Skip to content

Commit

Permalink
Deduplicate strings inside json_string_unescape
Browse files Browse the repository at this point in the history
  • Loading branch information
byroot committed Nov 20, 2020
1 parent 1e95cf1 commit a137e56
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 61 deletions.
1 change: 1 addition & 0 deletions ext/json/ext/parser/extconf.rb
Expand Up @@ -2,6 +2,7 @@
require 'mkmf'

have_func("rb_enc_raise", "ruby.h")
have_func("rb_enc_interned_str", "ruby.h")

# checking if String#-@ (str_uminus) dedupes... '
begin
Expand Down
73 changes: 40 additions & 33 deletions ext/json/ext/parser/parser.c
Expand Up @@ -2343,7 +2343,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
}

static const size_t MAX_STACK_BUFFER_SIZE = 128;
static VALUE json_string_unescape(char *string, char *stringEnd)
static VALUE json_string_unescape(char *string, char *stringEnd, int intern)
{
VALUE result = Qnil;
size_t bufferSize = stringEnd - string;
Expand Down Expand Up @@ -2442,15 +2442,38 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
buffer += pe - p;
}

#ifdef HAVE_RUBY_ENCODING_H
result = rb_utf8_str_new(bufferStart, buffer - bufferStart);
#else
result = rb_str_new(bufferStart, buffer - bufferStart);
#endif
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
} else {
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
}
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
free(bufferStart);
}
# else
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));

if (bufferSize > MAX_STACK_BUFFER_SIZE) {
free(bufferStart);
}

if (intern) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
# elif STR_UMINUS_DEDUPE
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, i_uminus, 0);
# else
result = rb_str_freeze(result);
# endif
}
# endif

return result;
}

Expand Down Expand Up @@ -2479,7 +2502,7 @@ static const char _JSON_string_nfa_pop_trans[] = {
};


#line 575 "parser.rl"
#line 598 "parser.rl"


static int
Expand All @@ -2504,7 +2527,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
cs = (int)JSON_string_start;
}

#line 595 "parser.rl"
#line 618 "parser.rl"

json->memo = p;

Expand Down Expand Up @@ -2565,9 +2588,9 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
ctr2:
{
#line 562 "parser.rl"
#line 585 "parser.rl"

*result = json_string_unescape(json->memo + 1, p);
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze);
if (NIL_P(*result)) {
{p = p - 1; }
{p+= 1; cs = 8; goto _out;}
Expand All @@ -2577,7 +2600,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
{
#line 572 "parser.rl"
#line 595 "parser.rl"
{p = p - 1; } {p+= 1; cs = 8; goto _out;} }

goto st8;
Expand Down Expand Up @@ -2692,7 +2715,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
_out: {}
}

#line 597 "parser.rl"
#line 620 "parser.rl"


if (json->create_additions && RTEST(match_string = json->match_string)) {
Expand All @@ -2708,24 +2731,8 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu

if (json->symbolize_names && json->parsing_name) {
*result = rb_str_intern(*result);
} else if (RB_TYPE_P(*result, T_STRING)) {
# if STR_UMINUS_DEDUPE_FROZEN
if (json->freeze) {
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
*result = rb_funcall(rb_str_freeze(*result), i_uminus, 0);
}
# elif STR_UMINUS_DEDUPE
if (json->freeze) {
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
*result = rb_funcall(*result, i_uminus, 0);
}
# else
rb_str_resize(*result, RSTRING_LEN(*result));
# endif
}

if (cs >= JSON_string_first_final) {
return p + 1;
} else {
Expand Down Expand Up @@ -2925,7 +2932,7 @@ static const char _JSON_nfa_pop_trans[] = {
};


#line 818 "parser.rl"
#line 825 "parser.rl"


/*
Expand All @@ -2946,7 +2953,7 @@ static VALUE cParser_parse(VALUE self)
cs = (int)JSON_start;
}

#line 834 "parser.rl"
#line 841 "parser.rl"

p = json->source;
pe = p + json->len;
Expand Down Expand Up @@ -3039,7 +3046,7 @@ static VALUE cParser_parse(VALUE self)
goto _out;
ctr2:
{
#line 810 "parser.rl"
#line 817 "parser.rl"

char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { {p = p - 1; } {p+= 1; cs = 10; goto _out;} } else {p = (( np))-1;}
Expand Down Expand Up @@ -3193,7 +3200,7 @@ static VALUE cParser_parse(VALUE self)
_out: {}
}

#line 837 "parser.rl"
#line 844 "parser.rl"


if (cs >= JSON_first_final && p == pe) {
Expand Down
2 changes: 1 addition & 1 deletion ext/json/ext/parser/parser.h
Expand Up @@ -63,7 +63,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
static VALUE json_string_unescape(char *string, char *stringEnd);
static VALUE json_string_unescape(char *string, char *stringEnd, int intern);
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
static VALUE convert_encoding(VALUE source);
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self);
Expand Down
61 changes: 34 additions & 27 deletions ext/json/ext/parser/parser.rl
Expand Up @@ -442,7 +442,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
}

static const size_t MAX_STACK_BUFFER_SIZE = 128;
static VALUE json_string_unescape(char *string, char *stringEnd)
static VALUE json_string_unescape(char *string, char *stringEnd, int intern)
{
VALUE result = Qnil;
size_t bufferSize = stringEnd - string;
Expand Down Expand Up @@ -541,15 +541,38 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
buffer += pe - p;
}

#ifdef HAVE_RUBY_ENCODING_H
result = rb_utf8_str_new(bufferStart, buffer - bufferStart);
#else
result = rb_str_new(bufferStart, buffer - bufferStart);
#endif
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
} else {
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
}
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
free(bufferStart);
}
# else
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));

if (bufferSize > MAX_STACK_BUFFER_SIZE) {
free(bufferStart);
}

if (intern) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
# elif STR_UMINUS_DEDUPE
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, i_uminus, 0);
# else
result = rb_str_freeze(result);
# endif
}
# endif

if (bufferSize > MAX_STACK_BUFFER_SIZE) {
free(bufferStart);
}
return result;
}

Expand All @@ -560,7 +583,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
write data;

action parse_string {
*result = json_string_unescape(json->memo + 1, p);
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze);
if (NIL_P(*result)) {
fhold;
fbreak;
Expand Down Expand Up @@ -608,24 +631,8 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu

if (json->symbolize_names && json->parsing_name) {
*result = rb_str_intern(*result);
} else if (RB_TYPE_P(*result, T_STRING)) {
# if STR_UMINUS_DEDUPE_FROZEN
if (json->freeze) {
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
*result = rb_funcall(rb_str_freeze(*result), i_uminus, 0);
}
# elif STR_UMINUS_DEDUPE
if (json->freeze) {
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
*result = rb_funcall(*result, i_uminus, 0);
}
# else
rb_str_resize(*result, RSTRING_LEN(*result));
# endif
}

if (cs >= JSON_string_first_final) {
return p + 1;
} else {
Expand Down

0 comments on commit a137e56

Please sign in to comment.