Skip to content

Commit

Permalink
Use rb_enc_interned_str when available
Browse files Browse the repository at this point in the history
  • Loading branch information
byroot committed Nov 30, 2020
1 parent eeff7ad commit 483917c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 73 deletions.
40 changes: 37 additions & 3 deletions ext/msgpack/buffer.h
Expand Up @@ -438,7 +438,7 @@ static inline VALUE _msgpack_buffer_refer_head_mapped_string(msgpack_buffer_t* b
return rb_str_substr(b->head->mapped_string, offset, length);
}

static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen)
static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen, bool utf8)
{
#ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
/* optimize */
Expand All @@ -451,11 +451,45 @@ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_
}
#endif

VALUE result = rb_str_new(b->read_buffer, length);
VALUE result;

#ifdef HAVE_RB_ENC_INTERNED_STR
if (will_be_frozen) {
result = rb_enc_interned_str(b->read_buffer, length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
} else {
if (utf8) {
result = rb_utf8_str_new(b->read_buffer, length);
} else {
result = rb_str_new(b->read_buffer, length);
}
}
_msgpack_buffer_consumed(b, length);
return result;
}

#else

result = rb_str_new(b->read_buffer, length);
#ifdef COMPAT_HAVE_ENCODING
ENCODING_SET(str, utf8 ? msgpack_rb_encindex_utf8 : msgpack_rb_encindex_ascii8bit);
#endif

#if STR_UMINUS_DEDUPE
if (will_be_frozen) {
#if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
rb_str_freeze(result);
#endif //STR_UMINUS_DEDUPE_FROZEN
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, s_uminus, 0);
}
#endif // STR_UMINUS_DEDUPE
_msgpack_buffer_consumed(b, length);
return result;

#endif // HAVE_RB_ENC_INTERNED_STR
}

#endif
1 change: 1 addition & 0 deletions ext/msgpack/extconf.rb
Expand Up @@ -4,6 +4,7 @@
have_header("st.h")
have_func("rb_str_replace", ["ruby.h"])
have_func("rb_intern_str", ["ruby.h"])
have_func("rb_enc_interned_str", "ruby.h")
have_func("rb_sym2str", ["ruby.h"])
have_func("rb_str_intern", ["ruby.h"])
have_func("rb_block_lambda", ["ruby.h"])
Expand Down
82 changes: 12 additions & 70 deletions ext/msgpack/unpacker.c
Expand Up @@ -152,62 +152,8 @@ static inline int object_complete(msgpack_unpacker_t* uk, VALUE object)
return PRIMITIVE_OBJECT_COMPLETE;
}

static inline int object_complete_string(msgpack_unpacker_t* uk, VALUE str)
{
#ifdef COMPAT_HAVE_ENCODING
ENCODING_SET(str, msgpack_rb_encindex_utf8);
#endif

#if STR_UMINUS_DEDUPE
if(uk->freeze) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
rb_str_freeze(str);
# endif
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
str = rb_funcall(str, s_uminus, 0);
}
#endif

return object_complete(uk, str);
}

static inline int object_complete_binary(msgpack_unpacker_t* uk, VALUE str)
{
#ifdef COMPAT_HAVE_ENCODING
ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);
#endif

#if STR_UMINUS_DEDUPE
if(uk->freeze) {
# if STR_UMINUS_DEDUPE_FROZEN
rb_str_freeze(str);
# endif
str = rb_funcall(str, s_uminus, 0);
}
#endif

return object_complete(uk, str);
}

static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALUE str)
{
#ifdef COMPAT_HAVE_ENCODING
ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);
#endif

#if STR_UMINUS_DEDUPE
if(uk->freeze) {
# if STR_UMINUS_DEDUPE_FROZEN
rb_str_freeze(str);
# endif
str = rb_funcall(str, s_uminus, 0);
}
#endif

VALUE proc = msgpack_unpacker_ext_registry_lookup(&uk->ext_registry, ext_type);
if(proc != Qnil) {
VALUE obj = rb_funcall(proc, s_call, 1, str);
Expand Down Expand Up @@ -309,10 +255,8 @@ static int read_raw_body_cont(msgpack_unpacker_t* uk)
} while(length > 0);

int ret;
if(uk->reading_raw_type == RAW_TYPE_STRING) {
ret = object_complete_string(uk, uk->reading_raw);
} else if(uk->reading_raw_type == RAW_TYPE_BINARY) {
ret = object_complete_binary(uk, uk->reading_raw);
if(uk->reading_raw_type == RAW_TYPE_STRING || uk->reading_raw_type == RAW_TYPE_BINARY) {
ret = object_complete(uk, uk->reading_raw);
} else {
ret = object_complete_ext(uk, uk->reading_raw_type, uk->reading_raw);
}
Expand All @@ -330,12 +274,10 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type)
/* don't use zerocopy for hash keys but get a frozen string directly
* because rb_hash_aset freezes keys and it causes copying */
bool will_freeze = uk->freeze || is_reading_map_key(uk);
VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze);
VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze, raw_type == RAW_TYPE_STRING);
int ret;
if(raw_type == RAW_TYPE_STRING) {
ret = object_complete_string(uk, string);
} else if(raw_type == RAW_TYPE_BINARY) {
ret = object_complete_binary(uk, string);
if(raw_type == RAW_TYPE_STRING || raw_type == RAW_TYPE_BINARY) {
ret = object_complete(uk, string);
} else {
ret = object_complete_ext(uk, raw_type, string);
}
Expand Down Expand Up @@ -374,7 +316,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
SWITCH_RANGE(b, 0xa0, 0xbf) // FixRaw / fixstr
int count = b & 0x1f;
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand Down Expand Up @@ -559,7 +501,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
uint8_t count = cb->u8;
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -571,7 +513,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
uint16_t count = _msgpack_be16(cb->u16);
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -583,7 +525,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
uint32_t count = _msgpack_be32(cb->u32);
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -595,7 +537,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
uint8_t count = cb->u8;
if(count == 0) {
return object_complete_binary(uk, rb_str_buf_new(0));
return object_complete(uk, rb_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -607,7 +549,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
uint16_t count = _msgpack_be16(cb->u16);
if(count == 0) {
return object_complete_binary(uk, rb_str_buf_new(0));
return object_complete(uk, rb_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -619,7 +561,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
uint32_t count = _msgpack_be32(cb->u32);
if(count == 0) {
return object_complete_binary(uk, rb_str_buf_new(0));
return object_complete(uk, rb_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand Down

0 comments on commit 483917c

Please sign in to comment.