diff --git a/ext/msgpack/buffer.c b/ext/msgpack/buffer.c index f765139e..6d202794 100644 --- a/ext/msgpack/buffer.c +++ b/ext/msgpack/buffer.c @@ -27,12 +27,16 @@ int msgpack_rb_encindex_utf8; int msgpack_rb_encindex_usascii; int msgpack_rb_encindex_ascii8bit; +ID s_uminus; + #ifndef DISABLE_RMEM static msgpack_rmem_t s_rmem; #endif void msgpack_buffer_static_init() { + s_uminus = rb_intern("-@"); + msgpack_rb_encindex_utf8 = rb_utf8_encindex(); msgpack_rb_encindex_usascii = rb_usascii_encindex(); msgpack_rb_encindex_ascii8bit = rb_ascii8bit_encindex(); diff --git a/ext/msgpack/buffer.h b/ext/msgpack/buffer.h index 60ce79a0..b933cc0d 100644 --- a/ext/msgpack/buffer.h +++ b/ext/msgpack/buffer.h @@ -53,6 +53,8 @@ extern int msgpack_rb_encindex_utf8; extern int msgpack_rb_encindex_usascii; extern int msgpack_rb_encindex_ascii8bit; +extern ID s_uminus; + struct msgpack_buffer_chunk_t; typedef struct msgpack_buffer_chunk_t msgpack_buffer_chunk_t; @@ -436,7 +438,7 @@ static inline VALUE _msgpack_buffer_refer_head_mapped_string(msgpack_buffer_t* b return rb_str_substr(b->head->mapped_string, offset, length); } -static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen) +static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen, bool utf8) { #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE /* optimize */ @@ -449,11 +451,46 @@ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_ } #endif - VALUE result = rb_str_new(b->read_buffer, length); + VALUE result; + +#ifdef HAVE_RB_ENC_INTERNED_STR + if (will_be_frozen) { + result = rb_enc_interned_str(b->read_buffer, length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding()); + } else { + if (utf8) { + result = rb_utf8_str_new(b->read_buffer, length); + } else { + result = rb_str_new(b->read_buffer, length); + } + } _msgpack_buffer_consumed(b, length); return result; -} +#else -#endif + if (utf8) { + result = rb_utf8_str_new(b->read_buffer, length); + } else { + result = rb_str_new(b->read_buffer, length); + } + +#if STR_UMINUS_DEDUPE + if (will_be_frozen) { +#if STR_UMINUS_DEDUPE_FROZEN + // Starting from MRI 2.8 it is preferable to freeze the string + // before deduplication so that it can be interned directly + // otherwise it would be duplicated first which is wasteful. + rb_str_freeze(result); +#endif //STR_UMINUS_DEDUPE_FROZEN + // MRI 2.5 and older do not deduplicate strings that are already + // frozen. + result = rb_funcall(result, s_uminus, 0); + } +#endif // STR_UMINUS_DEDUPE + _msgpack_buffer_consumed(b, length); + return result; + +#endif // HAVE_RB_ENC_INTERNED_STR +} +#endif diff --git a/ext/msgpack/extconf.rb b/ext/msgpack/extconf.rb index 4d56706a..558afd49 100644 --- a/ext/msgpack/extconf.rb +++ b/ext/msgpack/extconf.rb @@ -4,6 +4,7 @@ have_header("st.h") have_func("rb_str_replace", ["ruby.h"]) have_func("rb_intern_str", ["ruby.h"]) +have_func("rb_enc_interned_str", "ruby.h") have_func("rb_sym2str", ["ruby.h"]) have_func("rb_str_intern", ["ruby.h"]) have_func("rb_block_lambda", ["ruby.h"]) diff --git a/ext/msgpack/unpacker.c b/ext/msgpack/unpacker.c index 18c053c5..8f500f1b 100644 --- a/ext/msgpack/unpacker.c +++ b/ext/msgpack/unpacker.c @@ -28,7 +28,7 @@ static int RAW_TYPE_STRING = 256; static int RAW_TYPE_BINARY = 257; -static ID s_call, s_uminus; +static ID s_call; #ifdef UNPACKER_STACK_RMEM static msgpack_rmem_t s_stack_rmem; @@ -41,7 +41,6 @@ void msgpack_unpacker_static_init() #endif s_call = rb_intern("call"); - s_uminus = rb_intern("-@"); } void msgpack_unpacker_static_destroy() @@ -152,56 +151,8 @@ static inline int object_complete(msgpack_unpacker_t* uk, VALUE object) return PRIMITIVE_OBJECT_COMPLETE; } -static inline int object_complete_string(msgpack_unpacker_t* uk, VALUE str) -{ - ENCODING_SET(str, msgpack_rb_encindex_utf8); - -#if STR_UMINUS_DEDUPE - if(uk->freeze) { -# if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 2.8 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - rb_str_freeze(str); -# endif - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - str = rb_funcall(str, s_uminus, 0); - } -#endif - - return object_complete(uk, str); -} - -static inline int object_complete_binary(msgpack_unpacker_t* uk, VALUE str) -{ - ENCODING_SET(str, msgpack_rb_encindex_ascii8bit); - -#if STR_UMINUS_DEDUPE - if(uk->freeze) { -# if STR_UMINUS_DEDUPE_FROZEN - rb_str_freeze(str); -# endif - str = rb_funcall(str, s_uminus, 0); - } -#endif - - return object_complete(uk, str); -} - static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALUE str) { - ENCODING_SET(str, msgpack_rb_encindex_ascii8bit); - -#if STR_UMINUS_DEDUPE - if(uk->freeze) { -# if STR_UMINUS_DEDUPE_FROZEN - rb_str_freeze(str); -# endif - str = rb_funcall(str, s_uminus, 0); - } -#endif - VALUE proc = msgpack_unpacker_ext_registry_lookup(&uk->ext_registry, ext_type); if(proc != Qnil) { VALUE obj = rb_funcall(proc, s_call, 1, str); @@ -304,9 +255,10 @@ static int read_raw_body_cont(msgpack_unpacker_t* uk) int ret; if(uk->reading_raw_type == RAW_TYPE_STRING) { - ret = object_complete_string(uk, uk->reading_raw); - } else if(uk->reading_raw_type == RAW_TYPE_BINARY) { - ret = object_complete_binary(uk, uk->reading_raw); + ENCODING_SET(uk->reading_raw, msgpack_rb_encindex_utf8); + ret = object_complete(uk, uk->reading_raw); + } else if (uk->reading_raw_type == RAW_TYPE_BINARY) { + ret = object_complete(uk, uk->reading_raw); } else { ret = object_complete_ext(uk, uk->reading_raw_type, uk->reading_raw); } @@ -324,12 +276,10 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type) /* don't use zerocopy for hash keys but get a frozen string directly * because rb_hash_aset freezes keys and it causes copying */ bool will_freeze = uk->freeze || is_reading_map_key(uk); - VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze); + VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze, raw_type == RAW_TYPE_STRING); int ret; - if(raw_type == RAW_TYPE_STRING) { - ret = object_complete_string(uk, string); - } else if(raw_type == RAW_TYPE_BINARY) { - ret = object_complete_binary(uk, string); + if(raw_type == RAW_TYPE_STRING || raw_type == RAW_TYPE_BINARY) { + ret = object_complete(uk, string); } else { ret = object_complete_ext(uk, raw_type, string); } @@ -368,7 +318,7 @@ static int read_primitive(msgpack_unpacker_t* uk) SWITCH_RANGE(b, 0xa0, 0xbf) // FixRaw / fixstr int count = b & 0x1f; if(count == 0) { - return object_complete_string(uk, rb_str_buf_new(0)); + return object_complete(uk, rb_utf8_str_new_static("", 0)); } /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; @@ -553,7 +503,7 @@ static int read_primitive(msgpack_unpacker_t* uk) READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1); uint8_t count = cb->u8; if(count == 0) { - return object_complete_string(uk, rb_str_buf_new(0)); + return object_complete(uk, rb_utf8_str_new_static("", 0)); } /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; @@ -565,7 +515,7 @@ static int read_primitive(msgpack_unpacker_t* uk) READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2); uint16_t count = _msgpack_be16(cb->u16); if(count == 0) { - return object_complete_string(uk, rb_str_buf_new(0)); + return object_complete(uk, rb_utf8_str_new_static("", 0)); } /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; @@ -577,7 +527,7 @@ static int read_primitive(msgpack_unpacker_t* uk) READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4); uint32_t count = _msgpack_be32(cb->u32); if(count == 0) { - return object_complete_string(uk, rb_str_buf_new(0)); + return object_complete(uk, rb_utf8_str_new_static("", 0)); } /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; @@ -589,7 +539,7 @@ static int read_primitive(msgpack_unpacker_t* uk) READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1); uint8_t count = cb->u8; if(count == 0) { - return object_complete_binary(uk, rb_str_buf_new(0)); + return object_complete(uk, rb_str_new_static("", 0)); } /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; @@ -601,7 +551,7 @@ static int read_primitive(msgpack_unpacker_t* uk) READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2); uint16_t count = _msgpack_be16(cb->u16); if(count == 0) { - return object_complete_binary(uk, rb_str_buf_new(0)); + return object_complete(uk, rb_str_new_static("", 0)); } /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; @@ -613,7 +563,7 @@ static int read_primitive(msgpack_unpacker_t* uk) READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4); uint32_t count = _msgpack_be32(cb->u32); if(count == 0) { - return object_complete_binary(uk, rb_str_buf_new(0)); + return object_complete(uk, rb_str_new_static("", 0)); } /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count;