Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use rb_enc_interned_str when available #196

Merged
merged 1 commit into from Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions ext/msgpack/buffer.c
Expand Up @@ -27,12 +27,16 @@ int msgpack_rb_encindex_utf8;
int msgpack_rb_encindex_usascii;
int msgpack_rb_encindex_ascii8bit;

ID s_uminus;

#ifndef DISABLE_RMEM
static msgpack_rmem_t s_rmem;
#endif

void msgpack_buffer_static_init()
{
s_uminus = rb_intern("-@");

msgpack_rb_encindex_utf8 = rb_utf8_encindex();
msgpack_rb_encindex_usascii = rb_usascii_encindex();
msgpack_rb_encindex_ascii8bit = rb_ascii8bit_encindex();
Expand Down
45 changes: 41 additions & 4 deletions ext/msgpack/buffer.h
Expand Up @@ -53,6 +53,8 @@ extern int msgpack_rb_encindex_utf8;
extern int msgpack_rb_encindex_usascii;
extern int msgpack_rb_encindex_ascii8bit;

extern ID s_uminus;

struct msgpack_buffer_chunk_t;
typedef struct msgpack_buffer_chunk_t msgpack_buffer_chunk_t;

Expand Down Expand Up @@ -436,7 +438,7 @@ static inline VALUE _msgpack_buffer_refer_head_mapped_string(msgpack_buffer_t* b
return rb_str_substr(b->head->mapped_string, offset, length);
}

static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen)
static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen, bool utf8)
{
#ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
/* optimize */
Expand All @@ -449,11 +451,46 @@ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_
}
#endif

VALUE result = rb_str_new(b->read_buffer, length);
VALUE result;

#ifdef HAVE_RB_ENC_INTERNED_STR
if (will_be_frozen) {
result = rb_enc_interned_str(b->read_buffer, length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
} else {
if (utf8) {
result = rb_utf8_str_new(b->read_buffer, length);
} else {
result = rb_str_new(b->read_buffer, length);
}
}
_msgpack_buffer_consumed(b, length);
return result;
}

#else

#endif
if (utf8) {
result = rb_utf8_str_new(b->read_buffer, length);
} else {
result = rb_str_new(b->read_buffer, length);
}

#if STR_UMINUS_DEDUPE
if (will_be_frozen) {
#if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
rb_str_freeze(result);
#endif //STR_UMINUS_DEDUPE_FROZEN
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, s_uminus, 0);
}
#endif // STR_UMINUS_DEDUPE
_msgpack_buffer_consumed(b, length);
return result;

#endif // HAVE_RB_ENC_INTERNED_STR
}

#endif
1 change: 1 addition & 0 deletions ext/msgpack/extconf.rb
Expand Up @@ -4,6 +4,7 @@
have_header("st.h")
have_func("rb_str_replace", ["ruby.h"])
have_func("rb_intern_str", ["ruby.h"])
have_func("rb_enc_interned_str", "ruby.h")
have_func("rb_sym2str", ["ruby.h"])
have_func("rb_str_intern", ["ruby.h"])
have_func("rb_block_lambda", ["ruby.h"])
Expand Down
80 changes: 15 additions & 65 deletions ext/msgpack/unpacker.c
Expand Up @@ -28,7 +28,7 @@
static int RAW_TYPE_STRING = 256;
static int RAW_TYPE_BINARY = 257;

static ID s_call, s_uminus;
static ID s_call;

#ifdef UNPACKER_STACK_RMEM
static msgpack_rmem_t s_stack_rmem;
Expand All @@ -41,7 +41,6 @@ void msgpack_unpacker_static_init()
#endif

s_call = rb_intern("call");
s_uminus = rb_intern("-@");
}

void msgpack_unpacker_static_destroy()
Expand Down Expand Up @@ -152,56 +151,8 @@ static inline int object_complete(msgpack_unpacker_t* uk, VALUE object)
return PRIMITIVE_OBJECT_COMPLETE;
}

static inline int object_complete_string(msgpack_unpacker_t* uk, VALUE str)
{
ENCODING_SET(str, msgpack_rb_encindex_utf8);

#if STR_UMINUS_DEDUPE
if(uk->freeze) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
rb_str_freeze(str);
# endif
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
str = rb_funcall(str, s_uminus, 0);
}
#endif

return object_complete(uk, str);
}

static inline int object_complete_binary(msgpack_unpacker_t* uk, VALUE str)
{
ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);

#if STR_UMINUS_DEDUPE
if(uk->freeze) {
# if STR_UMINUS_DEDUPE_FROZEN
rb_str_freeze(str);
# endif
str = rb_funcall(str, s_uminus, 0);
}
#endif

return object_complete(uk, str);
}

static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALUE str)
{
ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);

#if STR_UMINUS_DEDUPE
if(uk->freeze) {
# if STR_UMINUS_DEDUPE_FROZEN
rb_str_freeze(str);
# endif
str = rb_funcall(str, s_uminus, 0);
}
#endif

VALUE proc = msgpack_unpacker_ext_registry_lookup(&uk->ext_registry, ext_type);
if(proc != Qnil) {
VALUE obj = rb_funcall(proc, s_call, 1, str);
Expand Down Expand Up @@ -304,9 +255,10 @@ static int read_raw_body_cont(msgpack_unpacker_t* uk)

int ret;
if(uk->reading_raw_type == RAW_TYPE_STRING) {
ret = object_complete_string(uk, uk->reading_raw);
} else if(uk->reading_raw_type == RAW_TYPE_BINARY) {
ret = object_complete_binary(uk, uk->reading_raw);
ENCODING_SET(uk->reading_raw, msgpack_rb_encindex_utf8);
ret = object_complete(uk, uk->reading_raw);
} else if (uk->reading_raw_type == RAW_TYPE_BINARY) {
ret = object_complete(uk, uk->reading_raw);
} else {
ret = object_complete_ext(uk, uk->reading_raw_type, uk->reading_raw);
}
Expand All @@ -324,12 +276,10 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type)
/* don't use zerocopy for hash keys but get a frozen string directly
* because rb_hash_aset freezes keys and it causes copying */
bool will_freeze = uk->freeze || is_reading_map_key(uk);
VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze);
VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze, raw_type == RAW_TYPE_STRING);
int ret;
if(raw_type == RAW_TYPE_STRING) {
ret = object_complete_string(uk, string);
} else if(raw_type == RAW_TYPE_BINARY) {
ret = object_complete_binary(uk, string);
if(raw_type == RAW_TYPE_STRING || raw_type == RAW_TYPE_BINARY) {
ret = object_complete(uk, string);
} else {
ret = object_complete_ext(uk, raw_type, string);
}
Expand Down Expand Up @@ -368,7 +318,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
SWITCH_RANGE(b, 0xa0, 0xbf) // FixRaw / fixstr
int count = b & 0x1f;
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand Down Expand Up @@ -553,7 +503,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
uint8_t count = cb->u8;
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -565,7 +515,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
uint16_t count = _msgpack_be16(cb->u16);
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -577,7 +527,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
uint32_t count = _msgpack_be32(cb->u32);
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete(uk, rb_utf8_str_new_static("", 0));
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two things here.

Since I removed object_complete_string and object_complete_binary we have to use API that set the proper encoding directly.

And also rb_str_buf_new is meant for string that will be concatenated to, so they are instantiated with a capacity of 63 bytes (so 103bytes total):

#define STR_BUF_MIN_SIZE 63

Since here we only want an empty string, it's a bit of a waste.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be misunderstanding your comment. Do you mean:

  • the previous code using rb_str_buf_new instantiated 63 bytes larger value
  • the newer code will not consume additional 63 bytes
    Is that correct?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first one. rb_str_buf_new(0) will always instantiate a string with 63 bytes of capacity, so a total of 103 bytes.

The new code will directly right size the string, so saving up to 63 bytes.

}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -589,7 +539,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
uint8_t count = cb->u8;
if(count == 0) {
return object_complete_binary(uk, rb_str_buf_new(0));
return object_complete(uk, rb_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -601,7 +551,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
uint16_t count = _msgpack_be16(cb->u16);
if(count == 0) {
return object_complete_binary(uk, rb_str_buf_new(0));
return object_complete(uk, rb_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -613,7 +563,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
uint32_t count = _msgpack_be32(cb->u32);
if(count == 0) {
return object_complete_binary(uk, rb_str_buf_new(0));
return object_complete(uk, rb_str_new_static("", 0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand Down