From 4fdd25d408336ab05ae3d16bb81fb790850d9074 Mon Sep 17 00:00:00 2001 From: Watson Date: Sun, 8 Aug 2021 21:55:28 +0900 Subject: [PATCH] Use `RSTRING_PTR()` to retrieve C string pointer (#684) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `rb_string_value_ptr()` attempts implicit type conversion with `#to_str` method. ```c char * rb_string_value_ptr(volatile VALUE *ptr) { VALUE str = rb_string_value(ptr); return RSTRING_PTR(str); } ``` Refer: https://github.com/ruby/ruby/blob/f81964568f954495ad9a517066bd241f5db22059/string.c#L2319-L2324 However, we have already convert to string using `#to_s` method or check the type. Therefore, This patch will call `RSTRING_PTR()` directly to reduce overhead. − | before | after | result -- | -- | -- | -- Oj.load | 419.273k | 421.858k | - Oj.dump | 1.233M | 1.303M | 1.06x ### Environment - MacBook Air (M1, 2020) - macOS 12.0 beta 3 - Apple M1 - Ruby 3.0.2 ### Before ``` Warming up -------------------------------------- Oj.load 42.892k i/100ms Oj.dump 124.603k i/100ms Calculating ------------------------------------- Oj.load 419.273k (± 0.6%) i/s - 2.102M in 5.012916s Oj.dump 1.233M (± 0.5%) i/s - 6.230M in 5.051849s ``` ### After ``` Warming up -------------------------------------- Oj.load 42.986k i/100ms Oj.dump 132.017k i/100ms Calculating ------------------------------------- Oj.load 421.858k (± 0.4%) i/s - 2.149M in 5.094940s Oj.dump 1.303M (± 0.8%) i/s - 6.601M in 5.068088s ``` ### Test code ```ruby require 'benchmark/ips' require 'oj' json =<<-EOF { "$id": "https://example.com/person.schema.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Person", "type": "object", "properties": { "firstName": { "type": "string", "description": "The person's first name." }, "lastName": { "type": "string", "description": "The person's last name." }, "age": { "description": "Age in years which must be equal to or greater than zero.", "type": "integer", "minimum": 0 } } } EOF Benchmark.ips do |x| x.report('Oj.load') { Oj.load(json) } data = Oj.load(json) x.report('Oj.dump') { Oj.dump(data) } end ``` --- ext/oj/custom.c | 14 +++++++------- ext/oj/dump.c | 16 ++++++++-------- ext/oj/dump_compat.c | 6 +++--- ext/oj/dump_object.c | 14 +++++++------- ext/oj/dump_strict.c | 6 +++--- ext/oj/odd.c | 2 +- ext/oj/parse.c | 2 +- ext/oj/rails.c | 8 ++++---- ext/oj/wab.c | 2 +- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/ext/oj/custom.c b/ext/oj/custom.c index e17f4a0c..98909e1b 100644 --- a/ext/oj/custom.c +++ b/ext/oj/custom.c @@ -31,14 +31,14 @@ static void dump_obj_str(VALUE obj, int depth, Out out) { static void dump_obj_as_str(VALUE obj, int depth, Out out) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - const char * str = rb_string_value_ptr((VALUE *)&rstr); + const char * str = RSTRING_PTR(rstr); oj_dump_cstr(str, RSTRING_LEN(rstr), 0, 0, out); } static void bigdecimal_dump(VALUE obj, int depth, Out out) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - const char * str = rb_string_value_ptr((VALUE *)&rstr); + const char * str = RSTRING_PTR(rstr); int len = (int)RSTRING_LEN(rstr); if (0 == strcasecmp("Infinity", str)) { @@ -123,7 +123,7 @@ static void date_dump(VALUE obj, int depth, Out out) { case RubyTime: case XmlTime: v = rb_funcall(obj, rb_intern("iso8601"), 0); - oj_dump_cstr(rb_string_value_ptr((VALUE *)&v), (int)RSTRING_LEN(v), 0, 0, out); + oj_dump_cstr(RSTRING_PTR(v), (int)RSTRING_LEN(v), 0, 0, out); break; case UnixZTime: v = rb_funcall(obj, rb_intern("to_time"), 0); @@ -420,7 +420,7 @@ static void dump_odd(VALUE obj, Odd odd, VALUE clas, int depth, Out out) { if (Qundef == v || T_STRING != rb_type(v)) { rb_raise(rb_eEncodingError, "Invalid type for raw JSON.\n"); } else { - const char *s = rb_string_value_ptr((VALUE *)&v); + const char *s = RSTRING_PTR(v); int len = (int)RSTRING_LEN(v); const char *name = rb_id2name(*odd->attrs); size_t nlen = strlen(name); @@ -510,7 +510,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) { if (Yes == out->opts->trace) { oj_trace("to_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyOut); } - s = rb_string_value_ptr((VALUE *)&rs); + s = RSTRING_PTR(rs); len = (int)RSTRING_LEN(rs); assure_size(out, len + 1); @@ -537,7 +537,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) { if (aj == obj) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - oj_dump_cstr(rb_string_value_ptr((VALUE *)&rstr), + oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), false, false, @@ -835,7 +835,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) { if (ma != Qnil) { volatile VALUE s = rb_sym_to_s(rb_ary_entry(ma, i)); - name = rb_string_value_ptr((VALUE *)&s); + name = RSTRING_PTR(s); len = (int)RSTRING_LEN(s); } else { len = snprintf(num_id, sizeof(num_id), "%d", i); diff --git a/ext/oj/dump.c b/ext/oj/dump.c index 981306c2..07b4eec2 100644 --- a/ext/oj/dump.c +++ b/ext/oj/dump.c @@ -472,7 +472,7 @@ void oj_dump_time(VALUE obj, Out out, int withZone) { void oj_dump_ruby_time(VALUE obj, Out out) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - oj_dump_cstr(rb_string_value_ptr((VALUE *)&rstr), (int)RSTRING_LEN(rstr), 0, 0, out); + oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), 0, 0, out); } void oj_dump_xml_time(VALUE obj, Out out) { @@ -713,7 +713,7 @@ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) { if (rb_utf8_encoding() != enc) { obj = rb_str_conv_enc(obj, enc, rb_utf8_encoding()); } - oj_dump_cstr(rb_string_value_ptr((VALUE *)&obj), (int)RSTRING_LEN(obj), 0, 0, out); + oj_dump_cstr(RSTRING_PTR(obj), (int)RSTRING_LEN(obj), 0, 0, out); } void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) { @@ -722,7 +722,7 @@ void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) { volatile VALUE s = rb_sym_to_s(obj); - oj_dump_cstr(rb_string_value_ptr((VALUE *)&s), (int)RSTRING_LEN(s), 0, 0, out); + oj_dump_cstr(RSTRING_PTR(s), (int)RSTRING_LEN(s), 0, 0, out); } static void debug_raise(const char *orig, size_t cnt, int line) { @@ -760,7 +760,7 @@ void oj_dump_raw_json(VALUE obj, int depth, Out out) { if (Yes == out->opts->trace) { oj_trace("raw_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyOut); } - oj_dump_raw(rb_string_value_ptr((VALUE *)&jv), (size_t)RSTRING_LEN(jv), out); + oj_dump_raw(RSTRING_PTR(jv), (size_t)RSTRING_LEN(jv), out); } } @@ -958,7 +958,7 @@ void oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) { void oj_dump_obj_to_s(VALUE obj, Out out) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - oj_dump_cstr(rb_string_value_ptr((VALUE *)&rstr), (int)RSTRING_LEN(rstr), 0, 0, out); + oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), 0, 0, out); } void oj_dump_raw(const char *str, size_t cnt, Out out) { @@ -1075,7 +1075,7 @@ void oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) { } else { assure_size(out, cnt); } - memcpy(out->cur, rb_string_value_ptr((VALUE *)&rs), cnt); + memcpy(out->cur, RSTRING_PTR(rs), cnt); out->cur += cnt; if (dump_as_string) { *out->cur++ = '"'; @@ -1203,7 +1203,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) { if ((int)sizeof(buf) <= cnt) { cnt = sizeof(buf) - 1; } - memcpy(buf, rb_string_value_ptr((VALUE *)&rstr), cnt); + memcpy(buf, RSTRING_PTR(rstr), cnt); buf[cnt] = '\0'; } else { cnt = oj_dump_float_printf(buf, sizeof(buf), obj, d, out->opts->float_fmt); @@ -1223,7 +1223,7 @@ int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char if (17 <= cnt && (0 == strcmp("0001", buf + cnt - 4) || 0 == strcmp("9999", buf + cnt - 4))) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - strcpy(buf, rb_string_value_ptr((VALUE *)&rstr)); + strcpy(buf, RSTRING_PTR(rstr)); cnt = (int)RSTRING_LEN(rstr); } return cnt; diff --git a/ext/oj/dump_compat.c b/ext/oj/dump_compat.c index a5fac992..4b1b599c 100644 --- a/ext/oj/dump_compat.c +++ b/ext/oj/dump_compat.c @@ -129,7 +129,7 @@ dump_to_json(VALUE obj, Out out) { oj_trace("to_json", obj, __FILE__, __LINE__, 0, TraceRubyOut); } - s = rb_string_value_ptr((VALUE*)&rs); + s = RSTRING_PTR(rs); len = (int)RSTRING_LEN(rs); assure_size(out, len + 1); @@ -635,7 +635,7 @@ dump_float(VALUE obj, int depth, Out out, bool as_ok) { } else { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - strcpy(buf, rb_string_value_ptr((VALUE*)&rstr)); + strcpy(buf, RSTRING_PTR(rstr)); cnt = (int)RSTRING_LEN(rstr); } assure_size(out, cnt); @@ -886,7 +886,7 @@ dump_bignum(VALUE obj, int depth, Out out, bool as_ok) { } else { assure_size(out, cnt); } - memcpy(out->cur, rb_string_value_ptr((VALUE*)&rs), cnt); + memcpy(out->cur, RSTRING_PTR(rs), cnt); out->cur += cnt; if (dump_as_string) { *out->cur++ = '"'; diff --git a/ext/oj/dump_object.c b/ext/oj/dump_object.c index 17ed099b..d7079c60 100644 --- a/ext/oj/dump_object.c +++ b/ext/oj/dump_object.c @@ -36,7 +36,7 @@ static void dump_data(VALUE obj, int depth, Out out, bool as_ok) { } else { if (oj_bigdecimal_class == clas) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - const char * str = rb_string_value_ptr((VALUE *)&rstr); + const char * str = RSTRING_PTR(rstr); int len = (int)RSTRING_LEN(rstr); if (No != out->opts->bigdec_as_num) { @@ -65,7 +65,7 @@ static void dump_obj(VALUE obj, int depth, Out out, bool as_ok) { if (oj_bigdecimal_class == clas) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - const char * str = rb_string_value_ptr((VALUE *)&rstr); + const char * str = RSTRING_PTR(rstr); int len = (int)RSTRING_LEN(rstr); if (0 == strcasecmp("Infinity", str)) { @@ -195,7 +195,7 @@ static void dump_str_class(VALUE obj, VALUE clas, int depth, Out out) { if (Qundef != clas && rb_cString != clas) { dump_obj_attrs(obj, clas, 0, depth, out); } else { - const char *s = rb_string_value_ptr((VALUE *)&obj); + const char *s = RSTRING_PTR(obj); size_t len = (int)RSTRING_LEN(obj); char s1 = s[1]; @@ -210,7 +210,7 @@ static void dump_str(VALUE obj, int depth, Out out, bool as_ok) { static void dump_sym(VALUE obj, int depth, Out out, bool as_ok) { volatile VALUE s = rb_sym_to_s(obj); - oj_dump_cstr(rb_string_value_ptr((VALUE *)&s), (int)RSTRING_LEN(s), 1, 0, out); + oj_dump_cstr(RSTRING_PTR(s), (int)RSTRING_LEN(s), 1, 0, out); } static int hash_cb(VALUE key, VALUE value, VALUE ov) { @@ -414,7 +414,7 @@ static void dump_odd(VALUE obj, Odd odd, VALUE clas, int depth, Out out) { if (Qundef == v || T_STRING != rb_type(v)) { rb_raise(rb_eEncodingError, "Invalid type for raw JSON."); } else { - const char *s = rb_string_value_ptr((VALUE *)&v); + const char *s = RSTRING_PTR(v); int len = (int)RSTRING_LEN(v); const char *name = rb_id2name(*odd->attrs); size_t nlen = strlen(name); @@ -532,7 +532,7 @@ static void dump_obj_attrs(VALUE obj, VALUE clas, slot_t id, int depth, Out out) *out->cur++ = 'f'; *out->cur++ = '"'; *out->cur++ = ':'; - oj_dump_cstr(rb_string_value_ptr((VALUE *)&obj), (int)RSTRING_LEN(obj), 0, 0, out); + oj_dump_cstr(RSTRING_PTR(obj), (int)RSTRING_LEN(obj), 0, 0, out); break; case T_ARRAY: assure_size(out, d2 * out->indent + 14); @@ -696,7 +696,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) { for (i = 0; i < cnt; i++) { volatile VALUE s = rb_sym_to_s(rb_ary_entry(ma, i)); - name = rb_string_value_ptr((VALUE *)&s); + name = RSTRING_PTR(s); len = (int)RSTRING_LEN(s); size = len + 3; assure_size(out, size); diff --git a/ext/oj/dump_strict.c b/ext/oj/dump_strict.c index 50303629..90243178 100644 --- a/ext/oj/dump_strict.c +++ b/ext/oj/dump_strict.c @@ -98,7 +98,7 @@ static void dump_float(VALUE obj, int depth, Out out, bool as_ok) { if ((int)sizeof(buf) <= cnt) { cnt = sizeof(buf) - 1; } - memcpy(buf, rb_string_value_ptr((VALUE *)&rstr), cnt); + memcpy(buf, RSTRING_PTR(rstr), cnt); buf[cnt] = '\0'; } else { cnt = oj_dump_float_printf(buf, sizeof(buf), obj, d, out->opts->float_fmt); @@ -304,7 +304,7 @@ static void dump_data_strict(VALUE obj, int depth, Out out, bool as_ok) { if (oj_bigdecimal_class == clas) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - oj_dump_raw(rb_string_value_ptr((VALUE *)&rstr), (int)RSTRING_LEN(rstr), out); + oj_dump_raw(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), out); } else { raise_strict(obj); } @@ -316,7 +316,7 @@ static void dump_data_null(VALUE obj, int depth, Out out, bool as_ok) { if (oj_bigdecimal_class == clas) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - oj_dump_raw(rb_string_value_ptr((VALUE *)&rstr), (int)RSTRING_LEN(rstr), out); + oj_dump_raw(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), out); } else { oj_dump_nil(Qnil, depth, out, false); } diff --git a/ext/oj/odd.c b/ext/oj/odd.c index a536c899..513e0be1 100644 --- a/ext/oj/odd.c +++ b/ext/oj/odd.c @@ -206,7 +206,7 @@ void oj_reg_odd(VALUE clas, *fp = 0; switch (rb_type(*members)) { case T_STRING: - if (NULL == (*np = strdup(rb_string_value_ptr(members)))) { + if (NULL == (*np = strdup(RSTRING_PTR(*members)))) { rb_raise(rb_eNoMemError, "for attribute name."); } break; diff --git a/ext/oj/parse.c b/ext/oj/parse.c index e64ad3c9..9094d6a5 100644 --- a/ext/oj/parse.c +++ b/ext/oj/parse.c @@ -969,7 +969,7 @@ static void oj_pi_set_input_str(ParseInfo pi, volatile VALUE *inputp) { if (rb_utf8_encoding() != enc) { *inputp = rb_str_conv_enc(*inputp, enc, rb_utf8_encoding()); } - pi->json = rb_string_value_ptr((VALUE *)inputp); + pi->json = RSTRING_PTR(*inputp); pi->end = pi->json + RSTRING_LEN(*inputp); } diff --git a/ext/oj/rails.c b/ext/oj/rails.c index f59d6f27..cd204c3b 100644 --- a/ext/oj/rails.c +++ b/ext/oj/rails.c @@ -159,7 +159,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) { for (i = 0; i < cnt; i++) { volatile VALUE s = rb_sym_to_s(rb_ary_entry(ma, i)); - name = rb_string_value_ptr((VALUE *)&s); + name = RSTRING_PTR(s); len = (int)RSTRING_LEN(s); assure_size(out, size + sep_len + 6); if (0 < i) { @@ -202,7 +202,7 @@ static void dump_enumerable(VALUE obj, int depth, Out out, bool as_ok) { static void dump_bigdecimal(VALUE obj, int depth, Out out, bool as_ok) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - const char * str = rb_string_value_ptr((VALUE *)&rstr); + const char * str = RSTRING_PTR(rstr); if ('I' == *str || 'N' == *str || ('-' == *str && 'I' == str[1])) { oj_dump_nil(Qnil, depth, out, false); @@ -355,7 +355,7 @@ static void dump_timewithzone(VALUE obj, int depth, Out out, bool as_ok) { static void dump_to_s(VALUE obj, int depth, Out out, bool as_ok) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - oj_dump_cstr(rb_string_value_ptr((VALUE *)&rstr), (int)RSTRING_LEN(rstr), 0, 0, out); + oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), 0, 0, out); } static ID parameters_id = 0; @@ -1224,7 +1224,7 @@ static void dump_float(VALUE obj, int depth, Out out, bool as_ok) { } else { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - strcpy(buf, rb_string_value_ptr((VALUE *)&rstr)); + strcpy(buf, RSTRING_PTR(rstr)); cnt = (int)RSTRING_LEN(rstr); } } diff --git a/ext/oj/wab.c b/ext/oj/wab.c index ade3511f..c46af69e 100644 --- a/ext/oj/wab.c +++ b/ext/oj/wab.c @@ -233,7 +233,7 @@ static void dump_obj(VALUE obj, int depth, Out out, bool as_ok) { } else if (oj_bigdecimal_class == clas) { volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0); - oj_dump_raw(rb_string_value_ptr((VALUE *)&rstr), (int)RSTRING_LEN(rstr), out); + oj_dump_raw(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), out); } else if (resolve_wab_uuid_class() == clas) { oj_dump_str(rb_funcall(obj, oj_to_s_id, 0), depth, out, false); } else if (resolve_uri_http_class() == clas) {