From bbb011b71b417095e61183d1ad85744062adc6fa Mon Sep 17 00:00:00 2001 From: Watson Date: Mon, 9 Aug 2021 14:25:46 +0900 Subject: [PATCH] Use `rb_sym2str()` to retrieve String object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When it retrieves String object using `rb_sym_to_s()`, it duplicate the internal String of Symbol. ```c VALUE rb_sym_to_s(VALUE sym) { return str_new_shared(rb_cString, rb_sym2str(sym)); } ``` Refer: https://github.com/ruby/ruby/blob/6f6a84f2f3e82f9554618f080f9b66ded52884cc/string.c#L11136-L11140 There will be no need to duplicate it, since we haven't changed the retrieved String. This patch will improve performance where it handles Symbols. − | before | after | result -- | -- | -- | -- Oj.dump | 807.090k | 1.173M | 1.45x Oj.dump (compat) | 760.372k | 1.080M | 1.42x Oj.dump (rails) | 656.976k | 880.951k | 1.34x ### Environment - MacBook Air (M1, 2020) - macOS 12.0 beta 4 - Apple M1 - Ruby 3.0.2 ### Before ``` Warming up -------------------------------------- Oj.dump 80.910k i/100ms Oj.dump (compat) 75.874k i/100ms Oj.dump (rails) 65.977k i/100ms Calculating ------------------------------------- Oj.dump 807.090k (± 0.9%) i/s - 4.046M in 5.012848s Oj.dump (compat) 760.372k (± 0.8%) i/s - 3.870M in 5.089417s Oj.dump (rails) 656.976k (± 0.8%) i/s - 3.299M in 5.021613s ``` ### After ``` Warming up -------------------------------------- Oj.dump 119.895k i/100ms Oj.dump (compat) 107.509k i/100ms Oj.dump (rails) 87.631k i/100ms Calculating ------------------------------------- Oj.dump 1.173M (± 0.5%) i/s - 5.875M in 5.007040s Oj.dump (compat) 1.080M (± 0.7%) i/s - 5.483M in 5.075621s Oj.dump (rails) 880.951k (± 0.9%) i/s - 4.469M in 5.073530s ``` ### Test code ```ruby require 'benchmark/ips' require 'oj' json =<<-EOF { "$id": "https://example.com/person.schema.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Person", "type": "object", "properties": { "firstName": { "type": "string", "description": "The person's first name." }, "lastName": { "type": "string", "description": "The person's last name." }, "age": { "description": "Age in years which must be equal to or greater than zero.", "type": "integer", "minimum": 0 } } } EOF Benchmark.ips do |x| data = Oj.load(json, symbol_keys: true) x.report('Oj.dump') { Oj.dump(data) } x.report('Oj.dump (compat)') { Oj.dump(data, mode: :compat) } x.report('Oj.dump (rails)') { Oj.dump(data, mode: :rails) } end ``` --- ext/oj/custom.c | 2 +- ext/oj/dump.c | 5 +---- ext/oj/dump_object.c | 4 ++-- ext/oj/parser.c | 4 ++-- ext/oj/rails.c | 2 +- ext/oj/usual.c | 4 ++-- 6 files changed, 9 insertions(+), 12 deletions(-) diff --git a/ext/oj/custom.c b/ext/oj/custom.c index 0bc215a1..c5adb46a 100644 --- a/ext/oj/custom.c +++ b/ext/oj/custom.c @@ -833,7 +833,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) { v = rb_struct_aref(obj, INT2FIX(i)); #endif if (ma != Qnil) { - volatile VALUE s = rb_sym_to_s(rb_ary_entry(ma, i)); + volatile VALUE s = rb_sym2str(rb_ary_entry(ma, i)); name = RSTRING_PTR(s); len = (int)RSTRING_LEN(s); diff --git a/ext/oj/dump.c b/ext/oj/dump.c index 1d449388..eef52402 100644 --- a/ext/oj/dump.c +++ b/ext/oj/dump.c @@ -717,10 +717,7 @@ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) { } void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) { - // This causes a memory leak in 2.5.1. Maybe in other versions as well. - // const char *sym = rb_id2name(SYM2ID(obj)); - - volatile VALUE s = rb_sym_to_s(obj); + volatile VALUE s = rb_sym2str(obj); oj_dump_cstr(RSTRING_PTR(s), (int)RSTRING_LEN(s), 0, 0, out); } diff --git a/ext/oj/dump_object.c b/ext/oj/dump_object.c index d7079c60..6c7f8219 100644 --- a/ext/oj/dump_object.c +++ b/ext/oj/dump_object.c @@ -208,7 +208,7 @@ static void dump_str(VALUE obj, int depth, Out out, bool as_ok) { } static void dump_sym(VALUE obj, int depth, Out out, bool as_ok) { - volatile VALUE s = rb_sym_to_s(obj); + volatile VALUE s = rb_sym2str(obj); oj_dump_cstr(RSTRING_PTR(s), (int)RSTRING_LEN(s), 1, 0, out); } @@ -694,7 +694,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) { *out->cur++ = '['; for (i = 0; i < cnt; i++) { - volatile VALUE s = rb_sym_to_s(rb_ary_entry(ma, i)); + volatile VALUE s = rb_sym2str(rb_ary_entry(ma, i)); name = RSTRING_PTR(s); len = (int)RSTRING_LEN(s); diff --git a/ext/oj/parser.c b/ext/oj/parser.c index bc3bf827..80e92024 100644 --- a/ext/oj/parser.c +++ b/ext/oj/parser.c @@ -1217,7 +1217,7 @@ static VALUE parser_new(VALUE self, VALUE mode) { switch (rb_type(mode)) { case RUBY_T_SYMBOL: - mode = rb_sym_to_s(mode); + mode = rb_sym2str(mode); // fall through case RUBY_T_STRING: ms = RSTRING_PTR(mode); break; default: @@ -1290,7 +1290,7 @@ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) { #endif switch (rb_type(rkey)) { case RUBY_T_SYMBOL: - rkey = rb_sym_to_s(rkey); + rkey = rb_sym2str(rkey); // fall through case RUBY_T_STRING: key = rb_string_value_ptr(&rkey); break; default: rb_raise(rb_eArgError, "option method must be a symbol or string"); diff --git a/ext/oj/rails.c b/ext/oj/rails.c index cd204c3b..ca5a1d61 100644 --- a/ext/oj/rails.c +++ b/ext/oj/rails.c @@ -157,7 +157,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) { assure_size(out, 2); *out->cur++ = '{'; for (i = 0; i < cnt; i++) { - volatile VALUE s = rb_sym_to_s(rb_ary_entry(ma, i)); + volatile VALUE s = rb_sym2str(rb_ary_entry(ma, i)); name = RSTRING_PTR(s); len = (int)RSTRING_LEN(s); diff --git a/ext/oj/usual.c b/ext/oj/usual.c index 488f695b..a836816c 100644 --- a/ext/oj/usual.c +++ b/ext/oj/usual.c @@ -903,7 +903,7 @@ static VALUE opt_decimal_set(ojParser p, VALUE value) { switch (rb_type(value)) { case T_STRING: mode = RSTRING_PTR(value); break; case T_SYMBOL: - s = rb_sym_to_s(value); + s = rb_sym2str(value); mode = RSTRING_PTR(s); break; default: @@ -1020,7 +1020,7 @@ static VALUE opt_missing_class_set(ojParser p, VALUE value) { switch (rb_type(value)) { case T_STRING: mode = RSTRING_PTR(value); break; case T_SYMBOL: - s = rb_sym_to_s(value); + s = rb_sym2str(value); mode = RSTRING_PTR(s); break; default: