From 62aea593dcd7c2102600eeb8dd244f1b39e4088d Mon Sep 17 00:00:00 2001 From: Watson Date: Fri, 6 Aug 2021 21:15:58 +0900 Subject: [PATCH] Improve performance of `Oj.load` with symbol_keys mode (#681) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert from String to Symbol, seems that it has some overhead. This patch will generate Symbol via `rb_intern3()` and `ID2SYM()`. The same approach seems to be favored by other gems. (https://github.com/brianmario/mysql2/blob/ca883e1a359a10f48ac5d0ce649827b424110cd7/ext/mysql2/result.c#L171-L172) − | before | after | result -- | -- | -- | -- Oj.load | 339.194k | 387.595k | 1.14x ### Environment - MacBook Air (M1, 2020) - macOS 12.0 beta 3 - Apple M1 - Ruby 3.0.2 ### Before ``` Warming up -------------------------------------- Oj.load 33.731k i/100ms Calculating ------------------------------------- Oj.load 339.194k (± 0.9%) i/s - 1.720M in 5.072086s ``` ### After ``` Warming up -------------------------------------- Oj.load 39.129k i/100ms Calculating ------------------------------------- Oj.load 387.595k (± 1.0%) i/s - 1.956M in 5.048159s ``` ### Test code ```ruby require 'benchmark/ips' require 'oj' json =<<-EOF { "$id": "https://example.com/person.schema.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Person", "type": "object", "properties": { "firstName": { "type": "string", "description": "The person's first name." }, "lastName": { "type": "string", "description": "The person's last name." }, "age": { "description": "Age in years which must be equal to or greater than zero.", "type": "integer", "minimum": 0 } } } EOF Benchmark.ips do |x| x.report('Oj.load') { Oj.load(json, symbol_keys: true) } end ``` --- ext/oj/compat.c | 11 +++++------ ext/oj/custom.c | 9 +++++---- ext/oj/object.c | 19 +++++++------------ ext/oj/strict.c | 11 +++++------ 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/ext/oj/compat.c b/ext/oj/compat.c index a4467225..0a8a6b90 100644 --- a/ext/oj/compat.c +++ b/ext/oj/compat.c @@ -27,19 +27,18 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c if (Qundef == rkey) { if (Yes != pi->options.cache_keys) { - rkey = rb_str_new(key, klen); - rkey = oj_encode(rkey); if (Yes == pi->options.sym_key) { - rkey = rb_str_intern(rkey); + rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding)); + } else { + rkey = rb_str_new(key, klen); + rkey = oj_encode(rkey); } } else { VALUE *slot; if (Yes == pi->options.sym_key) { if (Qnil == (rkey = oj_sym_hash_get(key, klen, &slot))) { - rkey = rb_str_new(key, klen); - rkey = oj_encode(rkey); - rkey = rb_str_intern(rkey); + rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding)); *slot = rkey; rb_gc_register_address(slot); } diff --git a/ext/oj/custom.c b/ext/oj/custom.c index b4c6893d..e17f4a0c 100644 --- a/ext/oj/custom.c +++ b/ext/oj/custom.c @@ -959,12 +959,13 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c volatile VALUE rstr = rb_str_new(str, len); if (Qundef == rkey) { - rkey = rb_str_new(key, klen); - rstr = oj_encode(rstr); - rkey = oj_encode(rkey); if (Yes == pi->options.sym_key) { - rkey = rb_str_intern(rkey); + rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding)); + } else { + rkey = rb_str_new(key, klen); + rkey = oj_encode(rkey); } + rstr = oj_encode(rstr); } if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) { VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len); diff --git a/ext/oj/object.c b/ext/oj/object.c index 95e78ebd..7a925efe 100644 --- a/ext/oj/object.c +++ b/ext/oj/object.c @@ -59,14 +59,13 @@ static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) { } #else if (':' == k1) { - rkey = rb_str_new(kval->key + 1, kval->klen - 1); - rkey = oj_encode(rkey); - rkey = rb_str_intern(rkey); + rkey = ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding)); } else { - rkey = rb_str_new(kval->key, kval->klen); - rkey = oj_encode(rkey); if (Yes == pi->options.sym_key) { - rkey = rb_str_intern(rkey); + rkey = ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding)); + } else { + rkey = rb_str_new(kval->key, kval->klen); + rkey = oj_encode(rkey); } } #endif @@ -78,9 +77,7 @@ static VALUE str_to_value(ParseInfo pi, const char *str, size_t len, const char volatile VALUE rstr = Qnil; if (':' == *orig && 0 < len) { - rstr = rb_str_new(str + 1, len - 1); - rstr = oj_encode(rstr); - rstr = rb_funcall(rstr, oj_to_sym_id, 0); + rstr = ID2SYM(rb_intern3(str + 1, len - 1, oj_utf8_encoding)); } else if (pi->circ_array && 3 <= len && '^' == *orig && 'r' == orig[1]) { long i = read_long(str + 2, len - 2); @@ -259,9 +256,7 @@ static int hat_cstr(ParseInfo pi, Val parent, Val kval, const char *str, size_t parent->odd_args = oj_odd_alloc_args(odd); } break; case 'm': - parent->val = rb_str_new(str + 1, len - 1); - parent->val = oj_encode(parent->val); - parent->val = rb_funcall(parent->val, oj_to_sym_id, 0); + parent->val = ID2SYM(rb_intern3(str + 1, len - 1, oj_utf8_encoding)); break; case 's': parent->val = rb_str_new(str, len); diff --git a/ext/oj/strict.c b/ext/oj/strict.c index 00f786d3..3e836b94 100644 --- a/ext/oj/strict.c +++ b/ext/oj/strict.c @@ -39,10 +39,11 @@ VALUE oj_calc_hash_key(ParseInfo pi, Val parent) { return rkey; } if (Yes != pi->options.cache_keys) { - rkey = rb_str_new(parent->key, parent->klen); - rkey = oj_encode(rkey); if (Yes == pi->options.sym_key) { - rkey = rb_str_intern(rkey); + rkey = ID2SYM(rb_intern3(parent->key, parent->klen, oj_utf8_encoding)); + } else { + rkey = rb_str_new(parent->key, parent->klen); + rkey = oj_encode(rkey); } OBJ_FREEZE(rkey); return rkey; @@ -51,9 +52,7 @@ VALUE oj_calc_hash_key(ParseInfo pi, Val parent) { if (Yes == pi->options.sym_key) { if (Qnil == (rkey = oj_sym_hash_get(parent->key, parent->klen, &slot))) { - rkey = rb_str_new(parent->key, parent->klen); - rkey = oj_encode(rkey); - rkey = rb_str_intern(rkey); + rkey = ID2SYM(rb_intern3(parent->key, parent->klen, oj_utf8_encoding)); *slot = rkey; rb_gc_register_address(slot); }