From aa0df5e743429302cb6f6cbdeed6e682dd17918d Mon Sep 17 00:00:00 2001 From: Watson Date: Fri, 6 Aug 2021 01:03:03 +0900 Subject: [PATCH] Improve `Oj.load` performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When use non-frozen string as hash key with rb_hash_aset(), it will duplicate and freeze the string internally. ```c static int hash_aset_str(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing) { if (!existing && !RB_OBJ_FROZEN(*key)) { *key = rb_hash_key_str(*key); } return hash_aset(key, val, arg, existing); } ``` Refer: https://github.com/ruby/ruby/blob/bda56a03a625793cb3fd110458c3f7323d73705e/hash.c#L2890-L2897 To avoid duplicate and freeze, this patch will give a frozen string in rb_hash_aset(). FYI) If you use string as hash key, hash object always might have frozen string as key. ``` irb(main):001:0> hash = { "foo" => 42, bar: 55 } => {"foo"=>42, :bar=>55} irb(main):002:0> hash.keys[0].frozen? => true irb(main):003:0> hash.keys[1].frozen? => true ``` This patch has same approch with https://github.com/flori/json/pull/345 − | before | after | result -- | -- | -- | -- Oj.load | 335.122k | 422.081k | 1.26x ### Environment - MacBook Air (M1, 2020) - macOS 12.0 beta 3 - Apple M1 - Ruby 3.0.2 ### Before ``` Warming up -------------------------------------- Oj.load 33.829k i/100ms Calculating ------------------------------------- Oj.load 335.122k (± 0.9%) i/s - 1.691M in 5.047682s ``` ### After ``` Warming up -------------------------------------- Oj.load 42.573k i/100ms Calculating ------------------------------------- Oj.load 422.081k (± 0.5%) i/s - 2.129M in 5.043373s ``` ### Test code ```ruby require 'benchmark/ips' require 'oj' json =<<-EOF { "$id": "https://example.com/person.schema.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Person", "type": "object", "properties": { "firstName": { "type": "string", "description": "The person's first name." }, "lastName": { "type": "string", "description": "The person's last name." }, "age": { "description": "Age in years which must be equal to or greater than zero.", "type": "integer", "minimum": 0 } } } EOF Benchmark.ips do |x| x.report('Oj.load') { Oj.load(json) } end ``` --- ext/oj/object.c | 1 + ext/oj/strict.c | 2 ++ ext/oj/wab.c | 1 + 3 files changed, 4 insertions(+) diff --git a/ext/oj/object.c b/ext/oj/object.c index 22c2df2b..95e78ebd 100644 --- a/ext/oj/object.c +++ b/ext/oj/object.c @@ -70,6 +70,7 @@ static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) { } } #endif + OBJ_FREEZE(rkey); return rkey; } diff --git a/ext/oj/strict.c b/ext/oj/strict.c index 9f81cadf..00f786d3 100644 --- a/ext/oj/strict.c +++ b/ext/oj/strict.c @@ -44,6 +44,7 @@ VALUE oj_calc_hash_key(ParseInfo pi, Val parent) { if (Yes == pi->options.sym_key) { rkey = rb_str_intern(rkey); } + OBJ_FREEZE(rkey); return rkey; } VALUE *slot; @@ -64,6 +65,7 @@ VALUE oj_calc_hash_key(ParseInfo pi, Val parent) { rb_gc_register_address(slot); } } + OBJ_FREEZE(rkey); return rkey; } diff --git a/ext/oj/wab.c b/ext/oj/wab.c index 44e9dd3a..ade3511f 100644 --- a/ext/oj/wab.c +++ b/ext/oj/wab.c @@ -318,6 +318,7 @@ static VALUE calc_hash_key(ParseInfo pi, Val parent) { *slot = rkey; rb_gc_register_address(slot); } + OBJ_FREEZE(rkey); return rkey; }