Skip to content

Commit

Permalink
Improve performance of Oj.load with symbol_keys mode (#681)
Browse files Browse the repository at this point in the history
Convert from String to Symbol, seems that it has some overhead.
This patch will generate Symbol via `rb_intern3()` and `ID2SYM()`.

The same approach seems to be favored by other gems.
(https://github.com/brianmario/mysql2/blob/ca883e1a359a10f48ac5d0ce649827b424110cd7/ext/mysql2/result.c#L171-L172)

−               | before   | after    | result
--               | --       | --       | --
Oj.load          | 339.194k | 387.595k | 1.14x

### Environment
- MacBook Air (M1, 2020)
- macOS 12.0 beta 3
- Apple M1
- Ruby 3.0.2

### Before
```
Warming up --------------------------------------
             Oj.load    33.731k i/100ms
Calculating -------------------------------------
             Oj.load    339.194k (± 0.9%) i/s -      1.720M in   5.072086s
```

### After
```
Warming up --------------------------------------
             Oj.load    39.129k i/100ms
Calculating -------------------------------------
             Oj.load    387.595k (± 1.0%) i/s -      1.956M in   5.048159s
```

### Test code
```ruby
require 'benchmark/ips'
require 'oj'

json =<<-EOF
{
  "$id": "https://example.com/person.schema.json",
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "Person",
  "type": "object",
  "properties": {
    "firstName": {
      "type": "string",
      "description": "The person's first name."
    },
    "lastName": {
      "type": "string",
      "description": "The person's last name."
    },
    "age": {
      "description": "Age in years which must be equal to or greater than zero.",
      "type": "integer",
      "minimum": 0
    }
  }
}
EOF

Benchmark.ips do |x|
  x.report('Oj.load') { Oj.load(json, symbol_keys: true) }
end
```
  • Loading branch information
Watson1978 committed Aug 6, 2021
1 parent 2f8cf2b commit 62aea59
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 28 deletions.
11 changes: 5 additions & 6 deletions ext/oj/compat.c
Expand Up @@ -27,19 +27,18 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c

if (Qundef == rkey) {
if (Yes != pi->options.cache_keys) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
} else {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
}
} else {
VALUE *slot;

if (Yes == pi->options.sym_key) {
if (Qnil == (rkey = oj_sym_hash_get(key, klen, &slot))) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
*slot = rkey;
rb_gc_register_address(slot);
}
Expand Down
9 changes: 5 additions & 4 deletions ext/oj/custom.c
Expand Up @@ -959,12 +959,13 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
volatile VALUE rstr = rb_str_new(str, len);

if (Qundef == rkey) {
rkey = rb_str_new(key, klen);
rstr = oj_encode(rstr);
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
} else {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
}
rstr = oj_encode(rstr);
}
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);
Expand Down
19 changes: 7 additions & 12 deletions ext/oj/object.c
Expand Up @@ -59,14 +59,13 @@ static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) {
}
#else
if (':' == k1) {
rkey = rb_str_new(kval->key + 1, kval->klen - 1);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
rkey = ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding));
} else {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
rkey = ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding));
} else {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
}
}
#endif
Expand All @@ -78,9 +77,7 @@ static VALUE str_to_value(ParseInfo pi, const char *str, size_t len, const char
volatile VALUE rstr = Qnil;

if (':' == *orig && 0 < len) {
rstr = rb_str_new(str + 1, len - 1);
rstr = oj_encode(rstr);
rstr = rb_funcall(rstr, oj_to_sym_id, 0);
rstr = ID2SYM(rb_intern3(str + 1, len - 1, oj_utf8_encoding));
} else if (pi->circ_array && 3 <= len && '^' == *orig && 'r' == orig[1]) {
long i = read_long(str + 2, len - 2);

Expand Down Expand Up @@ -259,9 +256,7 @@ static int hat_cstr(ParseInfo pi, Val parent, Val kval, const char *str, size_t
parent->odd_args = oj_odd_alloc_args(odd);
} break;
case 'm':
parent->val = rb_str_new(str + 1, len - 1);
parent->val = oj_encode(parent->val);
parent->val = rb_funcall(parent->val, oj_to_sym_id, 0);
parent->val = ID2SYM(rb_intern3(str + 1, len - 1, oj_utf8_encoding));
break;
case 's':
parent->val = rb_str_new(str, len);
Expand Down
11 changes: 5 additions & 6 deletions ext/oj/strict.c
Expand Up @@ -39,10 +39,11 @@ VALUE oj_calc_hash_key(ParseInfo pi, Val parent) {
return rkey;
}
if (Yes != pi->options.cache_keys) {
rkey = rb_str_new(parent->key, parent->klen);
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
rkey = ID2SYM(rb_intern3(parent->key, parent->klen, oj_utf8_encoding));
} else {
rkey = rb_str_new(parent->key, parent->klen);
rkey = oj_encode(rkey);
}
OBJ_FREEZE(rkey);
return rkey;
Expand All @@ -51,9 +52,7 @@ VALUE oj_calc_hash_key(ParseInfo pi, Val parent) {

if (Yes == pi->options.sym_key) {
if (Qnil == (rkey = oj_sym_hash_get(parent->key, parent->klen, &slot))) {
rkey = rb_str_new(parent->key, parent->klen);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
rkey = ID2SYM(rb_intern3(parent->key, parent->klen, oj_utf8_encoding));
*slot = rkey;
rb_gc_register_address(slot);
}
Expand Down

0 comments on commit 62aea59

Please sign in to comment.