Skip to content

Commit

Permalink
Cache (#670)
Browse files Browse the repository at this point in the history
Add hash key and string caching.
  • Loading branch information
ohler55 committed Jul 6, 2021
1 parent b5d0574 commit 78074e4
Show file tree
Hide file tree
Showing 19 changed files with 272 additions and 159 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
# CHANGELOG

## 3.12.0 - 2021-07-05

- Added string and symbol caching options that give Oj about a 20% parse performance boost.

## 3.11.8 - 2021-07-03

- Fixed or reverted change that set the default mode when optimize_Rails was called.
Expand Down
49 changes: 23 additions & 26 deletions ext/oj/compat.c
Expand Up @@ -23,14 +23,26 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
parent->classname = oj_strndup(str, len);
parent->clen = len;
} else {
volatile VALUE rstr = rb_str_new(str, len);
volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);

if (Qundef == rkey) {
rkey = rb_str_new(key, klen);
rstr = oj_encode(rstr);
rkey = oj_encode(rkey);
VALUE *slot;

if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
if (Qnil == (rkey = oj_sym_hash_get(key, klen, &slot))) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
} else {
if (Qnil == (rkey = oj_str_hash_get(key, klen, &slot))) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
}
}
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
Expand Down Expand Up @@ -93,23 +105,9 @@ static void end_hash(struct _parseInfo *pi) {
}
}

static VALUE calc_hash_key(ParseInfo pi, Val parent) {
volatile VALUE rkey = parent->key_val;

if (Qundef == rkey) {
rkey = rb_str_new(parent->key, parent->klen);
}
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
}
return rkey;
}

static void add_cstr(ParseInfo pi, const char *str, size_t len, const char *orig) {
volatile VALUE rstr = rb_str_new(str, len);
volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);

rstr = oj_encode(rstr);
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);

Expand Down Expand Up @@ -142,10 +140,10 @@ static void hash_set_num(struct _parseInfo *pi, Val parent, NumInfo ni) {
rb_funcall(stack_peek(&pi->stack)->val,
rb_intern("[]="),
2,
calc_hash_key(pi, parent),
oj_calc_hash_key(pi, parent),
rval);
} else {
rb_hash_aset(stack_peek(&pi->stack)->val, calc_hash_key(pi, parent), rval);
rb_hash_aset(stack_peek(&pi->stack)->val, oj_calc_hash_key(pi, parent), rval);
}
if (Yes == pi->options.trace) {
oj_trace_parse_call("set_number", pi, __FILE__, __LINE__, rval);
Expand All @@ -161,10 +159,10 @@ static void hash_set_value(ParseInfo pi, Val parent, VALUE value) {
rb_funcall(stack_peek(&pi->stack)->val,
rb_intern("[]="),
2,
calc_hash_key(pi, parent),
oj_calc_hash_key(pi, parent),
value);
} else {
rb_hash_aset(stack_peek(&pi->stack)->val, calc_hash_key(pi, parent), value);
rb_hash_aset(stack_peek(&pi->stack)->val, oj_calc_hash_key(pi, parent), value);
}
if (Yes == pi->options.trace) {
oj_trace_parse_call("set_value", pi, __FILE__, __LINE__, value);
Expand Down Expand Up @@ -199,9 +197,8 @@ static void array_append_num(ParseInfo pi, NumInfo ni) {
}

static void array_append_cstr(ParseInfo pi, const char *str, size_t len, const char *orig) {
volatile VALUE rstr = rb_str_new(str, len);
volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);

rstr = oj_encode(rstr);
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);

Expand Down
18 changes: 3 additions & 15 deletions ext/oj/custom.c
Expand Up @@ -955,6 +955,7 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
}
}
} else {
//volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
volatile VALUE rstr = rb_str_new(str, len);

if (Qundef == rkey) {
Expand Down Expand Up @@ -1010,19 +1011,6 @@ static void end_hash(struct _parseInfo *pi) {
}
}

static VALUE calc_hash_key(ParseInfo pi, Val parent) {
volatile VALUE rkey = parent->key_val;

if (Qundef == rkey) {
rkey = rb_str_new(parent->key, parent->klen);
}
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
}
return rkey;
}

static void hash_set_num(struct _parseInfo *pi, Val kval, NumInfo ni) {
Val parent = stack_peek(&pi->stack);
volatile VALUE rval = oj_num_as_value(ni);
Expand Down Expand Up @@ -1067,7 +1055,7 @@ static void hash_set_num(struct _parseInfo *pi, Val kval, NumInfo ni) {
}
rval = parent->val;
} else {
rb_hash_aset(parent->val, calc_hash_key(pi, kval), rval);
rb_hash_aset(parent->val, oj_calc_hash_key(pi, kval), rval);
}
break;
default: break;
Expand All @@ -1082,7 +1070,7 @@ static void hash_set_value(ParseInfo pi, Val kval, VALUE value) {

switch (rb_type(parent->val)) {
case T_OBJECT: oj_set_obj_ivar(parent, kval, value); break;
case T_HASH: rb_hash_aset(parent->val, calc_hash_key(pi, kval), value); break;
case T_HASH: rb_hash_aset(parent->val, oj_calc_hash_key(pi, kval), value); break;
default: break;
}
if (Yes == pi->options.trace) {
Expand Down
17 changes: 16 additions & 1 deletion ext/oj/hash.c
Expand Up @@ -20,6 +20,8 @@ struct _hash {
};

struct _hash class_hash;
struct _hash str_hash;
struct _hash sym_hash;
struct _hash intern_hash;

// almost the Murmur hash algorithm
Expand Down Expand Up @@ -64,6 +66,8 @@ static uint32_t hash_calc(const uint8_t *key, size_t len) {

void oj_hash_init() {
memset(class_hash.slots, 0, sizeof(class_hash.slots));
memset(str_hash.slots, 0, sizeof(str_hash.slots));
memset(sym_hash.slots, 0, sizeof(sym_hash.slots));
memset(intern_hash.slots, 0, sizeof(intern_hash.slots));
}

Expand Down Expand Up @@ -117,7 +121,18 @@ oj_class_hash_get(const char *key, size_t len, VALUE **slotp) {
return hash_get(&class_hash, key, len, slotp, Qnil);
}

ID oj_attr_hash_get(const char *key, size_t len, ID **slotp) {
VALUE
oj_str_hash_get(const char *key, size_t len, VALUE **slotp) {
return hash_get(&str_hash, key, len, slotp, Qnil);
}

VALUE
oj_sym_hash_get(const char *key, size_t len, VALUE **slotp) {
return hash_get(&sym_hash, key, len, slotp, Qnil);
}

ID
oj_attr_hash_get(const char *key, size_t len, ID **slotp) {
return (ID)hash_get(&intern_hash, key, len, (VALUE **)slotp, 0);
}

Expand Down
2 changes: 2 additions & 0 deletions ext/oj/hash.h
Expand Up @@ -11,6 +11,8 @@ typedef struct _hash *Hash;
extern void oj_hash_init();

extern VALUE oj_class_hash_get(const char *key, size_t len, VALUE **slotp);
extern VALUE oj_str_hash_get(const char *key, size_t len, VALUE **slotp);
extern VALUE oj_sym_hash_get(const char *key, size_t len, VALUE **slotp);
extern ID oj_attr_hash_get(const char *key, size_t len, ID **slotp);

extern void oj_hash_print();
Expand Down
10 changes: 6 additions & 4 deletions ext/oj/mimic_json.c
Expand Up @@ -389,9 +389,9 @@ static VALUE mimic_generate_core(int argc, VALUE *argv, Options copts) {
} else {
VALUE active_hack[1];

if (Qundef == state_class) {
oj_define_mimic_json(0, NULL, Qnil);
}
if (Qundef == state_class) {
oj_define_mimic_json(0, NULL, Qnil);
}
active_hack[0] = rb_funcall(state_class, oj_new_id, 0);
oj_dump_obj_to_json_using_params(*argv, copts, &out, 1, active_hack);
}
Expand Down Expand Up @@ -480,7 +480,7 @@ oj_mimic_pretty_generate(int argc, VALUE *argv, VALUE self) {
rb_hash_aset(h, oj_array_nl_sym, rb_str_new2("\n"));
}
if (Qundef == state_class) {
oj_define_mimic_json(0, NULL, Qnil);
oj_define_mimic_json(0, NULL, Qnil);
}
rargs[1] = rb_funcall(state_class, oj_new_id, 1, h);

Expand Down Expand Up @@ -713,6 +713,8 @@ static struct _options mimic_object_to_json_options = {0, // indent
No, // safe
false, // sec_prec_set
No, // ignore_under
Yes, // cache_keys
3, // cache_str
0, // int_range_min
0, // int_range_max
oj_json_class, // create_id
Expand Down
30 changes: 29 additions & 1 deletion ext/oj/object.c
Expand Up @@ -30,18 +30,46 @@ inline static long read_long(const char *str, size_t len) {

static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) {
volatile VALUE rkey;
#if 0
VALUE *slot;

if (':' == k1) {
if (Qnil == (rkey = oj_sym_hash_get(kval->key + 1, kval->klen - 1, &slot))) {
rkey = rb_str_new(kval->key + 1, kval->klen - 1);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
} else if (Yes == pi->options.sym_key) {
if (Qnil == (rkey = oj_sym_hash_get(kval->key, kval->klen, &slot))) {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
} else {
if (Qnil == (rkey = oj_str_hash_get(kval->key, kval->klen, &slot))) {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
}
#else
if (':' == k1) {
rkey = rb_str_new(kval->key + 1, kval->klen - 1);
rkey = oj_encode(rkey);
rkey = rb_funcall(rkey, oj_to_sym_id, 0);
rkey = rb_str_intern(rkey);
} else {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
}
}
#endif
return rkey;
}

Expand Down
45 changes: 43 additions & 2 deletions ext/oj/oj.c
Expand Up @@ -106,6 +106,8 @@ static VALUE auto_sym;
static VALUE bigdecimal_as_decimal_sym;
static VALUE bigdecimal_load_sym;
static VALUE bigdecimal_sym;
static VALUE cache_keys_sym;
static VALUE cache_str_sym;
static VALUE circular_sym;
static VALUE class_cache_sym;
static VALUE compat_bigdecimal_sym;
Expand Down Expand Up @@ -186,6 +188,8 @@ struct _options oj_default_options = {
No, // safe
false, // sec_prec_set
No, // ignore_under
Yes, // cache_keys
3, // cache_str
0, // int_range_min
0, // int_range_max
oj_json_class, // create_id
Expand Down Expand Up @@ -279,9 +283,11 @@ struct _options oj_default_options = {
*used
* - *:array_class* [_Class_|_nil_] Class to use instead of Array on load
* - *:omit_nil* [_true_|_false_] if true Hash and Object attributes with nil values are omitted
* - *:ignore* [_nil_|Array] either nil or an Array of classes to ignore when dumping
* - *:ignore_under* [Boolean] if true then attributes that start with _ are ignored when dumping in
* - *:ignore* [_nil_|_Array_] either nil or an Array of classes to ignore when dumping
* - *:ignore_under* [_Boolean_] if true then attributes that start with _ are ignored when dumping in
*object or custom mode.
* - *:cache_keys* [_Boolean_] if true then hash keys are cached
* - *:cache_str* [_Fixnum_] maximum string value length to cache
* - *:integer_range* [_Range_] Dump integers outside range as strings.
* - *:trace* [_true,_|_false_] Trace all load and dump calls, default is false (trace is off)
* - *:safe* [_true,_|_false_] Safe mimic breaks JSON mimic to be safer, default is false (safe is
Expand Down Expand Up @@ -389,11 +395,17 @@ static VALUE get_def_opts(VALUE self) {
? Qtrue
: ((No == oj_default_options.safe) ? Qfalse : Qnil));
rb_hash_aset(opts, float_prec_sym, INT2FIX(oj_default_options.float_prec));
rb_hash_aset(opts, cache_str_sym, INT2FIX(oj_default_options.cache_str));
rb_hash_aset(opts,
ignore_under_sym,
(Yes == oj_default_options.ignore_under)
? Qtrue
: ((No == oj_default_options.ignore_under) ? Qfalse : Qnil));
rb_hash_aset(opts,
cache_keys_sym,
(Yes == oj_default_options.cache_keys)
? Qtrue
: ((No == oj_default_options.cache_keys) ? Qfalse : Qnil));
switch (oj_default_options.mode) {
case StrictMode: rb_hash_aset(opts, mode_sym, strict_sym); break;
case CompatMode: rb_hash_aset(opts, mode_sym, compat_sym); break;
Expand Down Expand Up @@ -557,6 +569,8 @@ static VALUE get_def_opts(VALUE self) {
* - *:ignore* [_nil_|Array] either nil or an Array of classes to ignore when dumping
* - *:ignore_under* [_Boolean_] if true then attributes that start with _ are ignored when
*dumping in object or custom mode.
* - *:cache_keys* [_Boolean_] if true then hash keys are cached
* - *:cache_str* [_Fixnum_] maximum string vsalue length to cache
* - *:integer_range* [_Range_] Dump integers outside range as strings.
* - *:trace* [_Boolean_] turn trace on or off.
* - *:safe* [_Boolean_] turn safe mimic on or off.
Expand Down Expand Up @@ -589,6 +603,7 @@ void oj_parse_options(VALUE ropts, Options copts) {
{oj_safe_sym, &copts->safe},
{ignore_under_sym, &copts->ignore_under},
{oj_create_additions_sym, &copts->create_ok},
{cache_keys_sym, &copts->cache_keys},
{Qnil, 0}};
YesNoOpt o;
volatile VALUE v;
Expand Down Expand Up @@ -647,6 +662,28 @@ void oj_parse_options(VALUE ropts, Options copts) {
copts->float_prec = n;
}
}
if (Qnil != (v = rb_hash_lookup(ropts, cache_str_sym))) {
int n;

#ifdef RUBY_INTEGER_UNIFICATION
if (rb_cInteger != rb_obj_class(v)) {
rb_raise(rb_eArgError, ":cache_str must be a Integer.");
}
#else
if (T_FIXNUM != rb_type(v)) {
rb_raise(rb_eArgError, ":cache_str must be a Fixnum.");
}
#endif
n = FIX2INT(v);
if (0 >= n) {
copts->cache_str = 0;
} else {
if (32 < n) {
n = 32;
}
copts->cache_str = (char)n;
}
}
if (Qnil != (v = rb_hash_lookup(ropts, sec_prec_sym))) {
int n;

Expand Down Expand Up @@ -1816,6 +1853,10 @@ void Init_oj() {
rb_gc_register_address(&bigdecimal_load_sym);
bigdecimal_sym = ID2SYM(rb_intern("bigdecimal"));
rb_gc_register_address(&bigdecimal_sym);
cache_keys_sym = ID2SYM(rb_intern("cache_keys"));
rb_gc_register_address(&cache_keys_sym);
cache_str_sym = ID2SYM(rb_intern("cache_str"));
rb_gc_register_address(&cache_str_sym);
circular_sym = ID2SYM(rb_intern("circular"));
rb_gc_register_address(&circular_sym);
class_cache_sym = ID2SYM(rb_intern("class_cache"));
Expand Down
2 changes: 2 additions & 0 deletions ext/oj/oj.h
Expand Up @@ -143,6 +143,8 @@ typedef struct _options {
char safe; // YesNo
char sec_prec_set; // boolean (0 or 1)
char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
char cache_keys; // YexNo
char cache_str; // string short than or equal to this are cache
int64_t int_range_min; // dump numbers below as string
int64_t int_range_max; // dump numbers above as string
const char * create_id; // 0 or string
Expand Down

0 comments on commit 78074e4

Please sign in to comment.