Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache #670

Merged
merged 3 commits into from Jul 6, 2021
Merged

Cache #670

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
# CHANGELOG

## 3.12.0 - 2021-07-05

- Added string and symbol caching options that give Oj about a 20% parse performance boost.

## 3.11.8 - 2021-07-03

- Fixed or reverted change that set the default mode when optimize_Rails was called.
Expand Down
49 changes: 23 additions & 26 deletions ext/oj/compat.c
Expand Up @@ -23,14 +23,26 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
parent->classname = oj_strndup(str, len);
parent->clen = len;
} else {
volatile VALUE rstr = rb_str_new(str, len);
volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);

if (Qundef == rkey) {
rkey = rb_str_new(key, klen);
rstr = oj_encode(rstr);
rkey = oj_encode(rkey);
VALUE *slot;

if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
if (Qnil == (rkey = oj_sym_hash_get(key, klen, &slot))) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
} else {
if (Qnil == (rkey = oj_str_hash_get(key, klen, &slot))) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
}
}
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
Expand Down Expand Up @@ -93,23 +105,9 @@ static void end_hash(struct _parseInfo *pi) {
}
}

static VALUE calc_hash_key(ParseInfo pi, Val parent) {
volatile VALUE rkey = parent->key_val;

if (Qundef == rkey) {
rkey = rb_str_new(parent->key, parent->klen);
}
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
}
return rkey;
}

static void add_cstr(ParseInfo pi, const char *str, size_t len, const char *orig) {
volatile VALUE rstr = rb_str_new(str, len);
volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);

rstr = oj_encode(rstr);
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);

Expand Down Expand Up @@ -142,10 +140,10 @@ static void hash_set_num(struct _parseInfo *pi, Val parent, NumInfo ni) {
rb_funcall(stack_peek(&pi->stack)->val,
rb_intern("[]="),
2,
calc_hash_key(pi, parent),
oj_calc_hash_key(pi, parent),
rval);
} else {
rb_hash_aset(stack_peek(&pi->stack)->val, calc_hash_key(pi, parent), rval);
rb_hash_aset(stack_peek(&pi->stack)->val, oj_calc_hash_key(pi, parent), rval);
}
if (Yes == pi->options.trace) {
oj_trace_parse_call("set_number", pi, __FILE__, __LINE__, rval);
Expand All @@ -161,10 +159,10 @@ static void hash_set_value(ParseInfo pi, Val parent, VALUE value) {
rb_funcall(stack_peek(&pi->stack)->val,
rb_intern("[]="),
2,
calc_hash_key(pi, parent),
oj_calc_hash_key(pi, parent),
value);
} else {
rb_hash_aset(stack_peek(&pi->stack)->val, calc_hash_key(pi, parent), value);
rb_hash_aset(stack_peek(&pi->stack)->val, oj_calc_hash_key(pi, parent), value);
}
if (Yes == pi->options.trace) {
oj_trace_parse_call("set_value", pi, __FILE__, __LINE__, value);
Expand Down Expand Up @@ -199,9 +197,8 @@ static void array_append_num(ParseInfo pi, NumInfo ni) {
}

static void array_append_cstr(ParseInfo pi, const char *str, size_t len, const char *orig) {
volatile VALUE rstr = rb_str_new(str, len);
volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);

rstr = oj_encode(rstr);
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);

Expand Down
18 changes: 3 additions & 15 deletions ext/oj/custom.c
Expand Up @@ -955,6 +955,7 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
}
}
} else {
//volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
volatile VALUE rstr = rb_str_new(str, len);

if (Qundef == rkey) {
Expand Down Expand Up @@ -1010,19 +1011,6 @@ static void end_hash(struct _parseInfo *pi) {
}
}

static VALUE calc_hash_key(ParseInfo pi, Val parent) {
volatile VALUE rkey = parent->key_val;

if (Qundef == rkey) {
rkey = rb_str_new(parent->key, parent->klen);
}
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
}
return rkey;
}

static void hash_set_num(struct _parseInfo *pi, Val kval, NumInfo ni) {
Val parent = stack_peek(&pi->stack);
volatile VALUE rval = oj_num_as_value(ni);
Expand Down Expand Up @@ -1067,7 +1055,7 @@ static void hash_set_num(struct _parseInfo *pi, Val kval, NumInfo ni) {
}
rval = parent->val;
} else {
rb_hash_aset(parent->val, calc_hash_key(pi, kval), rval);
rb_hash_aset(parent->val, oj_calc_hash_key(pi, kval), rval);
}
break;
default: break;
Expand All @@ -1082,7 +1070,7 @@ static void hash_set_value(ParseInfo pi, Val kval, VALUE value) {

switch (rb_type(parent->val)) {
case T_OBJECT: oj_set_obj_ivar(parent, kval, value); break;
case T_HASH: rb_hash_aset(parent->val, calc_hash_key(pi, kval), value); break;
case T_HASH: rb_hash_aset(parent->val, oj_calc_hash_key(pi, kval), value); break;
default: break;
}
if (Yes == pi->options.trace) {
Expand Down
17 changes: 16 additions & 1 deletion ext/oj/hash.c
Expand Up @@ -20,6 +20,8 @@ struct _hash {
};

struct _hash class_hash;
struct _hash str_hash;
struct _hash sym_hash;
struct _hash intern_hash;

// almost the Murmur hash algorithm
Expand Down Expand Up @@ -64,6 +66,8 @@ static uint32_t hash_calc(const uint8_t *key, size_t len) {

void oj_hash_init() {
memset(class_hash.slots, 0, sizeof(class_hash.slots));
memset(str_hash.slots, 0, sizeof(str_hash.slots));
memset(sym_hash.slots, 0, sizeof(sym_hash.slots));
memset(intern_hash.slots, 0, sizeof(intern_hash.slots));
}

Expand Down Expand Up @@ -117,7 +121,18 @@ oj_class_hash_get(const char *key, size_t len, VALUE **slotp) {
return hash_get(&class_hash, key, len, slotp, Qnil);
}

ID oj_attr_hash_get(const char *key, size_t len, ID **slotp) {
VALUE
oj_str_hash_get(const char *key, size_t len, VALUE **slotp) {
return hash_get(&str_hash, key, len, slotp, Qnil);
}

VALUE
oj_sym_hash_get(const char *key, size_t len, VALUE **slotp) {
return hash_get(&sym_hash, key, len, slotp, Qnil);
}

ID
oj_attr_hash_get(const char *key, size_t len, ID **slotp) {
return (ID)hash_get(&intern_hash, key, len, (VALUE **)slotp, 0);
}

Expand Down
2 changes: 2 additions & 0 deletions ext/oj/hash.h
Expand Up @@ -11,6 +11,8 @@ typedef struct _hash *Hash;
extern void oj_hash_init();

extern VALUE oj_class_hash_get(const char *key, size_t len, VALUE **slotp);
extern VALUE oj_str_hash_get(const char *key, size_t len, VALUE **slotp);
extern VALUE oj_sym_hash_get(const char *key, size_t len, VALUE **slotp);
extern ID oj_attr_hash_get(const char *key, size_t len, ID **slotp);

extern void oj_hash_print();
Expand Down
10 changes: 6 additions & 4 deletions ext/oj/mimic_json.c
Expand Up @@ -389,9 +389,9 @@ static VALUE mimic_generate_core(int argc, VALUE *argv, Options copts) {
} else {
VALUE active_hack[1];

if (Qundef == state_class) {
oj_define_mimic_json(0, NULL, Qnil);
}
if (Qundef == state_class) {
oj_define_mimic_json(0, NULL, Qnil);
}
active_hack[0] = rb_funcall(state_class, oj_new_id, 0);
oj_dump_obj_to_json_using_params(*argv, copts, &out, 1, active_hack);
}
Expand Down Expand Up @@ -480,7 +480,7 @@ oj_mimic_pretty_generate(int argc, VALUE *argv, VALUE self) {
rb_hash_aset(h, oj_array_nl_sym, rb_str_new2("\n"));
}
if (Qundef == state_class) {
oj_define_mimic_json(0, NULL, Qnil);
oj_define_mimic_json(0, NULL, Qnil);
}
rargs[1] = rb_funcall(state_class, oj_new_id, 1, h);

Expand Down Expand Up @@ -713,6 +713,8 @@ static struct _options mimic_object_to_json_options = {0, // indent
No, // safe
false, // sec_prec_set
No, // ignore_under
Yes, // cache_keys
3, // cache_str
0, // int_range_min
0, // int_range_max
oj_json_class, // create_id
Expand Down
30 changes: 29 additions & 1 deletion ext/oj/object.c
Expand Up @@ -30,18 +30,46 @@ inline static long read_long(const char *str, size_t len) {

static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) {
volatile VALUE rkey;
#if 0
VALUE *slot;

if (':' == k1) {
if (Qnil == (rkey = oj_sym_hash_get(kval->key + 1, kval->klen - 1, &slot))) {
rkey = rb_str_new(kval->key + 1, kval->klen - 1);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
} else if (Yes == pi->options.sym_key) {
if (Qnil == (rkey = oj_sym_hash_get(kval->key, kval->klen, &slot))) {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
rkey = rb_str_intern(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
} else {
if (Qnil == (rkey = oj_str_hash_get(kval->key, kval->klen, &slot))) {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
}
#else
if (':' == k1) {
rkey = rb_str_new(kval->key + 1, kval->klen - 1);
rkey = oj_encode(rkey);
rkey = rb_funcall(rkey, oj_to_sym_id, 0);
rkey = rb_str_intern(rkey);
} else {
rkey = rb_str_new(kval->key, kval->klen);
rkey = oj_encode(rkey);
if (Yes == pi->options.sym_key) {
rkey = rb_str_intern(rkey);
}
}
#endif
return rkey;
}

Expand Down
45 changes: 43 additions & 2 deletions ext/oj/oj.c
Expand Up @@ -106,6 +106,8 @@ static VALUE auto_sym;
static VALUE bigdecimal_as_decimal_sym;
static VALUE bigdecimal_load_sym;
static VALUE bigdecimal_sym;
static VALUE cache_keys_sym;
static VALUE cache_str_sym;
static VALUE circular_sym;
static VALUE class_cache_sym;
static VALUE compat_bigdecimal_sym;
Expand Down Expand Up @@ -186,6 +188,8 @@ struct _options oj_default_options = {
No, // safe
false, // sec_prec_set
No, // ignore_under
Yes, // cache_keys
3, // cache_str
0, // int_range_min
0, // int_range_max
oj_json_class, // create_id
Expand Down Expand Up @@ -279,9 +283,11 @@ struct _options oj_default_options = {
*used
* - *:array_class* [_Class_|_nil_] Class to use instead of Array on load
* - *:omit_nil* [_true_|_false_] if true Hash and Object attributes with nil values are omitted
* - *:ignore* [_nil_|Array] either nil or an Array of classes to ignore when dumping
* - *:ignore_under* [Boolean] if true then attributes that start with _ are ignored when dumping in
* - *:ignore* [_nil_|_Array_] either nil or an Array of classes to ignore when dumping
* - *:ignore_under* [_Boolean_] if true then attributes that start with _ are ignored when dumping in
*object or custom mode.
* - *:cache_keys* [_Boolean_] if true then hash keys are cached
* - *:cache_str* [_Fixnum_] maximum string value length to cache
* - *:integer_range* [_Range_] Dump integers outside range as strings.
* - *:trace* [_true,_|_false_] Trace all load and dump calls, default is false (trace is off)
* - *:safe* [_true,_|_false_] Safe mimic breaks JSON mimic to be safer, default is false (safe is
Expand Down Expand Up @@ -389,11 +395,17 @@ static VALUE get_def_opts(VALUE self) {
? Qtrue
: ((No == oj_default_options.safe) ? Qfalse : Qnil));
rb_hash_aset(opts, float_prec_sym, INT2FIX(oj_default_options.float_prec));
rb_hash_aset(opts, cache_str_sym, INT2FIX(oj_default_options.cache_str));
rb_hash_aset(opts,
ignore_under_sym,
(Yes == oj_default_options.ignore_under)
? Qtrue
: ((No == oj_default_options.ignore_under) ? Qfalse : Qnil));
rb_hash_aset(opts,
cache_keys_sym,
(Yes == oj_default_options.cache_keys)
? Qtrue
: ((No == oj_default_options.cache_keys) ? Qfalse : Qnil));
switch (oj_default_options.mode) {
case StrictMode: rb_hash_aset(opts, mode_sym, strict_sym); break;
case CompatMode: rb_hash_aset(opts, mode_sym, compat_sym); break;
Expand Down Expand Up @@ -557,6 +569,8 @@ static VALUE get_def_opts(VALUE self) {
* - *:ignore* [_nil_|Array] either nil or an Array of classes to ignore when dumping
* - *:ignore_under* [_Boolean_] if true then attributes that start with _ are ignored when
*dumping in object or custom mode.
* - *:cache_keys* [_Boolean_] if true then hash keys are cached
* - *:cache_str* [_Fixnum_] maximum string vsalue length to cache
* - *:integer_range* [_Range_] Dump integers outside range as strings.
* - *:trace* [_Boolean_] turn trace on or off.
* - *:safe* [_Boolean_] turn safe mimic on or off.
Expand Down Expand Up @@ -589,6 +603,7 @@ void oj_parse_options(VALUE ropts, Options copts) {
{oj_safe_sym, &copts->safe},
{ignore_under_sym, &copts->ignore_under},
{oj_create_additions_sym, &copts->create_ok},
{cache_keys_sym, &copts->cache_keys},
{Qnil, 0}};
YesNoOpt o;
volatile VALUE v;
Expand Down Expand Up @@ -647,6 +662,28 @@ void oj_parse_options(VALUE ropts, Options copts) {
copts->float_prec = n;
}
}
if (Qnil != (v = rb_hash_lookup(ropts, cache_str_sym))) {
int n;

#ifdef RUBY_INTEGER_UNIFICATION
if (rb_cInteger != rb_obj_class(v)) {
rb_raise(rb_eArgError, ":cache_str must be a Integer.");
}
#else
if (T_FIXNUM != rb_type(v)) {
rb_raise(rb_eArgError, ":cache_str must be a Fixnum.");
}
#endif
n = FIX2INT(v);
if (0 >= n) {
copts->cache_str = 0;
} else {
if (32 < n) {
n = 32;
}
copts->cache_str = (char)n;
}
}
if (Qnil != (v = rb_hash_lookup(ropts, sec_prec_sym))) {
int n;

Expand Down Expand Up @@ -1816,6 +1853,10 @@ void Init_oj() {
rb_gc_register_address(&bigdecimal_load_sym);
bigdecimal_sym = ID2SYM(rb_intern("bigdecimal"));
rb_gc_register_address(&bigdecimal_sym);
cache_keys_sym = ID2SYM(rb_intern("cache_keys"));
rb_gc_register_address(&cache_keys_sym);
cache_str_sym = ID2SYM(rb_intern("cache_str"));
rb_gc_register_address(&cache_str_sym);
circular_sym = ID2SYM(rb_intern("circular"));
rb_gc_register_address(&circular_sym);
class_cache_sym = ID2SYM(rb_intern("class_cache"));
Expand Down
2 changes: 2 additions & 0 deletions ext/oj/oj.h
Expand Up @@ -143,6 +143,8 @@ typedef struct _options {
char safe; // YesNo
char sec_prec_set; // boolean (0 or 1)
char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
char cache_keys; // YexNo
char cache_str; // string short than or equal to this are cache
int64_t int_range_min; // dump numbers below as string
int64_t int_range_max; // dump numbers above as string
const char * create_id; // 0 or string
Expand Down