From b2347f35ab2628b1b3742f12798eb6590e81b5b4 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Wed, 7 Apr 2021 08:58:46 +0200 Subject: [PATCH 1/2] Make compaction available for `Oj::Doc` Introduce compaction to `fast.c`, only if `rb_gc_mark_movable` is available. Otherwise, keep a version that supports ruby <2.7. Since `rb_data_object_wrap` and `TypedData_Wrap_Struct` is available in every supported versions (eg >=2.4.10), I've also removed support for earlier versions. Result for compaction is great. I've tested it by allocating and removing arbitrary sized json. For a count of 70 pages, we can see that we used to need 68 pages after compaction,and now 61 pages. Of course this test include also lots of noise, but if we count pinned objects (objects that are marked in C extension as uncollectible), we can see that for a benchmark with no allocations, we have the same amount as a benchmark with this commit with thousands of allocations (499 pinned objects total), and before this, `Oj::Doc` used to generate 5k more objects (5499 pinned objects total). Hence this is a win, see ohler55/oj#650 for a graphical result. Signed-off-by: Ulysse Buonomo --- CHANGELOG.md | 2 +- Rakefile | 1 - ext/oj/extconf.rb | 3 ++- ext/oj/fast.c | 61 ++++++++++++++++++++++++++++++++++++++++------- lib/oj/bag.rb | 1 + test/test_scp.rb | 2 +- 6 files changed, 58 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e422bfcf..73b17e34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ - Code re-formatted with clang-format. Thanks goes to BuonOmo for suggesting and encouraging the use of a formatter and getting the effort started. - +- Added support for `GC.compact` on `Oj::Doc` ## 3.11.3 - 2021-03-09 diff --git a/Rakefile b/Rakefile index 1e27641f..25b5d58f 100644 --- a/Rakefile +++ b/Rakefile @@ -92,4 +92,3 @@ begin rescue LoadError end - diff --git a/ext/oj/extconf.rb b/ext/oj/extconf.rb index e48ff165..eadb4a16 100644 --- a/ext/oj/extconf.rb +++ b/ext/oj/extconf.rb @@ -26,8 +26,9 @@ have_func('rb_time_timespec') have_func('rb_ivar_count') have_func('rb_ivar_foreach') +# Support for compaction. +have_func('rb_gc_mark_movable') have_func('stpcpy') -have_func('rb_data_object_wrap') have_func('pthread_mutex_init') dflags['OJ_DEBUG'] = true unless ENV['OJ_DEBUG'].nil? diff --git a/ext/oj/fast.c b/ext/oj/fast.c index db99ad9c..c64ccccd 100644 --- a/ext/oj/fast.c +++ b/ext/oj/fast.c @@ -20,6 +20,13 @@ //#define BATCH_SIZE (4096 / sizeof(struct _leaf) - 1) #define BATCH_SIZE 100 +// Support for compaction +#ifdef HAVE_RB_GC_MARK_MOVABLE +#define mark rb_gc_mark_movable +#else +#define mark rb_gc_mark +#endif + typedef struct _batch { struct _batch *next; int next_avail; @@ -699,7 +706,7 @@ static void mark_leaf(Leaf leaf) { } while (e != first); } break; - case RUBY_VAL: rb_gc_mark(leaf->value); break; + case RUBY_VAL: mark(leaf->value); break; default: break; } @@ -709,10 +716,53 @@ static void mark_doc(void *ptr) { if (NULL != ptr) { Doc doc = (Doc)ptr; - rb_gc_mark(doc->self); + mark(doc->self); mark_leaf(doc->data); } } +#ifdef HAVE_RB_GC_MARK_MOVABLE +static void compact_leaf(Leaf leaf) { + switch (leaf->value_type) { + case COL_VAL: + if (NULL != leaf->elements) { + Leaf first = leaf->elements->next; + Leaf e = first; + + do { + compact_leaf(e); + e = e->next; + } while (e != first); + } + break; + case RUBY_VAL: leaf->value = rb_gc_location(leaf->value); break; + + default: break; + } +} + +static void compact_doc(void *ptr) { + Doc doc = (Doc)ptr; + + if (doc) { + doc->self = rb_gc_location(doc->self); + compact_leaf(doc->data); + } +} +#endif + +static const rb_data_type_t oj_doc_type = { + "Oj/doc", + { + mark_doc, + free_doc_cb, + NULL, +#ifdef HAVE_RB_GC_MARK_MOVABLE + compact_doc, +#endif + }, + 0, + 0, +}; static VALUE parse_json(VALUE clas, char *json, bool given, bool allocated) { struct _parseInfo pi; @@ -752,12 +802,7 @@ static VALUE parse_json(VALUE clas, char *json, bool given, bool allocated) { } } #endif - // last arg is free func void* func(void*) -#ifdef HAVE_RB_DATA_OBJECT_WRAP - self = rb_data_object_wrap(clas, doc, mark_doc, free_doc_cb); -#else - self = rb_data_object_alloc(clas, doc, mark_doc, free_doc_cb); -#endif + self = TypedData_Wrap_Struct(clas, &oj_doc_type, doc); doc->self = self; doc->json = json; DATA_PTR(doc->self) = doc; diff --git a/lib/oj/bag.rb b/lib/oj/bag.rb index 528ccb54..0791630c 100644 --- a/lib/oj/bag.rb +++ b/lib/oj/bag.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true module Oj diff --git a/test/test_scp.rb b/test/test_scp.rb index bd0451bf..02822bf6 100755 --- a/test/test_scp.rb +++ b/test/test_scp.rb @@ -328,7 +328,7 @@ def test_pipe IO.pipe do |read_io, write_io| if fork write_io.close - Oj.sc_parse(handler, read_io) {|v| p v} + Oj.sc_parse(handler, read_io) read_io.close assert_equal([[:hash_start], [:hash_key, 'one'], From be8283ccfbdad4be81e90180a1099754b67bcbba Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Fri, 9 Apr 2021 10:47:15 +0200 Subject: [PATCH 2/2] Change `clang-format` -> `.clang-format` According to clang's documentation: > When using `-style=file`, **clang-format** for each input file will > try to find the `.clang-format` file located in the closest parent > directory of the input file. When the standard input is used, the > search is started from the current directory. Hence the `.clang-format` seems more appropriate Signed-off-by: Ulysse Buonomo --- clang-format => .clang-format | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename clang-format => .clang-format (100%) diff --git a/clang-format b/.clang-format similarity index 100% rename from clang-format rename to .clang-format