New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New parser #682
Merged
Merged
New parser #682
Changes from 53 commits
Commits
Show all changes
57 commits
Select commit
Hold shift + click to select a range
0c59a6f
WIP
ohler55 7122ada
WIP
ohler55 0b26b15
Add saj with new parser
ohler55 19a8e06
WIP
ohler55 30dd2cf
Update saj and perf tests
ohler55 b1c205e
Stack for keys with saj parser
ohler55 5524feb
Start new parse saj test
ohler55 b734143
WIP
ohler55 8f501e1
test multiple json in one string
ohler55 5b45984
Start on load
ohler55 4452ed1
WIP
ohler55 459ef00
WIP
ohler55 38b64dd
Merge branch 'develop' into new-parser
ohler55 1ef7751
Implement parser_load
ohler55 7dc579a
thread safe option for caching
ohler55 e78d2f1
Intern prep
ohler55 12a1e23
Simpler str intern
ohler55 479b07d
Rework hash for sym and attr
ohler55 3c283bf
WIP
ohler55 26f8239
WIP
ohler55 ec0bd8f
notes
ohler55 da27522
WIP
ohler55 ac13469
Add usual parser array
ohler55 1b8e525
Longer int type needed
ohler55 f697cee
Fix big decimal parse error
ohler55 2f34e18
WIP
ohler55 53b7b33
Usual parser basically working
ohler55 d5bd500
WIP
ohler55 18516d8
WIP
ohler55 3c837ba
WIP
ohler55 0ea2d82
Merge branch 'develop' into new-parser
ohler55 bd47386
WIP
ohler55 fd45b5e
New cache
ohler55 3fa2c72
More options for usual parser
ohler55 6ebf43a
Add decimal option to usual parser
ohler55 01ca0d3
Add docs
ohler55 5d509ad
WIP
ohler55 c37922d
Merge branch 'develop' into new-parser
ohler55 92dd7f1
WIP
ohler55 43e6b05
Usual object handling started
ohler55 4f44759
Usual parser mostly complete
ohler55 fecd7af
New parser cleanup
ohler55 0f41be0
merge in develop
ohler55 9f6b7a3
notes
ohler55 3f7b1ab
Add stdbool.h
ohler55 6b2ae5a
Add stdbool.h
ohler55 479b844
Add stdbool.h
ohler55 cd1ce4b
Support older versions of Ruby
ohler55 5e7bf26
Add macos 3.0 CI
ohler55 6efaf30
Bypass broken have_func check for Ruby 2.6.x
ohler55 7bb4b6f
Fiddle with CI
ohler55 28f9769
Merge branch 'develop' into new-parser
ohler55 b28afef
Fix memory leaks
ohler55 24c66a2
Mark cache instead of register
ohler55 fff36d8
Add default parsers
ohler55 3cd2b86
Update parser docs
ohler55 9a931bf
Update ext/oj/parser.c
ohler55 File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
// Copyright (c) 2011, 2021 Peter Ohler. All rights reserved. | ||
// Licensed under the MIT License. See LICENSE file in the project root for license details. | ||
|
||
#include "cache.h" | ||
|
||
#define REHASH_LIMIT 64 | ||
#define MIN_SHIFT 8 | ||
|
||
typedef struct _slot { | ||
struct _slot *next; | ||
uint32_t hash; | ||
uint8_t klen; | ||
char key[CACHE_MAX_KEY]; | ||
VALUE val; | ||
} * Slot; | ||
|
||
typedef struct _cache { | ||
Slot * slots; | ||
size_t cnt; | ||
VALUE (*form)(const char *str, size_t len); | ||
uint32_t size; | ||
uint32_t mask; | ||
bool reg; | ||
} * Cache; | ||
|
||
// almost the Murmur hash algorithm | ||
#define M 0x5bd1e995 | ||
#define C1 0xCC9E2D51 | ||
#define C2 0x1B873593 | ||
#define N 0xE6546B64 | ||
|
||
void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) { | ||
c->form = form; | ||
} | ||
|
||
#if 0 | ||
// For debugging only. | ||
static void cache_print(Cache c) { | ||
for (uint32_t i = 0; i < c->size; i++) { | ||
printf("%4d:", i); | ||
for (Slot s = c->slots[i]; NULL != s; s = s->next) { | ||
char buf[40]; | ||
strncpy(buf, s->key, s->klen); | ||
buf[s->klen] = '\0'; | ||
printf(" %s", buf); | ||
} | ||
printf("\n"); | ||
} | ||
} | ||
#endif | ||
|
||
static uint32_t hash_calc(const uint8_t *key, size_t len) { | ||
const uint8_t *end = key + len; | ||
const uint8_t *endless = key + (len & 0xFFFFFFFC); | ||
uint32_t h = (uint32_t)len; | ||
uint32_t k; | ||
|
||
while (key < endless) { | ||
k = (uint32_t)*key++; | ||
k |= (uint32_t)*key++ << 8; | ||
k |= (uint32_t)*key++ << 16; | ||
k |= (uint32_t)*key++ << 24; | ||
|
||
k *= M; | ||
k ^= k >> 24; | ||
h *= M; | ||
h ^= k * M; | ||
} | ||
if (1 < end - key) { | ||
uint16_t k16 = (uint16_t)*key++; | ||
|
||
k16 |= (uint16_t)*key++ << 8; | ||
h ^= k16 << 8; | ||
} | ||
if (key < end) { | ||
h ^= *key; | ||
} | ||
h *= M; | ||
h ^= h >> 13; | ||
h *= M; | ||
h ^= h >> 15; | ||
|
||
return h; | ||
} | ||
|
||
Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool reg) { | ||
Cache c = ALLOC(struct _cache); | ||
int shift = 0; | ||
|
||
for (; REHASH_LIMIT < size; size /= 2, shift++) { | ||
} | ||
if (shift < MIN_SHIFT) { | ||
shift = MIN_SHIFT; | ||
} | ||
c->size = 1 << shift; | ||
c->mask = c->size - 1; | ||
c->slots = ALLOC_N(Slot, c->size); | ||
memset(c->slots, 0, sizeof(Slot) * c->size); | ||
c->form = form; | ||
c->cnt = 0; | ||
c->reg = reg; | ||
|
||
return c; | ||
} | ||
|
||
static void rehash(Cache c) { | ||
uint32_t osize = c->size; | ||
|
||
c->size = osize * 4; | ||
c->mask = c->size - 1; | ||
REALLOC_N(c->slots, Slot, c->size); | ||
memset(c->slots + osize, 0, sizeof(Slot) * osize * 3); | ||
|
||
Slot *end = c->slots + osize; | ||
for (Slot *sp = c->slots; sp < end; sp++) { | ||
Slot s = *sp; | ||
Slot next = NULL; | ||
*sp = NULL; | ||
for (; NULL != s; s = next) { | ||
next = s->next; | ||
|
||
uint32_t h = s->hash & c->mask; | ||
Slot * bucket = c->slots + h; | ||
|
||
s->next = *bucket; | ||
*bucket = s; | ||
} | ||
} | ||
} | ||
|
||
void cache_free(Cache c) { | ||
for (uint32_t i = 0; i < c->size; i++) { | ||
Slot next; | ||
for (Slot s = c->slots[i]; NULL != s; s = next) { | ||
next = s->next; | ||
xfree(s); | ||
} | ||
} | ||
xfree(c->slots); | ||
xfree(c); | ||
} | ||
|
||
VALUE | ||
cache_intern(Cache c, const char *key, size_t len) { | ||
if (CACHE_MAX_KEY < len) { | ||
return c->form(key, len); | ||
} | ||
uint32_t h = hash_calc((const uint8_t *)key, len); | ||
Slot * bucket = c->slots + (h & c->mask); | ||
Slot b; | ||
Slot tail = NULL; | ||
|
||
for (b = *bucket; NULL != b; b = b->next) { | ||
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) { | ||
return b->val; | ||
} | ||
tail = b; | ||
} | ||
b = ALLOC(struct _slot); | ||
b->hash = h; | ||
b->next = NULL; | ||
if (NULL == tail) { | ||
*bucket = b; | ||
} else { | ||
tail->next = b; | ||
} | ||
memcpy(b->key, key, len); | ||
b->klen = (uint8_t)len; | ||
b->val = c->form(key, len); | ||
if (c->reg) { | ||
rb_gc_register_address(&b->val); | ||
} | ||
c->cnt++; | ||
if (REHASH_LIMIT < c->cnt / c->size) { | ||
rehash(c); | ||
} | ||
return b->val; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Copyright (c) 2021 Peter Ohler. All rights reserved. | ||
// Licensed under the MIT License. See LICENSE file in the project root for license details. | ||
|
||
#ifndef CACHE_H | ||
#define CACHE_H | ||
|
||
#include <stdbool.h> | ||
#include <ruby.h> | ||
|
||
#define CACHE_MAX_KEY 35 | ||
|
||
struct _cache; | ||
|
||
extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool reg); | ||
extern void cache_free(struct _cache *c); | ||
extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len)); | ||
|
||
extern VALUE cache_intern(struct _cache *c, const char *key, size_t len); | ||
|
||
#endif /* CACHE_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the object passed to
rb_gc_register_address()
is no longer needed, you have to callrb_gc_unregister_address()
to release wasted object incache_free()
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We might need to check other places that use
rb_gc_register_address()
as well to avoid memory leak.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. The cache_free will have to do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to rb_gc_mark. That seems to be faster as well in the grans scheme of things.
It was a bit surprising to note that the GC runs concurrently with the main thread. I wonder if a lock is needed for the parser to avoid a race condition with marking and parsing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added
Oj::Parser.usual
which can be used likeOj::Parser.usual.parse('[true]')
similar toOj::load
.