Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New parser #682

Merged
merged 57 commits into from Aug 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
0c59a6f
WIP
ohler55 Jul 11, 2021
7122ada
WIP
ohler55 Jul 12, 2021
0b26b15
Add saj with new parser
ohler55 Jul 13, 2021
19a8e06
WIP
ohler55 Jul 15, 2021
30dd2cf
Update saj and perf tests
ohler55 Jul 15, 2021
b1c205e
Stack for keys with saj parser
ohler55 Jul 16, 2021
5524feb
Start new parse saj test
ohler55 Jul 16, 2021
b734143
WIP
ohler55 Jul 16, 2021
8f501e1
test multiple json in one string
ohler55 Jul 18, 2021
5b45984
Start on load
ohler55 Jul 22, 2021
4452ed1
WIP
ohler55 Jul 23, 2021
459ef00
WIP
ohler55 Jul 25, 2021
38b64dd
Merge branch 'develop' into new-parser
ohler55 Jul 25, 2021
1ef7751
Implement parser_load
ohler55 Jul 26, 2021
7dc579a
thread safe option for caching
ohler55 Jul 27, 2021
e78d2f1
Intern prep
ohler55 Jul 29, 2021
12a1e23
Simpler str intern
ohler55 Jul 29, 2021
479b07d
Rework hash for sym and attr
ohler55 Jul 29, 2021
3c283bf
WIP
ohler55 Jul 30, 2021
26f8239
WIP
ohler55 Jul 30, 2021
ec0bd8f
notes
ohler55 Jul 30, 2021
da27522
WIP
ohler55 Aug 1, 2021
ac13469
Add usual parser array
ohler55 Aug 1, 2021
1b8e525
Longer int type needed
ohler55 Aug 1, 2021
f697cee
Fix big decimal parse error
ohler55 Aug 1, 2021
2f34e18
WIP
ohler55 Aug 2, 2021
53b7b33
Usual parser basically working
ohler55 Aug 2, 2021
d5bd500
WIP
ohler55 Aug 3, 2021
18516d8
WIP
ohler55 Aug 3, 2021
3c837ba
WIP
ohler55 Aug 3, 2021
0ea2d82
Merge branch 'develop' into new-parser
ohler55 Aug 3, 2021
bd47386
WIP
ohler55 Aug 3, 2021
fd45b5e
New cache
ohler55 Aug 3, 2021
3fa2c72
More options for usual parser
ohler55 Aug 4, 2021
6ebf43a
Add decimal option to usual parser
ohler55 Aug 4, 2021
01ca0d3
Add docs
ohler55 Aug 5, 2021
5d509ad
WIP
ohler55 Aug 5, 2021
c37922d
Merge branch 'develop' into new-parser
ohler55 Aug 5, 2021
92dd7f1
WIP
ohler55 Aug 6, 2021
43e6b05
Usual object handling started
ohler55 Aug 6, 2021
4f44759
Usual parser mostly complete
ohler55 Aug 7, 2021
fecd7af
New parser cleanup
ohler55 Aug 7, 2021
0f41be0
merge in develop
ohler55 Aug 7, 2021
9f6b7a3
notes
ohler55 Aug 7, 2021
3f7b1ab
Add stdbool.h
ohler55 Aug 7, 2021
6b2ae5a
Add stdbool.h
ohler55 Aug 7, 2021
479b844
Add stdbool.h
ohler55 Aug 7, 2021
cd1ce4b
Support older versions of Ruby
ohler55 Aug 7, 2021
5e7bf26
Add macos 3.0 CI
ohler55 Aug 7, 2021
6efaf30
Bypass broken have_func check for Ruby 2.6.x
ohler55 Aug 7, 2021
7bb4b6f
Fiddle with CI
ohler55 Aug 7, 2021
28f9769
Merge branch 'develop' into new-parser
ohler55 Aug 7, 2021
b28afef
Fix memory leaks
ohler55 Aug 7, 2021
24c66a2
Mark cache instead of register
ohler55 Aug 8, 2021
fff36d8
Add default parsers
ohler55 Aug 8, 2021
3cd2b86
Update parser docs
ohler55 Aug 8, 2021
9a931bf
Update ext/oj/parser.c
ohler55 Aug 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .clang-format
Expand Up @@ -50,7 +50,7 @@ BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 100
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/CI.yml
Expand Up @@ -31,11 +31,10 @@ jobs:
exclude:
- os: macos
ruby: head
- os: macos
ruby: '3.0'
- os: macos
ruby: '2.5'
- ruby: '3.0'
gemfile: rails_5
- ruby: '3.0'
gemfile: rails_6

env:
BUNDLE_GEMFILE: gemfiles/${{ matrix.gemfile }}.gemfile
Expand Down
9 changes: 9 additions & 0 deletions ext/oj/buf.h
Expand Up @@ -19,6 +19,10 @@ inline static void buf_init(Buf buf) {
buf->tail = buf->head;
}

inline static void buf_reset(Buf buf) {
buf->tail = buf->head;
}

inline static void buf_cleanup(Buf buf) {
if (buf->base != buf->head) {
xfree(buf->head);
Expand All @@ -29,6 +33,11 @@ inline static size_t buf_len(Buf buf) {
return buf->tail - buf->head;
}

inline static const char *buf_str(Buf buf) {
*buf->tail = '\0';
return buf->head;
}

inline static void buf_append_string(Buf buf, const char *s, size_t slen) {
if (buf->end <= buf->tail + slen) {
size_t len = buf->end - buf->head;
Expand Down
187 changes: 187 additions & 0 deletions ext/oj/cache.c
@@ -0,0 +1,187 @@
// Copyright (c) 2011, 2021 Peter Ohler. All rights reserved.
// Licensed under the MIT License. See LICENSE file in the project root for license details.

#include "cache.h"

#define REHASH_LIMIT 64
#define MIN_SHIFT 8

typedef struct _slot {
struct _slot *next;
VALUE val;
uint32_t hash;
uint8_t klen;
char key[CACHE_MAX_KEY];
} * Slot;

typedef struct _cache {
Slot * slots;
size_t cnt;
VALUE (*form)(const char *str, size_t len);
uint32_t size;
uint32_t mask;
bool mark;
} * Cache;

// almost the Murmur hash algorithm
#define M 0x5bd1e995
#define C1 0xCC9E2D51
#define C2 0x1B873593
#define N 0xE6546B64

void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
c->form = form;
}

#if 0
// For debugging only.
static void cache_print(Cache c) {
for (uint32_t i = 0; i < c->size; i++) {
printf("%4d:", i);
for (Slot s = c->slots[i]; NULL != s; s = s->next) {
char buf[40];
strncpy(buf, s->key, s->klen);
buf[s->klen] = '\0';
printf(" %s", buf);
}
printf("\n");
}
}
#endif

static uint32_t hash_calc(const uint8_t *key, size_t len) {
const uint8_t *end = key + len;
const uint8_t *endless = key + (len & 0xFFFFFFFC);
uint32_t h = (uint32_t)len;
uint32_t k;

while (key < endless) {
k = (uint32_t)*key++;
k |= (uint32_t)*key++ << 8;
k |= (uint32_t)*key++ << 16;
k |= (uint32_t)*key++ << 24;

k *= M;
k ^= k >> 24;
h *= M;
h ^= k * M;
}
if (1 < end - key) {
uint16_t k16 = (uint16_t)*key++;

k16 |= (uint16_t)*key++ << 8;
h ^= k16 << 8;
}
if (key < end) {
h ^= *key;
}
h *= M;
h ^= h >> 13;
h *= M;
h ^= h >> 15;

return h;
}

Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark) {
Cache c = ALLOC(struct _cache);
int shift = 0;

for (; REHASH_LIMIT < size; size /= 2, shift++) {
}
if (shift < MIN_SHIFT) {
shift = MIN_SHIFT;
}
c->size = 1 << shift;
c->mask = c->size - 1;
c->slots = ALLOC_N(Slot, c->size);
memset(c->slots, 0, sizeof(Slot) * c->size);
c->form = form;
c->cnt = 0;
c->mark = mark;

return c;
}

static void rehash(Cache c) {
uint32_t osize = c->size;

c->size = osize * 4;
c->mask = c->size - 1;
REALLOC_N(c->slots, Slot, c->size);
memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);

Slot *end = c->slots + osize;
for (Slot *sp = c->slots; sp < end; sp++) {
Slot s = *sp;
Slot next = NULL;

*sp = NULL;
for (; NULL != s; s = next) {
next = s->next;

uint32_t h = s->hash & c->mask;
Slot * bucket = c->slots + h;

s->next = *bucket;
*bucket = s;
}
}
}

void cache_free(Cache c) {
for (uint32_t i = 0; i < c->size; i++) {
Slot next;
for (Slot s = c->slots[i]; NULL != s; s = next) {
next = s->next;
xfree(s);
}
}
xfree(c->slots);
xfree(c);
}

void cache_mark(Cache c) {
if (c->mark) {
for (uint32_t i = 0; i < c->size; i++) {
for (Slot s = c->slots[i]; NULL != s; s = s->next) {
rb_gc_mark(s->val);
}
}
}
}

VALUE
cache_intern(Cache c, const char *key, size_t len) {
if (CACHE_MAX_KEY < len) {
return c->form(key, len);
}
uint32_t h = hash_calc((const uint8_t *)key, len);
Slot * bucket = c->slots + (h & c->mask);
Slot b;
Slot tail = NULL;

for (b = *bucket; NULL != b; b = b->next) {
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
return b->val;
}
tail = b;
}
b = ALLOC(struct _slot);
b->hash = h;
b->next = NULL;
memcpy(b->key, key, len);
b->klen = (uint8_t)len;
b->key[len] = '\0';
b->val = c->form(key, len);
if (NULL == tail) {
*bucket = b;
} else {
tail->next = b;
}
c->cnt++;
if (REHASH_LIMIT < c->cnt / c->size) {
rehash(c);
}
return b->val;
}
20 changes: 20 additions & 0 deletions ext/oj/cache.h
@@ -0,0 +1,20 @@
// Copyright (c) 2021 Peter Ohler. All rights reserved.
// Licensed under the MIT License. See LICENSE file in the project root for license details.

#ifndef CACHE_H
#define CACHE_H

#include <ruby.h>
#include <stdbool.h>

#define CACHE_MAX_KEY 35

struct _cache;

extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark);
extern void cache_free(struct _cache *c);
extern void cache_mark(struct _cache *c);
extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);

#endif /* CACHE_H */
21 changes: 4 additions & 17 deletions ext/oj/compat.c
Expand Up @@ -5,7 +5,7 @@

#include "encode.h"
#include "err.h"
#include "hash.h"
#include "intern.h"
#include "oj.h"
#include "parse.h"
#include "resolve.h"
Expand Down Expand Up @@ -33,23 +33,10 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
}
} else if (Yes == pi->options.sym_key) {
rkey = oj_sym_intern(key, klen);
} else {
VALUE *slot;

if (Yes == pi->options.sym_key) {
if (Qnil == (rkey = oj_sym_hash_get(key, klen, &slot))) {
rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
*slot = rkey;
rb_gc_register_address(slot);
}
} else {
if (Qnil == (rkey = oj_str_hash_get(key, klen, &slot))) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
}
rkey = oj_str_intern(key, klen);
}
}
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
Expand Down
2 changes: 1 addition & 1 deletion ext/oj/custom.c
Expand Up @@ -8,7 +8,7 @@
#include "dump.h"
#include "encode.h"
#include "err.h"
#include "hash.h"
#include "intern.h"
#include "odd.h"
#include "oj.h"
#include "parse.h"
Expand Down