Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some errors #183

Merged
merged 9 commits into from Dec 18, 2017
116 changes: 68 additions & 48 deletions ext/yajl/yajl_ext.c
Expand Up @@ -45,12 +45,12 @@ static void yajl_check_and_fire_callback(void * ctx) {

/* No need to do any of this if the callback isn't even setup */
if (wrapper->parse_complete_callback != Qnil) {
int len = RARRAY_LEN(wrapper->builderStack);
long len = RARRAY_LEN(wrapper->builderStack);
if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
rb_funcall(wrapper->parse_complete_callback, intern_call, 1, rb_ary_pop(wrapper->builderStack));
}
} else {
int len = RARRAY_LEN(wrapper->builderStack);
long len = RARRAY_LEN(wrapper->builderStack);
if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
wrapper->objectsFound++;
if (wrapper->objectsFound > 1) {
Expand Down Expand Up @@ -82,7 +82,7 @@ static char *yajl_raise_encode_error_for_status(yajl_gen_status status, VALUE ob
static void yajl_set_static_value(void * ctx, VALUE val) {
yajl_parser_wrapper * wrapper;
VALUE lastEntry, hash;
int len;
long len;

GetParser((VALUE)ctx, wrapper);

Expand Down Expand Up @@ -204,29 +204,29 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
case T_BIGNUM:
str = rb_funcall(obj, intern_to_s, 0);
cptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
len = (unsigned int)RSTRING_LEN(str);
if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
rb_raise(cEncodeError, "'%s' is an invalid number", cptr);
}
CHECK_STATUS(yajl_gen_number(w->encoder, cptr, len));
break;
case T_STRING:
cptr = RSTRING_PTR(obj);
len = RSTRING_LEN(obj);
len = (unsigned int)RSTRING_LEN(obj);
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
break;
default:
if (rb_respond_to(obj, intern_to_json)) {
str = rb_funcall(obj, intern_to_json, 0);
Check_Type(str, T_STRING);
cptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
len = (unsigned int)RSTRING_LEN(str);
CHECK_STATUS(yajl_gen_number(w->encoder, cptr, len));
} else {
str = rb_funcall(obj, intern_to_s, 0);
Check_Type(str, T_STRING);
cptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
len = (unsigned int)RSTRING_LEN(str);
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
}
break;
Expand Down Expand Up @@ -495,13 +495,13 @@ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self) {

if (TYPE(input) == T_STRING) {
cptr = RSTRING_PTR(input);
len = RSTRING_LEN(input);
len = (unsigned int)RSTRING_LEN(input);
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
} else if (rb_respond_to(input, intern_io_read)) {
VALUE parsed = rb_str_new(0, FIX2LONG(rbufsize));
while (rb_funcall(input, intern_io_read, 2, rbufsize, parsed) != Qnil) {
cptr = RSTRING_PTR(parsed);
len = RSTRING_LEN(parsed);
len = (unsigned int)RSTRING_LEN(parsed);
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
}
} else {
Expand Down Expand Up @@ -541,7 +541,7 @@ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk) {

if (wrapper->parse_complete_callback != Qnil) {
const char * cptr = RSTRING_PTR(chunk);
len = RSTRING_LEN(chunk);
len = (unsigned int)RSTRING_LEN(chunk);
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
} else {
rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless.");
Expand Down Expand Up @@ -616,11 +616,11 @@ static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop)
yajl_tok token;
if (pop == 0) {
//printf("peeking %p %ld %d\n", RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset);
token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset);
token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), parser->offset);
//printf("peeked event %d\n", token);

if (token == yajl_tok_eof) {
parser->offset = RSTRING_LEN(parser->buffer);
parser->offset = (unsigned int)RSTRING_LEN(parser->buffer);
continue;
}

Expand All @@ -630,7 +630,7 @@ static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop)
}

//printf("popping\n");
token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len);
token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len);
//printf("popped event %d\n", token);

if (token == yajl_tok_eof) {
Expand All @@ -652,20 +652,19 @@ static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser);
static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event);
static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event);

static VALUE rb_yajl_projector_filter_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
static VALUE rb_yajl_projector_filter(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
assert(parser->stream);

if (event.token == yajl_tok_left_brace) {
return rb_yajl_projector_filter_array_subtree(parser, schema, event);
}

if (event.token == yajl_tok_left_bracket) {
return rb_yajl_projector_filter_object_subtree(parser, schema, event);
switch(event.token) {
case yajl_tok_left_brace:
return rb_yajl_projector_filter_array_subtree(parser, schema, event);
break;
case yajl_tok_left_bracket:
return rb_yajl_projector_filter_object_subtree(parser, schema, event);
break;
default:
return rb_yajl_projector_build_simple_value(parser, event);
}

rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token));

return Qnil;
}

static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
Expand All @@ -680,12 +679,7 @@ static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser,
break;
}

VALUE val;
if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) {
val = rb_yajl_projector_filter_subtree(parser, schema, event);
} else {
val = rb_yajl_projector_build_simple_value(parser, event);
}
VALUE val = rb_yajl_projector_filter(parser, schema, event);
rb_ary_push(ary, val);

event = yajl_event_stream_next(parser, 0);
Expand Down Expand Up @@ -738,20 +732,16 @@ static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser,

yajl_event_t value_event = yajl_event_stream_next(parser, 1);

VALUE val;
if (value_event.token == yajl_tok_left_bracket || value_event.token == yajl_tok_left_brace) {
VALUE key_schema;
if (schema == Qnil) {
key_schema = Qnil;
} else {
key_schema = rb_hash_aref(schema, key);
}

val = rb_yajl_projector_filter_subtree(parser, key_schema, value_event);
VALUE key_schema;
if (schema == Qnil) {
key_schema = Qnil;
} else {
val = rb_yajl_projector_build_simple_value(parser, value_event);
key_schema = rb_hash_aref(schema, key);
}


VALUE val = rb_yajl_projector_filter(parser, key_schema, value_event);

rb_str_freeze(key);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

rb_hash_aset(hsh, key, val);

peek_comma:
Expand Down Expand Up @@ -802,7 +792,7 @@ static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser) {
return;
}

rb_raise(cStandardError, "unknown value type to ignore %s", yajl_tok_name(value_event.token));
rb_raise(cParseError, "unknown value type to ignore %s", yajl_tok_name(value_event.token));
}

/*
Expand Down Expand Up @@ -858,7 +848,7 @@ static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, ya
val = rb_cstr2inum(buf, 10);
}
free(buf);

return val;

case yajl_tok_string:;
Expand All @@ -868,6 +858,12 @@ static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, ya
case yajl_tok_eof:;
rb_raise(cParseError, "unexpected eof while constructing value");

case yajl_tok_comma:
rb_raise(cParseError, "unexpected comma while constructing value");

case yajl_tok_colon:
rb_raise(cParseError, "unexpected colon while constructing value");

default:;
assert(0);
}
Expand Down Expand Up @@ -912,6 +908,13 @@ static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_eve
}
}

static VALUE rb_protected_yajl_projector_filter(VALUE pointer) {
VALUE *args = (VALUE *)pointer;
return rb_yajl_projector_filter((struct yajl_event_stream_s *)args[0],
args[1],
*(yajl_event_t *)args[2]);
}

/*
* Document-method: project
*/
Expand All @@ -930,18 +933,35 @@ static VALUE rb_yajl_projector_project(VALUE self, VALUE schema) {
.stream = stream,

.buffer = buffer,
.offset = buffer_size,
.offset = (unsigned int)buffer_size,

.lexer = yajl_lex_alloc(&allocFuncs, 0, 1),
};

VALUE result = rb_yajl_projector_filter_subtree(&parser, schema, yajl_event_stream_next(&parser, 1));

yajl_lex_free(parser.lexer);
yajl_event_t event = yajl_event_stream_next(&parser, 1);

RB_GC_GUARD(stream);
RB_GC_GUARD(buffer);

VALUE result;
int state = 0;

if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) {
VALUE args[3];
args[0] = (VALUE)&parser;
args[1] = schema;
args[2] = (VALUE)&event;
result = rb_protect(rb_protected_yajl_projector_filter,
(VALUE)args,
&state);
} else {
yajl_lex_free(parser.lexer);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch 👍

rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token));
}

yajl_lex_free(parser.lexer);
if (state) rb_jump_tag(state);

return result;
}

Expand Down
59 changes: 58 additions & 1 deletion spec/projection/projection.rb
Expand Up @@ -18,6 +18,63 @@
expect(projection['age']).to eql(nil)
end

it "should raise an exception and not leak memory" do
stream = StringIO.new('foo')
projector = Yajl::Projector.new(stream)
expect {
projector.project({"name" => nil})
}.to raise_error(Yajl::ParseError)
end

it "should raise an exception and not segv" do
stream = StringIO.new('[,,,,]')
projector = Yajl::Projector.new(stream)
expect {
projector.project({"name" => nil})
}.to raise_error(Yajl::ParseError)
end

it "should raise an exception and not segv on colons" do
stream = StringIO.new('[::::]')
projector = Yajl::Projector.new(stream)
expect {
projector.project({"name" => nil})
}.to raise_error(Yajl::ParseError)
end

it "should behave the same way as the regular parser on bad tokens like comma" do
bad_json = '{"name": "keith", "age":, 27}'
stream = StringIO.new(bad_json)
projector = Yajl::Projector.new(stream)
expect {
projector.project({"name" => nil})
}.to raise_error(capture_exception_for(bad_json).class)
end

it "should behave the same way as the regular parser on bad tokens like colon" do
bad_json = '{"name": "keith", "age":: 27}'
stream = StringIO.new(bad_json)
projector = Yajl::Projector.new(stream)
expect {
projector.project({"name" => nil})
}.to raise_error(capture_exception_for(bad_json).class)
end

it "should behave the same way as the regular parser on not enough json" do
bad_json = '{"name": "keith", "age":'
stream = StringIO.new(bad_json)
projector = Yajl::Projector.new(stream)
expect {
projector.project({"name" => nil})
}.to raise_error(capture_exception_for(bad_json).class)
end

def capture_exception_for(bad_json)
Yajl::Parser.new.parse(bad_json)
rescue Exception => e
e
end

def project(schema, over: "", json: nil, stream: nil)
if stream.nil?
if json.nil?
Expand Down Expand Up @@ -438,4 +495,4 @@ def project(schema, over: "", json: nil, stream: nil)
it "handles objects whose second key has escape sequences" do
expect(project(nil, json: '{"foo": "bar", "\ud83d\ude00": "grinning face"}')).to eql({"foo" => "bar", "😀" => "grinning face"})
end
end
end