diff --git a/ext/yajl/yajl_ext.c b/ext/yajl/yajl_ext.c index f9e91c14..a8faea0f 100644 --- a/ext/yajl/yajl_ext.c +++ b/ext/yajl/yajl_ext.c @@ -45,12 +45,12 @@ static void yajl_check_and_fire_callback(void * ctx) { /* No need to do any of this if the callback isn't even setup */ if (wrapper->parse_complete_callback != Qnil) { - int len = RARRAY_LEN(wrapper->builderStack); + long len = RARRAY_LEN(wrapper->builderStack); if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) { rb_funcall(wrapper->parse_complete_callback, intern_call, 1, rb_ary_pop(wrapper->builderStack)); } } else { - int len = RARRAY_LEN(wrapper->builderStack); + long len = RARRAY_LEN(wrapper->builderStack); if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) { wrapper->objectsFound++; if (wrapper->objectsFound > 1) { @@ -82,7 +82,7 @@ static char *yajl_raise_encode_error_for_status(yajl_gen_status status, VALUE ob static void yajl_set_static_value(void * ctx, VALUE val) { yajl_parser_wrapper * wrapper; VALUE lastEntry, hash; - int len; + long len; GetParser((VALUE)ctx, wrapper); @@ -204,7 +204,7 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) { case T_BIGNUM: str = rb_funcall(obj, intern_to_s, 0); cptr = RSTRING_PTR(str); - len = RSTRING_LEN(str); + len = (unsigned int)RSTRING_LEN(str); if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) { rb_raise(cEncodeError, "'%s' is an invalid number", cptr); } @@ -212,7 +212,7 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) { break; case T_STRING: cptr = RSTRING_PTR(obj); - len = RSTRING_LEN(obj); + len = (unsigned int)RSTRING_LEN(obj); CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len)); break; default: @@ -220,13 +220,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) { str = rb_funcall(obj, intern_to_json, 0); Check_Type(str, T_STRING); cptr = RSTRING_PTR(str); - len = RSTRING_LEN(str); + len = (unsigned int)RSTRING_LEN(str); CHECK_STATUS(yajl_gen_number(w->encoder, cptr, len)); } else { str = rb_funcall(obj, intern_to_s, 0); Check_Type(str, T_STRING); cptr = RSTRING_PTR(str); - len = RSTRING_LEN(str); + len = (unsigned int)RSTRING_LEN(str); CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len)); } break; @@ -495,13 +495,13 @@ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self) { if (TYPE(input) == T_STRING) { cptr = RSTRING_PTR(input); - len = RSTRING_LEN(input); + len = (unsigned int)RSTRING_LEN(input); yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser); } else if (rb_respond_to(input, intern_io_read)) { VALUE parsed = rb_str_new(0, FIX2LONG(rbufsize)); while (rb_funcall(input, intern_io_read, 2, rbufsize, parsed) != Qnil) { cptr = RSTRING_PTR(parsed); - len = RSTRING_LEN(parsed); + len = (unsigned int)RSTRING_LEN(parsed); yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser); } } else { @@ -541,7 +541,7 @@ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk) { if (wrapper->parse_complete_callback != Qnil) { const char * cptr = RSTRING_PTR(chunk); - len = RSTRING_LEN(chunk); + len = (unsigned int)RSTRING_LEN(chunk); yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser); } else { rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless."); @@ -616,11 +616,11 @@ static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop) yajl_tok token; if (pop == 0) { //printf("peeking %p %ld %d\n", RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset); - token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset); + token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), parser->offset); //printf("peeked event %d\n", token); if (token == yajl_tok_eof) { - parser->offset = RSTRING_LEN(parser->buffer); + parser->offset = (unsigned int)RSTRING_LEN(parser->buffer); continue; } @@ -630,7 +630,7 @@ static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop) } //printf("popping\n"); - token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len); + token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len); //printf("popped event %d\n", token); if (token == yajl_tok_eof) { @@ -652,20 +652,19 @@ static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser); static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event); static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event); -static VALUE rb_yajl_projector_filter_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) { +static VALUE rb_yajl_projector_filter(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) { assert(parser->stream); - if (event.token == yajl_tok_left_brace) { - return rb_yajl_projector_filter_array_subtree(parser, schema, event); - } - - if (event.token == yajl_tok_left_bracket) { - return rb_yajl_projector_filter_object_subtree(parser, schema, event); + switch(event.token) { + case yajl_tok_left_brace: + return rb_yajl_projector_filter_array_subtree(parser, schema, event); + break; + case yajl_tok_left_bracket: + return rb_yajl_projector_filter_object_subtree(parser, schema, event); + break; + default: + return rb_yajl_projector_build_simple_value(parser, event); } - - rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token)); - - return Qnil; } static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) { @@ -680,12 +679,7 @@ static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, break; } - VALUE val; - if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) { - val = rb_yajl_projector_filter_subtree(parser, schema, event); - } else { - val = rb_yajl_projector_build_simple_value(parser, event); - } + VALUE val = rb_yajl_projector_filter(parser, schema, event); rb_ary_push(ary, val); event = yajl_event_stream_next(parser, 0); @@ -738,20 +732,16 @@ static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, yajl_event_t value_event = yajl_event_stream_next(parser, 1); - VALUE val; - if (value_event.token == yajl_tok_left_bracket || value_event.token == yajl_tok_left_brace) { - VALUE key_schema; - if (schema == Qnil) { - key_schema = Qnil; - } else { - key_schema = rb_hash_aref(schema, key); - } - - val = rb_yajl_projector_filter_subtree(parser, key_schema, value_event); + VALUE key_schema; + if (schema == Qnil) { + key_schema = Qnil; } else { - val = rb_yajl_projector_build_simple_value(parser, value_event); + key_schema = rb_hash_aref(schema, key); } - + + VALUE val = rb_yajl_projector_filter(parser, key_schema, value_event); + + rb_str_freeze(key); rb_hash_aset(hsh, key, val); peek_comma: @@ -802,7 +792,7 @@ static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser) { return; } - rb_raise(cStandardError, "unknown value type to ignore %s", yajl_tok_name(value_event.token)); + rb_raise(cParseError, "unknown value type to ignore %s", yajl_tok_name(value_event.token)); } /* @@ -858,7 +848,7 @@ static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, ya val = rb_cstr2inum(buf, 10); } free(buf); - + return val; case yajl_tok_string:; @@ -868,6 +858,12 @@ static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, ya case yajl_tok_eof:; rb_raise(cParseError, "unexpected eof while constructing value"); + case yajl_tok_comma: + rb_raise(cParseError, "unexpected comma while constructing value"); + + case yajl_tok_colon: + rb_raise(cParseError, "unexpected colon while constructing value"); + default:; assert(0); } @@ -912,6 +908,13 @@ static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_eve } } +static VALUE rb_protected_yajl_projector_filter(VALUE pointer) { + VALUE *args = (VALUE *)pointer; + return rb_yajl_projector_filter((struct yajl_event_stream_s *)args[0], + args[1], + *(yajl_event_t *)args[2]); +} + /* * Document-method: project */ @@ -930,18 +933,35 @@ static VALUE rb_yajl_projector_project(VALUE self, VALUE schema) { .stream = stream, .buffer = buffer, - .offset = buffer_size, + .offset = (unsigned int)buffer_size, .lexer = yajl_lex_alloc(&allocFuncs, 0, 1), }; - VALUE result = rb_yajl_projector_filter_subtree(&parser, schema, yajl_event_stream_next(&parser, 1)); - - yajl_lex_free(parser.lexer); + yajl_event_t event = yajl_event_stream_next(&parser, 1); RB_GC_GUARD(stream); RB_GC_GUARD(buffer); + VALUE result; + int state = 0; + + if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) { + VALUE args[3]; + args[0] = (VALUE)&parser; + args[1] = schema; + args[2] = (VALUE)&event; + result = rb_protect(rb_protected_yajl_projector_filter, + (VALUE)args, + &state); + } else { + yajl_lex_free(parser.lexer); + rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token)); + } + + yajl_lex_free(parser.lexer); + if (state) rb_jump_tag(state); + return result; } diff --git a/spec/projection/projection.rb b/spec/projection/projection.rb index 09a34cb0..9558507c 100644 --- a/spec/projection/projection.rb +++ b/spec/projection/projection.rb @@ -18,6 +18,63 @@ expect(projection['age']).to eql(nil) end + it "should raise an exception and not leak memory" do + stream = StringIO.new('foo') + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(Yajl::ParseError) + end + + it "should raise an exception and not segv" do + stream = StringIO.new('[,,,,]') + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(Yajl::ParseError) + end + + it "should raise an exception and not segv on colons" do + stream = StringIO.new('[::::]') + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(Yajl::ParseError) + end + + it "should behave the same way as the regular parser on bad tokens like comma" do + bad_json = '{"name": "keith", "age":, 27}' + stream = StringIO.new(bad_json) + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(capture_exception_for(bad_json).class) + end + + it "should behave the same way as the regular parser on bad tokens like colon" do + bad_json = '{"name": "keith", "age":: 27}' + stream = StringIO.new(bad_json) + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(capture_exception_for(bad_json).class) + end + + it "should behave the same way as the regular parser on not enough json" do + bad_json = '{"name": "keith", "age":' + stream = StringIO.new(bad_json) + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(capture_exception_for(bad_json).class) + end + + def capture_exception_for(bad_json) + Yajl::Parser.new.parse(bad_json) + rescue Exception => e + e + end + def project(schema, over: "", json: nil, stream: nil) if stream.nil? if json.nil? @@ -438,4 +495,4 @@ def project(schema, over: "", json: nil, stream: nil) it "handles objects whose second key has escape sequences" do expect(project(nil, json: '{"foo": "bar", "\ud83d\ude00": "grinning face"}')).to eql({"foo" => "bar", "😀" => "grinning face"}) end -end \ No newline at end of file +end