diff --git a/ext/yajl/yajl_encode.c b/ext/yajl/yajl_encode.c index 716ddded..16b2a472 100644 --- a/ext/yajl/yajl_encode.c +++ b/ext/yajl/yajl_encode.c @@ -59,12 +59,18 @@ yajl_string_encode2(const yajl_print_t print, unsigned int htmlSafe) { unsigned int beg = 0; - unsigned int end = 0; + unsigned int end = 0; + unsigned int increment = 0; char hexBuf[7]; + char entityBuffer[7]; hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; hexBuf[6] = 0; + entityBuffer[0] = '\\'; entityBuffer[1] = 'u'; entityBuffer[2] = '2'; entityBuffer[3] = '0'; + entityBuffer[6] = 0; + while (end < len) { + increment = 1; const char * escaped = NULL; switch (str[end]) { case '\r': escaped = "\\r"; break; @@ -76,10 +82,39 @@ yajl_string_encode2(const yajl_print_t print, case '\b': escaped = "\\b"; break; case '\t': escaped = "\\t"; break; case '/': - if (htmlSafe) { + if (htmlSafe == 1) { escaped = "\\/"; } break; + /* Escaping 0xe280a8 0xe280a9 */ + case 0xe2: + if (htmlSafe == 2) { + if (len - end >= 2 && str[end + 1] == 0x80) { + if (str[end + 2] == 0xa8) { + increment = 3; + entityBuffer[4] = '2'; + entityBuffer[5] = '8'; + escaped = entityBuffer; + break; + } + + if (str[end + 2] == 0xa9) { + increment = 3; + entityBuffer[4] = '2'; + entityBuffer[5] = '9'; + escaped = entityBuffer; + break; + } + } + } + case '<': + case '>': + case '&': + if (htmlSafe == 2) { + CharToHex(str[end], hexBuf + 4); + escaped = hexBuf; + } + break; default: if ((unsigned char) str[end] < 32) { CharToHex(str[end], hexBuf + 4); @@ -90,7 +125,8 @@ yajl_string_encode2(const yajl_print_t print, if (escaped != NULL) { print(ctx, (const char *) (str + beg), end - beg); print(ctx, escaped, (unsigned int)strlen(escaped)); - beg = ++end; + end += increment; + beg = end; } else { ++end; } diff --git a/ext/yajl/yajl_ext.c b/ext/yajl/yajl_ext.c index c9593bb8..9dd7def9 100644 --- a/ext/yajl/yajl_ext.c +++ b/ext/yajl/yajl_ext.c @@ -1030,9 +1030,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) { actualIndent = indentString; } } + if (rb_hash_aref(opts, sym_html_safe) == Qtrue) { htmlSafe = 1; } + + if (rb_hash_aref(opts, sym_entities) == Qtrue) { + htmlSafe = 2; + } } if (!indentString) { indentString = defaultIndentString; @@ -1356,6 +1361,7 @@ void Init_yajl() { sym_pretty = ID2SYM(rb_intern("pretty")); sym_indent = ID2SYM(rb_intern("indent")); sym_html_safe = ID2SYM(rb_intern("html_safe")); + sym_entities = ID2SYM(rb_intern("entities")); sym_terminator = ID2SYM(rb_intern("terminator")); sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); diff --git a/ext/yajl/yajl_ext.h b/ext/yajl/yajl_ext.h index 3c51ae7d..bab2543b 100644 --- a/ext/yajl/yajl_ext.h +++ b/ext/yajl/yajl_ext.h @@ -56,7 +56,7 @@ static rb_encoding *utf8Encoding; static VALUE cStandardError, cParseError, cEncodeError, mYajl, cParser, cProjector, cEncoder; static ID intern_io_read, intern_call, intern_keys, intern_to_s, intern_to_json, intern_has_key, intern_to_sym, intern_as_json; -static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_symbolize_names, sym_html_safe; +static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_symbolize_names, sym_html_safe, sym_entities; #define GetParser(obj, sval) Data_Get_Struct(obj, yajl_parser_wrapper, sval); #define GetEncoder(obj, sval) Data_Get_Struct(obj, yajl_encoder_wrapper, sval); diff --git a/spec/encoding/encoding_spec.rb b/spec/encoding/encoding_spec.rb index d31d213a..85c9ec7a 100644 --- a/spec/encoding/encoding_spec.rb +++ b/spec/encoding/encoding_spec.rb @@ -275,6 +275,14 @@ def to_s expect(safe_encoder.encode("")).to eql("\"<\\/script>\"") end + it "should not encode characters with entities by default" do + expect(Yajl.dump("\u2028\u2029><&")).to eql("\"\u2028\u2029><&\"") + end + + it "should encode characters with entities when enabled" do + expect(Yajl.dump("\u2028\u2029><&", entities: true)).to eql("\"\\u2028\\u2029\\u003E\\u003C\\u0026\"") + end + it "should default to *not* escaping / characters" do unsafe_encoder = Yajl::Encoder.new expect(unsafe_encoder.encode("")).not_to eql("\"<\\/script>\"")