Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added option to do entity encoding #187

Merged
merged 1 commit into from Jul 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 39 additions & 3 deletions ext/yajl/yajl_encode.c
Expand Up @@ -59,12 +59,18 @@ yajl_string_encode2(const yajl_print_t print,
unsigned int htmlSafe)
{
unsigned int beg = 0;
unsigned int end = 0;
unsigned int end = 0;
unsigned int increment = 0;
char hexBuf[7];
char entityBuffer[7];
hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
hexBuf[6] = 0;

entityBuffer[0] = '\\'; entityBuffer[1] = 'u'; entityBuffer[2] = '2'; entityBuffer[3] = '0';
entityBuffer[6] = 0;

while (end < len) {
increment = 1;
const char * escaped = NULL;
switch (str[end]) {
case '\r': escaped = "\\r"; break;
Expand All @@ -76,10 +82,39 @@ yajl_string_encode2(const yajl_print_t print,
case '\b': escaped = "\\b"; break;
case '\t': escaped = "\\t"; break;
case '/':
if (htmlSafe) {
if (htmlSafe == 1 || htmlSafe == 2) {
escaped = "\\/";
}
break;
/* Escaping 0xe280a8 0xe280a9 */
case 0xe2:
if (htmlSafe == 2) {
if (len - end >= 2 && str[end + 1] == 0x80) {
if (str[end + 2] == 0xa8) {
increment = 3;
entityBuffer[4] = '2';
entityBuffer[5] = '8';
escaped = entityBuffer;
break;
}

if (str[end + 2] == 0xa9) {
increment = 3;
entityBuffer[4] = '2';
entityBuffer[5] = '9';
escaped = entityBuffer;
break;
}
}
}
case '<':
case '>':
case '&':
if (htmlSafe == 2) {
CharToHex(str[end], hexBuf + 4);
escaped = hexBuf;
}
break;
default:
if ((unsigned char) str[end] < 32) {
CharToHex(str[end], hexBuf + 4);
Expand All @@ -90,7 +125,8 @@ yajl_string_encode2(const yajl_print_t print,
if (escaped != NULL) {
print(ctx, (const char *) (str + beg), end - beg);
print(ctx, escaped, (unsigned int)strlen(escaped));
beg = ++end;
end += increment;
beg = end;
} else {
++end;
}
Expand Down
6 changes: 6 additions & 0 deletions ext/yajl/yajl_ext.c
Expand Up @@ -1030,9 +1030,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
actualIndent = indentString;
}
}

if (rb_hash_aref(opts, sym_html_safe) == Qtrue) {
htmlSafe = 1;
}

if (rb_hash_aref(opts, sym_entities) == Qtrue) {
htmlSafe = 2;
}
}
if (!indentString) {
indentString = defaultIndentString;
Expand Down Expand Up @@ -1356,6 +1361,7 @@ void Init_yajl() {
sym_pretty = ID2SYM(rb_intern("pretty"));
sym_indent = ID2SYM(rb_intern("indent"));
sym_html_safe = ID2SYM(rb_intern("html_safe"));
sym_entities = ID2SYM(rb_intern("entities"));
sym_terminator = ID2SYM(rb_intern("terminator"));
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
Expand Down
2 changes: 1 addition & 1 deletion ext/yajl/yajl_ext.h
Expand Up @@ -56,7 +56,7 @@ static rb_encoding *utf8Encoding;
static VALUE cStandardError, cParseError, cEncodeError, mYajl, cParser, cProjector, cEncoder;
static ID intern_io_read, intern_call, intern_keys, intern_to_s,
intern_to_json, intern_has_key, intern_to_sym, intern_as_json;
static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_symbolize_names, sym_html_safe;
static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_symbolize_names, sym_html_safe, sym_entities;

#define GetParser(obj, sval) Data_Get_Struct(obj, yajl_parser_wrapper, sval);
#define GetEncoder(obj, sval) Data_Get_Struct(obj, yajl_encoder_wrapper, sval);
Expand Down
16 changes: 16 additions & 0 deletions spec/encoding/encoding_spec.rb
Expand Up @@ -275,11 +275,27 @@ def to_s
expect(safe_encoder.encode("</script>")).to eql("\"<\\/script>\"")
end

it "should not encode characters with entities by default" do
expect(Yajl.dump("\u2028\u2029><&")).to eql("\"\u2028\u2029><&\"")
end

it "should encode characters with entities when enabled" do
expect(Yajl.dump("\u2028\u2029><&", entities: true)).to eql("\"\\u2028\\u2029\\u003E\\u003C\\u0026\"")
end

it "should default to *not* escaping / characters" do
unsafe_encoder = Yajl::Encoder.new
expect(unsafe_encoder.encode("</script>")).not_to eql("\"<\\/script>\"")
end

it "should encode slashes when enabled" do
unsafe_encoder = Yajl::Encoder.new(:entities => false)
safe_encoder = Yajl::Encoder.new(:entities => true)

expect(unsafe_encoder.encode("</script>")).not_to eql("\"<\\/script>\"")
expect(safe_encoder.encode("</script>")).to eql("\"\\u003C\\/script\\u003E\"")
end

it "return value of #to_json must be a string" do
expect {
Yajl::Encoder.encode(TheMindKiller.new)
Expand Down