Skip to content

Commit

Permalink
Merge pull request #118 from alcionai/std-lib-string-serializer
Browse files Browse the repository at this point in the history
Use golang standard library to serialize strings to JSON
  • Loading branch information
baywet committed Jan 10, 2024
2 parents b9ef405 + d232012 commit 8c3bb9d
Show file tree
Hide file tree
Showing 3 changed files with 230 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [1.0.5] - 2024-01-10

### Changed

- Fixes some special character escaping when serializing strings to JSON. Previous incorrect escaping could lead to deserialization errors if old serialized data is read again.

## [1.0.4] - 2023-07-12

### Changed
Expand Down
33 changes: 26 additions & 7 deletions json_serialization_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,32 @@ func (w *JsonSerializationWriter) writeRawValue(value ...string) {
}
}
func (w *JsonSerializationWriter) writeStringValue(value string) {
value = strings.ReplaceAll(value, `\`, `\\`)
value = strings.ReplaceAll(value, `"`, `\"`)
value = strings.ReplaceAll(value, "\n", `\n`)
value = strings.ReplaceAll(value, "\r", `\r`)
value = strings.ReplaceAll(value, "\t", `\t`)

w.writeRawValue("\"", value, "\"")
builder := &strings.Builder{}
// Allocate at least enough space for the string and quotes. However, it's
// possible that slightly overallocating may be a better strategy because then
// it would at least be able to handle a few character escape sequences
// without another allocation.
builder.Grow(len(value) + 2)

// Turning off HTML escaping may not be strictly necessary but it matches with
// the current behavior. Testing with Exchange mail shows that it will
// accept and properly interpret data sent with and without HTML escaping
// enabled when creating emails with body content type HTML and HTML tags in
// the body content.
enc := json.NewEncoder(builder)
enc.SetEscapeHTML(false)
enc.SetIndent("", "")
enc.Encode(value)

// Note that builder.String() returns a slice referencing the internal memory
// of builder. This means it's unsafe to continue holding that reference once
// this function exits (for example some conditions where a pool was used to
// reduce strings.Builder allocations). We can use it here directly since
// writeRawValue calls WriteString on a different buffer which should cause a
// copy of the contents. If that's changed though this will need updated.
s := builder.String()
// Need to trim off the trailing newline the encoder adds.
w.writeRawValue(s[:len(s)-1])
}
func (w *JsonSerializationWriter) writePropertyName(key string) {
w.writeRawValue("\"", key, "\":")
Expand Down
200 changes: 200 additions & 0 deletions json_serialization_writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,206 @@ func TestEscapeTabAndCarriageReturnInStrings(t *testing.T) {
assert.Equal(t, expected, converted)
}

// TestShortEscapeSequencesInString tests that strings containing characters
// with 2-character escape sequences according to RFC 8259 section 7 are
// properly encoded as JSON.
func TestShortEscapeSequencesInString(t *testing.T) {
// Expected results for each test are quoted since it's a JSON string.
table := []struct {
input byte
expected []byte
}{
{
input: 0x22, // " character
expected: []byte(`"\""`),
},
{
input: 0x5c, // \ character
expected: []byte(`"\\"`),
},
{
input: 0x08, // backspace character
// Until go1.22 is released this will be the more generic \u0008 escape
// code.
expected: []byte(`"\u0008"`),
},
{
input: 0x0c, // form feed character
// Until go1.22 is released this will be the more generic \u000c escape
// code.
expected: []byte(`"\u000c"`),
},
{
input: 0x0a, // line feed character
expected: []byte(`"\n"`),
},
{
input: 0x0d, // carriage return character
expected: []byte(`"\r"`),
},
{
input: 0x09, // tab character
expected: []byte(`"\t"`),
},
}

for _, test := range table {
t.Run(fmt.Sprintf("0x%02X", test.input), func(t *testing.T) {
stringInput := string(test.input)

serializer := NewJsonSerializationWriter()
err := serializer.WriteStringValue("", &stringInput)
assert.NoError(t, err)

result, err := serializer.GetSerializedContent()
assert.NoError(t, err)

assert.Equal(t, test.expected, result)

assert.True(t, json.Valid(result), "valid JSON")
})
}
}

// TestLongEscapeSequencesInString tests that strings containing characters
// without 2-character escape sequences according to RFC 8259 section 7 are
// properly encoded as JSON.
func TestLongEscapeSequencesInString(t *testing.T) {
// Manually adding these expected results since the code to generate them with
// a loop would be pretty similar to the code to generate the escape sequences
// which could make it susceptible to similar logic errors.
table := []struct {
input byte
expected []byte
}{
{
input: 0x00,
expected: []byte(`"\u0000"`),
},
{
input: 0x01,
expected: []byte(`"\u0001"`),
},
{
input: 0x02,
expected: []byte(`"\u0002"`),
},
{
input: 0x03,
expected: []byte(`"\u0003"`),
},
{
input: 0x04,
expected: []byte(`"\u0004"`),
},
{
input: 0x05,
expected: []byte(`"\u0005"`),
},
{
input: 0x06,
expected: []byte(`"\u0006"`),
},
{
input: 0x07,
expected: []byte(`"\u0007"`),
},
{
input: 0x0b,
expected: []byte(`"\u000b"`),
},
{
input: 0x0e,
expected: []byte(`"\u000e"`),
},
{
input: 0x0f,
expected: []byte(`"\u000f"`),
},
{
input: 0x10,
expected: []byte(`"\u0010"`),
},
{
input: 0x11,
expected: []byte(`"\u0011"`),
},
{
input: 0x12,
expected: []byte(`"\u0012"`),
},
{
input: 0x13,
expected: []byte(`"\u0013"`),
},
{
input: 0x14,
expected: []byte(`"\u0014"`),
},
{
input: 0x15,
expected: []byte(`"\u0015"`),
},
{
input: 0x16,
expected: []byte(`"\u0016"`),
},
{
input: 0x17,
expected: []byte(`"\u0017"`),
},
{
input: 0x18,
expected: []byte(`"\u0018"`),
},
{
input: 0x19,
expected: []byte(`"\u0019"`),
},
{
input: 0x1a,
expected: []byte(`"\u001a"`),
},
{
input: 0x1b,
expected: []byte(`"\u001b"`),
},
{
input: 0x1c,
expected: []byte(`"\u001c"`),
},
{
input: 0x1d,
expected: []byte(`"\u001d"`),
},
{
input: 0x1e,
expected: []byte(`"\u001e"`),
},
{
input: 0x1f,
expected: []byte(`"\u001f"`),
},
}

for _, test := range table {
t.Run(fmt.Sprintf("0x%02X", test.input), func(t *testing.T) {
stringInput := string(test.input)

serializer := NewJsonSerializationWriter()
err := serializer.WriteStringValue("", &stringInput)
assert.NoError(t, err)

result, err := serializer.GetSerializedContent()
assert.NoError(t, err)

assert.Equal(t, test.expected, result)

assert.True(t, json.Valid(result), "valid JSON")
})
}
}

func TestWriteValuesConcurrently(t *testing.T) {
instances := 100
output := make([][]byte, instances)
Expand Down

0 comments on commit 8c3bb9d

Please sign in to comment.