/
diagnostic_test.go
58 lines (55 loc) · 1.43 KB
/
diagnostic_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
package toproto
import (
"testing"
)
func TestForceValidUTF8(t *testing.T) {
tests := []struct {
Input string
Want string
}{
{
"hello",
"hello",
},
{
"こんにちは",
"こんにちは",
},
{
"baffle", // NOTE: "ffl" is a single-character ligature
"baffle", // ligature is preserved exactly
},
{
"wé́́é́́é́́!", // NOTE: These "e" have multiple combining diacritics
"wé́́é́́é́́!", // diacritics are preserved exactly
},
{
"😸😾", // Astral-plane characters
"😸😾", // preserved exactly
},
{
"\xff\xff", // neither byte is valid UTF-8
"\ufffd\ufffd", // both are replaced by replacement character
},
{
"\xff\xff\xff\xff\xff", // more than three invalid bytes
"\ufffd\ufffd\ufffd\ufffd\ufffd", // still expanded even though it exceeds our initial slice capacity in the implementation
},
{
"t\xffe\xffst", // invalid bytes interleaved with other content
"t\ufffde\ufffdst", // the valid content is preserved
},
{
"\xffこんにちは\xffこんにちは", // invalid bytes interacting with multibyte sequences
"\ufffdこんにちは\ufffdこんにちは", // the valid content is preserved
},
}
for _, test := range tests {
t.Run(test.Input, func(t *testing.T) {
got := forceValidUTF8(test.Input)
if got != test.Want {
t.Errorf("wrong result\ngot: %q\nwant: %q", got, test.Want)
}
})
}
}