-
Notifications
You must be signed in to change notification settings - Fork 310
/
utf8.spec.ts
74 lines (64 loc) · 2.92 KB
/
utf8.spec.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import { toAscii } from "./ascii";
import { fromUtf8, toUtf8 } from "./utf8";
describe("utf8", () => {
it("encodes ascii strings", () => {
expect(toUtf8("")).toEqual(new Uint8Array([]));
expect(toUtf8("abc")).toEqual(new Uint8Array([0x61, 0x62, 0x63]));
expect(toUtf8(" ?=-n|~+-*/\\")).toEqual(
new Uint8Array([0x20, 0x3f, 0x3d, 0x2d, 0x6e, 0x7c, 0x7e, 0x2b, 0x2d, 0x2a, 0x2f, 0x5c]),
);
});
it("decodes ascii string", () => {
expect(fromUtf8(new Uint8Array([]))).toEqual("");
expect(fromUtf8(new Uint8Array([0x61, 0x62, 0x63]))).toEqual("abc");
expect(
fromUtf8(new Uint8Array([0x20, 0x3f, 0x3d, 0x2d, 0x6e, 0x7c, 0x7e, 0x2b, 0x2d, 0x2a, 0x2f, 0x5c])),
).toEqual(" ?=-n|~+-*/\\");
});
it("encodes null character", () => {
expect(toUtf8("\u0000")).toEqual(new Uint8Array([0x00]));
});
it("decodes null byte", () => {
expect(fromUtf8(new Uint8Array([0x00]))).toEqual("\u0000");
});
it("encodes Basic Multilingual Plane strings", () => {
expect(toUtf8("ö")).toEqual(new Uint8Array([0xc3, 0xb6]));
expect(toUtf8("¥")).toEqual(new Uint8Array([0xc2, 0xa5]));
expect(toUtf8("Ф")).toEqual(new Uint8Array([0xd0, 0xa4]));
expect(toUtf8("ⱴ")).toEqual(new Uint8Array([0xe2, 0xb1, 0xb4]));
expect(toUtf8("ⵘ")).toEqual(new Uint8Array([0xe2, 0xb5, 0x98]));
});
it("decodes Basic Multilingual Plane strings", () => {
expect(fromUtf8(new Uint8Array([0xc3, 0xb6]))).toEqual("ö");
expect(fromUtf8(new Uint8Array([0xc2, 0xa5]))).toEqual("¥");
expect(fromUtf8(new Uint8Array([0xd0, 0xa4]))).toEqual("Ф");
expect(fromUtf8(new Uint8Array([0xe2, 0xb1, 0xb4]))).toEqual("ⱴ");
expect(fromUtf8(new Uint8Array([0xe2, 0xb5, 0x98]))).toEqual("ⵘ");
});
it("encodes Supplementary Multilingual Plane strings", () => {
// U+1F0A1
expect(toUtf8("🂡")).toEqual(new Uint8Array([0xf0, 0x9f, 0x82, 0xa1]));
// U+1034A
expect(toUtf8("𐍊")).toEqual(new Uint8Array([0xf0, 0x90, 0x8d, 0x8a]));
});
it("decodes Supplementary Multilingual Plane strings", () => {
// U+1F0A1
expect(fromUtf8(new Uint8Array([0xf0, 0x9f, 0x82, 0xa1]))).toEqual("🂡");
// U+1034A
expect(fromUtf8(new Uint8Array([0xf0, 0x90, 0x8d, 0x8a]))).toEqual("𐍊");
});
it("throws on invalid utf8 bytes", () => {
// Broken UTF8 example from https://github.com/nodejs/node/issues/16894
expect(() => fromUtf8(new Uint8Array([0xf0, 0x80, 0x80]))).toThrow();
});
describe("fromUtf8", () => {
it("replaces characters in lossy mode", () => {
expect(fromUtf8(new Uint8Array([]), true)).toEqual("");
expect(fromUtf8(new Uint8Array([0x61, 0x62, 0x63]), true)).toEqual("abc");
// Example from https://doc.rust-lang.org/stable/std/string/struct.String.html#method.from_utf8_lossy
expect(
fromUtf8(new Uint8Array([...toAscii("Hello "), 0xf0, 0x90, 0x80, ...toAscii("World")]), true),
).toEqual("Hello �World");
});
});
});