Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UTF-8 Encoding to OrderedCode #5817

Merged
merged 8 commits into from Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
111 changes: 105 additions & 6 deletions packages/firestore/src/index/ordered_code_writer.ts
Expand Up @@ -17,6 +17,17 @@
import { debugAssert, fail } from '../util/assert';
import { ByteString } from '../util/byte_string';

/** These constants are taken from the backend. */
const MIN_SURROGATE = '\uD800';
const MAX_SURROGATE = '\uDBFF';

const ESCAPE1 = 0x00;
const NULL_BYTE = 0xff; // Combined with ESCAPE1
const SEPARATOR = 0x01; // Combined with ESCAPE1

const ESCAPE2 = 0xff;
const FF_BYTE = 0x00; // Combined with ESCAPE2

const LONG_SIZE = 64;
const BYTE_SIZE = 8;

Expand Down Expand Up @@ -100,6 +111,54 @@ export class OrderedCodeWriter {
buffer = new Uint8Array(DEFAULT_BUFFER_SIZE);
position = 0;

/** Writes utf8 bytes into this byte sequence, ascending. */
writeUtf8Ascending(sequence: string): void {
for (const c of sequence) {
const charCode = c.charCodeAt(0);
if (charCode < 0x80) {
this.writeByteAscending(charCode);
} else if (charCode < 0x800) {
this.writeByteAscending((0x0f << 6) | (charCode >>> 6));
this.writeByteAscending(0x80 | (0x3f & charCode));
} else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
this.writeByteAscending((0x0f << 5) | (charCode >>> 12));
this.writeByteAscending(0x80 | (0x3f & (charCode >>> 6)));
this.writeByteAscending(0x80 | (0x3f & charCode));
} else {
const codePoint = c.codePointAt(0)!;
this.writeByteAscending((0x0f << 4) | (codePoint >>> 18));
this.writeByteAscending(0x80 | (0x3f & (codePoint >>> 12)));
this.writeByteAscending(0x80 | (0x3f & (codePoint >>> 6)));
this.writeByteAscending(0x80 | (0x3f & codePoint));
}
}
this.writeSeparatorAscending();
}

/** Writes utf8 bytes into this byte sequence, descending */
writeUtf8Descending(sequence: string): void {
for (const c of sequence) {
const charCode = c.charCodeAt(0);
if (charCode < 0x80) {
this.writeByteDescending(charCode);
} else if (charCode < 0x800) {
this.writeByteDescending((0x0f << 6) | (charCode >>> 6));
this.writeByteDescending(0x80 | (0x3f & charCode));
} else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
this.writeByteDescending((0x0f << 5) | (charCode >>> 12));
this.writeByteDescending(0x80 | (0x3f & (charCode >>> 6)));
this.writeByteDescending(0x80 | (0x3f & charCode));
} else {
const codePoint = c.codePointAt(0)!;
this.writeByteDescending((0x0f << 4) | (codePoint >>> 18));
this.writeByteDescending(0x80 | (0x3f & (codePoint >>> 12)));
this.writeByteDescending(0x80 | (0x3f & (codePoint >>> 6)));
this.writeByteDescending(0x80 | (0x3f & codePoint));
}
}
this.writeSeparatorDescending();
}

writeNumberAscending(val: number): void {
// Values are encoded with a single byte length prefix, followed by the
// actual value in big-endian format with leading 0 bytes dropped.
Expand Down Expand Up @@ -155,19 +214,59 @@ export class OrderedCodeWriter {
return this.buffer.slice(0, this.position);
}

writeBytesAscending(value: ByteString): void {
fail('Not implemented');
/** Writes a single byte ascending to the buffer. */
private writeByteAscending(b: number): void {
const masked = b & 0xff;
if (masked === ESCAPE1) {
this.writeEscapedByteAscending(ESCAPE1);
this.writeEscapedByteAscending(NULL_BYTE);
} else if (masked === ESCAPE2) {
this.writeEscapedByteAscending(ESCAPE2);
this.writeEscapedByteAscending(FF_BYTE);
} else {
this.writeEscapedByteAscending(masked);
}
}

writeBytesDescending(value: ByteString): void {
fail('Not implemented');
/** Writes a single byte descending to the buffer. */
private writeByteDescending(b: number): void {
const masked = b & 0xff;
if (masked === ESCAPE1) {
this.writeEscapedByteDescending(ESCAPE1);
this.writeEscapedByteDescending(NULL_BYTE);
} else if (masked === ESCAPE2) {
this.writeEscapedByteDescending(ESCAPE2);
this.writeEscapedByteDescending(FF_BYTE);
} else {
this.writeEscapedByteDescending(b);
}
}

writeUtf8Ascending(sequence: string): void {
private writeSeparatorAscending(): void {
this.writeEscapedByteAscending(ESCAPE1);
this.writeEscapedByteAscending(SEPARATOR);
}

private writeSeparatorDescending(): void {
this.writeEscapedByteDescending(ESCAPE1);
this.writeEscapedByteDescending(SEPARATOR);
}

private writeEscapedByteAscending(b: number): void {
this.ensureAvailable(1);
this.buffer[this.position++] = b;
}

private writeEscapedByteDescending(b: number): void {
this.ensureAvailable(1);
this.buffer[this.position++] = ~b;
}

writeBytesAscending(value: ByteString): void {
fail('Not implemented');
}

writeUtf8Descending(sequence: string): void {
writeBytesDescending(value: ByteString): void {
fail('Not implemented');
}

Expand Down
78 changes: 65 additions & 13 deletions packages/firestore/test/unit/index/ordered_code_writer.test.ts
Expand Up @@ -76,6 +76,38 @@ const NUMBER_TEST_CASES: Array<ValueTestCase<number>> = [
new ValueTestCase(Number.NaN, '08fff8000000000000', 'f70007ffffffffffff')
];

const STRING_TEST_CASES: Array<ValueTestCase<string>> = [
new ValueTestCase('', '0001', 'fffe'),
new ValueTestCase('\u0000', '00ff0001', 'ff00fffe'),
new ValueTestCase('\u0000\u0000', '00ff00ff0001', 'ff00ff00fffe'),
new ValueTestCase('abc', '6162630001', '9e9d9cfffe'),
new ValueTestCase(
'xy¢z𠜎€𠜱あ𠝹',
'7879c2a27af0a09c8ee282acf0a09cb1e38182f0a09db90001',
'87863d5d850f5f63711d7d530f5f634e1c7e7d0f5f6246fffe'
),
new ValueTestCase(
'¬˚ß∂∆ç',
'c2accb9ac39fe28882e28886c3a70001',
'3d5334653c601d777d1d77793c58fffe'
),
new ValueTestCase(
'œ∑´´ß™£',
'c593e28891c2b4c2b4c39fe284a2c2a30001',
'3a6c1d776e3d4b3d4b3c601d7b5d3d5cfffe'
),
new ValueTestCase(
'πåçasdl߬µœ∑âsldalskdåßµ∂π',
'cf80c3a5c3a76173646cc39fc2acc2b5c593e28891c3a2736c64616c736b64c3a5c39fc2b5e28882cf800001',
'307f3c5a3c589e8c9b933c603d533d4a3a6c1d776e3c5d8c939b9e938c949b3c5a3c603d4a1d777d307ffffe'
),
new ValueTestCase(
'†¥¬´´`',
'e280a0c2a5c2acc2b4c2b4600001',
'1d7f5f3d5a3d533d4b3d4b9ffffe'
)
];

describe('Ordered Code Writer', () => {
it('computes number of leading zeros', () => {
for (let i = 0; i < 0xff; ++i) {
Expand All @@ -92,25 +124,42 @@ describe('Ordered Code Writer', () => {
});

it('converts numbers to bits', () => {
for (let i = 0; i < NUMBER_TEST_CASES.length; ++i) {
const bytes = getBytes(NUMBER_TEST_CASES[i].val);
verifyEncoding(NUMBER_TEST_CASES);
});

it('orders numbers correctly', () => {
verifyOrdering(NUMBER_TEST_CASES);
});

it('converts strings to bits', () => {
verifyEncoding(STRING_TEST_CASES);
});

it('orders strings correctly', () => {
verifyOrdering(STRING_TEST_CASES);
});

function verifyEncoding(testCases: Array<ValueTestCase<unknown>>): void {
for (let i = 0; i < testCases.length; ++i) {
const bytes = getBytes(testCases[i].val);
expect(bytes.asc).to.deep.equal(
fromHex(NUMBER_TEST_CASES[i].ascString),
'Ascending for ' + NUMBER_TEST_CASES[i].val
fromHex(testCases[i].ascString),
'Ascending for ' + testCases[i].val
);
expect(bytes.desc).to.deep.equal(
fromHex(NUMBER_TEST_CASES[i].descString),
'Descending for ' + NUMBER_TEST_CASES[i].val
fromHex(testCases[i].descString),
'Descending for ' + testCases[i].val
);
}
});
}

it('orders numbers correctly', () => {
for (let i = 0; i < NUMBER_TEST_CASES.length; ++i) {
for (let j = i; j < NUMBER_TEST_CASES.length; ++j) {
const left = NUMBER_TEST_CASES[i].val;
function verifyOrdering(testCases: Array<ValueTestCase<unknown>>): void {
for (let i = 0; i < testCases.length; ++i) {
for (let j = i; j < testCases.length; ++j) {
const left = testCases[i].val;
const leftBytes = getBytes(left);
const right = NUMBER_TEST_CASES[j].val;
const right = testCases[j].val;

const rightBytes = getBytes(right);
expect(compare(leftBytes.asc, rightBytes.asc)).to.equal(
i === j ? 0 : -1,
Expand All @@ -122,7 +171,7 @@ describe('Ordered Code Writer', () => {
);
}
}
});
}
});

function fromHex(hexString: string): Uint8Array {
Expand Down Expand Up @@ -151,6 +200,9 @@ function getBytes(val: unknown): { asc: Uint8Array; desc: Uint8Array } {
if (typeof val === 'number') {
ascWriter.writeNumberAscending(val);
descWriter.writeNumberDescending(val);
} else if (typeof val === 'string') {
ascWriter.writeUtf8Ascending(val);
descWriter.writeUtf8Descending(val);
} else {
throw new Error('Encoding not yet supported for ' + val);
}
Expand Down