Skip to content

Commit

Permalink
Add UTF-8 Encoding to OrderedCode (#5817)
Browse files Browse the repository at this point in the history
  • Loading branch information
schmidt-sebastian committed Jan 4, 2022
1 parent 1ef1341 commit 4f3662b
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 19 deletions.
111 changes: 105 additions & 6 deletions packages/firestore/src/index/ordered_code_writer.ts
Expand Up @@ -17,6 +17,17 @@
import { debugAssert, fail } from '../util/assert';
import { ByteString } from '../util/byte_string';

/** These constants are taken from the backend. */
const MIN_SURROGATE = '\uD800';
const MAX_SURROGATE = '\uDBFF';

const ESCAPE1 = 0x00;
const NULL_BYTE = 0xff; // Combined with ESCAPE1
const SEPARATOR = 0x01; // Combined with ESCAPE1

const ESCAPE2 = 0xff;
const FF_BYTE = 0x00; // Combined with ESCAPE2

const LONG_SIZE = 64;
const BYTE_SIZE = 8;

Expand Down Expand Up @@ -100,6 +111,54 @@ export class OrderedCodeWriter {
buffer = new Uint8Array(DEFAULT_BUFFER_SIZE);
position = 0;

/** Writes utf8 bytes into this byte sequence, ascending. */
writeUtf8Ascending(sequence: string): void {
for (const c of sequence) {
const charCode = c.charCodeAt(0);
if (charCode < 0x80) {
this.writeByteAscending(charCode);
} else if (charCode < 0x800) {
this.writeByteAscending((0x0f << 6) | (charCode >>> 6));
this.writeByteAscending(0x80 | (0x3f & charCode));
} else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
this.writeByteAscending((0x0f << 5) | (charCode >>> 12));
this.writeByteAscending(0x80 | (0x3f & (charCode >>> 6)));
this.writeByteAscending(0x80 | (0x3f & charCode));
} else {
const codePoint = c.codePointAt(0)!;
this.writeByteAscending((0x0f << 4) | (codePoint >>> 18));
this.writeByteAscending(0x80 | (0x3f & (codePoint >>> 12)));
this.writeByteAscending(0x80 | (0x3f & (codePoint >>> 6)));
this.writeByteAscending(0x80 | (0x3f & codePoint));
}
}
this.writeSeparatorAscending();
}

/** Writes utf8 bytes into this byte sequence, descending */
writeUtf8Descending(sequence: string): void {
for (const c of sequence) {
const charCode = c.charCodeAt(0);
if (charCode < 0x80) {
this.writeByteDescending(charCode);
} else if (charCode < 0x800) {
this.writeByteDescending((0x0f << 6) | (charCode >>> 6));
this.writeByteDescending(0x80 | (0x3f & charCode));
} else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
this.writeByteDescending((0x0f << 5) | (charCode >>> 12));
this.writeByteDescending(0x80 | (0x3f & (charCode >>> 6)));
this.writeByteDescending(0x80 | (0x3f & charCode));
} else {
const codePoint = c.codePointAt(0)!;
this.writeByteDescending((0x0f << 4) | (codePoint >>> 18));
this.writeByteDescending(0x80 | (0x3f & (codePoint >>> 12)));
this.writeByteDescending(0x80 | (0x3f & (codePoint >>> 6)));
this.writeByteDescending(0x80 | (0x3f & codePoint));
}
}
this.writeSeparatorDescending();
}

writeNumberAscending(val: number): void {
// Values are encoded with a single byte length prefix, followed by the
// actual value in big-endian format with leading 0 bytes dropped.
Expand Down Expand Up @@ -155,19 +214,59 @@ export class OrderedCodeWriter {
return this.buffer.slice(0, this.position);
}

writeBytesAscending(value: ByteString): void {
fail('Not implemented');
/** Writes a single byte ascending to the buffer. */
private writeByteAscending(b: number): void {
const masked = b & 0xff;
if (masked === ESCAPE1) {
this.writeEscapedByteAscending(ESCAPE1);
this.writeEscapedByteAscending(NULL_BYTE);
} else if (masked === ESCAPE2) {
this.writeEscapedByteAscending(ESCAPE2);
this.writeEscapedByteAscending(FF_BYTE);
} else {
this.writeEscapedByteAscending(masked);
}
}

writeBytesDescending(value: ByteString): void {
fail('Not implemented');
/** Writes a single byte descending to the buffer. */
private writeByteDescending(b: number): void {
const masked = b & 0xff;
if (masked === ESCAPE1) {
this.writeEscapedByteDescending(ESCAPE1);
this.writeEscapedByteDescending(NULL_BYTE);
} else if (masked === ESCAPE2) {
this.writeEscapedByteDescending(ESCAPE2);
this.writeEscapedByteDescending(FF_BYTE);
} else {
this.writeEscapedByteDescending(b);
}
}

writeUtf8Ascending(sequence: string): void {
private writeSeparatorAscending(): void {
this.writeEscapedByteAscending(ESCAPE1);
this.writeEscapedByteAscending(SEPARATOR);
}

private writeSeparatorDescending(): void {
this.writeEscapedByteDescending(ESCAPE1);
this.writeEscapedByteDescending(SEPARATOR);
}

private writeEscapedByteAscending(b: number): void {
this.ensureAvailable(1);
this.buffer[this.position++] = b;
}

private writeEscapedByteDescending(b: number): void {
this.ensureAvailable(1);
this.buffer[this.position++] = ~b;
}

writeBytesAscending(value: ByteString): void {
fail('Not implemented');
}

writeUtf8Descending(sequence: string): void {
writeBytesDescending(value: ByteString): void {
fail('Not implemented');
}

Expand Down
78 changes: 65 additions & 13 deletions packages/firestore/test/unit/index/ordered_code_writer.test.ts
Expand Up @@ -76,6 +76,38 @@ const NUMBER_TEST_CASES: Array<ValueTestCase<number>> = [
new ValueTestCase(Number.NaN, '08fff8000000000000', 'f70007ffffffffffff')
];

const STRING_TEST_CASES: Array<ValueTestCase<string>> = [
new ValueTestCase('', '0001', 'fffe'),
new ValueTestCase('\u0000', '00ff0001', 'ff00fffe'),
new ValueTestCase('\u0000\u0000', '00ff00ff0001', 'ff00ff00fffe'),
new ValueTestCase('abc', '6162630001', '9e9d9cfffe'),
new ValueTestCase(
'xy¢z𠜎€𠜱あ𠝹',
'7879c2a27af0a09c8ee282acf0a09cb1e38182f0a09db90001',
'87863d5d850f5f63711d7d530f5f634e1c7e7d0f5f6246fffe'
),
new ValueTestCase(
'¬˚ß∂∆ç',
'c2accb9ac39fe28882e28886c3a70001',
'3d5334653c601d777d1d77793c58fffe'
),
new ValueTestCase(
'œ∑´´ß™£',
'c593e28891c2b4c2b4c39fe284a2c2a30001',
'3a6c1d776e3d4b3d4b3c601d7b5d3d5cfffe'
),
new ValueTestCase(
'πåçasdl߬µœ∑âsldalskdåßµ∂π',
'cf80c3a5c3a76173646cc39fc2acc2b5c593e28891c3a2736c64616c736b64c3a5c39fc2b5e28882cf800001',
'307f3c5a3c589e8c9b933c603d533d4a3a6c1d776e3c5d8c939b9e938c949b3c5a3c603d4a1d777d307ffffe'
),
new ValueTestCase(
'†¥¬´´`',
'e280a0c2a5c2acc2b4c2b4600001',
'1d7f5f3d5a3d533d4b3d4b9ffffe'
)
];

describe('Ordered Code Writer', () => {
it('computes number of leading zeros', () => {
for (let i = 0; i < 0xff; ++i) {
Expand All @@ -92,25 +124,42 @@ describe('Ordered Code Writer', () => {
});

it('converts numbers to bits', () => {
for (let i = 0; i < NUMBER_TEST_CASES.length; ++i) {
const bytes = getBytes(NUMBER_TEST_CASES[i].val);
verifyEncoding(NUMBER_TEST_CASES);
});

it('orders numbers correctly', () => {
verifyOrdering(NUMBER_TEST_CASES);
});

it('converts strings to bits', () => {
verifyEncoding(STRING_TEST_CASES);
});

it('orders strings correctly', () => {
verifyOrdering(STRING_TEST_CASES);
});

function verifyEncoding(testCases: Array<ValueTestCase<unknown>>): void {
for (let i = 0; i < testCases.length; ++i) {
const bytes = getBytes(testCases[i].val);
expect(bytes.asc).to.deep.equal(
fromHex(NUMBER_TEST_CASES[i].ascString),
'Ascending for ' + NUMBER_TEST_CASES[i].val
fromHex(testCases[i].ascString),
'Ascending for ' + testCases[i].val
);
expect(bytes.desc).to.deep.equal(
fromHex(NUMBER_TEST_CASES[i].descString),
'Descending for ' + NUMBER_TEST_CASES[i].val
fromHex(testCases[i].descString),
'Descending for ' + testCases[i].val
);
}
});
}

it('orders numbers correctly', () => {
for (let i = 0; i < NUMBER_TEST_CASES.length; ++i) {
for (let j = i; j < NUMBER_TEST_CASES.length; ++j) {
const left = NUMBER_TEST_CASES[i].val;
function verifyOrdering(testCases: Array<ValueTestCase<unknown>>): void {
for (let i = 0; i < testCases.length; ++i) {
for (let j = i; j < testCases.length; ++j) {
const left = testCases[i].val;
const leftBytes = getBytes(left);
const right = NUMBER_TEST_CASES[j].val;
const right = testCases[j].val;

const rightBytes = getBytes(right);
expect(compare(leftBytes.asc, rightBytes.asc)).to.equal(
i === j ? 0 : -1,
Expand All @@ -122,7 +171,7 @@ describe('Ordered Code Writer', () => {
);
}
}
});
}
});

function fromHex(hexString: string): Uint8Array {
Expand Down Expand Up @@ -151,6 +200,9 @@ function getBytes(val: unknown): { asc: Uint8Array; desc: Uint8Array } {
if (typeof val === 'number') {
ascWriter.writeNumberAscending(val);
descWriter.writeNumberDescending(val);
} else if (typeof val === 'string') {
ascWriter.writeUtf8Ascending(val);
descWriter.writeUtf8Descending(val);
} else {
throw new Error('Encoding not yet supported for ' + val);
}
Expand Down

0 comments on commit 4f3662b

Please sign in to comment.