Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve unicode string handling in linkifier #1678

Merged
merged 29 commits into from Sep 14, 2018
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a8fe410
Buffer.stringIndexToBufferIndex method
jerch Sep 9, 2018
a494e80
BufferStringIterator
jerch Sep 9, 2018
4aa8e40
fix MockBuffer
jerch Sep 9, 2018
daa6fa0
change Linkifier._doLinkifyRow
jerch Sep 9, 2018
08af72b
linter errors
jerch Sep 9, 2018
a76d4a6
fix multiple row linkifier attempts; tests for unicode before a match
jerch Sep 10, 2018
3289eed
merge master
jerch Sep 10, 2018
c35306a
handle unicode within a match; test cases
jerch Sep 10, 2018
a2bfbea
Merge branch 'master' into fix_linkifier
jerch Sep 10, 2018
0068c47
Merge branch 'master' into fix_linkifier
jerch Sep 12, 2018
7e69c5d
apply linkifier only to rows in viewport
jerch Sep 12, 2018
c8c22e3
docs
jerch Sep 13, 2018
2708a76
fix next() invocation; handle wild matches
jerch Sep 13, 2018
30f74bf
cover & comment right border trim behavior to be fixed
jerch Sep 13, 2018
bd68853
Merge branch 'master' into fix_linkifier
jerch Sep 13, 2018
a8d9c92
make linter happy
jerch Sep 13, 2018
7401567
Remove unsafe cast in Buffer.test.ts
Tyriar Sep 14, 2018
cd2ac92
:lipstick:
Tyriar Sep 14, 2018
50d6008
Use common TestTerminal for writeSync across Buffer/Linkifier test
Tyriar Sep 14, 2018
9ce2894
remove toArray; interface type for iterator result
jerch Sep 14, 2018
9514bfd
cleanup BufferStringIterator
jerch Sep 14, 2018
8664f54
rename contents to iterator
jerch Sep 14, 2018
57bbbda
rename stringWidth; add some comments
jerch Sep 14, 2018
14c20c5
simplify stringIndexToBufferIndex, fix docs
jerch Sep 14, 2018
70be55d
test cases for getStringCellWidth
jerch Sep 14, 2018
631e331
fix test cases
jerch Sep 14, 2018
3143ad7
Merge branch 'master' into fix_linkifier
jerch Sep 14, 2018
4b68976
hide regex error behind debug flag
jerch Sep 14, 2018
2f66ec4
:lipstick:
Tyriar Sep 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
165 changes: 164 additions & 1 deletion src/Buffer.test.ts
Expand Up @@ -5,14 +5,22 @@

import { assert } from 'chai';
import { ITerminal } from './Types';
import { Buffer, DEFAULT_ATTR } from './Buffer';
import { Buffer, DEFAULT_ATTR, CHAR_DATA_CHAR_INDEX } from './Buffer';
import { CircularList } from './common/CircularList';
import { MockTerminal } from './utils/TestUtils.test';
import { BufferLine } from './BufferLine';
import { Terminal } from './Terminal';

const INIT_COLS = 80;
const INIT_ROWS = 24;

class TestTerminal extends Terminal {
writeSync(data: string): void {
this.writeBuffer.push(data);
this._innerWrite();
}
}

describe('Buffer', () => {
let terminal: ITerminal;
let buffer: Buffer;
Expand Down Expand Up @@ -347,4 +355,159 @@ describe('Buffer', () => {
assert.equal(str3, '😁a');
});
});
describe('stringIndexToBufferIndex', function(): void {
let terminal: TestTerminal;
beforeEach(function(): void {
terminal = new TestTerminal({rows: 5, cols: 10});
});
it('multiline ascii', function(): void {
const input = 'This is ASCII text spanning multiple lines.';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
});
it('combining e\u0301 in a sentence', function(): void {
const input = 'Sitting in the cafe\u0301 drinking coffee.';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
for (let i = 0; i < 19; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
// string index 18 & 19 point to combining char e\u0301 ---> same buffer Index
assert.deepEqual(
terminal.buffer.stringIndexToBufferIndex(0, 18),
terminal.buffer.stringIndexToBufferIndex(0, 19));
// after the combining char every string index has an offset of -1
for (let i = 19; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i - 1) / terminal.cols) | 0, (i - 1) % terminal.cols], bufferIndex);
}
});
it('multiline combining e\u0301', function(): void {
const input = 'e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
// every buffer cell index contains 2 string indices
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i >> 1) / terminal.cols) | 0, (i >> 1) % terminal.cols], bufferIndex);
}
});
it('surrogate char in a sentence', function(): void {
const input = 'The 𝄞 is a clef widely used in modern notation.';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
for (let i = 0; i < 5; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
// string index 4 & 5 point to surrogate char 𝄞 ---> same buffer Index
assert.deepEqual(
terminal.buffer.stringIndexToBufferIndex(0, 4),
terminal.buffer.stringIndexToBufferIndex(0, 5));
// after the combining char every string index has an offset of -1
for (let i = 5; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i - 1) / terminal.cols) | 0, (i - 1) % terminal.cols], bufferIndex);
}
});
it('multiline surrogate char', function(): void {
const input = '𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
// every buffer cell index contains 2 string indices
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i >> 1) / terminal.cols) | 0, (i >> 1) % terminal.cols], bufferIndex);
}
});
it('surrogate char with combining', function(): void {
// eye of Ra with acute accent - string length of 3
const input = '𓂀\u0301 - the eye hiroglyph with an acute accent.';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
// index 0..2 should map to 0
assert.deepEqual([0, 0], terminal.buffer.stringIndexToBufferIndex(0, 1));
assert.deepEqual([0, 0], terminal.buffer.stringIndexToBufferIndex(0, 2));
for (let i = 2; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i - 2) / terminal.cols) | 0, (i - 2) % terminal.cols], bufferIndex);
}
});
it('multiline surrogate with combining', function(): void {
const input = '𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
// every buffer cell index contains 3 string indices
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(((i / 3) | 0) / terminal.cols) | 0, ((i / 3) | 0) % terminal.cols], bufferIndex);
}
});
it('fullwidth chars', function(): void {
const input = 'These 123 are some fat numbers.';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
for (let i = 0; i < 6; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
// string index 6, 7, 8 take 2 cells
assert.deepEqual([0, 8], terminal.buffer.stringIndexToBufferIndex(0, 7));
assert.deepEqual([1, 0], terminal.buffer.stringIndexToBufferIndex(0, 8));
// rest of the string has offset of +3
for (let i = 9; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i + 3) / terminal.cols) | 0, (i + 3) % terminal.cols], bufferIndex);
}
});
it('multiline fullwidth chars', function(): void {
const input = '12345678901234567890';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
for (let i = 9; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i << 1) / terminal.cols) | 0, (i << 1) % terminal.cols], bufferIndex);
}
});
it('fullwidth combining with emoji - match emoji cell', function(): void {
const input = 'Lots of ¥\u0301 make me 😃.';
terminal.writeSync(input);
const s = terminal.buffer.contents(true).toArray()[0];
assert.equal(input, s);
const stringIndex = s.match(/😃/).index;
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, stringIndex);
assert(terminal.buffer.lines.get(bufferIndex[0]).get(bufferIndex[1])[CHAR_DATA_CHAR_INDEX], '😃');
});
it('multiline fullwidth chars with offset 1 (currently tests for broken behavior)', function(): void {
const input = 'a12345678901234567890';
// the 'a' at the beginning moves all fullwidth chars one to the right
// now the end of the line contains a dangling empty cell since
// the next fullwidth char has to wrap early
// the dangling last cell is wrongly added in the string
// --> fixable after resolving #1685
terminal.writeSync(input);
// TODO: reenable after fix
// const s = terminal.buffer.contents(true).toArray()[0];
// assert.equal(input, s);
for (let i = 10; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i + 1); // TODO: remove +1 after fix
const j = (i - 0) << 1;
assert.deepEqual([(j / terminal.cols) | 0, j % terminal.cols], bufferIndex);
}
});
});
});
74 changes: 73 additions & 1 deletion src/Buffer.ts
Expand Up @@ -4,7 +4,7 @@
*/

import { CircularList } from './common/CircularList';
import { CharData, ITerminal, IBuffer, IBufferLine } from './Types';
import { CharData, ITerminal, IBuffer, IBufferLine, BufferIndex, IBufferStringIterator } from './Types';
import { EventEmitter } from './common/EventEmitter';
import { IMarker } from 'xterm';
import { BufferLine } from './BufferLine';
Expand Down Expand Up @@ -194,6 +194,39 @@ export class Buffer implements IBuffer {
this.scrollBottom = newRows - 1;
}

/**
* Translates a string index back to a BufferIndex.
* To get the correct buffer position the string must start at `startCol` 0
* (default in translateBufferLineToString).
* The method also works on wrapped line strings given rows were not trimmed.
* The method operates on the CharData width attribute, there are no
* additional content or boundary checks. Therefore the string and the buffer
* should not be altered in between.
* TODO: respect trim flag after fixing #1685
* @param lineIndex line index the string was retrieved from
* @param stringIndex index within the string
* @param startCol column offset the string was retrieved from
*/
public stringIndexToBufferIndex(lineIndex: number, stringIndex: number): BufferIndex {
jerch marked this conversation as resolved.
Show resolved Hide resolved
if (!stringIndex) {
return [lineIndex, 0];
}
while (stringIndex) {
const line = this.lines.get(lineIndex);
if (!line) {
[-1, -1];
}
for (let i = 0; i < line.length; ++i) {
stringIndex -= line.get(i)[CHAR_DATA_CHAR_INDEX].length;
if (stringIndex < 0) {
return [lineIndex, i];
}
}
lineIndex++;
}
return [lineIndex, 0];
}

/**
* Translates a buffer line to a string, with optional start and end columns.
* Wide characters will count as two columns in the resulting string. This
Expand Down Expand Up @@ -340,6 +373,10 @@ export class Buffer implements IBuffer {
// TODO: This could probably be optimized by relying on sort order and trimming the array using .length
this.markers.splice(this.markers.indexOf(marker), 1);
}

public contents(trimRight: boolean, startIndex?: number, endIndex?: number): IBufferStringIterator {
jerch marked this conversation as resolved.
Show resolved Hide resolved
return new BufferStringIterator(this, trimRight, startIndex, endIndex);
}
}

export class Marker extends EventEmitter implements IMarker {
Expand All @@ -366,3 +403,38 @@ export class Marker extends EventEmitter implements IMarker {
super.dispose();
}
}

export class BufferStringIterator implements IBufferStringIterator {
private _start: number;
private _end: number;
private _current: number;

constructor (private _buffer: IBuffer, private _trimRight: boolean, startIndex?: number, endIndex?: number) {
jerch marked this conversation as resolved.
Show resolved Hide resolved
this._start = startIndex || 0;
this._end = endIndex || this._buffer.lines.length;
this._current = this._start;
}

public hasNext(): boolean {
return this._current < this._end;
}

public next(withRanges: boolean = false): string | [{first: number, last: number}, string] {
const range = this._buffer.getWrappedRangeForLine(this._current);
let result = '';
for (let i = range.first; i <= range.last; ++i) {
// TODO: always apply trimRight after fixing #1685
result += this._buffer.translateBufferLineToString(i, (this._trimRight) ? i === range.last : false);
}
this._current = range.last + 1;
return (withRanges) ? [range, result] : result;
}

public toArray(): string[] {
jerch marked this conversation as resolved.
Show resolved Hide resolved
const result = [];
while (this.hasNext()) {
result.push(this.next() as string);
jerch marked this conversation as resolved.
Show resolved Hide resolved
}
return result;
}
}
23 changes: 23 additions & 0 deletions src/CharWidth.ts
Expand Up @@ -169,3 +169,26 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu
return wcwidthHigh(num);
};
})({nul: 0, control: 0}); // configurable options

/**
* Get the terminal cell width for a string.
* @param s
jerch marked this conversation as resolved.
Show resolved Hide resolved
*/
export function stringWidth(s: string): number {
jerch marked this conversation as resolved.
Show resolved Hide resolved
jerch marked this conversation as resolved.
Show resolved Hide resolved
let result = 0;
for (let i = 0; i < s.length; ++i) {
let code = s.charCodeAt(i);
if (0xD800 <= code && code <= 0xDBFF) {
const low = s.charCodeAt(i + 1);
if (isNaN(low)) {
return result;
}
code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
if (0xDC00 <= code && code <= 0xDFFF) {
continue;
}
result += wcwidth(code);
}
return result;
}