Skip to content

Commit

Permalink
Merge pull request #1678 from jerch/fix_linkifier
Browse files Browse the repository at this point in the history
Improve unicode string handling in linkifier
  • Loading branch information
jerch committed Sep 14, 2018
2 parents 5c873c1 + 2f66ec4 commit d37a8ce
Show file tree
Hide file tree
Showing 8 changed files with 537 additions and 96 deletions.
171 changes: 169 additions & 2 deletions src/Buffer.test.ts
Expand Up @@ -5,9 +5,9 @@

import { assert } from 'chai';
import { ITerminal } from './Types';
import { Buffer, DEFAULT_ATTR } from './Buffer';
import { Buffer, DEFAULT_ATTR, CHAR_DATA_CHAR_INDEX } from './Buffer';
import { CircularList } from './common/CircularList';
import { MockTerminal } from './utils/TestUtils.test';
import { MockTerminal, TestTerminal } from './utils/TestUtils.test';
import { BufferLine } from './BufferLine';

const INIT_COLS = 80;
Expand Down Expand Up @@ -347,4 +347,171 @@ describe('Buffer', () => {
assert.equal(str3, '😁a');
});
});
describe('stringIndexToBufferIndex', () => {
let terminal: TestTerminal;

beforeEach(() => {
terminal = new TestTerminal({rows: 5, cols: 10});
});

it('multiline ascii', () => {
const input = 'This is ASCII text spanning multiple lines.';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
});

it('combining e\u0301 in a sentence', () => {
const input = 'Sitting in the cafe\u0301 drinking coffee.';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
for (let i = 0; i < 19; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
// string index 18 & 19 point to combining char e\u0301 ---> same buffer Index
assert.deepEqual(
terminal.buffer.stringIndexToBufferIndex(0, 18),
terminal.buffer.stringIndexToBufferIndex(0, 19));
// after the combining char every string index has an offset of -1
for (let i = 19; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i - 1) / terminal.cols) | 0, (i - 1) % terminal.cols], bufferIndex);
}
});

it('multiline combining e\u0301', () => {
const input = 'e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
// every buffer cell index contains 2 string indices
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i >> 1) / terminal.cols) | 0, (i >> 1) % terminal.cols], bufferIndex);
}
});

it('surrogate char in a sentence', () => {
const input = 'The 𝄞 is a clef widely used in modern notation.';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
for (let i = 0; i < 5; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
// string index 4 & 5 point to surrogate char 𝄞 ---> same buffer Index
assert.deepEqual(
terminal.buffer.stringIndexToBufferIndex(0, 4),
terminal.buffer.stringIndexToBufferIndex(0, 5));
// after the combining char every string index has an offset of -1
for (let i = 5; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i - 1) / terminal.cols) | 0, (i - 1) % terminal.cols], bufferIndex);
}
});

it('multiline surrogate char', () => {
const input = '𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
// every buffer cell index contains 2 string indices
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i >> 1) / terminal.cols) | 0, (i >> 1) % terminal.cols], bufferIndex);
}
});

it('surrogate char with combining', () => {
// eye of Ra with acute accent - string length of 3
const input = '𓂀\u0301 - the eye hiroglyph with an acute accent.';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
// index 0..2 should map to 0
assert.deepEqual([0, 0], terminal.buffer.stringIndexToBufferIndex(0, 1));
assert.deepEqual([0, 0], terminal.buffer.stringIndexToBufferIndex(0, 2));
for (let i = 2; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i - 2) / terminal.cols) | 0, (i - 2) % terminal.cols], bufferIndex);
}
});

it('multiline surrogate with combining', () => {
const input = '𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
// every buffer cell index contains 3 string indices
for (let i = 0; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(((i / 3) | 0) / terminal.cols) | 0, ((i / 3) | 0) % terminal.cols], bufferIndex);
}
});

it('fullwidth chars', () => {
const input = 'These 123 are some fat numbers.';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
for (let i = 0; i < 6; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([(i / terminal.cols) | 0, i % terminal.cols], bufferIndex);
}
// string index 6, 7, 8 take 2 cells
assert.deepEqual([0, 8], terminal.buffer.stringIndexToBufferIndex(0, 7));
assert.deepEqual([1, 0], terminal.buffer.stringIndexToBufferIndex(0, 8));
// rest of the string has offset of +3
for (let i = 9; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i + 3) / terminal.cols) | 0, (i + 3) % terminal.cols], bufferIndex);
}
});

it('multiline fullwidth chars', () => {
const input = '12345678901234567890';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
for (let i = 9; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i);
assert.deepEqual([((i << 1) / terminal.cols) | 0, (i << 1) % terminal.cols], bufferIndex);
}
});

it('fullwidth combining with emoji - match emoji cell', () => {
const input = 'Lots of ¥\u0301 make me 😃.';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
const stringIndex = s.match(/😃/).index;
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, stringIndex);
assert(terminal.buffer.lines.get(bufferIndex[0]).get(bufferIndex[1])[CHAR_DATA_CHAR_INDEX], '😃');
});

it('multiline fullwidth chars with offset 1 (currently tests for broken behavior)', () => {
const input = 'a12345678901234567890';
// the 'a' at the beginning moves all fullwidth chars one to the right
// now the end of the line contains a dangling empty cell since
// the next fullwidth char has to wrap early
// the dangling last cell is wrongly added in the string
// --> fixable after resolving #1685
terminal.writeSync(input);
// TODO: reenable after fix
// const s = terminal.buffer.contents(true).toArray()[0];
// assert.equal(input, s);
for (let i = 10; i < input.length; ++i) {
const bufferIndex = terminal.buffer.stringIndexToBufferIndex(0, i + 1); // TODO: remove +1 after fix
const j = (i - 0) << 1;
assert.deepEqual([(j / terminal.cols) | 0, j % terminal.cols], bufferIndex);
}
});
});
});
64 changes: 63 additions & 1 deletion src/Buffer.ts
Expand Up @@ -4,7 +4,7 @@
*/

import { CircularList } from './common/CircularList';
import { CharData, ITerminal, IBuffer, IBufferLine } from './Types';
import { CharData, ITerminal, IBuffer, IBufferLine, BufferIndex, IBufferStringIterator, IBufferStringIteratorResult } from './Types';
import { EventEmitter } from './common/EventEmitter';
import { IMarker } from 'xterm';
import { BufferLine } from './BufferLine';
Expand Down Expand Up @@ -194,6 +194,36 @@ export class Buffer implements IBuffer {
this.scrollBottom = newRows - 1;
}

/**
* Translates a string index back to a BufferIndex.
* To get the correct buffer position the string must start at `startCol` 0
* (default in translateBufferLineToString).
* The method also works on wrapped line strings given rows were not trimmed.
* The method operates on the CharData string length, there are no
* additional content or boundary checks. Therefore the string and the buffer
* should not be altered in between.
* TODO: respect trim flag after fixing #1685
* @param lineIndex line index the string was retrieved from
* @param stringIndex index within the string
* @param startCol column offset the string was retrieved from
*/
public stringIndexToBufferIndex(lineIndex: number, stringIndex: number): BufferIndex {
while (stringIndex) {
const line = this.lines.get(lineIndex);
if (!line) {
[-1, -1];
}
for (let i = 0; i < line.length; ++i) {
stringIndex -= line.get(i)[CHAR_DATA_CHAR_INDEX].length;
if (stringIndex < 0) {
return [lineIndex, i];
}
}
lineIndex++;
}
return [lineIndex, 0];
}

/**
* Translates a buffer line to a string, with optional start and end columns.
* Wide characters will count as two columns in the resulting string. This
Expand Down Expand Up @@ -340,6 +370,10 @@ export class Buffer implements IBuffer {
// TODO: This could probably be optimized by relying on sort order and trimming the array using .length
this.markers.splice(this.markers.indexOf(marker), 1);
}

public iterator(trimRight: boolean, startIndex?: number, endIndex?: number): IBufferStringIterator {
return new BufferStringIterator(this, trimRight, startIndex, endIndex);
}
}

export class Marker extends EventEmitter implements IMarker {
Expand All @@ -366,3 +400,31 @@ export class Marker extends EventEmitter implements IMarker {
super.dispose();
}
}

export class BufferStringIterator implements IBufferStringIterator {
private _current: number;

constructor (
private _buffer: IBuffer,
private _trimRight: boolean,
private _startIndex: number = 0,
private _endIndex: number = _buffer.lines.length
) {
this._current = this._startIndex;
}

public hasNext(): boolean {
return this._current < this._endIndex;
}

public next(): IBufferStringIteratorResult {
const range = this._buffer.getWrappedRangeForLine(this._current);
let result = '';
for (let i = range.first; i <= range.last; ++i) {
// TODO: always apply trimRight after fixing #1685
result += this._buffer.translateBufferLineToString(i, (this._trimRight) ? i === range.last : false);
}
this._current = range.last + 1;
return {range: range, content: result};
}
}
79 changes: 79 additions & 0 deletions src/CharWidth.test.ts
@@ -0,0 +1,79 @@
/**
* Copyright (c) 2017 The xterm.js authors. All rights reserved.
* @license MIT
*/

import { TestTerminal } from './utils/TestUtils.test';
import { assert } from 'chai';
import { getStringCellWidth } from './CharWidth';
import { IBuffer } from './Types';
import { CHAR_DATA_WIDTH_INDEX, CHAR_DATA_CHAR_INDEX } from './Buffer';


describe('getStringCellWidth', function(): void {
let terminal: TestTerminal;

beforeEach(() => {
terminal = new TestTerminal({rows: 5, cols: 30});
});

function sumWidths(buffer: IBuffer, start: number, end: number, sentinel: string): number {
let result = 0;
for (let i = start; i < end; ++i) {
const line = buffer.lines.get(i);
for (let j = 0; j < line.length; ++j) { // TODO: change to trimBorder with multiline
const ch = line.get(j);
result += ch[CHAR_DATA_WIDTH_INDEX];
// return on sentinel
if (ch[CHAR_DATA_CHAR_INDEX] === sentinel) {
return result;
}
}
}
return result;
}

it('ASCII chars', function(): void {
const input = 'This is just ASCII text.#';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
assert.equal(getStringCellWidth(s), sumWidths(terminal.buffer, 0, 1, '#'));
});
it('combining chars', function(): void {
const input = 'e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301e\u0301#';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
assert.equal(getStringCellWidth(s), sumWidths(terminal.buffer, 0, 1, '#'));
});
it('surrogate chars', function(): void {
const input = '𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞𝄞#';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
assert.equal(getStringCellWidth(s), sumWidths(terminal.buffer, 0, 1, '#'));
});
it('surrogate combining chars', function(): void {
const input = '𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301𓂀\u0301#';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
assert.equal(getStringCellWidth(s), sumWidths(terminal.buffer, 0, 1, '#'));
});
it('fullwidth chars', function(): void {
const input = '1234567890#';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
assert.equal(getStringCellWidth(s), sumWidths(terminal.buffer, 0, 1, '#'));
});
it('fullwidth chars offset 1', function(): void {
const input = 'a1234567890#';
terminal.writeSync(input);
const s = terminal.buffer.iterator(true).next().content;
assert.equal(input, s);
assert.equal(getStringCellWidth(s), sumWidths(terminal.buffer, 0, 1, '#'));
});
// TODO: multiline tests once #1685 is resolved
});
22 changes: 22 additions & 0 deletions src/CharWidth.ts
Expand Up @@ -169,3 +169,25 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu
return wcwidthHigh(num);
};
})({nul: 0, control: 0}); // configurable options

/**
* Get the terminal cell width for a string.
*/
export function getStringCellWidth(s: string): number {
let result = 0;
for (let i = 0; i < s.length; ++i) {
let code = s.charCodeAt(i);
if (0xD800 <= code && code <= 0xDBFF) {
const low = s.charCodeAt(i + 1);
if (isNaN(low)) {
return result;
}
code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
if (0xDC00 <= code && code <= 0xDFFF) {
continue;
}
result += wcwidth(code);
}
return result;
}

0 comments on commit d37a8ce

Please sign in to comment.