Skip to content

Commit

Permalink
Merge pull request #4288 from jerch/fix_weblinks
Browse files Browse the repository at this point in the history
Fix weblinks
  • Loading branch information
Tyriar committed Dec 19, 2022
2 parents cb4102c + 218cfd2 commit 84b8121
Show file tree
Hide file tree
Showing 9 changed files with 291 additions and 130 deletions.
188 changes: 118 additions & 70 deletions addons/xterm-addon-web-links/src/WebLinkProvider.ts
Expand Up @@ -3,7 +3,7 @@
* @license MIT
*/

import { ILinkProvider, ILink, Terminal, IViewportRange } from 'xterm';
import { ILinkProvider, ILink, Terminal, IViewportRange, IBufferLine } from 'xterm';

export interface ILinkProviderOptions {
hover?(event: MouseEvent, text: string, location: IViewportRange): void;
Expand Down Expand Up @@ -45,61 +45,51 @@ export class LinkComputer {
public static computeLink(y: number, regex: RegExp, terminal: Terminal, activate: (event: MouseEvent, uri: string) => void): ILink[] {
const rex = new RegExp(regex.source, (regex.flags || '') + 'g');

const [line, startLineIndex] = LinkComputer._translateBufferLineToStringWithWrap(y - 1, false, terminal);

// Don't try if the wrapped line if excessively large as the regex matching will block the main
// thread.
if (line.length > 1024) {
return [];
}
const [lines, startLineIndex] = LinkComputer._getWindowedLineStrings(y - 1, terminal);
const line = lines.join('');

let match;
let stringIndex = -1;
const result: ILink[] = [];

while ((match = rex.exec(line)) !== null) {
const text = match[1];
if (!text) {
// something matched but does not comply with the given matchIndex
// since this is most likely a bug the regex itself we simply do nothing here
console.log('match found without corresponding matchIndex');
break;
}

// Get index, match.index is for the outer match which includes negated chars
// therefore we cannot use match.index directly, instead we search the position
// of the match group in text again
// also correct regex and string search offsets for the next loop run
stringIndex = line.indexOf(text, stringIndex + 1);
rex.lastIndex = stringIndex + text.length;
if (stringIndex < 0) {
// invalid stringIndex (should not have happened)
break;
while (match = rex.exec(line)) {
const text = match[0];

// check via URL if the matched text would form a proper url
// NOTE: This outsources the ugly url parsing to the browser.
// To avoid surprising auto expansion from URL we additionally
// check afterwards if the provided string resembles the parsed
// one close enough:
// - decodeURI decode path segement back to byte repr
// to detect unicode auto conversion correctly
// - append / also match domain urls w'o any path notion
try {
const url = new URL(text);
const urlText = decodeURI(url.toString());
if (text !== urlText && text + '/' !== urlText) {
continue;
}
} catch (e) {
continue;
}

let endX = stringIndex + text.length;
let endY = startLineIndex + 1;
// map string positions back to buffer positions
// values are 0-based right side excluding
const [startY, startX] = LinkComputer._mapStrIdx(terminal, startLineIndex, 0, match.index);
const [endY, endX] = LinkComputer._mapStrIdx(terminal, startY, startX, text.length);

while (endX > terminal.cols) {
endX -= terminal.cols;
endY++;
}

let startX = stringIndex + 1;
let startY = startLineIndex + 1;
while (startX > terminal.cols) {
startX -= terminal.cols;
startY++;
if (startY === -1 || startX === -1 || endY === -1 || endX === -1) {
continue;
}

// range expects values 1-based right side including, thus +1 except for endX
const range = {
start: {
x: startX,
y: startY
x: startX + 1,
y: startY + 1
},
end: {
x: endX,
y: endY
y: endY + 1
}
};

Expand All @@ -110,41 +100,99 @@ export class LinkComputer {
}

/**
* Gets the entire line for the buffer line
* @param lineIndex The index of the line being translated.
* @param trimRight Whether to trim whitespace to the right.
* Get wrapped content lines for the current line index.
* The top/bottom line expansion stops at whitespaces or length > 2048.
* Returns an array with line strings and the top line index.
*
* NOTE: We pull line strings with trimRight=true on purpose to make sure
* to correctly match urls with early wrapped wide chars. This corrupts the string index
* for 1:1 backmapping to buffer positions, thus needs an additional correction in _mapStrIdx.
*/
private static _translateBufferLineToStringWithWrap(lineIndex: number, trimRight: boolean, terminal: Terminal): [string, number] {
let lineString = '';
let lineWrapsToNext: boolean;
let prevLinesToWrap: boolean;

do {
const line = terminal.buffer.active.getLine(lineIndex);
if (!line) {
break;
private static _getWindowedLineStrings(lineIndex: number, terminal: Terminal): [string[], number] {
let line: IBufferLine | undefined;
let topIdx = lineIndex;
let bottomIdx = lineIndex;
let length = 0;
let content = '';
const lines: string[] = [];

if ((line = terminal.buffer.active.getLine(lineIndex))) {
const currentContent = line.translateToString(true);

// expand top, stop on whitespaces or length > 2048
if (line.isWrapped && currentContent[0] !== ' ') {
length = 0;
while ((line = terminal.buffer.active.getLine(--topIdx)) && length < 2048) {
content = line.translateToString(true);
length += content.length;
lines.push(content);
if (!line.isWrapped || content.indexOf(' ') !== -1) {
break;
}
}
lines.reverse();
}

if (line.isWrapped) {
lineIndex--;
// append current line
lines.push(currentContent);

// expand bottom, stop on whitespaces or length > 2048
length = 0;
while ((line = terminal.buffer.active.getLine(++bottomIdx)) && line.isWrapped && length < 2048) {
content = line.translateToString(true);
length += content.length;
lines.push(content);
if (content.indexOf(' ') !== -1) {
break;
}
}
}
return [lines, topIdx];
}

prevLinesToWrap = line.isWrapped;
} while (prevLinesToWrap);

const startLineIndex = lineIndex;

do {
const nextLine = terminal.buffer.active.getLine(lineIndex + 1);
lineWrapsToNext = nextLine ? nextLine.isWrapped : false;
const line = terminal.buffer.active.getLine(lineIndex);
/**
* Map a string index back to buffer positions.
* Returns buffer position as [lineIndex, columnIndex] 0-based,
* or [-1, -1] in case the lookup ran into a non-existing line.
*/
private static _mapStrIdx(terminal: Terminal, lineIndex: number, rowIndex: number, stringIndex: number): [number, number] {
const buf = terminal.buffer.active;
const cell = buf.getNullCell();
let start = rowIndex;
while (stringIndex) {
const line = buf.getLine(lineIndex);
if (!line) {
break;
return [-1, -1];
}
for (let i = start; i < line.length; ++i) {
line.getCell(i, cell);
const chars = cell.getChars();
const width = cell.getWidth();
if (width) {
stringIndex -= chars.length || 1;

// correct stringIndex for early wrapped wide chars:
// - currently only happens at last cell
// - cells to the right are reset with chars='' and width=1 in InputHandler.print
// - follow-up line must be wrapped and contain wide char at first cell
// --> if all these conditions are met, correct stringIndex by +1
if (i === line.length - 1 && chars === '') {
const line = buf.getLine(lineIndex + 1);
if (line && line.isWrapped) {
line.getCell(0, cell);
if (cell.getWidth() === 2) {
stringIndex += 1;
}
}
}
}
if (stringIndex < 0) {
return [lineIndex, i];
}
}
lineString += line.translateToString(!lineWrapsToNext && trimRight).substring(0, terminal.cols);
lineIndex++;
} while (lineWrapsToNext);

return [lineString, startLineIndex];
start = 0;
}
return [lineIndex, start];
}
}
32 changes: 13 additions & 19 deletions addons/xterm-addon-web-links/src/WebLinksAddon.ts
Expand Up @@ -6,25 +6,19 @@
import { Terminal, ITerminalAddon, IDisposable } from 'xterm';
import { ILinkProviderOptions, WebLinkProvider } from './WebLinkProvider';

const protocolClause = '(https?:\\/\\/)';
const domainCharacterSet = '[\\da-z\\.-]+';
const negatedDomainCharacterSet = '[^\\da-z\\.-]+';
const domainBodyClause = '(' + domainCharacterSet + ')';
const tldClause = '([a-z\\.]{2,18})';
const ipClause = '((\\d{1,3}\\.){3}\\d{1,3})';
const localHostClause = '(localhost)';
const portClause = '(:\\d{1,5})';
const hostClause = '((' + domainBodyClause + '\\.' + tldClause + ')|' + ipClause + '|' + localHostClause + ')' + portClause + '?';
const pathCharacterSet = '(\\/[\\/\\w\\.\\-%~:+@]*)*([^:"\'\\s])';
const pathClause = '(' + pathCharacterSet + ')?';
const queryStringHashFragmentCharacterSet = '[0-9\\w\\[\\]\\(\\)\\/\\?\\!#@$%&\'*+,:;~\\=\\.\\-]*';
const queryStringClause = '(\\?' + queryStringHashFragmentCharacterSet + ')?';
const hashFragmentClause = '(#' + queryStringHashFragmentCharacterSet + ')?';
const negatedPathCharacterSet = '[^\\/\\w\\.\\-%]+';
const bodyClause = hostClause + pathClause + queryStringClause + hashFragmentClause;
const start = '(?:^|' + negatedDomainCharacterSet + ')(';
const end = ')($|' + negatedPathCharacterSet + ')';
const strictUrlRegex = new RegExp(start + protocolClause + bodyClause + end);
// consider everthing starting with http:// or https://
// up to first whitespace, `"` or `'` as url
// NOTE: The repeated end clause is needed to not match a dangling `:`
// resembling the old (...)*([^:"\'\\s]) final path clause
// additionally exclude early + final:
// - unsafe from rfc3986: !*'()
// - unsafe chars from rfc1738: {}|\^~[]` (minus [] as we need them for ipv6 adresses, also allow ~)
// also exclude as finals:
// - final interpunction like ,.!?
// - any sort of brackets <>()[]{} (not spec conform, but often used to enclose urls)
// - unsafe chars from rfc1738: {}|\^~[]`
const strictUrlRegex = /https?:[/]{2}[^\s"'!*(){}|\\\^<>`]*[^\s"':,.!?{}|\\\^~\[\]`()<>]/;


function handleLink(event: MouseEvent, uri: string): void {
const newWindow = window.open();
Expand Down
73 changes: 73 additions & 0 deletions addons/xterm-addon-web-links/test/WebLinksAddon.api.ts
Expand Up @@ -14,6 +14,22 @@ let page: Page;
const width = 800;
const height = 600;


interface ILinkStateData {
uri?: string;
range?: {
start: {
x: number;
y: number;
};
end: {
x: number;
y: number;
};
};
}


describe('WebLinksAddon', () => {
before(async function(): Promise<any> {
browser = await launchBrowser();
Expand All @@ -35,6 +51,43 @@ describe('WebLinksAddon', () => {
it('.io', async function(): Promise<any> {
await testHostName('foo.io');
});

describe('correct buffer offsets & uri', () => {
it('all half width', async () => {
setupCustom();
await writeSync(page, 'aaa http://example.com aaa http://example.com aaa');
await resetAndHover(5, 1);
await evalLinkStateData('http://example.com', { start: { x: 5, y: 1 }, end: { x: 22, y: 1 } });
await resetAndHover(1, 2);
await evalLinkStateData('http://example.com', { start: { x: 28, y: 1 }, end: { x: 5, y: 2 } });
});
it('url after full width', async () => {
setupCustom();
await writeSync(page, '¥¥¥ http://example.com ¥¥¥ http://example.com aaa');
await resetAndHover(8, 1);
await evalLinkStateData('http://example.com', { start: { x: 8, y: 1 }, end: { x: 25, y: 1 } });
await resetAndHover(1, 2);
await evalLinkStateData('http://example.com', { start: { x: 34, y: 1 }, end: { x: 11, y: 2 } });
});
it('full width within url and before', async () => {
setupCustom();
await writeSync(page, '¥¥¥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ¥¥¥');
await resetAndHover(8, 1);
await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
await resetAndHover(1, 2);
await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
await resetAndHover(17, 2);
await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 17, y: 2 }, end: { x: 19, y: 3 } });
});
it('name + password url after full width and combining', async () => {
setupCustom();
await writeSync(page, '¥¥¥cafe\u0301 http://test:password@example.com/some_path');
await resetAndHover(12, 1);
await evalLinkStateData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
await resetAndHover(13, 2);
await evalLinkStateData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
});
});
});

async function testHostName(hostname: string): Promise<void> {
Expand Down Expand Up @@ -64,3 +117,23 @@ async function pollForLinkAtCell(col: number, row: number, value: string): Promi
await pollFor(page, `document.querySelectorAll('${rowSelector} > span[style]').length >= ${value.length}`, true, async () => page.hover(`${rowSelector} > :nth-child(${col})`));
assert.equal(await page.evaluate(`Array.prototype.reduce.call(document.querySelectorAll('${rowSelector} > span[style]'), (a, b) => a + b.textContent, '');`), value);
}

async function setupCustom(): Promise<void> {
await openTerminal(page, { cols: 40 });
await page.evaluate(`window._linkStateData = {};
window._linkaddon = new window.WebLinksAddon();
window._linkaddon._options.hover = (event, uri, range) => { window._linkStateData = { uri, range }; };
window.term.loadAddon(window._linkaddon);`);
}

async function resetAndHover(col: number, row: number): Promise<void> {
await page.evaluate(`window._linkStateData = {};`);
const rowSelector = `.xterm-rows > :nth-child(${row})`;
await page.hover(`${rowSelector} > :nth-child(${col})`);
}

async function evalLinkStateData(uri: string, range: any): Promise<void> {
const data: ILinkStateData = await page.evaluate(`window._linkStateData`);
assert.equal(data.uri, uri);
assert.deepEqual(data.range, range);
}
23 changes: 23 additions & 0 deletions demo/client.ts
Expand Up @@ -228,6 +228,7 @@ if (document.location.pathname === '/test') {
document.getElementById('sgr-test').addEventListener('click', sgrTest);
document.getElementById('add-decoration').addEventListener('click', addDecoration);
document.getElementById('add-overview-ruler').addEventListener('click', addOverviewRuler);
document.getElementById('weblinks-test').addEventListener('click', testWeblinks);
addVtButtons();
}

Expand Down Expand Up @@ -1148,3 +1149,25 @@ function addVtButtons(): void {

document.querySelector('#vt-container').appendChild(vtFragment);
}

function testWeblinks(): void {
const linkExamples = `
aaa http://example.com aaa http://example.com aaa
¥¥¥ http://example.com aaa http://example.com aaa
aaa http://example.com ¥¥¥ http://example.com aaa
¥¥¥ http://example.com ¥¥¥ http://example.com aaa
aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa
¥¥¥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ¥¥¥
aaa http://test:password@example.com/some_path aaa
brackets enclosed:
aaa [http://example.de] aaa
aaa (http://example.de) aaa
aaa <http://example.de> aaa
aaa {http://example.de} aaa
ipv6 https://[::1]/with/some?vars=and&a#hash aaa
stop at final '.': This is a sentence with an url to http://example.com.
stop at final '?': Is this the right url http://example.com/?
stop at final '?': Maybe this one http://example.com/with?arguments=false?
`;
term.write(linkExamples.split('\n').join('\r\n'));
}

0 comments on commit 84b8121

Please sign in to comment.