Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix weblinks #4288

Merged
merged 16 commits into from Dec 19, 2022
188 changes: 118 additions & 70 deletions addons/xterm-addon-web-links/src/WebLinkProvider.ts
Expand Up @@ -3,7 +3,7 @@
* @license MIT
*/

import { ILinkProvider, ILink, Terminal, IViewportRange } from 'xterm';
import { ILinkProvider, ILink, Terminal, IViewportRange, IBufferLine } from 'xterm';

export interface ILinkProviderOptions {
hover?(event: MouseEvent, text: string, location: IViewportRange): void;
Expand Down Expand Up @@ -45,61 +45,51 @@ export class LinkComputer {
public static computeLink(y: number, regex: RegExp, terminal: Terminal, activate: (event: MouseEvent, uri: string) => void): ILink[] {
const rex = new RegExp(regex.source, (regex.flags || '') + 'g');

const [line, startLineIndex] = LinkComputer._translateBufferLineToStringWithWrap(y - 1, false, terminal);

// Don't try if the wrapped line if excessively large as the regex matching will block the main
// thread.
if (line.length > 1024) {
return [];
}
const [lines, startLineIndex] = LinkComputer._getWindowedLineStrings(y - 1, terminal);
const line = lines.join('');

let match;
let stringIndex = -1;
const result: ILink[] = [];

while ((match = rex.exec(line)) !== null) {
const text = match[1];
if (!text) {
// something matched but does not comply with the given matchIndex
// since this is most likely a bug the regex itself we simply do nothing here
console.log('match found without corresponding matchIndex');
break;
}

// Get index, match.index is for the outer match which includes negated chars
// therefore we cannot use match.index directly, instead we search the position
// of the match group in text again
// also correct regex and string search offsets for the next loop run
stringIndex = line.indexOf(text, stringIndex + 1);
rex.lastIndex = stringIndex + text.length;
if (stringIndex < 0) {
// invalid stringIndex (should not have happened)
break;
while (match = rex.exec(line)) {
const text = match[0];

// check via URL if the matched text would form a proper url
// NOTE: This outsources the ugly url parsing to the browser.
// To avoid surprising auto expansion from URL we additionally
// check afterwards if the provided string resembles the parsed
// one close enough:
// - decodeURI decode path segement back to byte repr
// to detect unicode auto conversion correctly
// - append / also match domain urls w'o any path notion
try {
const url = new URL(text);
const urlText = decodeURI(url.toString());
if (text !== urlText && text + '/' !== urlText) {
continue;
}
} catch (e) {
continue;
}

let endX = stringIndex + text.length;
let endY = startLineIndex + 1;
// map string positions back to buffer positions
// values are 0-based right side excluding
const [startY, startX] = LinkComputer._mapStrIdx(terminal, startLineIndex, 0, match.index);
const [endY, endX] = LinkComputer._mapStrIdx(terminal, startY, startX, text.length);

while (endX > terminal.cols) {
endX -= terminal.cols;
endY++;
}

let startX = stringIndex + 1;
let startY = startLineIndex + 1;
while (startX > terminal.cols) {
startX -= terminal.cols;
startY++;
if (startY === -1 || startX === -1 || endY === -1 || endX === -1) {
continue;
}

// range expects values 1-based right side including, thus +1 except for endX
const range = {
start: {
x: startX,
y: startY
x: startX + 1,
y: startY + 1
},
end: {
x: endX,
y: endY
y: endY + 1
}
};

Expand All @@ -110,41 +100,99 @@ export class LinkComputer {
}

/**
* Gets the entire line for the buffer line
* @param lineIndex The index of the line being translated.
* @param trimRight Whether to trim whitespace to the right.
* Get wrapped content lines for the current line index.
* The top/bottom line expansion stops at whitespaces or length > 2048.
* Returns an array with line strings and the top line index.
*
* NOTE: We pull line strings with trimRight=true on purpose to make sure
* to correctly match urls with early wrapped wide chars. This corrupts the string index
* for 1:1 backmapping to buffer positions, thus needs an additional correction in _mapStrIdx.
*/
private static _translateBufferLineToStringWithWrap(lineIndex: number, trimRight: boolean, terminal: Terminal): [string, number] {
let lineString = '';
let lineWrapsToNext: boolean;
let prevLinesToWrap: boolean;

do {
const line = terminal.buffer.active.getLine(lineIndex);
if (!line) {
break;
private static _getWindowedLineStrings(lineIndex: number, terminal: Terminal): [string[], number] {
let line: IBufferLine | undefined;
let topIdx = lineIndex;
let bottomIdx = lineIndex;
let length = 0;
let content = '';
const lines: string[] = [];

if ((line = terminal.buffer.active.getLine(lineIndex))) {
const currentContent = line.translateToString(true);

// expand top, stop on whitespaces or length > 2048
if (line.isWrapped && currentContent[0] !== ' ') {
length = 0;
while ((line = terminal.buffer.active.getLine(--topIdx)) && length < 2048) {
content = line.translateToString(true);
length += content.length;
lines.push(content);
if (!line.isWrapped || content.indexOf(' ') !== -1) {
break;
}
}
lines.reverse();
}

if (line.isWrapped) {
lineIndex--;
// append current line
lines.push(currentContent);

// expand bottom, stop on whitespaces or length > 2048
length = 0;
while ((line = terminal.buffer.active.getLine(++bottomIdx)) && line.isWrapped && length < 2048) {
content = line.translateToString(true);
length += content.length;
lines.push(content);
if (content.indexOf(' ') !== -1) {
break;
}
}
}
return [lines, topIdx];
}

prevLinesToWrap = line.isWrapped;
} while (prevLinesToWrap);

const startLineIndex = lineIndex;

do {
const nextLine = terminal.buffer.active.getLine(lineIndex + 1);
lineWrapsToNext = nextLine ? nextLine.isWrapped : false;
const line = terminal.buffer.active.getLine(lineIndex);
/**
* Map a string index back to buffer positions.
* Returns buffer position as [lineIndex, columnIndex] 0-based,
* or [-1, -1] in case the lookup ran into a non-existing line.
*/
private static _mapStrIdx(terminal: Terminal, lineIndex: number, rowIndex: number, stringIndex: number): [number, number] {
const buf = terminal.buffer.active;
const cell = buf.getNullCell();
let start = rowIndex;
while (stringIndex) {
const line = buf.getLine(lineIndex);
if (!line) {
break;
return [-1, -1];
}
for (let i = start; i < line.length; ++i) {
line.getCell(i, cell);
const chars = cell.getChars();
const width = cell.getWidth();
if (width) {
stringIndex -= chars.length || 1;

// correct stringIndex for early wrapped wide chars:
// - currently only happens at last cell
// - cells to the right are reset with chars='' and width=1 in InputHandler.print
// - follow-up line must be wrapped and contain wide char at first cell
// --> if all these conditions are met, correct stringIndex by +1
if (i === line.length - 1 && chars === '') {
const line = buf.getLine(lineIndex + 1);
if (line && line.isWrapped) {
line.getCell(0, cell);
if (cell.getWidth() === 2) {
stringIndex += 1;
}
}
}
}
if (stringIndex < 0) {
return [lineIndex, i];
}
}
lineString += line.translateToString(!lineWrapsToNext && trimRight).substring(0, terminal.cols);
lineIndex++;
} while (lineWrapsToNext);

return [lineString, startLineIndex];
start = 0;
}
return [lineIndex, start];
}
}
32 changes: 13 additions & 19 deletions addons/xterm-addon-web-links/src/WebLinksAddon.ts
Expand Up @@ -6,25 +6,19 @@
import { Terminal, ITerminalAddon, IDisposable } from 'xterm';
import { ILinkProviderOptions, WebLinkProvider } from './WebLinkProvider';

const protocolClause = '(https?:\\/\\/)';
const domainCharacterSet = '[\\da-z\\.-]+';
const negatedDomainCharacterSet = '[^\\da-z\\.-]+';
const domainBodyClause = '(' + domainCharacterSet + ')';
const tldClause = '([a-z\\.]{2,18})';
const ipClause = '((\\d{1,3}\\.){3}\\d{1,3})';
const localHostClause = '(localhost)';
const portClause = '(:\\d{1,5})';
const hostClause = '((' + domainBodyClause + '\\.' + tldClause + ')|' + ipClause + '|' + localHostClause + ')' + portClause + '?';
const pathCharacterSet = '(\\/[\\/\\w\\.\\-%~:+@]*)*([^:"\'\\s])';
const pathClause = '(' + pathCharacterSet + ')?';
const queryStringHashFragmentCharacterSet = '[0-9\\w\\[\\]\\(\\)\\/\\?\\!#@$%&\'*+,:;~\\=\\.\\-]*';
const queryStringClause = '(\\?' + queryStringHashFragmentCharacterSet + ')?';
const hashFragmentClause = '(#' + queryStringHashFragmentCharacterSet + ')?';
const negatedPathCharacterSet = '[^\\/\\w\\.\\-%]+';
const bodyClause = hostClause + pathClause + queryStringClause + hashFragmentClause;
const start = '(?:^|' + negatedDomainCharacterSet + ')(';
const end = ')($|' + negatedPathCharacterSet + ')';
const strictUrlRegex = new RegExp(start + protocolClause + bodyClause + end);
// consider everthing starting with http:// or https://
// up to first whitespace, `"` or `'` as url
// NOTE: The repeated end clause is needed to not match a dangling `:`
// resembling the old (...)*([^:"\'\\s]) final path clause
// additionally exclude early + final:
// - unsafe from rfc3986: !*'()
// - unsafe chars from rfc1738: {}|\^~[]` (minus [] as we need them for ipv6 adresses, also allow ~)
// also exclude as finals:
// - final interpunction like ,.!?
// - any sort of brackets <>()[]{} (not spec conform, but often used to enclose urls)
// - unsafe chars from rfc1738: {}|\^~[]`
const strictUrlRegex = /https?:[/]{2}[^\s"'!*(){}|\\\^<>`]*[^\s"':,.!?{}|\\\^~\[\]`()<>]/;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing people will complain about regressions here, interesting learning about rfc1738 though. Should we be more explicit about the intended scope of the addon in the readme? I've always viewed it as a decent basic URL detector, but it's not meant to be feature complete and handle a bunch of edge cases.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing people will complain about regressions here

Yes thats the downside of turning the matcher logic upside-down, but I did not find another way while allowing these unicode uris to match. Will see if I get less "overmatching" from the vscode-uri package.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how to go about the risk of overmatching. I think the new matcher is way more capable than the old one, so I'd suggest to go with new one and lets see, if ppl stumble into tons of issues?

And if thats a big issue for peeps, the old addon version would still be around and work as before, so we can fix things iteratively?



function handleLink(event: MouseEvent, uri: string): void {
const newWindow = window.open();
Expand Down
73 changes: 73 additions & 0 deletions addons/xterm-addon-web-links/test/WebLinksAddon.api.ts
Expand Up @@ -14,6 +14,22 @@ let page: Page;
const width = 800;
const height = 600;


interface ILinkStateData {
uri?: string;
range?: {
start: {
x: number;
y: number;
};
end: {
x: number;
y: number;
};
};
}


describe('WebLinksAddon', () => {
before(async function(): Promise<any> {
browser = await launchBrowser();
Expand All @@ -35,6 +51,43 @@ describe('WebLinksAddon', () => {
it('.io', async function(): Promise<any> {
await testHostName('foo.io');
});

describe('correct buffer offsets & uri', () => {
it('all half width', async () => {
setupCustom();
await writeSync(page, 'aaa http://example.com aaa http://example.com aaa');
await resetAndHover(5, 1);
await evalLinkStateData('http://example.com', { start: { x: 5, y: 1 }, end: { x: 22, y: 1 } });
await resetAndHover(1, 2);
await evalLinkStateData('http://example.com', { start: { x: 28, y: 1 }, end: { x: 5, y: 2 } });
});
it('url after full width', async () => {
setupCustom();
await writeSync(page, '¥¥¥ http://example.com ¥¥¥ http://example.com aaa');
await resetAndHover(8, 1);
await evalLinkStateData('http://example.com', { start: { x: 8, y: 1 }, end: { x: 25, y: 1 } });
await resetAndHover(1, 2);
await evalLinkStateData('http://example.com', { start: { x: 34, y: 1 }, end: { x: 11, y: 2 } });
});
it('full width within url and before', async () => {
setupCustom();
await writeSync(page, '¥¥¥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ¥¥¥');
await resetAndHover(8, 1);
await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
await resetAndHover(1, 2);
await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
await resetAndHover(17, 2);
await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 17, y: 2 }, end: { x: 19, y: 3 } });
});
it('name + password url after full width and combining', async () => {
setupCustom();
await writeSync(page, '¥¥¥cafe\u0301 http://test:password@example.com/some_path');
await resetAndHover(12, 1);
await evalLinkStateData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
await resetAndHover(13, 2);
await evalLinkStateData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
});
});
});

async function testHostName(hostname: string): Promise<void> {
Expand Down Expand Up @@ -64,3 +117,23 @@ async function pollForLinkAtCell(col: number, row: number, value: string): Promi
await pollFor(page, `document.querySelectorAll('${rowSelector} > span[style]').length >= ${value.length}`, true, async () => page.hover(`${rowSelector} > :nth-child(${col})`));
assert.equal(await page.evaluate(`Array.prototype.reduce.call(document.querySelectorAll('${rowSelector} > span[style]'), (a, b) => a + b.textContent, '');`), value);
}

async function setupCustom(): Promise<void> {
await openTerminal(page, { cols: 40 });
await page.evaluate(`window._linkStateData = {};
window._linkaddon = new window.WebLinksAddon();
window._linkaddon._options.hover = (event, uri, range) => { window._linkStateData = { uri, range }; };
window.term.loadAddon(window._linkaddon);`);
}

async function resetAndHover(col: number, row: number): Promise<void> {
await page.evaluate(`window._linkStateData = {};`);
const rowSelector = `.xterm-rows > :nth-child(${row})`;
await page.hover(`${rowSelector} > :nth-child(${col})`);
}

async function evalLinkStateData(uri: string, range: any): Promise<void> {
const data: ILinkStateData = await page.evaluate(`window._linkStateData`);
assert.equal(data.uri, uri);
assert.deepEqual(data.range, range);
}
23 changes: 23 additions & 0 deletions demo/client.ts
Expand Up @@ -228,6 +228,7 @@ if (document.location.pathname === '/test') {
document.getElementById('sgr-test').addEventListener('click', sgrTest);
document.getElementById('add-decoration').addEventListener('click', addDecoration);
document.getElementById('add-overview-ruler').addEventListener('click', addOverviewRuler);
document.getElementById('weblinks-test').addEventListener('click', testWeblinks);
addVtButtons();
}

Expand Down Expand Up @@ -1148,3 +1149,25 @@ function addVtButtons(): void {

document.querySelector('#vt-container').appendChild(vtFragment);
}

function testWeblinks(): void {
const linkExamples = `
aaa http://example.com aaa http://example.com aaa
¥¥¥ http://example.com aaa http://example.com aaa
aaa http://example.com ¥¥¥ http://example.com aaa
¥¥¥ http://example.com ¥¥¥ http://example.com aaa
aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa
¥¥¥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ¥¥¥
aaa http://test:password@example.com/some_path aaa
brackets enclosed:
aaa [http://example.de] aaa
aaa (http://example.de) aaa
aaa <http://example.de> aaa
aaa {http://example.de} aaa
ipv6 https://[::1]/with/some?vars=and&a#hash aaa
stop at final '.': This is a sentence with an url to http://example.com.
stop at final '?': Is this the right url http://example.com/?
stop at final '?': Maybe this one http://example.com/with?arguments=false?
`;
term.write(linkExamples.split('\n').join('\r\n'));
}