Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix weblinks #4288

Merged
merged 16 commits into from Dec 19, 2022
188 changes: 118 additions & 70 deletions addons/xterm-addon-web-links/src/WebLinkProvider.ts
Expand Up @@ -3,7 +3,7 @@
* @license MIT
*/

import { ILinkProvider, ILink, Terminal, IViewportRange } from 'xterm';
import { ILinkProvider, ILink, Terminal, IViewportRange, IBufferLine } from 'xterm';

export interface ILinkProviderOptions {
hover?(event: MouseEvent, text: string, location: IViewportRange): void;
Expand Down Expand Up @@ -45,61 +45,51 @@ export class LinkComputer {
public static computeLink(y: number, regex: RegExp, terminal: Terminal, activate: (event: MouseEvent, uri: string) => void): ILink[] {
const rex = new RegExp(regex.source, (regex.flags || '') + 'g');

const [line, startLineIndex] = LinkComputer._translateBufferLineToStringWithWrap(y - 1, false, terminal);

// Don't try if the wrapped line if excessively large as the regex matching will block the main
// thread.
if (line.length > 1024) {
return [];
}
const [lines, startLineIndex] = LinkComputer._getWindowedLineStrings(y - 1, terminal);
const line = lines.join('');

let match;
let stringIndex = -1;
const result: ILink[] = [];

while ((match = rex.exec(line)) !== null) {
const text = match[1];
if (!text) {
// something matched but does not comply with the given matchIndex
// since this is most likely a bug the regex itself we simply do nothing here
console.log('match found without corresponding matchIndex');
break;
}

// Get index, match.index is for the outer match which includes negated chars
// therefore we cannot use match.index directly, instead we search the position
// of the match group in text again
// also correct regex and string search offsets for the next loop run
stringIndex = line.indexOf(text, stringIndex + 1);
rex.lastIndex = stringIndex + text.length;
if (stringIndex < 0) {
// invalid stringIndex (should not have happened)
break;
while (match = rex.exec(line)) {
const text = match[0];

// check via URL if the matched text would form a proper url
// NOTE: This outsources the ugly url parsing to the browser.
// To avoid surprising auto expansion from URL we additionally
// check afterwards if the provided string resembles the parsed
// one close enough:
// - decodeURI decode path segement back to byte repr
// to detect unicode auto conversion correctly
// - append / also match domain urls w'o any path notion
try {
const url = new URL(text);
const urlText = decodeURI(url.toString());
if (text !== urlText && text + '/' !== urlText) {
continue;
}
} catch (e) {
continue;
}

let endX = stringIndex + text.length;
let endY = startLineIndex + 1;
// map string positions back to buffer positions
// values are 0-based right side excluding
const [startY, startX] = LinkComputer._mapStrIdx(terminal, startLineIndex, 0, match.index);
const [endY, endX] = LinkComputer._mapStrIdx(terminal, startY, startX, text.length);

while (endX > terminal.cols) {
endX -= terminal.cols;
endY++;
}

let startX = stringIndex + 1;
let startY = startLineIndex + 1;
while (startX > terminal.cols) {
startX -= terminal.cols;
startY++;
if (startY === -1 || startX === -1 || endY === -1 || endX === -1) {
continue;
}

// range expects values 1-based right side including, thus +1 except for endX
const range = {
start: {
x: startX,
y: startY
x: startX + 1,
y: startY + 1
},
end: {
x: endX,
y: endY
y: endY + 1
}
};

Expand All @@ -110,41 +100,99 @@ export class LinkComputer {
}

/**
* Gets the entire line for the buffer line
* @param lineIndex The index of the line being translated.
* @param trimRight Whether to trim whitespace to the right.
* Get wrapped content lines for the current line index.
* The top/bottom line expansion stops at whitespaces or length > 2048.
* Returns an array with line strings and the top line index.
*
* NOTE: We pull line strings with trimRight=true on purpose to make sure
* to correctly match urls with early wrapped wide chars. This corrupts the string index
* for 1:1 backmapping to buffer positions, thus needs an additional correction in _mapStrIdx.
*/
private static _translateBufferLineToStringWithWrap(lineIndex: number, trimRight: boolean, terminal: Terminal): [string, number] {
let lineString = '';
let lineWrapsToNext: boolean;
let prevLinesToWrap: boolean;

do {
const line = terminal.buffer.active.getLine(lineIndex);
if (!line) {
break;
private static _getWindowedLineStrings(lineIndex: number, terminal: Terminal): [string[], number] {
let line: IBufferLine | undefined;
let topIdx = lineIndex;
let bottomIdx = lineIndex;
let length = 0;
let content = '';
const lines: string[] = [];

if ((line = terminal.buffer.active.getLine(lineIndex))) {
const currentContent = line.translateToString(true);

// expand top, stop on whitespaces or length > 2048
if (line.isWrapped && currentContent[0] !== ' ') {
length = 0;
while ((line = terminal.buffer.active.getLine(--topIdx)) && length < 2048) {
content = line.translateToString(true);
length += content.length;
lines.push(content);
if (!line.isWrapped || content.indexOf(' ') !== -1) {
break;
}
}
lines.reverse();
}

if (line.isWrapped) {
lineIndex--;
// append current line
lines.push(currentContent);

// expand bottom, stop on whitespaces or length > 2048
length = 0;
while ((line = terminal.buffer.active.getLine(++bottomIdx)) && line.isWrapped && length < 2048) {
content = line.translateToString(true);
length += content.length;
lines.push(content);
if (content.indexOf(' ') !== -1) {
break;
}
}
}
return [lines, topIdx];
}

prevLinesToWrap = line.isWrapped;
} while (prevLinesToWrap);

const startLineIndex = lineIndex;

do {
const nextLine = terminal.buffer.active.getLine(lineIndex + 1);
lineWrapsToNext = nextLine ? nextLine.isWrapped : false;
const line = terminal.buffer.active.getLine(lineIndex);
/**
* Map a string index back to buffer positions.
* Returns buffer position as [lineIndex, columnIndex] 0-based,
* or [-1, -1] in case the lookup ran into a non-existing line.
*/
private static _mapStrIdx(terminal: Terminal, lineIndex: number, rowIndex: number, stringIndex: number): [number, number] {
const buf = terminal.buffer.active;
const cell = buf.getNullCell();
let start = rowIndex;
while (stringIndex) {
const line = buf.getLine(lineIndex);
if (!line) {
break;
return [-1, -1];
}
for (let i = start; i < line.length; ++i) {
line.getCell(i, cell);
const chars = cell.getChars();
const width = cell.getWidth();
if (width) {
stringIndex -= chars.length || 1;

// correct stringIndex for early wrapped wide chars:
// - currently only happens at last cell
// - cells to the right are reset with chars='' and width=1 in InputHandler.print
// - follow-up line must be wrapped and contain wide char at first cell
// --> if all these conditions are met, correct stringIndex by +1
if (i === line.length - 1 && chars === '') {
const line = buf.getLine(lineIndex + 1);
if (line && line.isWrapped) {
line.getCell(0, cell);
if (cell.getWidth() === 2) {
stringIndex += 1;
}
}
}
}
if (stringIndex < 0) {
return [lineIndex, i];
}
}
lineString += line.translateToString(!lineWrapsToNext && trimRight).substring(0, terminal.cols);
lineIndex++;
} while (lineWrapsToNext);

return [lineString, startLineIndex];
start = 0;
}
return [lineIndex, start];
}
}
32 changes: 13 additions & 19 deletions addons/xterm-addon-web-links/src/WebLinksAddon.ts
Expand Up @@ -6,25 +6,19 @@
import { Terminal, ITerminalAddon, IDisposable } from 'xterm';
import { ILinkProviderOptions, WebLinkProvider } from './WebLinkProvider';

const protocolClause = '(https?:\\/\\/)';
const domainCharacterSet = '[\\da-z\\.-]+';
const negatedDomainCharacterSet = '[^\\da-z\\.-]+';
const domainBodyClause = '(' + domainCharacterSet + ')';
const tldClause = '([a-z\\.]{2,18})';
const ipClause = '((\\d{1,3}\\.){3}\\d{1,3})';
const localHostClause = '(localhost)';
const portClause = '(:\\d{1,5})';
const hostClause = '((' + domainBodyClause + '\\.' + tldClause + ')|' + ipClause + '|' + localHostClause + ')' + portClause + '?';
const pathCharacterSet = '(\\/[\\/\\w\\.\\-%~:+@]*)*([^:"\'\\s])';
const pathClause = '(' + pathCharacterSet + ')?';
const queryStringHashFragmentCharacterSet = '[0-9\\w\\[\\]\\(\\)\\/\\?\\!#@$%&\'*+,:;~\\=\\.\\-]*';
const queryStringClause = '(\\?' + queryStringHashFragmentCharacterSet + ')?';
const hashFragmentClause = '(#' + queryStringHashFragmentCharacterSet + ')?';
const negatedPathCharacterSet = '[^\\/\\w\\.\\-%]+';
const bodyClause = hostClause + pathClause + queryStringClause + hashFragmentClause;
const start = '(?:^|' + negatedDomainCharacterSet + ')(';
const end = ')($|' + negatedPathCharacterSet + ')';
const strictUrlRegex = new RegExp(start + protocolClause + bodyClause + end);
// consider everthing starting with http:// or https://
// up to first whitespace, `"` or `'` as url
// NOTE: The repeated end clause is needed to not match a dangling `:`
// resembling the old (...)*([^:"\'\\s]) final path clause
// additionally exclude early + final:
// - unsafe from rfc3986: !*'()
// - unsafe chars from rfc1738: {}|\^~[]` (minus [] as we need them for ipv6 adresses, also allow ~)
// also exclude as finals:
// - final interpunction like ,.!?
// - any sort of brackets <>()[]{} (not spec conform, but often used to enclose urls)
// - unsafe chars from rfc1738: {}|\^~[]`
const strictUrlRegex = /https?:[/]{2}[^\s"'!*(){}|\\\^<>`]*[^\s"':,.!?{}|\\\^~\[\]`()<>]/;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing people will complain about regressions here, interesting learning about rfc1738 though. Should we be more explicit about the intended scope of the addon in the readme? I've always viewed it as a decent basic URL detector, but it's not meant to be feature complete and handle a bunch of edge cases.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing people will complain about regressions here

Yes thats the downside of turning the matcher logic upside-down, but I did not find another way while allowing these unicode uris to match. Will see if I get less "overmatching" from the vscode-uri package.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how to go about the risk of overmatching. I think the new matcher is way more capable than the old one, so I'd suggest to go with new one and lets see, if ppl stumble into tons of issues?

And if thats a big issue for peeps, the old addon version would still be around and work as before, so we can fix things iteratively?



function handleLink(event: MouseEvent, uri: string): void {
const newWindow = window.open();
Expand Down
57 changes: 57 additions & 0 deletions addons/xterm-addon-web-links/test/WebLinksAddon.api.ts
Expand Up @@ -35,6 +35,43 @@ describe('WebLinksAddon', () => {
it('.io', async function(): Promise<any> {
await testHostName('foo.io');
});

describe('correct buffer offsets & uri', () => {
it('all half width', async () => {
setupCustom();
await writeSync(page, 'aaa http://example.com aaa http://example.com aaa');
await resetAndHover(5, 1);
await evalData('http://example.com', { start: { x: 5, y: 1 }, end: { x: 22, y: 1 } });
await resetAndHover(1, 2);
await evalData('http://example.com', { start: { x: 28, y: 1 }, end: { x: 5, y: 2 } });
});
it('url after full width', async () => {
setupCustom();
await writeSync(page, '¥¥¥ http://example.com ¥¥¥ http://example.com aaa');
await resetAndHover(8, 1);
await evalData('http://example.com', { start: { x: 8, y: 1 }, end: { x: 25, y: 1 } });
await resetAndHover(1, 2);
await evalData('http://example.com', { start: { x: 34, y: 1 }, end: { x: 11, y: 2 } });
});
it('full width within url and before', async () => {
setupCustom();
await writeSync(page, '¥¥¥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ¥¥¥');
await resetAndHover(8, 1);
await evalData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
await resetAndHover(1, 2);
await evalData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
await resetAndHover(17, 2);
await evalData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 17, y: 2 }, end: { x: 19, y: 3 } });
});
it('name + password url after full width and combining', async () => {
setupCustom();
await writeSync(page, '¥¥¥cafe\u0301 http://test:password@example.com/some_path');
await resetAndHover(12, 1);
await evalData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
await resetAndHover(13, 2);
await evalData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
});
});
});

async function testHostName(hostname: string): Promise<void> {
Expand Down Expand Up @@ -64,3 +101,23 @@ async function pollForLinkAtCell(col: number, row: number, value: string): Promi
await pollFor(page, `document.querySelectorAll('${rowSelector} > span[style]').length >= ${value.length}`, true, async () => page.hover(`${rowSelector} > :nth-child(${col})`));
assert.equal(await page.evaluate(`Array.prototype.reduce.call(document.querySelectorAll('${rowSelector} > span[style]'), (a, b) => a + b.textContent, '');`), value);
}

async function setupCustom(): Promise<void> {
await openTerminal(page, { cols: 40 });
await page.evaluate(`window._customLinkData = [];
window._linkaddon = new window.WebLinksAddon();
window._linkaddon._options.hover = (event, uri, range) => { window._customLinkData.push([uri, range]); };
window.term.loadAddon(window._linkaddon);`);
}

async function resetAndHover(col: number, row: number): Promise<void> {
await page.evaluate(`window._customLinkData = [];`);
const rowSelector = `.xterm-rows > :nth-child(${row})`;
await page.hover(`${rowSelector} > :nth-child(${col})`);
}

async function evalData(uri: string, range: any): Promise<void> {
const data: any[] = await page.evaluate(`window._customLinkData[0]`);
jerch marked this conversation as resolved.
Show resolved Hide resolved
assert.equal(data[0], uri);
assert.deepEqual(data[1], range);
}
34 changes: 34 additions & 0 deletions bin/test_weblinks.sh
@@ -0,0 +1,34 @@
#!/bin/bash

# all half width
echo "aaa http://example.com aaa http://example.com aaa"
jerch marked this conversation as resolved.
Show resolved Hide resolved

# full width before
echo "¥¥¥ http://example.com aaa http://example.com aaa"

# full width between
echo "aaa http://example.com ¥¥¥ http://example.com aaa"

# full width before and between
echo "¥¥¥ http://example.com ¥¥¥ http://example.com aaa"

# full width within url
echo "aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa"

# full width within and before
echo "¥¥¥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ¥¥¥"

# username + password scheme
echo "http://test:password@example.com/some_path aaa"

# overly long text with urls with final interpunction
echo "Lorem ipsum dolor sit amet, consetetur sadipscing elitr https://ko.wikipedia.org/wiki/위키백과:대문, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat: http://test:password@example.com/some_path."

# bracket enclosed urls
echo "[http://example.de]"
echo "(http://example.de)"
echo "<http://example.de>"
echo "{http://example.de}"

# ipv6 scheme
echo "ipv6 https://[::1]/with/some?vars=and&a#hash"
16 changes: 4 additions & 12 deletions src/browser/Linkifier2.ts
Expand Up @@ -367,18 +367,10 @@ export class Linkifier2 extends Disposable implements ILinkifier2 {
* @param position
*/
private _linkAtPosition(link: ILink, position: IBufferCellPosition): boolean {
const sameLine = link.range.start.y === link.range.end.y;
const wrappedFromLeft = link.range.start.y < position.y;
const wrappedToRight = link.range.end.y > position.y;

// If the start and end have the same y, then the position must be between start and end x
// If not, then handle each case seperately, depending on which way it wraps
return ((sameLine && link.range.start.x <= position.x && link.range.end.x >= position.x) ||
(wrappedFromLeft && link.range.end.x >= position.x) ||
(wrappedToRight && link.range.start.x <= position.x) ||
(wrappedFromLeft && wrappedToRight)) &&
link.range.start.y <= position.y &&
link.range.end.y >= position.y;
const lower = link.range.start.y * this._bufferService.cols + link.range.start.x;
const upper = link.range.end.y * this._bufferService.cols + link.range.end.x;
const current = position.y * this._bufferService.cols + position.x;
return (lower <= current && current <= upper);
}

/**
Expand Down