xtermjs · Tyriar · Dec 19, 2022 · Dec 4, 2022 · Dec 4, 2022 · Dec 5, 2022
diff --git a/addons/xterm-addon-web-links/src/WebLinkProvider.ts b/addons/xterm-addon-web-links/src/WebLinkProvider.ts
@@ -3,7 +3,7 @@
  * @license MIT
  */
 
-import { ILinkProvider, ILink, Terminal, IViewportRange } from 'xterm';
+import { ILinkProvider, ILink, Terminal, IViewportRange, IBufferLine } from 'xterm';
 
 export interface ILinkProviderOptions {
   hover?(event: MouseEvent, text: string, location: IViewportRange): void;
@@ -45,61 +45,51 @@ export class LinkComputer {
   public static computeLink(y: number, regex: RegExp, terminal: Terminal, activate: (event: MouseEvent, uri: string) => void): ILink[] {
     const rex = new RegExp(regex.source, (regex.flags || '') + 'g');
 
-    const [line, startLineIndex] = LinkComputer._translateBufferLineToStringWithWrap(y - 1, false, terminal);
-
-    // Don't try if the wrapped line if excessively large as the regex matching will block the main
-    // thread.
-    if (line.length > 1024) {
-      return [];
-    }
+    const [lines, startLineIndex] = LinkComputer._getWindowedLineStrings(y - 1, terminal);
+    const line = lines.join('');
 
     let match;
-    let stringIndex = -1;
     const result: ILink[] = [];
 
-    while ((match = rex.exec(line)) !== null) {
-      const text = match[1];
-      if (!text) {
-        // something matched but does not comply with the given matchIndex
-        // since this is most likely a bug the regex itself we simply do nothing here
-        console.log('match found without corresponding matchIndex');
-        break;
-      }
-
-      // Get index, match.index is for the outer match which includes negated chars
-      // therefore we cannot use match.index directly, instead we search the position
-      // of the match group in text again
-      // also correct regex and string search offsets for the next loop run
-      stringIndex = line.indexOf(text, stringIndex + 1);
-      rex.lastIndex = stringIndex + text.length;
-      if (stringIndex < 0) {
-        // invalid stringIndex (should not have happened)
-        break;
+    while (match = rex.exec(line)) {
+      const text = match[0];
+
+      // check via URL if the matched text would form a proper url
+      // NOTE: This outsources the ugly url parsing to the browser.
+      // To avoid surprising auto expansion from URL we additionally
+      // check afterwards if the provided string resembles the parsed
+      // one close enough:
+      // - decodeURI  decode path segement back to byte repr
+      //              to detect unicode auto conversion correctly
+      // - append /   also match domain urls w'o any path notion
+      try {
+        const url = new URL(text);
+        const urlText = decodeURI(url.toString());
+        if (text !== urlText && text + '/' !== urlText) {
+          continue;
+        }
+      } catch (e) {
+        continue;
       }
 
-      let endX = stringIndex + text.length;
-      let endY = startLineIndex + 1;
+      // map string positions back to buffer positions
+      // values are 0-based right side excluding
+      const [startY, startX] = LinkComputer._mapStrIdx(terminal, startLineIndex, 0, match.index);
+      const [endY, endX] = LinkComputer._mapStrIdx(terminal, startY, startX, text.length);
 
-      while (endX > terminal.cols) {
-        endX -= terminal.cols;
-        endY++;
-      }
-
-      let startX = stringIndex + 1;
-      let startY = startLineIndex + 1;
-      while (startX > terminal.cols) {
-        startX -= terminal.cols;
-        startY++;
+      if (startY === -1 || startX === -1 || endY === -1 || endX === -1) {
+        continue;
       }
 
+      // range expects values 1-based right side including, thus +1 except for endX
       const range = {
         start: {
-          x: startX,
-          y: startY
+          x: startX + 1,
+          y: startY + 1
         },
         end: {
           x: endX,
-          y: endY
+          y: endY + 1
         }
       };
 
@@ -110,41 +100,99 @@ export class LinkComputer {
   }
 
   /**
-   * Gets the entire line for the buffer line
-   * @param lineIndex The index of the line being translated.
-   * @param trimRight Whether to trim whitespace to the right.
+   * Get wrapped content lines for the current line index.
+   * The top/bottom line expansion stops at whitespaces or length > 2048.
+   * Returns an array with line strings and the top line index.
+   *
+   * NOTE: We pull line strings with trimRight=true on purpose to make sure
+   * to correctly match urls with early wrapped wide chars. This corrupts the string index
+   * for 1:1 backmapping to buffer positions, thus needs an additional correction in _mapStrIdx.
    */
-  private static _translateBufferLineToStringWithWrap(lineIndex: number, trimRight: boolean, terminal: Terminal): [string, number] {
-    let lineString = '';
-    let lineWrapsToNext: boolean;
-    let prevLinesToWrap: boolean;
-
-    do {
-      const line = terminal.buffer.active.getLine(lineIndex);
-      if (!line) {
-        break;
+  private static _getWindowedLineStrings(lineIndex: number, terminal: Terminal): [string[], number] {
+    let line: IBufferLine | undefined;
+    let topIdx = lineIndex;
+    let bottomIdx = lineIndex;
+    let length = 0;
+    let content = '';
+    const lines: string[] = [];
+
+    if ((line = terminal.buffer.active.getLine(lineIndex))) {
+      const currentContent = line.translateToString(true);
+
+      // expand top, stop on whitespaces or length > 2048
+      if (line.isWrapped && currentContent[0] !== ' ') {
+        length = 0;
+        while ((line = terminal.buffer.active.getLine(--topIdx)) && length < 2048) {
+          content = line.translateToString(true);
+          length += content.length;
+          lines.push(content);
+          if (!line.isWrapped || content.indexOf(' ') !== -1) {
+            break;
+          }
+        }
+        lines.reverse();
       }
 
-      if (line.isWrapped) {
-        lineIndex--;
+      // append current line
+      lines.push(currentContent);
+
+      // expand bottom, stop on whitespaces or length > 2048
+      length = 0;
+      while ((line = terminal.buffer.active.getLine(++bottomIdx)) && line.isWrapped && length < 2048) {
+        content = line.translateToString(true);
+        length += content.length;
+        lines.push(content);
+        if (content.indexOf(' ') !== -1) {
+          break;
+        }
       }
+    }
+    return [lines, topIdx];
+  }
 
-      prevLinesToWrap = line.isWrapped;
-    } while (prevLinesToWrap);
-
-    const startLineIndex = lineIndex;
-
-    do {
-      const nextLine = terminal.buffer.active.getLine(lineIndex + 1);
-      lineWrapsToNext = nextLine ? nextLine.isWrapped : false;
-      const line = terminal.buffer.active.getLine(lineIndex);
+  /**
+   * Map a string index back to buffer positions.
+   * Returns buffer position as [lineIndex, columnIndex] 0-based,
+   * or [-1, -1] in case the lookup ran into a non-existing line.
+   */
+  private static _mapStrIdx(terminal: Terminal, lineIndex: number, rowIndex: number, stringIndex: number): [number, number] {
+    const buf = terminal.buffer.active;
+    const cell = buf.getNullCell();
+    let start = rowIndex;
+    while (stringIndex) {
+      const line = buf.getLine(lineIndex);
       if (!line) {
-        break;
+        return [-1, -1];
+      }
+      for (let i = start; i < line.length; ++i) {
+        line.getCell(i, cell);
+        const chars = cell.getChars();
+        const width = cell.getWidth();
+        if (width) {
+          stringIndex -= chars.length || 1;
+
+          // correct stringIndex for early wrapped wide chars:
+          // - currently only happens at last cell
+          // - cells to the right are reset with chars='' and width=1 in InputHandler.print
+          // - follow-up line must be wrapped and contain wide char at first cell
+          // --> if all these conditions are met, correct stringIndex by +1
+          if (i === line.length - 1 && chars === '') {
+            const line = buf.getLine(lineIndex + 1);
+            if (line && line.isWrapped) {
+              line.getCell(0, cell);
+              if (cell.getWidth() === 2) {
+                stringIndex += 1;
+              }
+            }
+          }
+        }
+        if (stringIndex < 0) {
+          return [lineIndex, i];
+        }
       }
-      lineString += line.translateToString(!lineWrapsToNext && trimRight).substring(0, terminal.cols);
       lineIndex++;
-    } while (lineWrapsToNext);
-
-    return [lineString, startLineIndex];
+      start = 0;
+    }
+    return [lineIndex, start];
   }
 }
diff --git a/addons/xterm-addon-web-links/src/WebLinksAddon.ts b/addons/xterm-addon-web-links/src/WebLinksAddon.ts
@@ -6,25 +6,19 @@
 import { Terminal, ITerminalAddon, IDisposable } from 'xterm';
 import { ILinkProviderOptions, WebLinkProvider } from './WebLinkProvider';
 
-const protocolClause = '(https?:\\/\\/)';
-const domainCharacterSet = '[\\da-z\\.-]+';
-const negatedDomainCharacterSet = '[^\\da-z\\.-]+';
-const domainBodyClause = '(' + domainCharacterSet + ')';
-const tldClause = '([a-z\\.]{2,18})';
-const ipClause = '((\\d{1,3}\\.){3}\\d{1,3})';
-const localHostClause = '(localhost)';
-const portClause = '(:\\d{1,5})';
-const hostClause = '((' + domainBodyClause + '\\.' + tldClause + ')|' + ipClause + '|' + localHostClause + ')' + portClause + '?';
-const pathCharacterSet = '(\\/[\\/\\w\\.\\-%~:+@]*)*([^:"\'\\s])';
-const pathClause = '(' + pathCharacterSet + ')?';
-const queryStringHashFragmentCharacterSet = '[0-9\\w\\[\\]\\(\\)\\/\\?\\!#@$%&\'*+,:;~\\=\\.\\-]*';
-const queryStringClause = '(\\?' + queryStringHashFragmentCharacterSet + ')?';
-const hashFragmentClause = '(#' + queryStringHashFragmentCharacterSet + ')?';
-const negatedPathCharacterSet = '[^\\/\\w\\.\\-%]+';
-const bodyClause = hostClause + pathClause + queryStringClause + hashFragmentClause;
-const start = '(?:^|' + negatedDomainCharacterSet + ')(';
-const end = ')($|' + negatedPathCharacterSet + ')';
-const strictUrlRegex = new RegExp(start + protocolClause + bodyClause + end);
+// consider everthing starting with http:// or https://
+// up to first whitespace, `"` or `'` as url
+// NOTE: The repeated end clause is needed to not match a dangling `:`
+// resembling the old (...)*([^:"\'\\s]) final path clause
+// additionally exclude early + final:
+// - unsafe from rfc3986: !*'()
+// - unsafe chars from rfc1738: {}|\^~[]` (minus [] as we need them for ipv6 adresses, also allow ~)
+// also exclude as finals:
+// - final interpunction like ,.!?
+// - any sort of brackets <>()[]{} (not spec conform, but often used to enclose urls)
+// - unsafe chars from rfc1738: {}|\^~[]`
+const strictUrlRegex = /https?:[/]{2}[^\s"'!*(){}|\\\^<>`]*[^\s"':,.!?{}|\\\^~\[\]`()<>]/;
+
 
 function handleLink(event: MouseEvent, uri: string): void {
   const newWindow = window.open();

diff --git a/addons/xterm-addon-web-links/test/WebLinksAddon.api.ts b/addons/xterm-addon-web-links/test/WebLinksAddon.api.ts
@@ -35,6 +35,43 @@ describe('WebLinksAddon', () => {
   it('.io', async function(): Promise<any> {
     await testHostName('foo.io');
   });
+
+  describe('correct buffer offsets & uri', () => {
+    it('all half width', async () => {
+      setupCustom();
+      await writeSync(page, 'aaa http://example.com aaa http://example.com aaa');
+      await resetAndHover(5, 1);
+      await evalData('http://example.com', { start: { x: 5, y: 1 }, end: { x: 22, y: 1 } });
+      await resetAndHover(1, 2);
+      await evalData('http://example.com', { start: { x: 28, y: 1 }, end: { x: 5, y: 2 } });
+    });
+    it('url after full width', async () => {
+      setupCustom();
+      await writeSync(page, '￥￥￥ http://example.com ￥￥￥ http://example.com aaa');
+      await resetAndHover(8, 1);
+      await evalData('http://example.com', { start: { x: 8, y: 1 }, end: { x: 25, y: 1 } });
+      await resetAndHover(1, 2);
+      await evalData('http://example.com', { start: { x: 34, y: 1 }, end: { x: 11, y: 2 } });
+    });
+    it('full width within url and before', async () => {
+      setupCustom();
+      await writeSync(page, '￥￥￥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ￥￥￥');
+      await resetAndHover(8, 1);
+      await evalData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
+      await resetAndHover(1, 2);
+      await evalData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
+      await resetAndHover(17, 2);
+      await evalData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 17, y: 2 }, end: { x: 19, y: 3 } });
+    });
+    it('name + password url after full width and combining', async () => {
+      setupCustom();
+      await writeSync(page, '￥￥￥cafe\u0301 http://test:password@example.com/some_path');
+      await resetAndHover(12, 1);
+      await evalData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
+      await resetAndHover(13, 2);
+      await evalData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
+    });
+  });
 });
 
 async function testHostName(hostname: string): Promise<void> {
@@ -64,3 +101,23 @@ async function pollForLinkAtCell(col: number, row: number, value: string): Promi
   await pollFor(page, `document.querySelectorAll('${rowSelector} > span[style]').length >= ${value.length}`, true, async () => page.hover(`${rowSelector} > :nth-child(${col})`));
   assert.equal(await page.evaluate(`Array.prototype.reduce.call(document.querySelectorAll('${rowSelector} > span[style]'), (a, b) => a + b.textContent, '');`), value);
 }
+
+async function setupCustom(): Promise<void> {
+  await openTerminal(page, { cols: 40 });
+  await page.evaluate(`window._customLinkData = [];
+window._linkaddon = new window.WebLinksAddon();
+window._linkaddon._options.hover = (event, uri, range) => { window._customLinkData.push([uri, range]); };
+window.term.loadAddon(window._linkaddon);`);
+}
+
+async function resetAndHover(col: number, row: number): Promise<void> {
+  await page.evaluate(`window._customLinkData = [];`);
+  const rowSelector = `.xterm-rows > :nth-child(${row})`;
+  await page.hover(`${rowSelector} > :nth-child(${col})`);
+}
+
+async function evalData(uri: string, range: any): Promise<void> {
+  const data: any[] = await page.evaluate(`window._customLinkData[0]`);
+  assert.equal(data[0], uri);
+  assert.deepEqual(data[1], range);
+}
diff --git a/bin/test_weblinks.sh b/bin/test_weblinks.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# all half width
+echo "aaa http://example.com aaa http://example.com aaa"
+
+# full width before
+echo "￥￥￥ http://example.com aaa http://example.com aaa"
+
+# full width between
+echo "aaa http://example.com ￥￥￥ http://example.com aaa"
+
+# full width before and between
+echo "￥￥￥ http://example.com ￥￥￥ http://example.com aaa"
+
+# full width within url
+echo "aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa"
+
+# full width within and before
+echo "￥￥￥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ￥￥￥"
+
+# username + password scheme
+echo "http://test:password@example.com/some_path aaa"
+
+# overly long text with urls with final interpunction
+echo "Lorem ipsum dolor sit amet, consetetur sadipscing elitr https://ko.wikipedia.org/wiki/위키백과:대문, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat: http://test:password@example.com/some_path."
+
+# bracket enclosed urls
+echo "[http://example.de]"
+echo "(http://example.de)"
+echo "<http://example.de>"
+echo "{http://example.de}"
+
+# ipv6 scheme
+echo "ipv6 https://[::1]/with/some?vars=and&a#hash"
diff --git a/src/browser/Linkifier2.ts b/src/browser/Linkifier2.ts
@@ -367,18 +367,10 @@ export class Linkifier2 extends Disposable implements ILinkifier2 {
    * @param position
    */
   private _linkAtPosition(link: ILink, position: IBufferCellPosition): boolean {
-    const sameLine = link.range.start.y === link.range.end.y;
-    const wrappedFromLeft = link.range.start.y < position.y;
-    const wrappedToRight = link.range.end.y > position.y;
-
-    // If the start and end have the same y, then the position must be between start and end x
-    // If not, then handle each case seperately, depending on which way it wraps
-    return ((sameLine && link.range.start.x <= position.x && link.range.end.x >= position.x) ||
-      (wrappedFromLeft && link.range.end.x >= position.x) ||
-      (wrappedToRight && link.range.start.x <= position.x) ||
-      (wrappedFromLeft && wrappedToRight)) &&
-      link.range.start.y <= position.y &&
-      link.range.end.y >= position.y;
+    const lower = link.range.start.y * this._bufferService.cols + link.range.start.x;
+    const upper = link.range.end.y * this._bufferService.cols + link.range.end.x;
+    const current = position.y * this._bufferService.cols + position.x;
+    return (lower <= current && current <= upper);
   }
 
   /**