Merge pull request #4288 from jerch/fix_weblinks

Fix weblinks
xtermjs · Dec 19, 2022 · 84b8121 · 84b8121
2 parents cb4102c + 218cfd2
commit 84b8121
Show file tree

Hide file tree

Showing 9 changed files with 291 additions and 130 deletions.
diff --git a/addons/xterm-addon-web-links/src/WebLinkProvider.ts b/addons/xterm-addon-web-links/src/WebLinkProvider.ts
@@ -3,7 +3,7 @@
  * @license MIT
  */
 
-import { ILinkProvider, ILink, Terminal, IViewportRange } from 'xterm';
+import { ILinkProvider, ILink, Terminal, IViewportRange, IBufferLine } from 'xterm';
 
 export interface ILinkProviderOptions {
   hover?(event: MouseEvent, text: string, location: IViewportRange): void;
@@ -45,61 +45,51 @@ export class LinkComputer {
   public static computeLink(y: number, regex: RegExp, terminal: Terminal, activate: (event: MouseEvent, uri: string) => void): ILink[] {
     const rex = new RegExp(regex.source, (regex.flags || '') + 'g');
 
-    const [line, startLineIndex] = LinkComputer._translateBufferLineToStringWithWrap(y - 1, false, terminal);
-
-    // Don't try if the wrapped line if excessively large as the regex matching will block the main
-    // thread.
-    if (line.length > 1024) {
-      return [];
-    }
+    const [lines, startLineIndex] = LinkComputer._getWindowedLineStrings(y - 1, terminal);
+    const line = lines.join('');
 
     let match;
-    let stringIndex = -1;
     const result: ILink[] = [];
 
-    while ((match = rex.exec(line)) !== null) {
-      const text = match[1];
-      if (!text) {
-        // something matched but does not comply with the given matchIndex
-        // since this is most likely a bug the regex itself we simply do nothing here
-        console.log('match found without corresponding matchIndex');
-        break;
-      }
-
-      // Get index, match.index is for the outer match which includes negated chars
-      // therefore we cannot use match.index directly, instead we search the position
-      // of the match group in text again
-      // also correct regex and string search offsets for the next loop run
-      stringIndex = line.indexOf(text, stringIndex + 1);
-      rex.lastIndex = stringIndex + text.length;
-      if (stringIndex < 0) {
-        // invalid stringIndex (should not have happened)
-        break;
+    while (match = rex.exec(line)) {
+      const text = match[0];
+
+      // check via URL if the matched text would form a proper url
+      // NOTE: This outsources the ugly url parsing to the browser.
+      // To avoid surprising auto expansion from URL we additionally
+      // check afterwards if the provided string resembles the parsed
+      // one close enough:
+      // - decodeURI  decode path segement back to byte repr
+      //              to detect unicode auto conversion correctly
+      // - append /   also match domain urls w'o any path notion
+      try {
+        const url = new URL(text);
+        const urlText = decodeURI(url.toString());
+        if (text !== urlText && text + '/' !== urlText) {
+          continue;
+        }
+      } catch (e) {
+        continue;
       }
 
-      let endX = stringIndex + text.length;
-      let endY = startLineIndex + 1;
+      // map string positions back to buffer positions
+      // values are 0-based right side excluding
+      const [startY, startX] = LinkComputer._mapStrIdx(terminal, startLineIndex, 0, match.index);
+      const [endY, endX] = LinkComputer._mapStrIdx(terminal, startY, startX, text.length);
 
-      while (endX > terminal.cols) {
-        endX -= terminal.cols;
-        endY++;
-      }
-
-      let startX = stringIndex + 1;
-      let startY = startLineIndex + 1;
-      while (startX > terminal.cols) {
-        startX -= terminal.cols;
-        startY++;
+      if (startY === -1 || startX === -1 || endY === -1 || endX === -1) {
+        continue;
       }
 
+      // range expects values 1-based right side including, thus +1 except for endX
       const range = {
         start: {
-          x: startX,
-          y: startY
+          x: startX + 1,
+          y: startY + 1
         },
         end: {
           x: endX,
-          y: endY
+          y: endY + 1
         }
       };
 
@@ -110,41 +100,99 @@ export class LinkComputer {
   }
 
   /**
-   * Gets the entire line for the buffer line
-   * @param lineIndex The index of the line being translated.
-   * @param trimRight Whether to trim whitespace to the right.
+   * Get wrapped content lines for the current line index.
+   * The top/bottom line expansion stops at whitespaces or length > 2048.
+   * Returns an array with line strings and the top line index.
+   *
+   * NOTE: We pull line strings with trimRight=true on purpose to make sure
+   * to correctly match urls with early wrapped wide chars. This corrupts the string index
+   * for 1:1 backmapping to buffer positions, thus needs an additional correction in _mapStrIdx.
    */
-  private static _translateBufferLineToStringWithWrap(lineIndex: number, trimRight: boolean, terminal: Terminal): [string, number] {
-    let lineString = '';
-    let lineWrapsToNext: boolean;
-    let prevLinesToWrap: boolean;
-
-    do {
-      const line = terminal.buffer.active.getLine(lineIndex);
-      if (!line) {
-        break;
+  private static _getWindowedLineStrings(lineIndex: number, terminal: Terminal): [string[], number] {
+    let line: IBufferLine | undefined;
+    let topIdx = lineIndex;
+    let bottomIdx = lineIndex;
+    let length = 0;
+    let content = '';
+    const lines: string[] = [];
+
+    if ((line = terminal.buffer.active.getLine(lineIndex))) {
+      const currentContent = line.translateToString(true);
+
+      // expand top, stop on whitespaces or length > 2048
+      if (line.isWrapped && currentContent[0] !== ' ') {
+        length = 0;
+        while ((line = terminal.buffer.active.getLine(--topIdx)) && length < 2048) {
+          content = line.translateToString(true);
+          length += content.length;
+          lines.push(content);
+          if (!line.isWrapped || content.indexOf(' ') !== -1) {
+            break;
+          }
+        }
+        lines.reverse();
       }
 
-      if (line.isWrapped) {
-        lineIndex--;
+      // append current line
+      lines.push(currentContent);
+
+      // expand bottom, stop on whitespaces or length > 2048
+      length = 0;
+      while ((line = terminal.buffer.active.getLine(++bottomIdx)) && line.isWrapped && length < 2048) {
+        content = line.translateToString(true);
+        length += content.length;
+        lines.push(content);
+        if (content.indexOf(' ') !== -1) {
+          break;
+        }
       }
+    }
+    return [lines, topIdx];
+  }
 
-      prevLinesToWrap = line.isWrapped;
-    } while (prevLinesToWrap);
-
-    const startLineIndex = lineIndex;
-
-    do {
-      const nextLine = terminal.buffer.active.getLine(lineIndex + 1);
-      lineWrapsToNext = nextLine ? nextLine.isWrapped : false;
-      const line = terminal.buffer.active.getLine(lineIndex);
+  /**
+   * Map a string index back to buffer positions.
+   * Returns buffer position as [lineIndex, columnIndex] 0-based,
+   * or [-1, -1] in case the lookup ran into a non-existing line.
+   */
+  private static _mapStrIdx(terminal: Terminal, lineIndex: number, rowIndex: number, stringIndex: number): [number, number] {
+    const buf = terminal.buffer.active;
+    const cell = buf.getNullCell();
+    let start = rowIndex;
+    while (stringIndex) {
+      const line = buf.getLine(lineIndex);
       if (!line) {
-        break;
+        return [-1, -1];
+      }
+      for (let i = start; i < line.length; ++i) {
+        line.getCell(i, cell);
+        const chars = cell.getChars();
+        const width = cell.getWidth();
+        if (width) {
+          stringIndex -= chars.length || 1;
+
+          // correct stringIndex for early wrapped wide chars:
+          // - currently only happens at last cell
+          // - cells to the right are reset with chars='' and width=1 in InputHandler.print
+          // - follow-up line must be wrapped and contain wide char at first cell
+          // --> if all these conditions are met, correct stringIndex by +1
+          if (i === line.length - 1 && chars === '') {
+            const line = buf.getLine(lineIndex + 1);
+            if (line && line.isWrapped) {
+              line.getCell(0, cell);
+              if (cell.getWidth() === 2) {
+                stringIndex += 1;
+              }
+            }
+          }
+        }
+        if (stringIndex < 0) {
+          return [lineIndex, i];
+        }
       }
-      lineString += line.translateToString(!lineWrapsToNext && trimRight).substring(0, terminal.cols);
       lineIndex++;
-    } while (lineWrapsToNext);
-
-    return [lineString, startLineIndex];
+      start = 0;
+    }
+    return [lineIndex, start];
   }
 }
diff --git a/addons/xterm-addon-web-links/src/WebLinksAddon.ts b/addons/xterm-addon-web-links/src/WebLinksAddon.ts
@@ -6,25 +6,19 @@
 import { Terminal, ITerminalAddon, IDisposable } from 'xterm';
 import { ILinkProviderOptions, WebLinkProvider } from './WebLinkProvider';
 
-const protocolClause = '(https?:\\/\\/)';
-const domainCharacterSet = '[\\da-z\\.-]+';
-const negatedDomainCharacterSet = '[^\\da-z\\.-]+';
-const domainBodyClause = '(' + domainCharacterSet + ')';
-const tldClause = '([a-z\\.]{2,18})';
-const ipClause = '((\\d{1,3}\\.){3}\\d{1,3})';
-const localHostClause = '(localhost)';
-const portClause = '(:\\d{1,5})';
-const hostClause = '((' + domainBodyClause + '\\.' + tldClause + ')|' + ipClause + '|' + localHostClause + ')' + portClause + '?';
-const pathCharacterSet = '(\\/[\\/\\w\\.\\-%~:+@]*)*([^:"\'\\s])';
-const pathClause = '(' + pathCharacterSet + ')?';
-const queryStringHashFragmentCharacterSet = '[0-9\\w\\[\\]\\(\\)\\/\\?\\!#@$%&\'*+,:;~\\=\\.\\-]*';
-const queryStringClause = '(\\?' + queryStringHashFragmentCharacterSet + ')?';
-const hashFragmentClause = '(#' + queryStringHashFragmentCharacterSet + ')?';
-const negatedPathCharacterSet = '[^\\/\\w\\.\\-%]+';
-const bodyClause = hostClause + pathClause + queryStringClause + hashFragmentClause;
-const start = '(?:^|' + negatedDomainCharacterSet + ')(';
-const end = ')($|' + negatedPathCharacterSet + ')';
-const strictUrlRegex = new RegExp(start + protocolClause + bodyClause + end);
+// consider everthing starting with http:// or https://
+// up to first whitespace, `"` or `'` as url
+// NOTE: The repeated end clause is needed to not match a dangling `:`
+// resembling the old (...)*([^:"\'\\s]) final path clause
+// additionally exclude early + final:
+// - unsafe from rfc3986: !*'()
+// - unsafe chars from rfc1738: {}|\^~[]` (minus [] as we need them for ipv6 adresses, also allow ~)
+// also exclude as finals:
+// - final interpunction like ,.!?
+// - any sort of brackets <>()[]{} (not spec conform, but often used to enclose urls)
+// - unsafe chars from rfc1738: {}|\^~[]`
+const strictUrlRegex = /https?:[/]{2}[^\s"'!*(){}|\\\^<>`]*[^\s"':,.!?{}|\\\^~\[\]`()<>]/;
+
 
 function handleLink(event: MouseEvent, uri: string): void {
   const newWindow = window.open();

diff --git a/addons/xterm-addon-web-links/test/WebLinksAddon.api.ts b/addons/xterm-addon-web-links/test/WebLinksAddon.api.ts
@@ -14,6 +14,22 @@ let page: Page;
 const width = 800;
 const height = 600;
 
+
+interface ILinkStateData {
+  uri?: string;
+  range?: {
+    start: {
+      x: number;
+      y: number;
+    };
+    end: {
+      x: number;
+      y: number;
+    };
+  };
+}
+
+
 describe('WebLinksAddon', () => {
   before(async function(): Promise<any> {
     browser = await launchBrowser();
@@ -35,6 +51,43 @@ describe('WebLinksAddon', () => {
   it('.io', async function(): Promise<any> {
     await testHostName('foo.io');
   });
+
+  describe('correct buffer offsets & uri', () => {
+    it('all half width', async () => {
+      setupCustom();
+      await writeSync(page, 'aaa http://example.com aaa http://example.com aaa');
+      await resetAndHover(5, 1);
+      await evalLinkStateData('http://example.com', { start: { x: 5, y: 1 }, end: { x: 22, y: 1 } });
+      await resetAndHover(1, 2);
+      await evalLinkStateData('http://example.com', { start: { x: 28, y: 1 }, end: { x: 5, y: 2 } });
+    });
+    it('url after full width', async () => {
+      setupCustom();
+      await writeSync(page, '￥￥￥ http://example.com ￥￥￥ http://example.com aaa');
+      await resetAndHover(8, 1);
+      await evalLinkStateData('http://example.com', { start: { x: 8, y: 1 }, end: { x: 25, y: 1 } });
+      await resetAndHover(1, 2);
+      await evalLinkStateData('http://example.com', { start: { x: 34, y: 1 }, end: { x: 11, y: 2 } });
+    });
+    it('full width within url and before', async () => {
+      setupCustom();
+      await writeSync(page, '￥￥￥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ￥￥￥');
+      await resetAndHover(8, 1);
+      await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
+      await resetAndHover(1, 2);
+      await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 8, y: 1 }, end: { x: 11, y: 2 } });
+      await resetAndHover(17, 2);
+      await evalLinkStateData('https://ko.wikipedia.org/wiki/위키백과:대문', { start: { x: 17, y: 2 }, end: { x: 19, y: 3 } });
+    });
+    it('name + password url after full width and combining', async () => {
+      setupCustom();
+      await writeSync(page, '￥￥￥cafe\u0301 http://test:password@example.com/some_path');
+      await resetAndHover(12, 1);
+      await evalLinkStateData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
+      await resetAndHover(13, 2);
+      await evalLinkStateData('http://test:password@example.com/some_path', { start: { x: 12, y: 1 }, end: { x: 13, y: 2 } });
+    });
+  });
 });
 
 async function testHostName(hostname: string): Promise<void> {
@@ -64,3 +117,23 @@ async function pollForLinkAtCell(col: number, row: number, value: string): Promi
   await pollFor(page, `document.querySelectorAll('${rowSelector} > span[style]').length >= ${value.length}`, true, async () => page.hover(`${rowSelector} > :nth-child(${col})`));
   assert.equal(await page.evaluate(`Array.prototype.reduce.call(document.querySelectorAll('${rowSelector} > span[style]'), (a, b) => a + b.textContent, '');`), value);
 }
+
+async function setupCustom(): Promise<void> {
+  await openTerminal(page, { cols: 40 });
+  await page.evaluate(`window._linkStateData = {};
+window._linkaddon = new window.WebLinksAddon();
+window._linkaddon._options.hover = (event, uri, range) => { window._linkStateData = { uri, range }; };
+window.term.loadAddon(window._linkaddon);`);
+}
+
+async function resetAndHover(col: number, row: number): Promise<void> {
+  await page.evaluate(`window._linkStateData = {};`);
+  const rowSelector = `.xterm-rows > :nth-child(${row})`;
+  await page.hover(`${rowSelector} > :nth-child(${col})`);
+}
+
+async function evalLinkStateData(uri: string, range: any): Promise<void> {
+  const data: ILinkStateData = await page.evaluate(`window._linkStateData`);
+  assert.equal(data.uri, uri);
+  assert.deepEqual(data.range, range);
+}
diff --git a/demo/client.ts b/demo/client.ts
@@ -228,6 +228,7 @@ if (document.location.pathname === '/test') {
   document.getElementById('sgr-test').addEventListener('click', sgrTest);
   document.getElementById('add-decoration').addEventListener('click', addDecoration);
   document.getElementById('add-overview-ruler').addEventListener('click', addOverviewRuler);
+  document.getElementById('weblinks-test').addEventListener('click', testWeblinks);
   addVtButtons();
 }
 
@@ -1148,3 +1149,25 @@ function addVtButtons(): void {
 
   document.querySelector('#vt-container').appendChild(vtFragment);
 }
+
+function testWeblinks(): void {
+  const linkExamples = `
+aaa http://example.com aaa http://example.com aaa
+￥￥￥ http://example.com aaa http://example.com aaa
+aaa http://example.com ￥￥￥ http://example.com aaa
+￥￥￥ http://example.com ￥￥￥ http://example.com aaa
+aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 aaa
+￥￥￥ https://ko.wikipedia.org/wiki/위키백과:대문 aaa https://ko.wikipedia.org/wiki/위키백과:대문 ￥￥￥
+aaa http://test:password@example.com/some_path aaa
+brackets enclosed:
+aaa [http://example.de] aaa
+aaa (http://example.de) aaa
+aaa <http://example.de> aaa
+aaa {http://example.de} aaa
+ipv6 https://[::1]/with/some?vars=and&a#hash aaa
+stop at final '.': This is a sentence with an url to http://example.com.
+stop at final '?': Is this the right url http://example.com/?
+stop at final '?': Maybe this one http://example.com/with?arguments=false?
+  `;
+  term.write(linkExamples.split('\n').join('\r\n'));
+}