From 1442089c94e99594ed3dfad238fcc9be5333bba7 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Wed, 23 Oct 2019 21:32:47 -0400 Subject: [PATCH 1/4] (enh) Add TokenTree and HTMLRenderer --- CHANGES.md | 1 + src/highlight.js | 111 ++++++++++++++++++++++++--------------- src/lib/html_renderer.js | 46 ++++++++++++++++ src/lib/token_tree.js | 88 +++++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+), 43 deletions(-) create mode 100644 src/lib/html_renderer.js create mode 100644 src/lib/token_tree.js diff --git a/CHANGES.md b/CHANGES.md index b9ef99e36d..2dbc7b86fe 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,7 @@ New themes: Core Changes: +- split out parse tree generation and HTML rendering concerns (#2404) [Josh Goebel][] - every language can have a `name` attribute now (#2400) [Josh Goebel][] - improve regular expression detect (less false-positives) (#2380) [Josh Goebel][] - make `noHighlightRe` and `languagePrefixRe` configurable (#2374) [Josh Goebel][] diff --git a/src/highlight.js b/src/highlight.js index 9d46bc2687..f735eb98ba 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -4,6 +4,8 @@ https://highlightjs.org/ */ import deepFreeze from './vendor/deep_freeze'; +import TokenTree from './lib/token_tree'; +import HTMLRenderer from './lib/html_renderer'; import * as regex from './lib/regex'; import * as utils from './lib/utils'; import * as MODES from './lib/modes'; @@ -11,10 +13,12 @@ import { compileLanguage } from './lib/mode_compiler'; const escape = utils.escapeHTML; const inherit = utils.inherit; + const { nodeStream, mergeStreams } = utils; const HLJS = function(hljs) { + // Convenience variables for build-in objects var ArrayProto = []; @@ -107,49 +111,48 @@ const HLJS = function(hljs) { return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str]; } - function buildSpan(className, insideSpan, leaveOpen, noPrefix) { - if (!leaveOpen && insideSpan === '') return ''; - if (!className) return insideSpan; - - var classPrefix = noPrefix ? '' : options.classPrefix, - openSpan = ''; - - return openSpan + insideSpan + closeSpan; - } - function processKeywords() { - var keyword_match, last_index, match, result; + var keyword_match, last_index, match, result, buf; - if (!top.keywords) - return escape(mode_buffer); + if (!top.keywords) { + emitter.addText(mode_buffer); + return; + } - result = ''; last_index = 0; top.lexemesRe.lastIndex = 0; match = top.lexemesRe.exec(mode_buffer); + buf = ""; while (match) { - result += escape(mode_buffer.substring(last_index, match.index)); + buf += mode_buffer.substring(last_index, match.index); keyword_match = keywordMatch(top, match); + var kind = null; if (keyword_match) { + emitter.addText(buf); + buf = ""; + relevance += keyword_match[1]; - result += buildSpan(keyword_match[0], escape(match[0])); + kind = keyword_match[0]; + emitter.addKeyword(match[0], kind); } else { - result += escape(match[0]); + buf += match[0]; } last_index = top.lexemesRe.lastIndex; match = top.lexemesRe.exec(mode_buffer); } - return result + escape(mode_buffer.substr(last_index)); + buf += mode_buffer.substr(last_index); + emitter.addText(buf); } function processSubLanguage() { + if (mode_buffer === "") return; + var explicit = typeof top.subLanguage === 'string'; + if (explicit && !languages[top.subLanguage]) { - return escape(mode_buffer); + emitter.addText(mode_buffer); + return; } var result = explicit ? @@ -166,16 +169,18 @@ const HLJS = function(hljs) { if (explicit) { continuations[top.subLanguage] = result.top; } - return buildSpan(result.language, result.value, false, true); + emitter.addSublanguage(result.emitter, result.language) } function processBuffer() { - result += (top.subLanguage != null ? processSubLanguage() : processKeywords()); + (top.subLanguage != null ? processSubLanguage() : processKeywords()); mode_buffer = ''; } function startNewMode(mode) { - result += mode.className? buildSpan(mode.className, '', true): ''; + if (mode.className) { + emitter.openNode(mode.className) + } top = Object.create(mode, {parent: {value: top}}); } @@ -223,7 +228,7 @@ const HLJS = function(hljs) { } do { if (top.className) { - result += spanEndTag; + emitter.closeNode(); } if (!top.skip && !top.subLanguage) { relevance += top.relevance; @@ -239,6 +244,16 @@ const HLJS = function(hljs) { return origin.returnEnd ? 0 : lexeme.length; } + function processContinuations() { + var list = [] + for(var current = top; current !== language; current = current.parent) { + if (current.className) { + list.unshift(current.className) + } + } + list.forEach(item => emitter.openNode(item)) + } + var lastMatch = {}; function processLexeme(text_before_match, match) { @@ -273,7 +288,9 @@ const HLJS = function(hljs) { return doBeginMatch(match); } else if (match.type==="illegal" && !ignore_illegals) { // illegal match, we do not continue processing - throw new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '') + '"'); + var err = new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '') + '"'); + err.mode = top; + throw err; } else if (match.type==="end") { var processed = doEndMatch(match); if (processed != undefined) @@ -305,48 +322,55 @@ const HLJS = function(hljs) { compileLanguage(language); var top = continuation || language; var continuations = {}; // keep continuations for sub-languages - var result = '', current; - for(current = top; current !== language; current = current.parent) { - if (current.className) { - result = buildSpan(current.className, '', true) + result; - } - } + var result; + var emitter = new TokenTree(); + processContinuations(); var mode_buffer = ''; var relevance = 0; + var match, processedCount, index = 0; + try { - var match, count, index = 0; while (true) { top.terminators.lastIndex = index; match = top.terminators.exec(codeToHighlight); if (!match) break; - count = processLexeme(codeToHighlight.substring(index, match.index), match); - index = match.index + count; + let beforeMatch = codeToHighlight.substring(index, match.index); + processedCount = processLexeme(beforeMatch, match); + index = match.index + processedCount; } processLexeme(codeToHighlight.substr(index)); - for(current = top; current.parent; current = current.parent) { // close dangling modes - if (current.className) { - result += spanEndTag; - } - } + emitter.closeAllNodes(); + emitter.finalize(); + result = new HTMLRenderer(emitter, options).value(); + return { relevance: relevance, value: result, - illegal:false, language: languageName, + illegal: false, + emitter: emitter, top: top }; } catch (err) { if (err.message && err.message.includes('Illegal')) { return { illegal: true, + illegalBy: { + msg: err.message, + context: codeToHighlight.slice(index-100,index+100), + mode: err.mode + }, + sofar: result, relevance: 0, - value: escape(codeToHighlight) + value: escape(codeToHighlight), + emitter: emitter, }; } else if (SAFE_MODE) { return { relevance: 0, value: escape(codeToHighlight), + emitter: emitter, language: languageName, top: top, errorRaised: err @@ -372,6 +396,7 @@ const HLJS = function(hljs) { languageSubset = languageSubset || options.languages || Object.keys(languages); var result = { relevance: 0, + emitter: new TokenTree(), value: escape(code) }; var second_best = result; diff --git a/src/lib/html_renderer.js b/src/lib/html_renderer.js new file mode 100644 index 0000000000..2ed5f04946 --- /dev/null +++ b/src/lib/html_renderer.js @@ -0,0 +1,46 @@ +const SPAN_CLOSE = ''; + +import {escapeHTML} from './utils'; + +const emitsWrappingTags = (node) => { + return !!node.kind; +} + +export default class HTMLRenderer { + constructor(tree, options) { + this.buffer = ""; + this.classPrefix = options.classPrefix; + tree.walk(this); + } + + // renderer API + + addText(text) { + this.buffer += escapeHTML(text) + } + + openNode(node) { + if (!emitsWrappingTags(node)) return; + + let className = node.kind; + if (!node.sublanguage) + className = `${this.classPrefix}${className}`; + this.span(className); + } + + closeNode(node) { + if (!emitsWrappingTags(node)) return; + + this.buffer += SPAN_CLOSE; + } + + // helpers + + span(className) { + this.buffer += `` + } + + value() { + return this.buffer; + } +} diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js new file mode 100644 index 0000000000..0d568c904b --- /dev/null +++ b/src/lib/token_tree.js @@ -0,0 +1,88 @@ +export default class TokenTree { + constructor() { + this.rootNode = { children: [] }; + this.stack = [ this.rootNode ]; + } + + get top() { + return this.stack[this.stack.length - 1]; + } + + add(node) { + this.top.children.push(node); + } + + addKeyword(text, kind) { + if (text === "") { return; } + + this.openNode(kind); + this.addText(text); + this.closeNode(); + } + + addText(text) { + if (text === "") { return; } + + this.add(text); + } + + addSublanguage({rootNode}, name) { + let node = rootNode; + node.kind = name; + node.sublanguage = true; + this.add(node); + } + + openNode(kind) { + var node = { kind, children: [] }; + this.add(node); + this.stack.push(node); + } + + closeNode() { + if (this.stack.length > 1) + return this.stack.pop(); + } + + closeAllNodes() { + while (this.closeNode()); + } + + toJSON() { + return JSON.stringify(this.rootNode, null, 4); + } + + finalize() { + return; + } + + walk(builder) { + return TokenTree._walk(builder, this.rootNode); + } + + static _walk(builder, node) { + if (typeof node === "string") { + builder.addText(node); + } else if (node.children) { + builder.openNode(node); + node.children.forEach((child) => this._walk(builder, child)) + builder.closeNode(node); + } + return builder; + } + + static _collapse(node) { + if (!node.children) { + return + } + if (node.children.every(el => typeof el === "string")) { + node.text = node.children.join("") + delete node["children"] + } else { + node.children.forEach((child) => { + if (typeof child === "string") return; + TokenTree._collapse(child) + }) + } + } +} From b5164589cbfe1c1d2247538d3bda51e645814b06 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sun, 16 Feb 2020 02:40:03 -0500 Subject: [PATCH 2/4] (chore) better naming of utility methods --- src/highlight.js | 7 +++---- src/lib/token_tree.js | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index f735eb98ba..04daf8b5a3 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -34,7 +34,6 @@ const HLJS = function(hljs) { // Regular expressions used throughout the highlight.js library. var fixMarkupRe = /((^(<[^>]+>|\t|)+|(?:\n)))/gm; - var spanEndTag = ''; var LANGUAGE_NOT_FOUND = "Could not find the language '{}', did you forget to load/include a language module?"; // Global options used when within external APIs. This is modified when @@ -50,7 +49,7 @@ const HLJS = function(hljs) { /* Utility functions */ - function isNotHighlighted(language) { + function shouldNotHighlight(language) { return options.noHighlightRe.test(language); } @@ -73,7 +72,7 @@ const HLJS = function(hljs) { return classes .split(/\s+/) - .find((_class) => isNotHighlighted(_class) || getLanguage(_class)) + .find((_class) => shouldNotHighlight(_class) || getLanguage(_class)) } /** @@ -462,7 +461,7 @@ const HLJS = function(hljs) { var node, originalStream, result, resultNode, text; var language = blockLanguage(block); - if (isNotHighlighted(language)) + if (shouldNotHighlight(language)) return; fire("before:highlightBlock", diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index 0d568c904b..75babbbf3c 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -34,7 +34,7 @@ export default class TokenTree { } openNode(kind) { - var node = { kind, children: [] }; + let node = { kind, children: [] }; this.add(node); this.stack.push(node); } From 1b820c0a7405474d40d014e94f48314703900bae Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 29 Feb 2020 16:10:41 -0500 Subject: [PATCH 3/4] (chore) split out emitter from TokenTree --- src/highlight.js | 6 ++-- src/lib/token_tree.js | 77 +++++++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index 04daf8b5a3..86910cc75b 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -4,7 +4,7 @@ https://highlightjs.org/ */ import deepFreeze from './vendor/deep_freeze'; -import TokenTree from './lib/token_tree'; +import TokenTreeEmitter from './lib/token_tree'; import HTMLRenderer from './lib/html_renderer'; import * as regex from './lib/regex'; import * as utils from './lib/utils'; @@ -322,7 +322,7 @@ const HLJS = function(hljs) { var top = continuation || language; var continuations = {}; // keep continuations for sub-languages var result; - var emitter = new TokenTree(); + var emitter = new TokenTreeEmitter(); processContinuations(); var mode_buffer = ''; var relevance = 0; @@ -395,7 +395,7 @@ const HLJS = function(hljs) { languageSubset = languageSubset || options.languages || Object.keys(languages); var result = { relevance: 0, - emitter: new TokenTree(), + emitter: new TokenTreeEmitter(), value: escape(code) }; var second_best = result; diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index 75babbbf3c..05c634e669 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -1,4 +1,4 @@ -export default class TokenTree { +class TokenTree { constructor() { this.rootNode = { children: [] }; this.stack = [ this.rootNode ]; @@ -8,31 +8,12 @@ export default class TokenTree { return this.stack[this.stack.length - 1]; } + get root() { return this.rootNode }; + add(node) { this.top.children.push(node); } - addKeyword(text, kind) { - if (text === "") { return; } - - this.openNode(kind); - this.addText(text); - this.closeNode(); - } - - addText(text) { - if (text === "") { return; } - - this.add(text); - } - - addSublanguage({rootNode}, name) { - let node = rootNode; - node.kind = name; - node.sublanguage = true; - this.add(node); - } - openNode(kind) { let node = { kind, children: [] }; this.add(node); @@ -52,12 +33,8 @@ export default class TokenTree { return JSON.stringify(this.rootNode, null, 4); } - finalize() { - return; - } - walk(builder) { - return TokenTree._walk(builder, this.rootNode); + return this.constructor._walk(builder, this.rootNode); } static _walk(builder, node) { @@ -73,7 +50,7 @@ export default class TokenTree { static _collapse(node) { if (!node.children) { - return + return; } if (node.children.every(el => typeof el === "string")) { node.text = node.children.join("") @@ -86,3 +63,47 @@ export default class TokenTree { } } } + +/** + Currently this is all private API, but this is the minimal API necessary + that an Emitter must implement to fully support the parser. + + API: + + - addKeyword(text, kind) + - addText(text) + - addSublanguage(emitter, subLangaugeName) + - finalize() + +*/ +export default class TokenTreeEmitter extends TokenTree { + constructor() { + super(); + } + + addKeyword(text, kind) { + if (text === "") { return; } + + this.openNode(kind); + this.addText(text); + this.closeNode(); + } + + addText(text) { + if (text === "") { return; } + + this.add(text); + } + + addSublanguage(emitter, name) { + let node = emitter.root; + node.kind = name; + node.sublanguage = true; + this.add(node); + } + + finalize() { + return; + } + +} From 5cbac511d025a645bc24dfe65868b9e4502ecaf9 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 29 Feb 2020 16:34:56 -0500 Subject: [PATCH 4/4] (enh) make emitter a beta config option - make emitter configurable - mark this API as beta/private for now - make rendering HTML a responsibility of the emitter (though it can of course delegate) That wraps emitting/rendering up nicely into a single object. --- src/highlight.js | 12 +++++++----- src/lib/token_tree.js | 16 ++++++++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index 86910cc75b..e14338e5e6 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -5,7 +5,6 @@ https://highlightjs.org/ import deepFreeze from './vendor/deep_freeze'; import TokenTreeEmitter from './lib/token_tree'; -import HTMLRenderer from './lib/html_renderer'; import * as regex from './lib/regex'; import * as utils from './lib/utils'; import * as MODES from './lib/modes'; @@ -44,7 +43,10 @@ const HLJS = function(hljs) { classPrefix: 'hljs-', tabReplace: null, useBR: false, - languages: undefined + languages: undefined, + // beta configuration options, subject to change, welcome to discuss + // https://github.com/highlightjs/highlight.js/issues/1086 + __emitter: TokenTreeEmitter }; /* Utility functions */ @@ -322,7 +324,7 @@ const HLJS = function(hljs) { var top = continuation || language; var continuations = {}; // keep continuations for sub-languages var result; - var emitter = new TokenTreeEmitter(); + var emitter = new options.__emitter(options); processContinuations(); var mode_buffer = ''; var relevance = 0; @@ -341,7 +343,7 @@ const HLJS = function(hljs) { processLexeme(codeToHighlight.substr(index)); emitter.closeAllNodes(); emitter.finalize(); - result = new HTMLRenderer(emitter, options).value(); + result = emitter.toHTML(); return { relevance: relevance, @@ -395,7 +397,7 @@ const HLJS = function(hljs) { languageSubset = languageSubset || options.languages || Object.keys(languages); var result = { relevance: 0, - emitter: new TokenTreeEmitter(), + emitter: new options.__emitter(options), value: escape(code) }; var second_best = result; diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index 05c634e669..8e5df9c313 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -1,3 +1,5 @@ +import HTMLRenderer from './html_renderer'; + class TokenTree { constructor() { this.rootNode = { children: [] }; @@ -68,17 +70,22 @@ class TokenTree { Currently this is all private API, but this is the minimal API necessary that an Emitter must implement to fully support the parser. - API: + Minimal interface: - addKeyword(text, kind) - addText(text) - addSublanguage(emitter, subLangaugeName) - finalize() + - openNode(kind) + - closeNode() + - closeAllNodes() + - toHTML() */ export default class TokenTreeEmitter extends TokenTree { - constructor() { + constructor(options) { super(); + this.options = options; } addKeyword(text, kind) { @@ -102,6 +109,11 @@ export default class TokenTreeEmitter extends TokenTree { this.add(node); } + toHTML() { + let renderer = new HTMLRenderer(this, this.options); + return renderer.value(); + } + finalize() { return; }