diff --git a/CHANGES.md b/CHANGES.md
index b9ef99e36d..2dbc7b86fe 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -11,6 +11,7 @@ New themes:
Core Changes:
+- split out parse tree generation and HTML rendering concerns (#2404) [Josh Goebel][]
- every language can have a `name` attribute now (#2400) [Josh Goebel][]
- improve regular expression detect (less false-positives) (#2380) [Josh Goebel][]
- make `noHighlightRe` and `languagePrefixRe` configurable (#2374) [Josh Goebel][]
diff --git a/src/highlight.js b/src/highlight.js
index 9d46bc2687..e14338e5e6 100644
--- a/src/highlight.js
+++ b/src/highlight.js
@@ -4,6 +4,7 @@ https://highlightjs.org/
*/
import deepFreeze from './vendor/deep_freeze';
+import TokenTreeEmitter from './lib/token_tree';
import * as regex from './lib/regex';
import * as utils from './lib/utils';
import * as MODES from './lib/modes';
@@ -11,10 +12,12 @@ import { compileLanguage } from './lib/mode_compiler';
const escape = utils.escapeHTML;
const inherit = utils.inherit;
+
const { nodeStream, mergeStreams } = utils;
const HLJS = function(hljs) {
+
// Convenience variables for build-in objects
var ArrayProto = [];
@@ -30,7 +33,6 @@ const HLJS = function(hljs) {
// Regular expressions used throughout the highlight.js library.
var fixMarkupRe = /((^(<[^>]+>|\t|)+|(?:\n)))/gm;
- var spanEndTag = '';
var LANGUAGE_NOT_FOUND = "Could not find the language '{}', did you forget to load/include a language module?";
// Global options used when within external APIs. This is modified when
@@ -41,12 +43,15 @@ const HLJS = function(hljs) {
classPrefix: 'hljs-',
tabReplace: null,
useBR: false,
- languages: undefined
+ languages: undefined,
+ // beta configuration options, subject to change, welcome to discuss
+ // https://github.com/highlightjs/highlight.js/issues/1086
+ __emitter: TokenTreeEmitter
};
/* Utility functions */
- function isNotHighlighted(language) {
+ function shouldNotHighlight(language) {
return options.noHighlightRe.test(language);
}
@@ -69,7 +74,7 @@ const HLJS = function(hljs) {
return classes
.split(/\s+/)
- .find((_class) => isNotHighlighted(_class) || getLanguage(_class))
+ .find((_class) => shouldNotHighlight(_class) || getLanguage(_class))
}
/**
@@ -107,49 +112,48 @@ const HLJS = function(hljs) {
return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str];
}
- function buildSpan(className, insideSpan, leaveOpen, noPrefix) {
- if (!leaveOpen && insideSpan === '') return '';
- if (!className) return insideSpan;
-
- var classPrefix = noPrefix ? '' : options.classPrefix,
- openSpan = '';
-
- return openSpan + insideSpan + closeSpan;
- }
-
function processKeywords() {
- var keyword_match, last_index, match, result;
+ var keyword_match, last_index, match, result, buf;
- if (!top.keywords)
- return escape(mode_buffer);
+ if (!top.keywords) {
+ emitter.addText(mode_buffer);
+ return;
+ }
- result = '';
last_index = 0;
top.lexemesRe.lastIndex = 0;
match = top.lexemesRe.exec(mode_buffer);
+ buf = "";
while (match) {
- result += escape(mode_buffer.substring(last_index, match.index));
+ buf += mode_buffer.substring(last_index, match.index);
keyword_match = keywordMatch(top, match);
+ var kind = null;
if (keyword_match) {
+ emitter.addText(buf);
+ buf = "";
+
relevance += keyword_match[1];
- result += buildSpan(keyword_match[0], escape(match[0]));
+ kind = keyword_match[0];
+ emitter.addKeyword(match[0], kind);
} else {
- result += escape(match[0]);
+ buf += match[0];
}
last_index = top.lexemesRe.lastIndex;
match = top.lexemesRe.exec(mode_buffer);
}
- return result + escape(mode_buffer.substr(last_index));
+ buf += mode_buffer.substr(last_index);
+ emitter.addText(buf);
}
function processSubLanguage() {
+ if (mode_buffer === "") return;
+
var explicit = typeof top.subLanguage === 'string';
+
if (explicit && !languages[top.subLanguage]) {
- return escape(mode_buffer);
+ emitter.addText(mode_buffer);
+ return;
}
var result = explicit ?
@@ -166,16 +170,18 @@ const HLJS = function(hljs) {
if (explicit) {
continuations[top.subLanguage] = result.top;
}
- return buildSpan(result.language, result.value, false, true);
+ emitter.addSublanguage(result.emitter, result.language)
}
function processBuffer() {
- result += (top.subLanguage != null ? processSubLanguage() : processKeywords());
+ (top.subLanguage != null ? processSubLanguage() : processKeywords());
mode_buffer = '';
}
function startNewMode(mode) {
- result += mode.className? buildSpan(mode.className, '', true): '';
+ if (mode.className) {
+ emitter.openNode(mode.className)
+ }
top = Object.create(mode, {parent: {value: top}});
}
@@ -223,7 +229,7 @@ const HLJS = function(hljs) {
}
do {
if (top.className) {
- result += spanEndTag;
+ emitter.closeNode();
}
if (!top.skip && !top.subLanguage) {
relevance += top.relevance;
@@ -239,6 +245,16 @@ const HLJS = function(hljs) {
return origin.returnEnd ? 0 : lexeme.length;
}
+ function processContinuations() {
+ var list = []
+ for(var current = top; current !== language; current = current.parent) {
+ if (current.className) {
+ list.unshift(current.className)
+ }
+ }
+ list.forEach(item => emitter.openNode(item))
+ }
+
var lastMatch = {};
function processLexeme(text_before_match, match) {
@@ -273,7 +289,9 @@ const HLJS = function(hljs) {
return doBeginMatch(match);
} else if (match.type==="illegal" && !ignore_illegals) {
// illegal match, we do not continue processing
- throw new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '') + '"');
+ var err = new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '') + '"');
+ err.mode = top;
+ throw err;
} else if (match.type==="end") {
var processed = doEndMatch(match);
if (processed != undefined)
@@ -305,48 +323,55 @@ const HLJS = function(hljs) {
compileLanguage(language);
var top = continuation || language;
var continuations = {}; // keep continuations for sub-languages
- var result = '', current;
- for(current = top; current !== language; current = current.parent) {
- if (current.className) {
- result = buildSpan(current.className, '', true) + result;
- }
- }
+ var result;
+ var emitter = new options.__emitter(options);
+ processContinuations();
var mode_buffer = '';
var relevance = 0;
+ var match, processedCount, index = 0;
+
try {
- var match, count, index = 0;
while (true) {
top.terminators.lastIndex = index;
match = top.terminators.exec(codeToHighlight);
if (!match)
break;
- count = processLexeme(codeToHighlight.substring(index, match.index), match);
- index = match.index + count;
+ let beforeMatch = codeToHighlight.substring(index, match.index);
+ processedCount = processLexeme(beforeMatch, match);
+ index = match.index + processedCount;
}
processLexeme(codeToHighlight.substr(index));
- for(current = top; current.parent; current = current.parent) { // close dangling modes
- if (current.className) {
- result += spanEndTag;
- }
- }
+ emitter.closeAllNodes();
+ emitter.finalize();
+ result = emitter.toHTML();
+
return {
relevance: relevance,
value: result,
- illegal:false,
language: languageName,
+ illegal: false,
+ emitter: emitter,
top: top
};
} catch (err) {
if (err.message && err.message.includes('Illegal')) {
return {
illegal: true,
+ illegalBy: {
+ msg: err.message,
+ context: codeToHighlight.slice(index-100,index+100),
+ mode: err.mode
+ },
+ sofar: result,
relevance: 0,
- value: escape(codeToHighlight)
+ value: escape(codeToHighlight),
+ emitter: emitter,
};
} else if (SAFE_MODE) {
return {
relevance: 0,
value: escape(codeToHighlight),
+ emitter: emitter,
language: languageName,
top: top,
errorRaised: err
@@ -372,6 +397,7 @@ const HLJS = function(hljs) {
languageSubset = languageSubset || options.languages || Object.keys(languages);
var result = {
relevance: 0,
+ emitter: new options.__emitter(options),
value: escape(code)
};
var second_best = result;
@@ -437,7 +463,7 @@ const HLJS = function(hljs) {
var node, originalStream, result, resultNode, text;
var language = blockLanguage(block);
- if (isNotHighlighted(language))
+ if (shouldNotHighlight(language))
return;
fire("before:highlightBlock",
diff --git a/src/lib/html_renderer.js b/src/lib/html_renderer.js
new file mode 100644
index 0000000000..2ed5f04946
--- /dev/null
+++ b/src/lib/html_renderer.js
@@ -0,0 +1,46 @@
+const SPAN_CLOSE = '';
+
+import {escapeHTML} from './utils';
+
+const emitsWrappingTags = (node) => {
+ return !!node.kind;
+}
+
+export default class HTMLRenderer {
+ constructor(tree, options) {
+ this.buffer = "";
+ this.classPrefix = options.classPrefix;
+ tree.walk(this);
+ }
+
+ // renderer API
+
+ addText(text) {
+ this.buffer += escapeHTML(text)
+ }
+
+ openNode(node) {
+ if (!emitsWrappingTags(node)) return;
+
+ let className = node.kind;
+ if (!node.sublanguage)
+ className = `${this.classPrefix}${className}`;
+ this.span(className);
+ }
+
+ closeNode(node) {
+ if (!emitsWrappingTags(node)) return;
+
+ this.buffer += SPAN_CLOSE;
+ }
+
+ // helpers
+
+ span(className) {
+ this.buffer += ``
+ }
+
+ value() {
+ return this.buffer;
+ }
+}
diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js
new file mode 100644
index 0000000000..8e5df9c313
--- /dev/null
+++ b/src/lib/token_tree.js
@@ -0,0 +1,121 @@
+import HTMLRenderer from './html_renderer';
+
+class TokenTree {
+ constructor() {
+ this.rootNode = { children: [] };
+ this.stack = [ this.rootNode ];
+ }
+
+ get top() {
+ return this.stack[this.stack.length - 1];
+ }
+
+ get root() { return this.rootNode };
+
+ add(node) {
+ this.top.children.push(node);
+ }
+
+ openNode(kind) {
+ let node = { kind, children: [] };
+ this.add(node);
+ this.stack.push(node);
+ }
+
+ closeNode() {
+ if (this.stack.length > 1)
+ return this.stack.pop();
+ }
+
+ closeAllNodes() {
+ while (this.closeNode());
+ }
+
+ toJSON() {
+ return JSON.stringify(this.rootNode, null, 4);
+ }
+
+ walk(builder) {
+ return this.constructor._walk(builder, this.rootNode);
+ }
+
+ static _walk(builder, node) {
+ if (typeof node === "string") {
+ builder.addText(node);
+ } else if (node.children) {
+ builder.openNode(node);
+ node.children.forEach((child) => this._walk(builder, child))
+ builder.closeNode(node);
+ }
+ return builder;
+ }
+
+ static _collapse(node) {
+ if (!node.children) {
+ return;
+ }
+ if (node.children.every(el => typeof el === "string")) {
+ node.text = node.children.join("")
+ delete node["children"]
+ } else {
+ node.children.forEach((child) => {
+ if (typeof child === "string") return;
+ TokenTree._collapse(child)
+ })
+ }
+ }
+}
+
+/**
+ Currently this is all private API, but this is the minimal API necessary
+ that an Emitter must implement to fully support the parser.
+
+ Minimal interface:
+
+ - addKeyword(text, kind)
+ - addText(text)
+ - addSublanguage(emitter, subLangaugeName)
+ - finalize()
+ - openNode(kind)
+ - closeNode()
+ - closeAllNodes()
+ - toHTML()
+
+*/
+export default class TokenTreeEmitter extends TokenTree {
+ constructor(options) {
+ super();
+ this.options = options;
+ }
+
+ addKeyword(text, kind) {
+ if (text === "") { return; }
+
+ this.openNode(kind);
+ this.addText(text);
+ this.closeNode();
+ }
+
+ addText(text) {
+ if (text === "") { return; }
+
+ this.add(text);
+ }
+
+ addSublanguage(emitter, name) {
+ let node = emitter.root;
+ node.kind = name;
+ node.sublanguage = true;
+ this.add(node);
+ }
+
+ toHTML() {
+ let renderer = new HTMLRenderer(this, this.options);
+ return renderer.value();
+ }
+
+ finalize() {
+ return;
+ }
+
+}