diff --git a/CHANGES.md b/CHANGES.md index b9ef99e36d..2dbc7b86fe 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,7 @@ New themes: Core Changes: +- split out parse tree generation and HTML rendering concerns (#2404) [Josh Goebel][] - every language can have a `name` attribute now (#2400) [Josh Goebel][] - improve regular expression detect (less false-positives) (#2380) [Josh Goebel][] - make `noHighlightRe` and `languagePrefixRe` configurable (#2374) [Josh Goebel][] diff --git a/src/highlight.js b/src/highlight.js index 9d46bc2687..e14338e5e6 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -4,6 +4,7 @@ https://highlightjs.org/ */ import deepFreeze from './vendor/deep_freeze'; +import TokenTreeEmitter from './lib/token_tree'; import * as regex from './lib/regex'; import * as utils from './lib/utils'; import * as MODES from './lib/modes'; @@ -11,10 +12,12 @@ import { compileLanguage } from './lib/mode_compiler'; const escape = utils.escapeHTML; const inherit = utils.inherit; + const { nodeStream, mergeStreams } = utils; const HLJS = function(hljs) { + // Convenience variables for build-in objects var ArrayProto = []; @@ -30,7 +33,6 @@ const HLJS = function(hljs) { // Regular expressions used throughout the highlight.js library. var fixMarkupRe = /((^(<[^>]+>|\t|)+|(?:\n)))/gm; - var spanEndTag = ''; var LANGUAGE_NOT_FOUND = "Could not find the language '{}', did you forget to load/include a language module?"; // Global options used when within external APIs. This is modified when @@ -41,12 +43,15 @@ const HLJS = function(hljs) { classPrefix: 'hljs-', tabReplace: null, useBR: false, - languages: undefined + languages: undefined, + // beta configuration options, subject to change, welcome to discuss + // https://github.com/highlightjs/highlight.js/issues/1086 + __emitter: TokenTreeEmitter }; /* Utility functions */ - function isNotHighlighted(language) { + function shouldNotHighlight(language) { return options.noHighlightRe.test(language); } @@ -69,7 +74,7 @@ const HLJS = function(hljs) { return classes .split(/\s+/) - .find((_class) => isNotHighlighted(_class) || getLanguage(_class)) + .find((_class) => shouldNotHighlight(_class) || getLanguage(_class)) } /** @@ -107,49 +112,48 @@ const HLJS = function(hljs) { return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str]; } - function buildSpan(className, insideSpan, leaveOpen, noPrefix) { - if (!leaveOpen && insideSpan === '') return ''; - if (!className) return insideSpan; - - var classPrefix = noPrefix ? '' : options.classPrefix, - openSpan = ''; - - return openSpan + insideSpan + closeSpan; - } - function processKeywords() { - var keyword_match, last_index, match, result; + var keyword_match, last_index, match, result, buf; - if (!top.keywords) - return escape(mode_buffer); + if (!top.keywords) { + emitter.addText(mode_buffer); + return; + } - result = ''; last_index = 0; top.lexemesRe.lastIndex = 0; match = top.lexemesRe.exec(mode_buffer); + buf = ""; while (match) { - result += escape(mode_buffer.substring(last_index, match.index)); + buf += mode_buffer.substring(last_index, match.index); keyword_match = keywordMatch(top, match); + var kind = null; if (keyword_match) { + emitter.addText(buf); + buf = ""; + relevance += keyword_match[1]; - result += buildSpan(keyword_match[0], escape(match[0])); + kind = keyword_match[0]; + emitter.addKeyword(match[0], kind); } else { - result += escape(match[0]); + buf += match[0]; } last_index = top.lexemesRe.lastIndex; match = top.lexemesRe.exec(mode_buffer); } - return result + escape(mode_buffer.substr(last_index)); + buf += mode_buffer.substr(last_index); + emitter.addText(buf); } function processSubLanguage() { + if (mode_buffer === "") return; + var explicit = typeof top.subLanguage === 'string'; + if (explicit && !languages[top.subLanguage]) { - return escape(mode_buffer); + emitter.addText(mode_buffer); + return; } var result = explicit ? @@ -166,16 +170,18 @@ const HLJS = function(hljs) { if (explicit) { continuations[top.subLanguage] = result.top; } - return buildSpan(result.language, result.value, false, true); + emitter.addSublanguage(result.emitter, result.language) } function processBuffer() { - result += (top.subLanguage != null ? processSubLanguage() : processKeywords()); + (top.subLanguage != null ? processSubLanguage() : processKeywords()); mode_buffer = ''; } function startNewMode(mode) { - result += mode.className? buildSpan(mode.className, '', true): ''; + if (mode.className) { + emitter.openNode(mode.className) + } top = Object.create(mode, {parent: {value: top}}); } @@ -223,7 +229,7 @@ const HLJS = function(hljs) { } do { if (top.className) { - result += spanEndTag; + emitter.closeNode(); } if (!top.skip && !top.subLanguage) { relevance += top.relevance; @@ -239,6 +245,16 @@ const HLJS = function(hljs) { return origin.returnEnd ? 0 : lexeme.length; } + function processContinuations() { + var list = [] + for(var current = top; current !== language; current = current.parent) { + if (current.className) { + list.unshift(current.className) + } + } + list.forEach(item => emitter.openNode(item)) + } + var lastMatch = {}; function processLexeme(text_before_match, match) { @@ -273,7 +289,9 @@ const HLJS = function(hljs) { return doBeginMatch(match); } else if (match.type==="illegal" && !ignore_illegals) { // illegal match, we do not continue processing - throw new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '') + '"'); + var err = new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '') + '"'); + err.mode = top; + throw err; } else if (match.type==="end") { var processed = doEndMatch(match); if (processed != undefined) @@ -305,48 +323,55 @@ const HLJS = function(hljs) { compileLanguage(language); var top = continuation || language; var continuations = {}; // keep continuations for sub-languages - var result = '', current; - for(current = top; current !== language; current = current.parent) { - if (current.className) { - result = buildSpan(current.className, '', true) + result; - } - } + var result; + var emitter = new options.__emitter(options); + processContinuations(); var mode_buffer = ''; var relevance = 0; + var match, processedCount, index = 0; + try { - var match, count, index = 0; while (true) { top.terminators.lastIndex = index; match = top.terminators.exec(codeToHighlight); if (!match) break; - count = processLexeme(codeToHighlight.substring(index, match.index), match); - index = match.index + count; + let beforeMatch = codeToHighlight.substring(index, match.index); + processedCount = processLexeme(beforeMatch, match); + index = match.index + processedCount; } processLexeme(codeToHighlight.substr(index)); - for(current = top; current.parent; current = current.parent) { // close dangling modes - if (current.className) { - result += spanEndTag; - } - } + emitter.closeAllNodes(); + emitter.finalize(); + result = emitter.toHTML(); + return { relevance: relevance, value: result, - illegal:false, language: languageName, + illegal: false, + emitter: emitter, top: top }; } catch (err) { if (err.message && err.message.includes('Illegal')) { return { illegal: true, + illegalBy: { + msg: err.message, + context: codeToHighlight.slice(index-100,index+100), + mode: err.mode + }, + sofar: result, relevance: 0, - value: escape(codeToHighlight) + value: escape(codeToHighlight), + emitter: emitter, }; } else if (SAFE_MODE) { return { relevance: 0, value: escape(codeToHighlight), + emitter: emitter, language: languageName, top: top, errorRaised: err @@ -372,6 +397,7 @@ const HLJS = function(hljs) { languageSubset = languageSubset || options.languages || Object.keys(languages); var result = { relevance: 0, + emitter: new options.__emitter(options), value: escape(code) }; var second_best = result; @@ -437,7 +463,7 @@ const HLJS = function(hljs) { var node, originalStream, result, resultNode, text; var language = blockLanguage(block); - if (isNotHighlighted(language)) + if (shouldNotHighlight(language)) return; fire("before:highlightBlock", diff --git a/src/lib/html_renderer.js b/src/lib/html_renderer.js new file mode 100644 index 0000000000..2ed5f04946 --- /dev/null +++ b/src/lib/html_renderer.js @@ -0,0 +1,46 @@ +const SPAN_CLOSE = ''; + +import {escapeHTML} from './utils'; + +const emitsWrappingTags = (node) => { + return !!node.kind; +} + +export default class HTMLRenderer { + constructor(tree, options) { + this.buffer = ""; + this.classPrefix = options.classPrefix; + tree.walk(this); + } + + // renderer API + + addText(text) { + this.buffer += escapeHTML(text) + } + + openNode(node) { + if (!emitsWrappingTags(node)) return; + + let className = node.kind; + if (!node.sublanguage) + className = `${this.classPrefix}${className}`; + this.span(className); + } + + closeNode(node) { + if (!emitsWrappingTags(node)) return; + + this.buffer += SPAN_CLOSE; + } + + // helpers + + span(className) { + this.buffer += `` + } + + value() { + return this.buffer; + } +} diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js new file mode 100644 index 0000000000..8e5df9c313 --- /dev/null +++ b/src/lib/token_tree.js @@ -0,0 +1,121 @@ +import HTMLRenderer from './html_renderer'; + +class TokenTree { + constructor() { + this.rootNode = { children: [] }; + this.stack = [ this.rootNode ]; + } + + get top() { + return this.stack[this.stack.length - 1]; + } + + get root() { return this.rootNode }; + + add(node) { + this.top.children.push(node); + } + + openNode(kind) { + let node = { kind, children: [] }; + this.add(node); + this.stack.push(node); + } + + closeNode() { + if (this.stack.length > 1) + return this.stack.pop(); + } + + closeAllNodes() { + while (this.closeNode()); + } + + toJSON() { + return JSON.stringify(this.rootNode, null, 4); + } + + walk(builder) { + return this.constructor._walk(builder, this.rootNode); + } + + static _walk(builder, node) { + if (typeof node === "string") { + builder.addText(node); + } else if (node.children) { + builder.openNode(node); + node.children.forEach((child) => this._walk(builder, child)) + builder.closeNode(node); + } + return builder; + } + + static _collapse(node) { + if (!node.children) { + return; + } + if (node.children.every(el => typeof el === "string")) { + node.text = node.children.join("") + delete node["children"] + } else { + node.children.forEach((child) => { + if (typeof child === "string") return; + TokenTree._collapse(child) + }) + } + } +} + +/** + Currently this is all private API, but this is the minimal API necessary + that an Emitter must implement to fully support the parser. + + Minimal interface: + + - addKeyword(text, kind) + - addText(text) + - addSublanguage(emitter, subLangaugeName) + - finalize() + - openNode(kind) + - closeNode() + - closeAllNodes() + - toHTML() + +*/ +export default class TokenTreeEmitter extends TokenTree { + constructor(options) { + super(); + this.options = options; + } + + addKeyword(text, kind) { + if (text === "") { return; } + + this.openNode(kind); + this.addText(text); + this.closeNode(); + } + + addText(text) { + if (text === "") { return; } + + this.add(text); + } + + addSublanguage(emitter, name) { + let node = emitter.root; + node.kind = name; + node.sublanguage = true; + this.add(node); + } + + toHTML() { + let renderer = new HTMLRenderer(this, this.options); + return renderer.value(); + } + + finalize() { + return; + } + +}