From 3d64fd97ad229a3159fdd3e3aff01af88838ac0a Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 1 May 2021 21:56:30 -0400 Subject: [PATCH] beginScope and endScope (#3159) * (chore) import types, plus typos * enh(parser) beginScope, endScope - also adds multi-match support to endScope * changelog --- .eslintrc.js | 6 ++- CHANGES.md | 2 + docs/mode-reference.rst | 37 ++++++++++++++- src/highlight.js | 74 ++++++++++++++++++++++-------- src/lib/compiler_extensions.js | 11 +++-- src/lib/ext/multi_class.js | 83 +++++++++++++++++++++++++++++----- src/lib/mode_compiler.js | 13 ++++-- src/lib/modes.js | 3 ++ src/lib/response.js | 3 ++ src/lib/token_tree.js | 1 + test/parser/beginEndScope.js | 72 +++++++++++++++++++++++++++++ types/index.d.ts | 35 ++++++++------ 12 files changed, 285 insertions(+), 55 deletions(-) create mode 100644 test/parser/beginEndScope.js diff --git a/.eslintrc.js b/.eslintrc.js index ba8f67d028..3458c627fc 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -43,7 +43,11 @@ module.exports = { overrides: [ { files: ["types/*.ts", "src/*.ts"], - parser: '@typescript-eslint/parser' + parser: '@typescript-eslint/parser', + rules: { + "import/no-duplicates": "off", + "import/extensions": "off" + } }, { files: ["src/**/*.js"], diff --git a/CHANGES.md b/CHANGES.md index b66b51fb8d..2b1cfe0095 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -43,6 +43,8 @@ Language Grammars: Parser: +- enh(parser) add `beginScope` and `endScope` to allow separate scoping begin and end (#3159) [Josh Goebel][] +- enh(parsed) `endScope` now supports multi-class matchers as well (#3159) [Josh Goebel][] - enh(parser) `highlightElement` now always tags blocks with a consistent `language-[name]` class [Josh Goebel][] - subLanguage `span` tags now also always have the `language-` prefix added - enh(parser) support multi-class matchers (#3081) [Josh Goebel][] diff --git a/docs/mode-reference.rst b/docs/mode-reference.rst index d8222b2d43..cfe2e9be26 100644 --- a/docs/mode-reference.rst +++ b/docs/mode-reference.rst @@ -177,8 +177,25 @@ begin Regular expression starting a mode. For example a single quote for strings or two forward slashes for C-style comments. If absent, ``begin`` defaults to a regexp that matches anything, so the mode starts immediately. +This may also be an array. See beginScope. -You can also pass an array when you need to individually highlight portions of the match using different scopes: +beginScope +^^^^^^^^^^ + +- **type**: scope +- **type**: numeric index of scopes (when ``begin`` is an array) + +This can be used to apply a scope to just the begin match portion. + +:: + + { + begin: /def/, + beginScope: "keyword" + } + +You can also use ``beginScope`` to individually highlight portions of the match +with different scopes by passing an array to ``begin``. :: @@ -188,7 +205,7 @@ You can also pass an array when you need to individually highlight portions of t /\s+/, hljs.IDENT_RE ], - scope: { + beginScope: { 1: "keyword", 3: "title" }, @@ -206,6 +223,22 @@ capture groups of their own yet.* If your regexes uses groups at all, they For more info see issue `#3095 `_. +endScope +^^^^^^^^ + +- **type**: scope +- **type**: numeric index of scopes (when ``end`` is an array) + +This has the same behavior as ``beginScope`` but applies to the content of the +``end`` match. + +:: + + { + begin: /FIRST/, + end: /LAST/, + endScope: "built_in" + } match diff --git a/src/highlight.js b/src/highlight.js index f848731131..305b7eca9a 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -14,6 +14,27 @@ import { compileLanguage } from './lib/mode_compiler.js'; import * as packageJSON from '../package.json'; import * as logger from "./lib/logger.js"; +/** +@typedef {import('highlight.js').Mode} Mode +@typedef {import('highlight.js').CompiledMode} CompiledMode +@typedef {import('highlight.js').Language} Language +@typedef {import('highlight.js').HLJSApi} HLJSApi +@typedef {import('highlight.js').HLJSPlugin} HLJSPlugin +@typedef {import('highlight.js').PluginEvent} PluginEvent +@typedef {import('highlight.js').HLJSOptions} HLJSOptions +@typedef {import('highlight.js').LanguageFn} LanguageFn +@typedef {import('highlight.js').HighlightedHTMLElement} HighlightedHTMLElement +@typedef {import('highlight.js').BeforeHighlightContext} BeforeHighlightContext +@typedef {import('highlight.js/private').MatchType} MatchType +@typedef {import('highlight.js/private').KeywordData} KeywordData +@typedef {import('highlight.js/private').EnhancedMatch} EnhancedMatch +@typedef {import('highlight.js/private').AnnotatedError} AnnotatedError +@typedef {import('highlight.js').AutoHighlightResult} AutoHighlightResult +@typedef {import('highlight.js').HighlightOptions} HighlightOptions +@typedef {import('highlight.js').HighlightResult} HighlightResult +*/ + + const escape = utils.escapeHTML; const inherit = utils.inherit; const NO_MATCH = Symbol("nomatch"); @@ -95,7 +116,7 @@ const HLJS = function(hljs) { * NEW API * highlight(code, {lang, ignoreIllegals}) * - * @param {string} codeOrlanguageName - the language to use for highlighting + * @param {string} codeOrLanguageName - the language to use for highlighting * @param {string | HighlightOptions} optionsOrCode - the code to highlight * @param {boolean} [ignoreIllegals] - whether to ignore illegal matches, default is to bail * @param {CompiledMode} [continuation] - current continuation mode, if any @@ -108,11 +129,11 @@ const HLJS = function(hljs) { * @property {CompiledMode} top - top of the current mode stack * @property {boolean} illegal - indicates whether any illegal matches were found */ - function highlight(codeOrlanguageName, optionsOrCode, ignoreIllegals, continuation) { + function highlight(codeOrLanguageName, optionsOrCode, ignoreIllegals, continuation) { let code = ""; let languageName = ""; if (typeof optionsOrCode === "object") { - code = codeOrlanguageName; + code = codeOrLanguageName; ignoreIllegals = optionsOrCode.ignoreIllegals; languageName = optionsOrCode.language; // continuation not supported at all via the new API @@ -122,7 +143,7 @@ const HLJS = function(hljs) { // old API logger.deprecated("10.7.0", "highlight(lang, code, ...args) has been deprecated."); logger.deprecated("10.7.0", "Please use highlight(code, options) instead.\nhttps://github.com/highlightjs/highlight.js/issues/2277"); - languageName = codeOrlanguageName; + languageName = codeOrLanguageName; code = optionsOrCode; } @@ -246,18 +267,19 @@ const HLJS = function(hljs) { * @param {CompiledMode} mode * @param {RegExpMatchArray} match */ - function emitMultiClass(mode, match) { + function emitMultiClass(scope, match) { let i = 1; // eslint-disable-next-line no-undefined while (match[i] !== undefined) { - if (!mode._emit[i]) { i++; continue; } - const klass = language.classNameAliases[mode.scope[i]] || mode.scope[i]; + if (!scope._emit[i]) { i++; continue; } + const klass = language.classNameAliases[scope[i]] || scope[i]; const text = match[i]; if (klass) { emitter.addKeyword(text, klass); } else { modeBuffer = text; processKeywords(); + modeBuffer = ""; } i++; } @@ -268,13 +290,21 @@ const HLJS = function(hljs) { * @param {RegExpMatchArray} match */ function startNewMode(mode, match) { - if (mode.isMultiClass) { - // at this point modeBuffer should just be the match - emitMultiClass(mode, match); - modeBuffer = ""; - } else if (mode.scope) { + if (mode.scope && typeof mode.scope === "string") { emitter.openNode(language.classNameAliases[mode.scope] || mode.scope); } + if (mode.beginScope) { + // beginScope just wraps the begin match itself in a scope + if (mode.beginScope._wrap) { + emitter.addKeyword(modeBuffer, language.classNameAliases[mode.beginScope._wrap] || mode.beginScope._wrap); + modeBuffer = ""; + } else if (mode.beginScope._multi) { + // at this point modeBuffer should just be the match + emitMultiClass(mode.beginScope, match); + modeBuffer = ""; + } + } + top = Object.create(mode, { parent: { value: top } }); return top; } @@ -316,7 +346,7 @@ const HLJS = function(hljs) { */ function doIgnore(lexeme) { if (top.matcher.regexIndex === 0) { - // no more regexs to potentially match here, so we move the cursor forward one + // no more regexes to potentially match here, so we move the cursor forward one // space modeBuffer += lexeme[0]; return 1; @@ -375,7 +405,13 @@ const HLJS = function(hljs) { if (!endMode) { return NO_MATCH; } const origin = top; - if (origin.skip) { + if (top.endScope && top.endScope._wrap) { + processBuffer(); + emitter.addKeyword(lexeme, top.endScope._wrap); + } else if (top.endScope && top.endScope._multi) { + processBuffer(); + emitMultiClass(top.endScope, match); + } else if (origin.skip) { modeBuffer += lexeme; } else { if (!(origin.returnEnd || origin.excludeEnd)) { @@ -417,7 +453,7 @@ const HLJS = function(hljs) { /** * Process an individual match * - * @param {string} textBeforeMatch - text preceeding the match (since the last match) + * @param {string} textBeforeMatch - text preceding the match (since the last match) * @param {EnhancedMatch} [match] - the match itself */ function processLexeme(textBeforeMatch, match) { @@ -499,7 +535,7 @@ const HLJS = function(hljs) { throw new Error('Unknown language: "' + languageName + '"'); } - const md = compileLanguage(language, { plugins }); + const md = compileLanguage(language); let result = ''; /** @type {CompiledMode} */ let top = continuation || md; @@ -701,12 +737,12 @@ const HLJS = function(hljs) { language: result.language, // TODO: remove with version 11.0 re: result.relevance, - relavance: result.relevance + relevance: result.relevance }; if (result.secondBest) { element.secondBest = { language: result.secondBest.language, - relavance: result.secondBest.relevance + relevance: result.secondBest.relevance }; } } @@ -928,7 +964,7 @@ const HLJS = function(hljs) { } } - // merge all the modes/regexs into our main object + // merge all the modes/regexes into our main object Object.assign(hljs, MODES); return hljs; diff --git a/src/lib/compiler_extensions.js b/src/lib/compiler_extensions.js index fddebae451..61b6965058 100644 --- a/src/lib/compiler_extensions.js +++ b/src/lib/compiler_extensions.js @@ -1,5 +1,10 @@ import * as regex from './regex.js'; +/** +@typedef {import('highlight.js').CallbackResponse} CallbackResponse +@typedef {import('highlight.js').CompilerExt} CompilerExt +*/ + // Grammar extensions / plugins // See: https://github.com/highlightjs/highlight.js/issues/2833 @@ -24,7 +29,7 @@ import * as regex from './regex.js'; * @param {RegExpMatchArray} match * @param {CallbackResponse} response */ -function skipIfhasPrecedingDot(match, response) { +function skipIfHasPrecedingDot(match, response) { const before = match.input[match.index - 1]; if (before === ".") { response.ignoreMatch(); @@ -35,7 +40,7 @@ function skipIfhasPrecedingDot(match, response) { * * @type {CompilerExt} */ -export function scopeClassName(mode, parent) { +export function scopeClassName(mode, _parent) { // eslint-disable-next-line no-undefined if (mode.className !== undefined) { mode.scope = mode.className; @@ -57,7 +62,7 @@ export function beginKeywords(mode, parent) { // doesn't allow spaces in keywords anyways and we still check for the boundary // first mode.begin = '\\b(' + mode.beginKeywords.split(' ').join('|') + ')(?!\\.)(?=\\b|\\s)'; - mode.__beforeBegin = skipIfhasPrecedingDot; + mode.__beforeBegin = skipIfHasPrecedingDot; mode.keywords = mode.keywords || mode.beginKeywords; delete mode.beginKeywords; diff --git a/src/lib/ext/multi_class.js b/src/lib/ext/multi_class.js index 4ae373931d..4f840e51f3 100644 --- a/src/lib/ext/multi_class.js +++ b/src/lib/ext/multi_class.js @@ -2,6 +2,10 @@ import * as logger from "../../lib/logger.js"; import * as regex from "../regex.js"; +/** +@typedef {import('highlight.js').CompiledMode} CompiledMode +*/ + const MultiClassError = new Error(); /** @@ -30,13 +34,14 @@ const MultiClassError = new Error(); * * @param {CompiledMode} mode * @param {Array} regexes + * @param {{key: "beginScope"|"endScope"}} opts */ -function remapScopeNames(mode, regexes) { +function remapScopeNames(mode, regexes, { key }) { let offset = 0; - const scopeNames = mode.scope; + const scopeNames = mode[key]; /** @type Record */ const emit = {}; - /** @type Record */ + /** @type Record */ const positions = {}; for (let i = 1; i <= regexes.length; i++) { @@ -46,28 +51,82 @@ function remapScopeNames(mode, regexes) { } // we use _emit to keep track of which match groups are "top-level" to avoid double // output from inside match groups - mode._emit = emit; - mode.scope = positions; + mode[key] = positions; + mode[key]._emit = emit; + mode[key]._multi = true; } /** * @param {CompiledMode} mode */ -export function MultiClass(mode) { +function beginMultiClass(mode) { if (!Array.isArray(mode.begin)) return; if (mode.skip || mode.excludeBegin || mode.returnBegin) { - logger.error("skip, excludeBegin, returnBegin not compatible with multi-class"); + logger.error("skip, excludeBegin, returnBegin not compatible with beginScope: {}"); throw MultiClassError; } - if (typeof mode.scope !== "object" || mode.scope == null) { - logger.error("scope/className must be object"); + if (typeof mode.beginScope !== "object" || mode.beginScope === null) { + logger.error("beginScope must be object"); throw MultiClassError; } - const matchers = mode.begin; - remapScopeNames(mode, matchers); + remapScopeNames(mode, mode.begin, {key: "beginScope"}); mode.begin = regex._rewriteBackreferences(mode.begin, { joinWith: "" }); - mode.isMultiClass = true; +} + +/** + * @param {CompiledMode} mode + */ +function endMultiClass(mode) { + if (!Array.isArray(mode.end)) return; + + if (mode.skip || mode.excludeEnd || mode.returnEnd) { + logger.error("skip, excludeEnd, returnEnd not compatible with endScope: {}"); + throw MultiClassError; + } + + if (typeof mode.endScope !== "object" || mode.endScope === null) { + logger.error("endScope must be object"); + throw MultiClassError; + } + + remapScopeNames(mode, mode.end, {key: "endScope"}); + mode.end = regex._rewriteBackreferences(mode.end, { joinWith: "" }); +} + +/** + * this exists only to allow `scope: {}` to be used beside `match:` + * Otherwise `beginScope` would necessary and that would look weird + + { + match: [ /def/, /\w+/ ] + scope: { 1: "keyword" , 2: "title" } + } + + * @param {CompiledMode} mode + */ +function scopeSugar(mode) { + if (mode.scope && typeof mode.scope === "object" && mode.scope !== null) { + mode.beginScope = mode.scope; + delete mode.scope; + } +} + +/** + * @param {CompiledMode} mode + */ +export function MultiClass(mode) { + scopeSugar(mode) + + if (typeof mode.beginScope === "string") { + mode.beginScope = { _wrap: mode.beginScope }; + } + if (typeof mode.endScope === "string") { + mode.endScope = { _wrap: mode.endScope }; + } + + beginMultiClass(mode) + endMultiClass(mode) } diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js index 18c59648c3..3ba6159bb8 100644 --- a/src/lib/mode_compiler.js +++ b/src/lib/mode_compiler.js @@ -5,6 +5,14 @@ import { beforeMatchExt } from "./exts/before_match.js"; import { compileKeywords } from "./compile_keywords.js"; import { MultiClass } from "./ext/multi_class.js"; +/** +@typedef {import('highlight.js').Mode} Mode +@typedef {import('highlight.js').CompiledMode} CompiledMode +@typedef {import('highlight.js').Language} Language +@typedef {import('highlight.js').HLJSPlugin} HLJSPlugin +@typedef {import('highlight.js').CompiledLanguage} CompiledLanguage +*/ + // compilation /** @@ -13,12 +21,11 @@ import { MultiClass } from "./ext/multi_class.js"; * Given the raw result of a language definition (Language), compiles this so * that it is ready for highlighting code. * @param {Language} language - * @param {{plugins: HLJSPlugin[]}} opts * @returns {CompiledLanguage} */ -export function compileLanguage(language, { plugins }) { +export function compileLanguage(language) { /** - * Builds a regex with the case sensativility of the current language + * Builds a regex with the case sensitivity of the current language * * @param {RegExp | string} value * @param {boolean} [global] diff --git a/src/lib/modes.js b/src/lib/modes.js index 2a86c54a03..5bbff22328 100644 --- a/src/lib/modes.js +++ b/src/lib/modes.js @@ -1,6 +1,9 @@ import { inherit } from './utils.js'; import * as regex from './regex.js'; +/** @typedef {import('highlight.js').Mode} Mode */ +/** @typedef {import('highlight.js').ModeCallback} ModeCallback */ + // Common regexps export const MATCH_NOTHING_RE = /\b\B/; export const IDENT_RE = '[a-zA-Z]\\w*'; diff --git a/src/lib/response.js b/src/lib/response.js index 5e2826c883..4444cda8aa 100644 --- a/src/lib/response.js +++ b/src/lib/response.js @@ -1,4 +1,7 @@ +/** @typedef {import('highlight.js').CallbackResponse} CallbackResponse */ +/** @typedef {import('highlight.js').CompiledMode} CompiledMode */ /** @implements CallbackResponse */ + export default class Response { /** * @param {CompiledMode} mode diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index b748d6b3df..cd388c60af 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -2,6 +2,7 @@ import HTMLRenderer from './html_renderer.js'; /** @typedef {{kind?: string, sublanguage?: boolean, children: Node[]} | string} Node */ /** @typedef {{kind?: string, sublanguage?: boolean, children: Node[]} } DataNode */ +/** @typedef {import('highlight.js').Emitter} Emitter */ /** */ class TokenTree { diff --git a/test/parser/beginEndScope.js b/test/parser/beginEndScope.js new file mode 100644 index 0000000000..e2cf9af7b8 --- /dev/null +++ b/test/parser/beginEndScope.js @@ -0,0 +1,72 @@ +'use strict'; + +const hljs = require('../../build'); +hljs.debugMode(); + +describe('beginScope and endScope', () => { + before(() => { + const grammar = function() { + return { + contains: [ + { + begin: /xyz/, + end: /123/, + scope: "string", + beginScope: "red", + endScope: "green" + }, + { + begin: /123/, + end: [ /a/,/((b))/,/c/,/d/ ], + endScope: { 1: "apple", 2: "boy", 4: "delta" } + }, + { + begin: /dumb/, + end: /luck/, + beginScope: "red", + endScope: "green" + }, + { + begin: /abc/, + beginScope: "letters", + contains: [ + { match: /def/, scope: "more" } + ] + } + ] + } + }; + hljs.registerLanguage("test", grammar); + }); + after(() => { + hljs.unregisterLanguage("test"); + }); + it('should support multi-class', () => { + const code = "123 abcd"; + const result = hljs.highlight(code, { language: 'test' }); + + result.value.should.equal(`123 abcd`); + }) + it('should support an outer scope wrapper', () => { + const code = "xyz me 123"; + const result = hljs.highlight(code, { language: 'test' }); + + result.value.should.equal( + `` + + `xyz me 123` + + ``); + }) + it('should support textual beginScope & endScope pair', () => { + const code = "dumb really luck"; + const result = hljs.highlight(code, { language: 'test' }); + + result.value.should.equal(`dumb really luck`); + }); + it('should support textual beginScope', () => { + const code = "abcdef"; + const result = hljs.highlight(code, { language: 'test' }); + + result.value.should.equal(`abcdef`); + }); + +}); diff --git a/types/index.d.ts b/types/index.d.ts index 5283491532..0680fc251e 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -3,8 +3,20 @@ // For TS consumers who use Node and don't have dom in their tsconfig lib, import the necessary types here. /// +declare module 'highlight.js/private' { + import { CompiledMode, Mode, Language } from "highlight.js"; + + type MatchType = "begin" | "end" | "illegal" + type EnhancedMatch = RegExpMatchArray & {rule: CompiledMode, type: MatchType} + type AnnotatedError = Error & {mode?: Mode | Language, languageName?: string, badRule?: Mode} + + type KeywordData = [string, number]; + type KeywordDict = Record +} declare module 'highlight.js' { + import { KeywordDict} from "highlight.js/private"; + export type HLJSApi = PublicApi & ModesAPI export interface VuePlugin { @@ -12,7 +24,7 @@ declare module 'highlight.js' { } interface PublicApi { - highlight: (codeOrlanguageName: string, optionsOrCode: string | HighlightOptions, ignoreIllegals?: boolean, continuation?: Mode) => HighlightResult + highlight: (codeOrLanguageName: string, optionsOrCode: string | HighlightOptions, ignoreIllegals?: boolean, continuation?: Mode) => HighlightResult highlightAuto: (code: string, languageSubset?: string[]) => AutoHighlightResult highlightBlock: (element: HTMLElement) => void highlightElement: (element: HTMLElement) => void @@ -161,16 +173,7 @@ declare module 'highlight.js' { addSublanguage(emitter: Emitter, subLanguageName: string): void } - /************ - PRIVATE API - ************/ - - /* for jsdoc annotations in the JS source files */ - - type AnnotatedError = Error & {mode?: Mode | Language, languageName?: string, badRule?: Mode} - type HighlightedHTMLElement = HTMLElement & {result?: object, secondBest?: object, parentNode: HTMLElement} - type EnhancedMatch = RegExpMatchArray & {rule: CompiledMode, type: MatchType} - type MatchType = "begin" | "end" | "illegal" + export type HighlightedHTMLElement = HTMLElement & {result?: object, secondBest?: object, parentNode: HTMLElement} /* modes */ @@ -179,15 +182,12 @@ declare module 'highlight.js' { "on:begin"?: ModeCallback } - interface CompiledLanguage extends LanguageDetail, CompiledMode { + export interface CompiledLanguage extends LanguageDetail, CompiledMode { isCompiled: true contains: CompiledMode[] keywords: Record } - type KeywordData = [string, number]; - type KeywordDict = Record - export type CompiledMode = Omit & { contains: CompiledMode[] @@ -203,6 +203,8 @@ declare module 'highlight.js' { isMultiClass?: boolean starts?: CompiledMode parent?: CompiledMode + beginScope?: Record & {_emit?: Record, _multi?: boolean, _wrap?: string} + endScope?: Record & {_emit?: Record, _multi?: boolean, _wrap?: string} } interface ModeDetails { @@ -210,6 +212,9 @@ declare module 'highlight.js' { match?: RegExp | string end?: RegExp | string className?: string + _emit?: Record + scope?: string | Record + beginScope?: string | Record contains?: ("self" | Mode)[] endsParent?: boolean endsWithParent?: boolean