Skip to content

Commit

Permalink
split code into modules and simplify (highlightjs#2402)
Browse files Browse the repository at this point in the history
* remove dead code
* pull out deep freeze
* move out escapeRe
* move out reStr to source
* move out countMatchGroups
* move reTest to regex.startsWith
* move escape to escapeHTML
* move inherit to utils
* move MODES out
* simplify defining API
* simplify, use `find`
* remove useless constant
* use rollup to build the UMB browser build also
* fix markdown to be strict safe
* no more need for worker stub
* move nodestream to utils
* Cleaner code with ES2015 tricks
* reduce use of lodash
* we do not need arrays here
* simplify
* remove dead config
  • Loading branch information
joshgoebel authored and taufik-nurrohman committed Feb 18, 2020
1 parent 6870e0c commit cab7d36
Show file tree
Hide file tree
Showing 22 changed files with 441 additions and 606 deletions.
506 changes: 63 additions & 443 deletions src/highlight.js

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions src/languages/markdown.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ Category: common, markup
*/

export default function(hljs) {
INLINE_HTML = {
const INLINE_HTML = {
begin: '<', end: '>',
subLanguage: 'xml',
relevance: 0
};
HORIZONTAL_RULE = {
const HORIZONTAL_RULE = {
begin: '^[-\\*]{3,}', end: '$'
};
CODE = {
const CODE = {
className: 'code',
variants: [
// TODO: fix to allow these to work with sublanguage also
Expand All @@ -36,13 +36,13 @@ export default function(hljs) {
}
]
};
LIST = {
const LIST = {
className: 'bullet',
begin: '^[ \t]*([*+-]|(\\d+\\.))(?=\\s+)',
end: '\\s+',
excludeEnd: true
};
LINK_REFERENCE = {
const LINK_REFERENCE = {
begin: /^\[[^\n]+\]:/,
returnBegin: true,
contains: [
Expand All @@ -58,7 +58,7 @@ export default function(hljs) {
}
]
};
LINK = {
const LINK = {
begin: '\\[.+?\\][\\(\\[].*?[\\)\\]]',
returnBegin: true,
contains: [
Expand All @@ -82,15 +82,15 @@ export default function(hljs) {
],
relevance: 10
};
BOLD = {
const BOLD = {
className: 'strong',
contains: [],
variants: [
{begin: /_{2}/, end: /_{2}/ },
{begin: /\*{2}/, end: /\*{2}/ }
]
};
ITALIC = {
const ITALIC = {
className: 'emphasis',
contains: [],
variants: [
Expand All @@ -101,7 +101,7 @@ export default function(hljs) {
BOLD.contains.push(ITALIC);
ITALIC.contains.push(BOLD);

CONTAINABLE = [
var CONTAINABLE = [
INLINE_HTML,
LINK
];
Expand All @@ -111,7 +111,7 @@ export default function(hljs) {

CONTAINABLE = CONTAINABLE.concat(BOLD,ITALIC);

HEADER = {
const HEADER = {
className: 'section',
variants: [
{
Expand All @@ -129,7 +129,7 @@ export default function(hljs) {
]
};

BLOCKQUOTE = {
const BLOCKQUOTE = {
className: 'quote',
begin: '^>\\s+',
contains: CONTAINABLE,
Expand Down
114 changes: 114 additions & 0 deletions src/lib/modes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import {inherit} from './utils';

// Common regexps
export const IDENT_RE = '[a-zA-Z]\\w*';
export const UNDERSCORE_IDENT_RE = '[a-zA-Z_]\\w*';
export const NUMBER_RE = '\\b\\d+(\\.\\d+)?';
export const C_NUMBER_RE = '(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float
export const BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b...
export const RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';

// Common modes
export const BACKSLASH_ESCAPE = {
begin: '\\\\[\\s\\S]', relevance: 0
};
export const APOS_STRING_MODE = {
className: 'string',
begin: '\'', end: '\'',
illegal: '\\n',
contains: [BACKSLASH_ESCAPE]
};
export const QUOTE_STRING_MODE = {
className: 'string',
begin: '"', end: '"',
illegal: '\\n',
contains: [BACKSLASH_ESCAPE]
};
export const PHRASAL_WORDS_MODE = {
begin: /\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|they|like|more)\b/
};
export const COMMENT = function (begin, end, inherits) {
var mode = inherit(
{
className: 'comment',
begin: begin, end: end,
contains: []
},
inherits || {}
);
mode.contains.push(PHRASAL_WORDS_MODE);
mode.contains.push({
className: 'doctag',
begin: '(?:TODO|FIXME|NOTE|BUG|XXX):',
relevance: 0
});
return mode;
};
export const C_LINE_COMMENT_MODE = COMMENT('//', '$');
export const C_BLOCK_COMMENT_MODE = COMMENT('/\\*', '\\*/');
export const HASH_COMMENT_MODE = COMMENT('#', '$');
export const NUMBER_MODE = {
className: 'number',
begin: NUMBER_RE,
relevance: 0
};
export const C_NUMBER_MODE = {
className: 'number',
begin: C_NUMBER_RE,
relevance: 0
};
export const BINARY_NUMBER_MODE = {
className: 'number',
begin: BINARY_NUMBER_RE,
relevance: 0
};
export const CSS_NUMBER_MODE = {
className: 'number',
begin: NUMBER_RE + '(' +
'%|em|ex|ch|rem' +
'|vw|vh|vmin|vmax' +
'|cm|mm|in|pt|pc|px' +
'|deg|grad|rad|turn' +
'|s|ms' +
'|Hz|kHz' +
'|dpi|dpcm|dppx' +
')?',
relevance: 0
};
export const REGEXP_MODE = {
// this outer rule makes sure we actually have a WHOLE regex and not simply
// an expression such as:
//
// 3 / something
//
// (which will then blow up when regex's `illegal` sees the newline)
begin: /(?=\/[^\/\n]*\/)/,
contains: [{
className: 'regexp',
begin: /\//, end: /\/[gimuy]*/,
illegal: /\n/,
contains: [
BACKSLASH_ESCAPE,
{
begin: /\[/, end: /\]/,
relevance: 0,
contains: [BACKSLASH_ESCAPE]
}
]
}]
};
export const TITLE_MODE = {
className: 'title',
begin: IDENT_RE,
relevance: 0
};
export const UNDERSCORE_TITLE_MODE = {
className: 'title',
begin: UNDERSCORE_IDENT_RE,
relevance: 0
};
export const METHOD_GUARD = {
// excludes method names from keyword processing
begin: '\\.\\s*' + UNDERSCORE_IDENT_RE,
relevance: 0
};
65 changes: 65 additions & 0 deletions src/lib/regex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
export function escape(value) {
return new RegExp(value.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm');
}

export function source(re) {
// if it's a regex get it's source,
// otherwise it's a string already so just return it
return (re && re.source) || re;
}

export function countMatchGroups(re) {
return (new RegExp(re.toString() + '|')).exec('').length - 1;
}

export function startsWith(re, lexeme) {
var match = re && re.exec(lexeme);
return match && match.index === 0;
}

// join logically computes regexps.join(separator), but fixes the
// backreferences so they continue to match.
// it also places each individual regular expression into it's own
// match group, keeping track of the sequencing of those match groups
// is currently an exercise for the caller. :-)
export function join(regexps, separator) {
// backreferenceRe matches an open parenthesis or backreference. To avoid
// an incorrect parse, it additionally matches the following:
// - [...] elements, where the meaning of parentheses and escapes change
// - other escape sequences, so we do not misparse escape sequences as
// interesting elements
// - non-matching or lookahead parentheses, which do not capture. These
// follow the '(' with a '?'.
var backreferenceRe = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./;
var numCaptures = 0;
var ret = '';
for (var i = 0; i < regexps.length; i++) {
numCaptures += 1;
var offset = numCaptures;
var re = source(regexps[i]);
if (i > 0) {
ret += separator;
}
ret += "(";
while (re.length > 0) {
var match = backreferenceRe.exec(re);
if (match == null) {
ret += re;
break;
}
ret += re.substring(0, match.index);
re = re.substring(match.index + match[0].length);
if (match[0][0] == '\\' && match[1]) {
// Adjust the backreference.
ret += '\\' + String(Number(match[1]) + offset);
} else {
ret += match[0];
if (match[0] == '(') {
numCaptures++;
}
}
}
ret += ")";
}
return ret;
}

0 comments on commit cab7d36

Please sign in to comment.