Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(chore) Clean up all regexs to be UTF-8 compliant/ready #2759

Merged
merged 24 commits into from Nov 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/language-guide.rst
Expand Up @@ -130,7 +130,7 @@ This is commonly used to define nested modes:

{
className: 'object',
begin: '{', end: '}',
begin: /\{/, end: /\}/,
contains: [hljs.QUOTE_STRING_MODE, 'self']
}

Expand Down
2 changes: 1 addition & 1 deletion docs/mode-reference.rst
Expand Up @@ -158,7 +158,7 @@ This is when ``endsWithParent`` comes into play:
::

{
className: 'rules', begin: '{', end: '}',
className: 'rules', begin: /\{/, end: /\}/,
contains: [
{className: 'rule', /* ... */ end: ';', endsWithParent: true}
]
Expand Down
4 changes: 2 additions & 2 deletions src/languages/actionscript.js
Expand Up @@ -34,12 +34,12 @@ export default function(hljs) {
hljs.C_NUMBER_MODE,
{
className: 'class',
beginKeywords: 'package', end: '{',
beginKeywords: 'package', end: /\{/,
contains: [hljs.TITLE_MODE]
},
{
className: 'class',
beginKeywords: 'class interface', end: '{', excludeEnd: true,
beginKeywords: 'class interface', end: /\{/, excludeEnd: true,
contains: [
{
beginKeywords: 'extends implements'
Expand Down
2 changes: 1 addition & 1 deletion src/languages/ada.js
Expand Up @@ -37,7 +37,7 @@ export default function(hljs) {
var ID_REGEX = '[A-Za-z](_?[A-Za-z0-9.])*';

// bad chars, only allowed in literals
var BAD_CHARS = `[]{}%#'"`;
var BAD_CHARS = `[]\\{\\}%#'"`;

// Ada doesn't have block comments, only line comments
var COMMENTS = hljs.COMMENT('--', '$');
Expand Down
6 changes: 3 additions & 3 deletions src/languages/angelscript.js
Expand Up @@ -37,7 +37,7 @@ export default function(hljs) {
'abstract|0 try catch protected explicit property',

// avoid close detection with C# and JS
illegal: '(^using\\s+[A-Za-z0-9_\\.]+;$|\\bfunction\s*[^\\(])',
illegal: '(^using\\s+[A-Za-z0-9_\\.]+;$|\\bfunction\\s*[^\\(])',

contains: [
{ // 'strings'
Expand Down Expand Up @@ -71,7 +71,7 @@ export default function(hljs) {
},

{ // interface or namespace declaration
beginKeywords: 'interface namespace', end: '{',
beginKeywords: 'interface namespace', end: /\{/,
illegal: '[;.\\-]',
contains: [
{ // interface or namespace name
Expand All @@ -82,7 +82,7 @@ export default function(hljs) {
},

{ // class declaration
beginKeywords: 'class', end: '{',
beginKeywords: 'class', end: /\{/,
illegal: '[;.\\-]',
contains: [
{ // class name
Expand Down
2 changes: 1 addition & 1 deletion src/languages/asciidoc.js
Expand Up @@ -96,7 +96,7 @@ export default function(hljs) {
// lists (can only capture indicators)
{
className: 'bullet',
begin: '^(\\*+|\\-+|\\.+|[^\\n]+?::)\\s+'
begin: '^(\\*+|-+|\\.+|[^\\n]+?::)\\s+'
},
// admonition
{
Expand Down
2 changes: 1 addition & 1 deletion src/languages/awk.js
Expand Up @@ -11,7 +11,7 @@ export default function(hljs) {
className: 'variable',
variants: [
{begin: /\$[\w\d#@][\w\d_]*/},
{begin: /\$\{(.*?)}/}
{begin: /\$\{(.*?)\}/}
]
};
var KEYWORDS = 'BEGIN END if else while do for in break continue delete next nextfile function func exit|10';
Expand Down
8 changes: 4 additions & 4 deletions src/languages/axapta.js
Expand Up @@ -75,15 +75,15 @@ export default function(hljs) {
'firstonly100',
'firstonly1000',
'flush',
'for',
'for',
'forceliterals',
'forcenestedloop',
'forceplaceholders',
'forceselectorder',
'forupdate',
'from',
'generateonly',
'group',
'group',
'hint',
'if',
'implements',
Expand Down Expand Up @@ -119,7 +119,7 @@ export default function(hljs) {
'select',
'server',
'setting',
'static',
'static',
'sum',
'super',
'switch',
Expand Down Expand Up @@ -160,7 +160,7 @@ export default function(hljs) {
},
{
className: 'class',
beginKeywords: 'class interface', end: '{', excludeEnd: true,
beginKeywords: 'class interface', end: /\{/, excludeEnd: true,
illegal: ':',
contains: [
{beginKeywords: 'extends implements'},
Expand Down
8 changes: 4 additions & 4 deletions src/languages/basic.js
Expand Up @@ -13,7 +13,7 @@ export default function(hljs) {
illegal: '^\.',
// Support explicitly typed variables that end with $%! or #.
keywords: {
$pattern: '[a-zA-Z][a-zA-Z0-9_\$\%\!\#]*',
$pattern: '[a-zA-Z][a-zA-Z0-9_$%!#]*',
keyword:
'ABS ASC AND ATN AUTO|0 BEEP BLOAD|10 BSAVE|10 CALL CALLS CDBL CHAIN CHDIR CHR$|10 CINT CIRCLE ' +
'CLEAR CLOSE CLS COLOR COM COMMON CONT COS CSNG CSRLIN CVD CVI CVS DATA DATE$ ' +
Expand All @@ -35,7 +35,7 @@ export default function(hljs) {
{
// Match line numbers
className: 'symbol',
begin: '^[0-9]+\ ',
begin: '^[0-9]+ ',
relevance: 10
},
{
Expand All @@ -47,12 +47,12 @@ export default function(hljs) {
{
// Match hexadecimal numbers (&Hxxxx)
className: 'number',
begin: '(\&[hH][0-9a-fA-F]{1,4})'
begin: '(&[hH][0-9a-fA-F]{1,4})'
},
{
// Match octal numbers (&Oxxxxxx)
className: 'number',
begin: '(\&[oO][0-7]{1,6})'
begin: '(&[oO][0-7]{1,6})'
}
]
};
Expand Down
4 changes: 2 additions & 2 deletions src/languages/brainfuck.js
Expand Up @@ -8,7 +8,7 @@ Website: https://esolangs.org/wiki/Brainfuck
export default function(hljs) {
var LITERAL = {
className: 'literal',
begin: '[\\+\\-]',
begin: /[+-]/,
relevance: 0
};
return {
Expand All @@ -35,7 +35,7 @@ export default function(hljs) {
},
{
// this mode works as the only relevance counter
begin: /(?:\+\+|\-\-)/,
begin: /(?:\+\+|--)/,
contains: [LITERAL]
},
LITERAL
Expand Down
2 changes: 1 addition & 1 deletion src/languages/ceylon.js
Expand Up @@ -67,7 +67,7 @@ export default function(hljs) {
{
// compiler annotation
className: 'meta',
begin: '@[a-z]\\w*(?:\\:\"[^\"]*\")?'
begin: '@[a-z]\\w*(?::"[^"]*")?'
}
].concat(EXPRESSIONS)
};
Expand Down
2 changes: 1 addition & 1 deletion src/languages/clojure.js
Expand Up @@ -96,7 +96,7 @@ export default function(hljs) {
var GLOBAL = {
beginKeywords: globals,
lexemes: SYMBOL_RE,
end: '(\\[|\\#|\\d|"|:|\\{|\\)|\\(|$)',
end: '(\\[|#|\\d|"|:|\\{|\\)|\\(|$)',
contains: [
{
className: 'title',
Expand Down
2 changes: 1 addition & 1 deletion src/languages/cmake.js
Expand Up @@ -51,7 +51,7 @@ export default function(hljs) {
contains: [
{
className: 'variable',
begin: '\\${', end: '}'
begin: /\$\{/, end: /\}/
},
hljs.HASH_COMMENT_MODE,
hljs.QUOTE_STRING_MODE,
Expand Down
2 changes: 1 addition & 1 deletion src/languages/coffeescript.js
Expand Up @@ -51,7 +51,7 @@ export default function(hljs) {
var JS_IDENT_RE = '[A-Za-z$_][0-9A-Za-z$_]*';
var SUBST = {
className: 'subst',
begin: /#\{/, end: /}/,
begin: /#\{/, end: /\}/,
keywords: KEYWORDS
};
var EXPRESSIONS = [
Expand Down
2 changes: 1 addition & 1 deletion src/languages/crmsh.js
Expand Up @@ -89,7 +89,7 @@ export default function(hljs) {
},
{
className: 'attr',
begin: /([A-Za-z\$_\#][\w_-]+)=/,
begin: /([A-Za-z$_#][\w_-]+)=/,
relevance: 0
},
{
Expand Down
16 changes: 8 additions & 8 deletions src/languages/crystal.js
Expand Up @@ -9,8 +9,8 @@ export default function(hljs) {
var INT_SUFFIX = '(_*[ui](8|16|32|64|128))?';
var FLOAT_SUFFIX = '(_*f(32|64))?';
var CRYSTAL_IDENT_RE = '[a-zA-Z_]\\w*[!?=]?';
var CRYSTAL_METHOD_RE = '[a-zA-Z_]\\w*[!?=]?|[-+~]\\@|<<|>>|[=!]~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~|]|//|//=|&[-+*]=?|&\\*\\*|\\[\\][=?]?';
var CRYSTAL_PATH_RE = '[A-Za-z_]\\w*(::\\w+)*(\\?|\\!)?';
var CRYSTAL_METHOD_RE = '[a-zA-Z_]\\w*[!?=]?|[-+~]@|<<|>>|[=!]~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~|]|//|//=|&[-+*]=?|&\\*\\*|\\[\\][=?]?';
var CRYSTAL_PATH_RE = '[A-Za-z_]\\w*(::\\w+)*(\\?|!)?';
var CRYSTAL_KEYWORDS = {
$pattern: CRYSTAL_IDENT_RE,
keyword:
Expand All @@ -22,7 +22,7 @@ export default function(hljs) {
};
var SUBST = {
className: 'subst',
begin: '#{', end: '}',
begin: /#\{/, end: /\}/,
keywords: CRYSTAL_KEYWORDS
};
var EXPANSION = {
Expand All @@ -49,7 +49,7 @@ export default function(hljs) {
{begin: /`/, end: /`/},
{begin: '%[Qwi]?\\(', end: '\\)', contains: recursiveParen('\\(', '\\)')},
{begin: '%[Qwi]?\\[', end: '\\]', contains: recursiveParen('\\[', '\\]')},
{begin: '%[Qwi]?{', end: '}', contains: recursiveParen('{', '}')},
{begin: '%[Qwi]?\\{', end: /\}/, contains: recursiveParen(/\{/, /\}/)},
{begin: '%[Qwi]?<', end: '>', contains: recursiveParen('<', '>')},
{begin: '%[Qwi]?\\|', end: '\\|'},
{begin: /<<-\w+$/, end: /^\s*\w+$/},
Expand All @@ -61,15 +61,15 @@ export default function(hljs) {
variants: [
{begin: '%q\\(', end: '\\)', contains: recursiveParen('\\(', '\\)')},
{begin: '%q\\[', end: '\\]', contains: recursiveParen('\\[', '\\]')},
{begin: '%q{', end: '}', contains: recursiveParen('{', '}')},
{begin: '%q\\{', end: /\}/, contains: recursiveParen(/\{/, /\}/)},
{begin: '%q<', end: '>', contains: recursiveParen('<', '>')},
{begin: '%q\\|', end: '\\|'},
{begin: /<<-'\w+'$/, end: /^\s*\w+$/},
],
relevance: 0,
};
var REGEXP = {
begin: '(?!%})(' + hljs.RE_STARTERS_RE + '|\\n|\\b(case|if|select|unless|until|when|while)\\b)\\s*',
begin: '(?!%\\})(' + hljs.RE_STARTERS_RE + '|\\n|\\b(case|if|select|unless|until|when|while)\\b)\\s*',
keywords: 'case if select unless until when while',
contains: [
{
Expand All @@ -89,7 +89,7 @@ export default function(hljs) {
variants: [
{begin: '%r\\(', end: '\\)', contains: recursiveParen('\\(', '\\)')},
{begin: '%r\\[', end: '\\]', contains: recursiveParen('\\[', '\\]')},
{begin: '%r{', end: '}', contains: recursiveParen('{', '}')},
{begin: '%r\\{', end: /\}/, contains: recursiveParen(/\{/, /\}/)},
{begin: '%r<', end: '>', contains: recursiveParen('<', '>')},
{begin: '%r\\|', end: '\\|'},
],
Expand Down Expand Up @@ -162,7 +162,7 @@ export default function(hljs) {
},
{
className: 'symbol',
begin: hljs.UNDERSCORE_IDENT_RE + '(\\!|\\?)?:',
begin: hljs.UNDERSCORE_IDENT_RE + '(!|\\?)?:',
relevance: 0
},
{
Expand Down
12 changes: 6 additions & 6 deletions src/languages/csharp.js
Expand Up @@ -170,24 +170,24 @@ export default function(hljs) {
var VERBATIM_STRING_NO_LF = hljs.inherit(VERBATIM_STRING, {illegal: /\n/});
var SUBST = {
className: 'subst',
begin: '{', end: '}',
begin: /\{/, end: /\}/,
keywords: KEYWORDS
};
var SUBST_NO_LF = hljs.inherit(SUBST, {illegal: /\n/});
var INTERPOLATED_STRING = {
className: 'string',
begin: /\$"/, end: '"',
illegal: /\n/,
contains: [{begin: '{{'}, {begin: '}}'}, hljs.BACKSLASH_ESCAPE, SUBST_NO_LF]
contains: [{begin: /\{\{/}, {begin: /\}\}/}, hljs.BACKSLASH_ESCAPE, SUBST_NO_LF]
};
var INTERPOLATED_VERBATIM_STRING = {
className: 'string',
begin: /\$@"/, end: '"',
contains: [{begin: '{{'}, {begin: '}}'}, {begin: '""'}, SUBST]
contains: [{begin: /\{\{/}, {begin: /\}\}/}, {begin: '""'}, SUBST]
};
var INTERPOLATED_VERBATIM_STRING_NO_LF = hljs.inherit(INTERPOLATED_VERBATIM_STRING, {
illegal: /\n/,
contains: [{begin: '{{'}, {begin: '}}'}, {begin: '""'}, SUBST_NO_LF]
contains: [{begin: /\{\{/}, {begin: /\}\}/}, {begin: '""'}, SUBST_NO_LF]
});
SUBST.contains = [
INTERPOLATED_VERBATIM_STRING,
Expand Down Expand Up @@ -319,14 +319,14 @@ export default function(hljs) {
},
{
className: 'function',
begin: '(' + TYPE_IDENT_RE + '\\s+)+' + hljs.IDENT_RE + '\\s*(\\<.+\\>)?\\s*\\(', returnBegin: true,
begin: '(' + TYPE_IDENT_RE + '\\s+)+' + hljs.IDENT_RE + '\\s*(<.+>)?\\s*\\(', returnBegin: true,
end: /\s*[{;=]/, excludeEnd: true,
keywords: KEYWORDS,
contains: [
// prevents these from being highlighted `title`
{ beginKeywords: FUNCTION_MODIFIERS.join(" ")},
{
begin: hljs.IDENT_RE + '\\s*(\\<.+\\>)?\\s*\\(', returnBegin: true,
begin: hljs.IDENT_RE + '\\s*(<.+>)?\\s*\\(', returnBegin: true,
contains: [
hljs.TITLE_MODE,
GENERIC_MODIFIER
Expand Down
8 changes: 4 additions & 4 deletions src/languages/css.js
Expand Up @@ -46,10 +46,10 @@ export default function(hljs) {
var AT_IDENTIFIER = '@[a-z-]+' // @font-face
var AT_MODIFIERS = "and or not only"
var MEDIA_TYPES = "all print screen speech"
var AT_PROPERTY_RE = /@\-?\w[\w]*(\-\w+)*/ // @-webkit-keyframes
var AT_PROPERTY_RE = /@-?\w[\w]*(-\w+)*/ // @-webkit-keyframes
var IDENT_RE = '[a-zA-Z-][a-zA-Z0-9_-]*';
var RULE = {
begin: /(?:[A-Z\_\.\-]+|--[a-zA-Z0-9_-]+)\s*:/, returnBegin: true, end: ';', endsWithParent: true,
begin: /(?:[A-Z_.-]+|--[a-zA-Z0-9_-]+)\s*:/, returnBegin: true, end: ';', endsWithParent: true,
contains: [
ATTRIBUTE
]
Expand Down Expand Up @@ -78,7 +78,7 @@ export default function(hljs) {
},
{
className: 'selector-pseudo',
begin: /:(:)?[a-zA-Z0-9\_\-\+\(\)"'.]+/
begin: /:(:)?[a-zA-Z0-9_+()"'.-]+/
},
// matching these here allows us to treat them more like regular CSS
// rules so everything between the {} gets regular rule highlighting,
Expand Down Expand Up @@ -121,7 +121,7 @@ export default function(hljs) {
relevance: 0
},
{
begin: '{', end: '}',
begin: /\{/, end: /\}/,
illegal: /\S/,
contains: [
hljs.C_BLOCK_COMMENT_MODE,
Expand Down
6 changes: 3 additions & 3 deletions src/languages/dart.js
Expand Up @@ -18,8 +18,8 @@ export default function(hljs) {
const BRACED_SUBST = {
className: 'subst',
variants: [{
begin: '\\${',
end: '}'
begin: /\$\{/,
end: /\}/
}],
keywords: 'true false null this is new super',
};
Expand Down Expand Up @@ -155,7 +155,7 @@ export default function(hljs) {
{
className: 'class',
beginKeywords: 'class interface',
end: '{',
end: /\{/,
excludeEnd: true,
contains: [{
beginKeywords: 'extends implements'
Expand Down