Skip to content

Commit

Permalink
fix(elixir) fix regular expression detection (#3207)
Browse files Browse the repository at this point in the history
* clean up grammar a bit, add more regex sigil examples
* add regex sigils that classify as `regex`
* add char.escape
* remove dead code
* regex modifiers
* properly escape the end of sigils
  • Loading branch information
joshgoebel committed Jun 2, 2021
1 parent 5d51ee4 commit 3e87daa
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 112 deletions.
3 changes: 2 additions & 1 deletion CHANGES.md
Expand Up @@ -3,9 +3,10 @@
Grammars:

- enh(clojure) added `edn` alias (#3213) [Stel Abrego][]
- enh(elixir) much improved regular expression sigil support (#3207) [Josh Goebel][]

[Stel Abrego]: https://github.com/stelcodes

[Josh Goebel]: https://github.com/joshgoebel

## Version 11.0.0

Expand Down
239 changes: 130 additions & 109 deletions src/languages/elixir.js
Expand Up @@ -6,115 +6,167 @@ Category: functional
Website: https://elixir-lang.org
*/

import * as regex from '../lib/regex.js';

/** @type LanguageFn */
export default function(hljs) {
const ELIXIR_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_.]*(!|\\?)?';
const ELIXIR_METHOD_RE = '[a-zA-Z_]\\w*[!?=]?|[-+~]@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?';
const ELIXIR_KEYWORDS = {
const KEYWORDS = [
"alias",
"alias",
"and",
"begin",
"break",
"case",
"cond",
"defined",
"do",
"end",
"ensure",
"false",
"fn",
"for",
"import",
"in",
"include",
"module",
"next",
"nil",
"not",
"or",
"quote",
"redo",
"require",
"retry",
"return",
"self",
"then",
"true",
"unless",
"until",
"use",
"when",
"while",
"with|0"
];
const KWS = {
$pattern: ELIXIR_IDENT_RE,
keyword: 'and false then defined module in return redo retry end for true self when ' +
'next until do begin unless nil break not case cond alias while ensure or ' +
'include use alias fn quote require import with|0'
keyword: KEYWORDS
};
const SUBST = {
className: 'subst',
begin: /#\{/,
end: /\}/,
keywords: ELIXIR_KEYWORDS
keywords: KWS
};
const NUMBER = {
className: 'number',
begin: '(\\b0o[0-7_]+)|(\\b0b[01_]+)|(\\b0x[0-9a-fA-F_]+)|(-?\\b[1-9][0-9_]*(\\.[0-9_]+([eE][-+]?[0-9]+)?)?)',
relevance: 0
};
// TODO: could be tightened
// https://elixir-lang.readthedocs.io/en/latest/intro/18.html
// but you also need to include closing delemeters in the escape list per
// individual sigil mode from what I can tell,
// ie: \} might or might not be an escape depending on the sigil used
const ESCAPES_RE = /\\[\s\S]/;
// const ESCAPES_RE = /\\["'\\abdefnrstv0]/;
const BACKSLASH_ESCAPE = {
match: ESCAPES_RE,
scope: "char.escape",
relevance: 0
};
const SIGIL_DELIMITERS = '[/|([{<"\']';
const SIGIL_DELIMITER_MODES = [
{
begin: /"/,
end: /"/
},
{
begin: /'/,
end: /'/
},
{
begin: /\//,
end: /\//
},
{
begin: /\|/,
end: /\|/
},
{
begin: /\(/,
end: /\)/
},
{
begin: /\[/,
end: /\]/
},
{
begin: /\{/,
end: /\}/
},
{
begin: /</,
end: />/
}
];
const escapeSigilEnd = (end) => {
return {
scope: "char.escape",
begin: regex.concat(/\\/, end),
relevance: 0
};
};
const LOWERCASE_SIGIL = {
className: 'string',
begin: '~[a-z]' + '(?=' + SIGIL_DELIMITERS + ')',
contains: [
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x,
{
endsParent: true,
contains: [
{
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
],
variants: [
{
begin: /"/,
end: /"/
},
{
begin: /'/,
end: /'/
},
{
begin: /\//,
end: /\//
},
{
begin: /\|/,
end: /\|/
},
{
begin: /\(/,
end: /\)/
},
{
begin: /\[/,
end: /\]/
},
{
begin: /\{/,
end: /\}/
},
{
begin: /</,
end: />/
}
]
}
escapeSigilEnd(x.end),
BACKSLASH_ESCAPE,
SUBST
]
}
]
))
};

const UPCASE_SIGIL = {
className: 'string',
begin: '~[A-Z]' + '(?=' + SIGIL_DELIMITERS + ')',
contains: [
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x,
{
begin: /"/,
end: /"/
},
{
begin: /'/,
end: /'/
},
{
begin: /\//,
end: /\//
},
{
begin: /\|/,
end: /\|/
},
{
begin: /\(/,
end: /\)/
},
{
begin: /\[/,
end: /\]/
},
contains: [ escapeSigilEnd(x.end) ]
}
))
};

const REGEX_SIGIL = {
className: 'regex',
variants: [
{
begin: /\{/,
end: /\}/
begin: '~r' + '(?=' + SIGIL_DELIMITERS + ')',
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x,
{
end: regex.concat(x.end, /[uismxfU]{0,7}/),
contains: [
escapeSigilEnd(x.end),
BACKSLASH_ESCAPE,
SUBST
]
}
))
},
{
begin: /</,
end: />/
begin: '~R' + '(?=' + SIGIL_DELIMITERS + ')',
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x,
{
end: regex.concat(x.end, /[uismxfU]{0,7}/),
contains: [ escapeSigilEnd(x.end) ]
})
)
}
]
};
Expand Down Expand Up @@ -182,6 +234,7 @@ export default function(hljs) {
});
const ELIXIR_DEFAULT_CONTAINS = [
STRING,
REGEX_SIGIL,
UPCASE_SIGIL,
LOWERCASE_SIGIL,
hljs.HASH_COMMENT_MODE,
Expand Down Expand Up @@ -213,45 +266,13 @@ export default function(hljs) {
},
{
begin: '->'
},
{ // regexp container
begin: '(' + hljs.RE_STARTERS_RE + ')\\s*',
contains: [
hljs.HASH_COMMENT_MODE,
{
// to prevent false regex triggers for the division function:
// /:
begin: /\/: (?=\d+\s*[,\]])/,
relevance: 0,
contains: [NUMBER]
},
{
className: 'regexp',
illegal: '\\n',
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
],
variants: [
{
begin: '/',
end: '/[a-z]*'
},
{
begin: '%r\\[',
end: '\\][a-z]*'
}
]
}
],
relevance: 0
}
];
SUBST.contains = ELIXIR_DEFAULT_CONTAINS;

return {
name: 'Elixir',
keywords: ELIXIR_KEYWORDS,
keywords: KWS,
contains: ELIXIR_DEFAULT_CONTAINS
};
}
30 changes: 28 additions & 2 deletions test/markup/elixir/sigils.expect.txt
@@ -1,9 +1,12 @@
<span class="hljs-string">~R&#x27;this + i\s &quot;a&quot; regex too&#x27;</span>
<span class="hljs-regex">~R&#x27;this + i\s &quot;a&quot; regex too&#x27;</span>
<span class="hljs-string">~w(hello <span class="hljs-subst">#{ [<span class="hljs-string">&quot;has&quot;</span> &lt;&gt; <span class="hljs-string">&quot;123&quot;</span>, <span class="hljs-string">&#x27;\c\d&#x27;</span>, <span class="hljs-string">&quot;\123 interpol&quot;</span> | []] }</span> world)</span>s
<span class="hljs-string">~W(hello #{no &quot;123&quot; \c\d \123 interpol} world)</span>s
<span class="hljs-string">~s{Escapes terminators \{ and \}, but no {balancing}</span> <span class="hljs-comment"># outside of sigil here }</span>
<span class="hljs-string">~s{Escapes terminators <span class="hljs-char escape_">\{</span> and <span class="hljs-char escape_">\}</span>, but no {balancing}</span> <span class="hljs-comment"># outside of sigil here }</span>
<span class="hljs-string">~S&quot;No escapes \s\t\n and no #{interpolation}&quot;</span>

<span class="hljs-string">~S(No escapes \&quot; \&#x27; \\ \a \b \d \e \f \n \r \s \t \v \0)</span>
<span class="hljs-string">~s(Plenty of escapes <span class="hljs-char escape_">\&quot;</span> <span class="hljs-char escape_">\&#x27;</span> <span class="hljs-char escape_">\\</span> <span class="hljs-char escape_">\a</span> <span class="hljs-char escape_">\b</span> <span class="hljs-char escape_">\d</span> <span class="hljs-char escape_">\e</span> <span class="hljs-char escape_">\f</span> <span class="hljs-char escape_">\n</span> <span class="hljs-char escape_">\r</span> <span class="hljs-char escape_">\s</span> <span class="hljs-char escape_">\t</span> <span class="hljs-char escape_">\v</span> <span class="hljs-char escape_">\0</span>)</span>

<span class="hljs-string">~S/hello/</span>
<span class="hljs-string">~S|hello|</span>
<span class="hljs-string">~S&quot;hello&quot;</span>
Expand All @@ -21,3 +24,26 @@
<span class="hljs-string">~s[hello <span class="hljs-subst">#{name}</span>]</span>
<span class="hljs-string">~s{hello <span class="hljs-subst">#{name}</span>}</span>
<span class="hljs-string">~s&lt;hello <span class="hljs-subst">#{name}</span>&gt;</span>

<span class="hljs-regex">~r/hello/</span>
<span class="hljs-regex">~r|hello|u</span>
<span class="hljs-regex">~r&quot;hello&quot;i</span>
<span class="hljs-regex">~r&#x27;hello&#x27;m</span>
<span class="hljs-regex">~r(hello)x</span>
<span class="hljs-regex">~r[hello]f</span>
<span class="hljs-regex">~r{hello}U</span>
<span class="hljs-regex">~r&lt;hello&gt;</span>

<span class="hljs-regex">~r&lt;regex here&gt;uismxfU</span>
<span class="hljs-regex">~r/regex here/uismxfU</span>
<span class="hljs-regex">~R&lt;regex here&gt;uismxfU</span>
<span class="hljs-regex">~R/regex here/uismxfU</span>

<span class="hljs-regex">~r|foo<span class="hljs-char escape_">\|</span>bar|</span>
<span class="hljs-regex">~R|foo<span class="hljs-char escape_">\|</span>bar|</span>

<span class="hljs-regex">~r(hello( there<span class="hljs-char escape_">\)</span>*!)u</span>
<span class="hljs-regex">~R(hello( there<span class="hljs-char escape_">\)</span>*!)u</span>

<span class="hljs-string">~s|foo<span class="hljs-char escape_">\|</span>bar|</span>
<span class="hljs-string">~S|foo<span class="hljs-char escape_">\|</span>bar|</span>
26 changes: 26 additions & 0 deletions test/markup/elixir/sigils.txt
Expand Up @@ -4,6 +4,9 @@
~s{Escapes terminators \{ and \}, but no {balancing} # outside of sigil here }
~S"No escapes \s\t\n and no #{interpolation}"

~S(No escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0)
~s(Plenty of escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0)

~S/hello/
~S|hello|
~S"hello"
Expand All @@ -21,3 +24,26 @@
~s[hello #{name}]
~s{hello #{name}}
~s<hello #{name}>

~r/hello/
~r|hello|u
~r"hello"i
~r'hello'm
~r(hello)x
~r[hello]f
~r{hello}U
~r<hello>

~r<regex here>uismxfU
~r/regex here/uismxfU
~R<regex here>uismxfU
~R/regex here/uismxfU

~r|foo\|bar|
~R|foo\|bar|

~r(hello( there\)*!)u
~R(hello( there\)*!)u

~s|foo\|bar|
~S|foo\|bar|

0 comments on commit 3e87daa

Please sign in to comment.