Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enh(llvm) Improve grammar and align assembly like grammars #2830

Merged
merged 11 commits into from Nov 13, 2020
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -30,6 +30,7 @@ Language Improvements:
- enh(php) highlight variables (#2785) [Taufik Nurrohman][]
- fix(python) Handle comments on decorators (#2804) [Jonathan Sharpe][]
- enh(diff) improve highlighting of diff for git patches [Florian Bezdeka][]
- fix(llvm) lots of small improvements and fixes (#2830) [Josh Goebel][]
- enh(mathematica) Rework entire implementation [Patrick Scheibe][]
- Correct matching of the many variations of Mathematica's numbers
- Matching of named-characters aka special symbols like `\[Gamma]`
Expand Down
105 changes: 69 additions & 36 deletions src/languages/llvm.js
Expand Up @@ -4,12 +4,67 @@ Author: Michael Rodler <contact@f0rki.at>
Description: language used as intermediate representation in the LLVM compiler framework
Website: https://llvm.org/docs/LangRef.html
Category: assembler
Audit: 2020
*/

import * as regex from '../lib/regex.js';

/** @type LanguageFn */
export default function(hljs) {
var identifier = '([-a-zA-Z$._][\\w\\-$.]*)';
const IDENT_RE = /([-a-zA-Z$._][\w$.-]*)/;
const TYPE = {
className: 'type',
begin: /\bi\d+(?=\s|\b)/
};
const OPERATOR = {
className: 'operator',
relevance: 0,
begin: /=/
};
const PUNCTUATION = {
className: 'punctuation',
relevance: 0,
begin: /,/
};
const NUMBER = {
className: 'number',
variants: [
{ begin: /0[xX][a-fA-F0-9]+/ },
{ begin: /-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?/ }
],
relevance: 0
};
const LABEL = {
className: 'symbol',
variants: [
{ begin: /^\s*[a-z]+:/ }, // labels
],
relevance: 0
};
const VARIABLE = {
className: 'variable',
variants: [
{ begin: regex.concat(/%/, IDENT_RE) },
{ begin: /%\d+/ },
{ begin: /#\d+/ },
]
};
const FUNCTION = {
className: 'title',
variants: [
{ begin: regex.concat(/@/, IDENT_RE) },
{ begin: /@\d+/ },
{ begin: regex.concat(/!/, IDENT_RE) },
{ begin: regex.concat(/!\d+/, IDENT_RE) },
// https://llvm.org/docs/LangRef.html#namedmetadatastructure
// obviously a single digit can also be used in this fashion
{ begin: /!\d+/ }
]
};

return {
name: 'LLVM IR',
// TODO: split into different categories of keywords
keywords:
'begin end true false declare define global ' +
'constant private linker_private internal ' +
Expand Down Expand Up @@ -49,48 +104,26 @@ export default function(hljs) {
'extractvalue insertvalue atomicrmw cmpxchg fence ' +
'argmemonly double',
contains: [
{
className: 'keyword',
begin: 'i\\d+'
},
hljs.COMMENT(
';', '\\n', {relevance: 0}
),
// Double quote string
TYPE,
// this matches "empty comments"...
// ...because it's far more likely this is a statement terminator in
// another language than an actual comment
hljs.COMMENT(/;\s*$/, null, { relevance: 0 }),
hljs.COMMENT(/;/, /$/),
hljs.QUOTE_STRING_MODE,
{
className: 'string',
variants: [
// Double-quoted string
{ begin: '"', end: '[^\\\\]"' },
],
relevance: 0
},
{
className: 'title',
variants: [
{ begin: '@' + identifier },
{ begin: '@\\d+' },
{ begin: '!' + identifier },
{ begin: '!\\d+' + identifier }
{ begin: /"/, end: /[^\\]"/ },
]
},
{
className: 'symbol',
variants: [
{ begin: '%' + identifier },
{ begin: '%\\d+' },
{ begin: '#\\d+' },
]
},
{
className: 'number',
variants: [
{ begin: '0[xX][a-fA-F0-9]+' },
{ begin: '-?\\d+(?:[.]\\d+)?(?:[eE][-+]?\\d+(?:[.]\\d+)?)?' }
],
relevance: 0
},
FUNCTION,
PUNCTUATION,
OPERATOR,
VARIABLE,
LABEL,
NUMBER
]
};
}
19 changes: 19 additions & 0 deletions test/markup/llvm/simple.expect.txt
@@ -0,0 +1,19 @@
<span class="hljs-comment">;; foooo</span>
<span class="hljs-keyword">define</span> <span class="hljs-type">i32</span> <span class="hljs-title">@mul_add</span>(<span class="hljs-type">i32</span> <span class="hljs-variable">%x</span><span class="hljs-punctuation">,</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%y</span><span class="hljs-punctuation">,</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%z</span>) {
<span class="hljs-symbol"> entry:</span>
<span class="hljs-variable">%tmp</span> <span class="hljs-operator">=</span> <span class="hljs-keyword">mul</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%x</span><span class="hljs-punctuation">,</span> <span class="hljs-variable">%y</span>
<span class="hljs-variable">%tmp2</span> <span class="hljs-operator">=</span> <span class="hljs-keyword">add</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%tmp</span><span class="hljs-punctuation">,</span> <span class="hljs-variable">%z</span>
<span class="hljs-variable">%tmp3</span> <span class="hljs-operator">=</span> <span class="hljs-keyword">add</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%tmp</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0</span>
<span class="hljs-keyword">ret</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%tmp3</span>
}

<span class="hljs-comment">; Named metadata</span>
<span class="hljs-title">!0</span> <span class="hljs-operator">=</span> !{<span class="hljs-type">i32</span> <span class="hljs-number">42</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">null</span><span class="hljs-punctuation">,</span> !<span class="hljs-string">&quot;string&quot;</span>}
<span class="hljs-title">!foo</span> <span class="hljs-operator">=</span> !{<span class="hljs-title">!0</span>}

<span class="hljs-comment">; Some unnamed metadata nodes, which are referenced by the named metadata.</span>
<span class="hljs-title">!0</span> <span class="hljs-operator">=</span> !{!<span class="hljs-string">&quot;zero&quot;</span>}
<span class="hljs-title">!1</span> <span class="hljs-operator">=</span> !{!<span class="hljs-string">&quot;one&quot;</span>}
<span class="hljs-title">!2</span> <span class="hljs-operator">=</span> !{!<span class="hljs-string">&quot;two&quot;</span>}
<span class="hljs-comment">; A named metadata.</span>
<span class="hljs-title">!name</span> <span class="hljs-operator">=</span> !{<span class="hljs-title">!0</span><span class="hljs-punctuation">,</span> <span class="hljs-title">!1</span><span class="hljs-punctuation">,</span> <span class="hljs-title">!2</span>}
19 changes: 19 additions & 0 deletions test/markup/llvm/simple.txt
@@ -0,0 +1,19 @@
;; foooo
define i32 @mul_add(i32 %x, i32 %y, i32 %z) {
entry:
%tmp = mul i32 %x, %y
%tmp2 = add i32 %tmp, %z
%tmp3 = add i32 %tmp, 0
ret i32 %tmp3
}

; Named metadata
!0 = !{i32 42, null, !"string"}
!foo = !{!0}

; Some unnamed metadata nodes, which are referenced by the named metadata.
!0 = !{!"zero"}
!1 = !{!"one"}
!2 = !{!"two"}
; A named metadata.
!name = !{!0, !1, !2}