Skip to content

Commit

Permalink
enh(llvm) Improve grammar and align assembly like grammars (#2830)
Browse files Browse the repository at this point in the history
-  Variables are now highlighted as `variable`
-  `i8`, `i32`, etc now highlighted as `type` 
-  Labels are now highlighted as `symbol` (matching our other grammars)
-  Better comment detection
-  Restore relevance to strings
-  prevent false positives on `;` line endings as "comments"
-  Add very minimal `operator` and `punctuation` support
  • Loading branch information
joshgoebel committed Nov 13, 2020
1 parent 9ba5a5d commit 7816b2f
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 36 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -30,6 +30,7 @@ Language Improvements:
- enh(php) highlight variables (#2785) [Taufik Nurrohman][]
- fix(python) Handle comments on decorators (#2804) [Jonathan Sharpe][]
- enh(diff) improve highlighting of diff for git patches [Florian Bezdeka][]
- fix(llvm) lots of small improvements and fixes (#2830) [Josh Goebel][]
- enh(mathematica) Rework entire implementation [Patrick Scheibe][]
- Correct matching of the many variations of Mathematica's numbers
- Matching of named-characters aka special symbols like `\[Gamma]`
Expand Down
105 changes: 69 additions & 36 deletions src/languages/llvm.js
Expand Up @@ -4,12 +4,67 @@ Author: Michael Rodler <contact@f0rki.at>
Description: language used as intermediate representation in the LLVM compiler framework
Website: https://llvm.org/docs/LangRef.html
Category: assembler
Audit: 2020
*/

import * as regex from '../lib/regex.js';

/** @type LanguageFn */
export default function(hljs) {
var identifier = '([-a-zA-Z$._][\\w\\-$.]*)';
const IDENT_RE = /([-a-zA-Z$._][\w$.-]*)/;
const TYPE = {
className: 'type',
begin: /\bi\d+(?=\s|\b)/
};
const OPERATOR = {
className: 'operator',
relevance: 0,
begin: /=/
};
const PUNCTUATION = {
className: 'punctuation',
relevance: 0,
begin: /,/
};
const NUMBER = {
className: 'number',
variants: [
{ begin: /0[xX][a-fA-F0-9]+/ },
{ begin: /-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?/ }
],
relevance: 0
};
const LABEL = {
className: 'symbol',
variants: [
{ begin: /^\s*[a-z]+:/ }, // labels
],
relevance: 0
};
const VARIABLE = {
className: 'variable',
variants: [
{ begin: regex.concat(/%/, IDENT_RE) },
{ begin: /%\d+/ },
{ begin: /#\d+/ },
]
};
const FUNCTION = {
className: 'title',
variants: [
{ begin: regex.concat(/@/, IDENT_RE) },
{ begin: /@\d+/ },
{ begin: regex.concat(/!/, IDENT_RE) },
{ begin: regex.concat(/!\d+/, IDENT_RE) },
// https://llvm.org/docs/LangRef.html#namedmetadatastructure
// obviously a single digit can also be used in this fashion
{ begin: /!\d+/ }
]
};

return {
name: 'LLVM IR',
// TODO: split into different categories of keywords
keywords:
'begin end true false declare define global ' +
'constant private linker_private internal ' +
Expand Down Expand Up @@ -49,48 +104,26 @@ export default function(hljs) {
'extractvalue insertvalue atomicrmw cmpxchg fence ' +
'argmemonly double',
contains: [
{
className: 'keyword',
begin: 'i\\d+'
},
hljs.COMMENT(
';', '\\n', {relevance: 0}
),
// Double quote string
TYPE,
// this matches "empty comments"...
// ...because it's far more likely this is a statement terminator in
// another language than an actual comment
hljs.COMMENT(/;\s*$/, null, { relevance: 0 }),
hljs.COMMENT(/;/, /$/),
hljs.QUOTE_STRING_MODE,
{
className: 'string',
variants: [
// Double-quoted string
{ begin: '"', end: '[^\\\\]"' },
],
relevance: 0
},
{
className: 'title',
variants: [
{ begin: '@' + identifier },
{ begin: '@\\d+' },
{ begin: '!' + identifier },
{ begin: '!\\d+' + identifier }
{ begin: /"/, end: /[^\\]"/ },
]
},
{
className: 'symbol',
variants: [
{ begin: '%' + identifier },
{ begin: '%\\d+' },
{ begin: '#\\d+' },
]
},
{
className: 'number',
variants: [
{ begin: '0[xX][a-fA-F0-9]+' },
{ begin: '-?\\d+(?:[.]\\d+)?(?:[eE][-+]?\\d+(?:[.]\\d+)?)?' }
],
relevance: 0
},
FUNCTION,
PUNCTUATION,
OPERATOR,
VARIABLE,
LABEL,
NUMBER
]
};
}
19 changes: 19 additions & 0 deletions test/markup/llvm/simple.expect.txt
@@ -0,0 +1,19 @@
<span class="hljs-comment">;; foooo</span>
<span class="hljs-keyword">define</span> <span class="hljs-type">i32</span> <span class="hljs-title">@mul_add</span>(<span class="hljs-type">i32</span> <span class="hljs-variable">%x</span><span class="hljs-punctuation">,</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%y</span><span class="hljs-punctuation">,</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%z</span>) {
<span class="hljs-symbol"> entry:</span>
<span class="hljs-variable">%tmp</span> <span class="hljs-operator">=</span> <span class="hljs-keyword">mul</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%x</span><span class="hljs-punctuation">,</span> <span class="hljs-variable">%y</span>
<span class="hljs-variable">%tmp2</span> <span class="hljs-operator">=</span> <span class="hljs-keyword">add</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%tmp</span><span class="hljs-punctuation">,</span> <span class="hljs-variable">%z</span>
<span class="hljs-variable">%tmp3</span> <span class="hljs-operator">=</span> <span class="hljs-keyword">add</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%tmp</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0</span>
<span class="hljs-keyword">ret</span> <span class="hljs-type">i32</span> <span class="hljs-variable">%tmp3</span>
}

<span class="hljs-comment">; Named metadata</span>
<span class="hljs-title">!0</span> <span class="hljs-operator">=</span> !{<span class="hljs-type">i32</span> <span class="hljs-number">42</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">null</span><span class="hljs-punctuation">,</span> !<span class="hljs-string">&quot;string&quot;</span>}
<span class="hljs-title">!foo</span> <span class="hljs-operator">=</span> !{<span class="hljs-title">!0</span>}

<span class="hljs-comment">; Some unnamed metadata nodes, which are referenced by the named metadata.</span>
<span class="hljs-title">!0</span> <span class="hljs-operator">=</span> !{!<span class="hljs-string">&quot;zero&quot;</span>}
<span class="hljs-title">!1</span> <span class="hljs-operator">=</span> !{!<span class="hljs-string">&quot;one&quot;</span>}
<span class="hljs-title">!2</span> <span class="hljs-operator">=</span> !{!<span class="hljs-string">&quot;two&quot;</span>}
<span class="hljs-comment">; A named metadata.</span>
<span class="hljs-title">!name</span> <span class="hljs-operator">=</span> !{<span class="hljs-title">!0</span><span class="hljs-punctuation">,</span> <span class="hljs-title">!1</span><span class="hljs-punctuation">,</span> <span class="hljs-title">!2</span>}
19 changes: 19 additions & 0 deletions test/markup/llvm/simple.txt
@@ -0,0 +1,19 @@
;; foooo
define i32 @mul_add(i32 %x, i32 %y, i32 %z) {
entry:
%tmp = mul i32 %x, %y
%tmp2 = add i32 %tmp, %z
%tmp3 = add i32 %tmp, 0
ret i32 %tmp3
}

; Named metadata
!0 = !{i32 42, null, !"string"}
!foo = !{!0}

; Some unnamed metadata nodes, which are referenced by the named metadata.
!0 = !{!"zero"}
!1 = !{!"one"}
!2 = !{!"two"}
; A named metadata.
!name = !{!0, !1, !2}

0 comments on commit 7816b2f

Please sign in to comment.