diff --git a/CHANGES.md b/CHANGES.md index a10d111c42..d37e741c0c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -30,6 +30,7 @@ Language Improvements: - enh(php) highlight variables (#2785) [Taufik Nurrohman][] - fix(python) Handle comments on decorators (#2804) [Jonathan Sharpe][] - enh(diff) improve highlighting of diff for git patches [Florian Bezdeka][] +- fix(llvm) lots of small improvements and fixes (#2830) [Josh Goebel][] - enh(mathematica) Rework entire implementation [Patrick Scheibe][] - Correct matching of the many variations of Mathematica's numbers - Matching of named-characters aka special symbols like `\[Gamma]` diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 21711e88a7..335d728f81 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -4,12 +4,67 @@ Author: Michael Rodler Description: language used as intermediate representation in the LLVM compiler framework Website: https://llvm.org/docs/LangRef.html Category: assembler +Audit: 2020 */ +import * as regex from '../lib/regex.js'; + +/** @type LanguageFn */ export default function(hljs) { - var identifier = '([-a-zA-Z$._][\\w\\-$.]*)'; + const IDENT_RE = /([-a-zA-Z$._][\w$.-]*)/; + const TYPE = { + className: 'type', + begin: /\bi\d+(?=\s|\b)/ + }; + const OPERATOR = { + className: 'operator', + relevance: 0, + begin: /=/ + }; + const PUNCTUATION = { + className: 'punctuation', + relevance: 0, + begin: /,/ + }; + const NUMBER = { + className: 'number', + variants: [ + { begin: /0[xX][a-fA-F0-9]+/ }, + { begin: /-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?/ } + ], + relevance: 0 + }; + const LABEL = { + className: 'symbol', + variants: [ + { begin: /^\s*[a-z]+:/ }, // labels + ], + relevance: 0 + }; + const VARIABLE = { + className: 'variable', + variants: [ + { begin: regex.concat(/%/, IDENT_RE) }, + { begin: /%\d+/ }, + { begin: /#\d+/ }, + ] + }; + const FUNCTION = { + className: 'title', + variants: [ + { begin: regex.concat(/@/, IDENT_RE) }, + { begin: /@\d+/ }, + { begin: regex.concat(/!/, IDENT_RE) }, + { begin: regex.concat(/!\d+/, IDENT_RE) }, + // https://llvm.org/docs/LangRef.html#namedmetadatastructure + // obviously a single digit can also be used in this fashion + { begin: /!\d+/ } + ] + }; + return { name: 'LLVM IR', + // TODO: split into different categories of keywords keywords: 'begin end true false declare define global ' + 'constant private linker_private internal ' + @@ -49,48 +104,26 @@ export default function(hljs) { 'extractvalue insertvalue atomicrmw cmpxchg fence ' + 'argmemonly double', contains: [ - { - className: 'keyword', - begin: 'i\\d+' - }, - hljs.COMMENT( - ';', '\\n', {relevance: 0} - ), - // Double quote string + TYPE, + // this matches "empty comments"... + // ...because it's far more likely this is a statement terminator in + // another language than an actual comment + hljs.COMMENT(/;\s*$/, null, { relevance: 0 }), + hljs.COMMENT(/;/, /$/), hljs.QUOTE_STRING_MODE, { className: 'string', variants: [ // Double-quoted string - { begin: '"', end: '[^\\\\]"' }, - ], - relevance: 0 - }, - { - className: 'title', - variants: [ - { begin: '@' + identifier }, - { begin: '@\\d+' }, - { begin: '!' + identifier }, - { begin: '!\\d+' + identifier } + { begin: /"/, end: /[^\\]"/ }, ] }, - { - className: 'symbol', - variants: [ - { begin: '%' + identifier }, - { begin: '%\\d+' }, - { begin: '#\\d+' }, - ] - }, - { - className: 'number', - variants: [ - { begin: '0[xX][a-fA-F0-9]+' }, - { begin: '-?\\d+(?:[.]\\d+)?(?:[eE][-+]?\\d+(?:[.]\\d+)?)?' } - ], - relevance: 0 - }, + FUNCTION, + PUNCTUATION, + OPERATOR, + VARIABLE, + LABEL, + NUMBER ] }; } diff --git a/test/markup/llvm/simple.expect.txt b/test/markup/llvm/simple.expect.txt new file mode 100644 index 0000000000..71bc91b959 --- /dev/null +++ b/test/markup/llvm/simple.expect.txt @@ -0,0 +1,19 @@ +;; foooo +define i32 @mul_add(i32 %x, i32 %y, i32 %z) { + entry: + %tmp = mul i32 %x, %y + %tmp2 = add i32 %tmp, %z + %tmp3 = add i32 %tmp, 0 + ret i32 %tmp3 +} + +; Named metadata +!0 = !{i32 42, null, !"string"} +!foo = !{!0} + +; Some unnamed metadata nodes, which are referenced by the named metadata. +!0 = !{!"zero"} +!1 = !{!"one"} +!2 = !{!"two"} +; A named metadata. +!name = !{!0, !1, !2} diff --git a/test/markup/llvm/simple.txt b/test/markup/llvm/simple.txt new file mode 100644 index 0000000000..4e757192b7 --- /dev/null +++ b/test/markup/llvm/simple.txt @@ -0,0 +1,19 @@ +;; foooo +define i32 @mul_add(i32 %x, i32 %y, i32 %z) { + entry: + %tmp = mul i32 %x, %y + %tmp2 = add i32 %tmp, %z + %tmp3 = add i32 %tmp, 0 + ret i32 %tmp3 +} + +; Named metadata +!0 = !{i32 42, null, !"string"} +!foo = !{!0} + +; Some unnamed metadata nodes, which are referenced by the named metadata. +!0 = !{!"zero"} +!1 = !{!"one"} +!2 = !{!"two"} +; A named metadata. +!name = !{!0, !1, !2}