From 93ed0e5a93579eb0ca5c24cb96351d7e5758edbd Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 3 Nov 2020 13:23:26 -0500 Subject: [PATCH 01/10] enh(llvm) fixes types, variables, and labels --- src/languages/llvm.js | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 21711e88a7..126f01b129 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -50,11 +50,11 @@ export default function(hljs) { 'argmemonly double', contains: [ { - className: 'keyword', - begin: 'i\\d+' + className: 'type', + begin: /\bi\d+(?=\s|\b)/ }, hljs.COMMENT( - ';', '\\n', {relevance: 0} + ';', /$/, {relevance: 0} ), // Double quote string hljs.QUOTE_STRING_MODE, @@ -76,13 +76,30 @@ export default function(hljs) { ] }, { - className: 'symbol', + className: 'puncutation', + begin: /,/ + }, + { + className: 'operator', + variants: [ + { begin: /=/ } + ] + }, + { + className: 'variable', variants: [ { begin: '%' + identifier }, { begin: '%\\d+' }, { begin: '#\\d+' }, ] }, + { + className: 'symbol', + variants: [ + {begin: '^\\s*[a-z]+:'}, // labels + ], + relevance: 0 + }, { className: 'number', variants: [ From 980c87636678d6721fd5e50c6eb9db53d38da546 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 3 Nov 2020 13:26:41 -0500 Subject: [PATCH 02/10] puncutation no relevance, strings get relevance --- src/languages/llvm.js | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 126f01b129..4682c093a7 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -63,8 +63,7 @@ export default function(hljs) { variants: [ // Double-quoted string { begin: '"', end: '[^\\\\]"' }, - ], - relevance: 0 + ] }, { className: 'title', @@ -77,13 +76,13 @@ export default function(hljs) { }, { className: 'puncutation', + relevance: 0, begin: /,/ }, { className: 'operator', - variants: [ - { begin: /=/ } - ] + relevance: 0, + begin: /=/ }, { className: 'variable', @@ -96,7 +95,7 @@ export default function(hljs) { { className: 'symbol', variants: [ - {begin: '^\\s*[a-z]+:'}, // labels + { begin: /^\s*[a-z]+:/ }, // labels ], relevance: 0 }, From 6bda921e307f0cfea706c54410d87980f951d21c Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 3 Nov 2020 13:33:49 -0500 Subject: [PATCH 03/10] handle comments that are not comments --- src/languages/llvm.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 4682c093a7..79fd72a347 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -53,10 +53,13 @@ export default function(hljs) { className: 'type', begin: /\bi\d+(?=\s|\b)/ }, - hljs.COMMENT( - ';', /$/, {relevance: 0} + hljs.COMMENT(/;\s*$/, + // this matches "empty comments"... + // ...because it's far more likely this is a statement terminator in + // another language than an actual comment + { relevance: 0 } ), - // Double quote string + hljs.COMMENT(/;/, /$/), hljs.QUOTE_STRING_MODE, { className: 'string', From b021332c880375a387dfa5cb33a5e5e837f10487 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 3 Nov 2020 13:34:59 -0500 Subject: [PATCH 04/10] typo --- src/languages/llvm.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 79fd72a347..a4fb75812f 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -78,7 +78,7 @@ export default function(hljs) { ] }, { - className: 'puncutation', + className: 'punctuation', relevance: 0, begin: /,/ }, From dc0208529e768c9a7c93631b81c35ca8812b5db4 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 3 Nov 2020 13:43:00 -0500 Subject: [PATCH 05/10] cleanup grammar --- src/languages/llvm.js | 102 ++++++++++++++++------------- test/markup/llvm/simple.expect.txt | 8 +++ test/markup/llvm/simple.txt | 8 +++ 3 files changed, 71 insertions(+), 47 deletions(-) create mode 100644 test/markup/llvm/simple.expect.txt create mode 100644 test/markup/llvm/simple.txt diff --git a/src/languages/llvm.js b/src/languages/llvm.js index a4fb75812f..372a37ac67 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -7,7 +7,54 @@ Category: assembler */ export default function(hljs) { - var identifier = '([-a-zA-Z$._][\\w\\-$.]*)'; + const IDENT_RE = '([-a-zA-Z$._][\\w\\-$.]*)'; + const TYPE = { + className: 'type', + begin: /\bi\d+(?=\s|\b)/ + }; + const OPERATOR = { + className: 'operator', + relevance: 0, + begin: /=/ + }; + const PUNCTUATION = { + className: 'punctuation', + relevance: 0, + begin: /,/ + }; + const NUMBER = { + className: 'number', + variants: [ + { begin: '0[xX][a-fA-F0-9]+' }, + { begin: '-?\\d+(?:[.]\\d+)?(?:[eE][-+]?\\d+(?:[.]\\d+)?)?' } + ], + relevance: 0 + }; + const LABEL = { + className: 'symbol', + variants: [ + { begin: /^\s*[a-z]+:/ }, // labels + ], + relevance: 0 + }; + const VARIABLE = { + className: 'variable', + variants: [ + { begin: '%' + IDENT_RE }, + { begin: '%\\d+' }, + { begin: '#\\d+' }, + ] + }; + const FUNCTION = { + className: 'title', + variants: [ + { begin: '@' + IDENT_RE }, + { begin: '@\\d+' }, + { begin: '!' + IDENT_RE }, + { begin: '!\\d+' + IDENT_RE } + ] + }; + return { name: 'LLVM IR', keywords: @@ -49,10 +96,7 @@ export default function(hljs) { 'extractvalue insertvalue atomicrmw cmpxchg fence ' + 'argmemonly double', contains: [ - { - className: 'type', - begin: /\bi\d+(?=\s|\b)/ - }, + TYPE, hljs.COMMENT(/;\s*$/, // this matches "empty comments"... // ...because it's far more likely this is a statement terminator in @@ -68,48 +112,12 @@ export default function(hljs) { { begin: '"', end: '[^\\\\]"' }, ] }, - { - className: 'title', - variants: [ - { begin: '@' + identifier }, - { begin: '@\\d+' }, - { begin: '!' + identifier }, - { begin: '!\\d+' + identifier } - ] - }, - { - className: 'punctuation', - relevance: 0, - begin: /,/ - }, - { - className: 'operator', - relevance: 0, - begin: /=/ - }, - { - className: 'variable', - variants: [ - { begin: '%' + identifier }, - { begin: '%\\d+' }, - { begin: '#\\d+' }, - ] - }, - { - className: 'symbol', - variants: [ - { begin: /^\s*[a-z]+:/ }, // labels - ], - relevance: 0 - }, - { - className: 'number', - variants: [ - { begin: '0[xX][a-fA-F0-9]+' }, - { begin: '-?\\d+(?:[.]\\d+)?(?:[eE][-+]?\\d+(?:[.]\\d+)?)?' } - ], - relevance: 0 - }, + FUNCTION, + PUNCTUATION, + OPERATOR, + VARIABLE, + LABEL, + NUMBER ] }; } diff --git a/test/markup/llvm/simple.expect.txt b/test/markup/llvm/simple.expect.txt new file mode 100644 index 0000000000..8f992fbf2f --- /dev/null +++ b/test/markup/llvm/simple.expect.txt @@ -0,0 +1,8 @@ +;; foooo +define i32 @mul_add(i32 %x, i32 %y, i32 %z) { + entry: + %tmp = mul i32 %x, %y + %tmp2 = add i32 %tmp, %z + %tmp3 = add i32 %tmp, 0 + ret i32 %tmp3 +} diff --git a/test/markup/llvm/simple.txt b/test/markup/llvm/simple.txt new file mode 100644 index 0000000000..8ce3effef6 --- /dev/null +++ b/test/markup/llvm/simple.txt @@ -0,0 +1,8 @@ +;; foooo +define i32 @mul_add(i32 %x, i32 %y, i32 %z) { + entry: + %tmp = mul i32 %x, %y + %tmp2 = add i32 %tmp, %z + %tmp3 = add i32 %tmp, 0 + ret i32 %tmp3 +} From d985a5f344cfcc1e1bf67d404b12e21e0e0b0052 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 13 Nov 2020 09:37:15 -0500 Subject: [PATCH 06/10] fix comments --- src/languages/llvm.js | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 372a37ac67..465400281a 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -97,12 +97,7 @@ export default function(hljs) { 'argmemonly double', contains: [ TYPE, - hljs.COMMENT(/;\s*$/, - // this matches "empty comments"... - // ...because it's far more likely this is a statement terminator in - // another language than an actual comment - { relevance: 0 } - ), + hljs.COMMENT(/;\s*$/, null, { relevance: 0 }), hljs.COMMENT(/;/, /$/), hljs.QUOTE_STRING_MODE, { From 75ae5a1a4809712c9cf99e7054989ab793c5c497 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 13 Nov 2020 09:45:42 -0500 Subject: [PATCH 07/10] allow single digit named meta-data --- src/languages/llvm.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 465400281a..287e01ba5c 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -51,7 +51,10 @@ export default function(hljs) { { begin: '@' + IDENT_RE }, { begin: '@\\d+' }, { begin: '!' + IDENT_RE }, - { begin: '!\\d+' + IDENT_RE } + { begin: '!\\d+' + IDENT_RE }, + // https://llvm.org/docs/LangRef.html#namedmetadatastructure + // obviously a single digit can also be used in this fashion + { begin: '!\\d+' } ] }; @@ -97,6 +100,9 @@ export default function(hljs) { 'argmemonly double', contains: [ TYPE, + // this matches "empty comments"... + // ...because it's far more likely this is a statement terminator in + // another language than an actual comment hljs.COMMENT(/;\s*$/, null, { relevance: 0 }), hljs.COMMENT(/;/, /$/), hljs.QUOTE_STRING_MODE, From 6150d8ff3978128bfb1234aba93699e8004766c8 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 13 Nov 2020 09:46:54 -0500 Subject: [PATCH 08/10] add additional tests --- test/markup/llvm/simple.expect.txt | 11 +++++++++++ test/markup/llvm/simple.txt | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/test/markup/llvm/simple.expect.txt b/test/markup/llvm/simple.expect.txt index 8f992fbf2f..71bc91b959 100644 --- a/test/markup/llvm/simple.expect.txt +++ b/test/markup/llvm/simple.expect.txt @@ -6,3 +6,14 @@ %tmp3 = add i32 %tmp, 0 ret i32 %tmp3 } + +; Named metadata +!0 = !{i32 42, null, !"string"} +!foo = !{!0} + +; Some unnamed metadata nodes, which are referenced by the named metadata. +!0 = !{!"zero"} +!1 = !{!"one"} +!2 = !{!"two"} +; A named metadata. +!name = !{!0, !1, !2} diff --git a/test/markup/llvm/simple.txt b/test/markup/llvm/simple.txt index 8ce3effef6..4e757192b7 100644 --- a/test/markup/llvm/simple.txt +++ b/test/markup/llvm/simple.txt @@ -6,3 +6,14 @@ define i32 @mul_add(i32 %x, i32 %y, i32 %z) { %tmp3 = add i32 %tmp, 0 ret i32 %tmp3 } + +; Named metadata +!0 = !{i32 42, null, !"string"} +!foo = !{!0} + +; Some unnamed metadata nodes, which are referenced by the named metadata. +!0 = !{!"zero"} +!1 = !{!"one"} +!2 = !{!"two"} +; A named metadata. +!name = !{!0, !1, !2} From e5847192a5c852c027217d80dabd6c2d73fb3a9f Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 13 Nov 2020 09:51:50 -0500 Subject: [PATCH 09/10] chore(audit) llvm --- src/languages/llvm.js | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/languages/llvm.js b/src/languages/llvm.js index 287e01ba5c..335d728f81 100644 --- a/src/languages/llvm.js +++ b/src/languages/llvm.js @@ -4,10 +4,14 @@ Author: Michael Rodler Description: language used as intermediate representation in the LLVM compiler framework Website: https://llvm.org/docs/LangRef.html Category: assembler +Audit: 2020 */ +import * as regex from '../lib/regex.js'; + +/** @type LanguageFn */ export default function(hljs) { - const IDENT_RE = '([-a-zA-Z$._][\\w\\-$.]*)'; + const IDENT_RE = /([-a-zA-Z$._][\w$.-]*)/; const TYPE = { className: 'type', begin: /\bi\d+(?=\s|\b)/ @@ -25,8 +29,8 @@ export default function(hljs) { const NUMBER = { className: 'number', variants: [ - { begin: '0[xX][a-fA-F0-9]+' }, - { begin: '-?\\d+(?:[.]\\d+)?(?:[eE][-+]?\\d+(?:[.]\\d+)?)?' } + { begin: /0[xX][a-fA-F0-9]+/ }, + { begin: /-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?/ } ], relevance: 0 }; @@ -40,26 +44,27 @@ export default function(hljs) { const VARIABLE = { className: 'variable', variants: [ - { begin: '%' + IDENT_RE }, - { begin: '%\\d+' }, - { begin: '#\\d+' }, + { begin: regex.concat(/%/, IDENT_RE) }, + { begin: /%\d+/ }, + { begin: /#\d+/ }, ] }; const FUNCTION = { className: 'title', variants: [ - { begin: '@' + IDENT_RE }, - { begin: '@\\d+' }, - { begin: '!' + IDENT_RE }, - { begin: '!\\d+' + IDENT_RE }, + { begin: regex.concat(/@/, IDENT_RE) }, + { begin: /@\d+/ }, + { begin: regex.concat(/!/, IDENT_RE) }, + { begin: regex.concat(/!\d+/, IDENT_RE) }, // https://llvm.org/docs/LangRef.html#namedmetadatastructure // obviously a single digit can also be used in this fashion - { begin: '!\\d+' } + { begin: /!\d+/ } ] }; return { name: 'LLVM IR', + // TODO: split into different categories of keywords keywords: 'begin end true false declare define global ' + 'constant private linker_private internal ' + @@ -110,7 +115,7 @@ export default function(hljs) { className: 'string', variants: [ // Double-quoted string - { begin: '"', end: '[^\\\\]"' }, + { begin: /"/, end: /[^\\]"/ }, ] }, FUNCTION, From 68325914e4912894141842091d8010c626e4fd8e Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 13 Nov 2020 12:06:23 -0500 Subject: [PATCH 10/10] add changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 1ee8915e53..b14bbcaac6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -22,6 +22,7 @@ Language Improvements: - enh(php) highlight variables (#2785) [Taufik Nurrohman][] - fix(python) Handle comments on decorators (#2804) [Jonathan Sharpe][] - enh(diff) improve highlighting of diff for git patches [Florian Bezdeka][] +- fix(llvm) lots of small improvements and fixes (#2830) [Josh Goebel][] Dev Improvements: