Skip to content

Commit

Permalink
enh(r) Add operators and punctuation (#3195)
Browse files Browse the repository at this point in the history
This change adds highlighting for operators and punctuation, and fixes
the issues described in #3194.

* Give R a relevance boost from arrow-assign
* Make `<-` less of a signal boost for R
* Rebalance relevance of common syntactic constructs
* Fix Vala having too much relevance for `^#` (meta/comment)
  • Loading branch information
klmr committed May 19, 2021
1 parent 75fd067 commit 15ed6a4
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 86 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -46,6 +46,8 @@ Language Grammars:

Parser:

- enh(vala) improve language detection for Vala (#3195) [Konrad Rudolph][]
- enh(r) add support for operators, fix number highlighting bug (#3194, #3195) [Konrad Rudolph][]
- enh(parser) add `beginScope` and `endScope` to allow separate scoping begin and end (#3159) [Josh Goebel][]
- enh(parsed) `endScope` now supports multi-class matchers as well (#3159) [Josh Goebel][]
- enh(parser) `highlightElement` now always tags blocks with a consistent `language-[name]` class [Josh Goebel][]
Expand Down
113 changes: 84 additions & 29 deletions src/languages/r.js
Expand Up @@ -18,13 +18,27 @@ export default function(hljs) {
// handled in a separate mode. See `test/markup/r/names.txt` for examples.
// FIXME: Support Unicode identifiers.
const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
const SIMPLE_IDENT = /[a-zA-Z][a-zA-Z_0-9]*/;
const NUMBER_TYPES_RE = regex.either(
// Special case: only hexadecimal binary powers can contain fractions
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
// Hexadecimal numbers without fraction and optional binary power
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
// Decimal numbers
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
);
const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
const PUNCTUATION_RE = regex.either(
/[()]/,
/[{}]/,
/\[\[/,
/[[\]]/,
/\\/,
/,/
);

return {
name: 'R',

// only in Haskell, not R
illegal: /->/,
keywords: {
$pattern: IDENT_RE,
keyword:
Expand Down Expand Up @@ -56,6 +70,7 @@ export default function(hljs) {
'standardGeneric substitute sum switch tan tanh tanpi tracemem ' +
'trigamma trunc unclass untracemem UseMethod xtfrm',
},

contains: [
// Roxygen comments
hljs.COMMENT(
Expand All @@ -69,7 +84,7 @@ export default function(hljs) {
// preventing highlighting. This code is example R code, so nested
// doctags shouldn’t be treated as such. See
// `test/markup/r/roxygen.txt` for an example.
className: 'doctag',
scope: 'doctag',
begin: '@examples',
starts: {
contains: [
Expand All @@ -89,12 +104,12 @@ export default function(hljs) {
{
// Handle `@param` to highlight the parameter name following
// after.
className: 'doctag',
scope: 'doctag',
begin: '@param',
end: /$/,
contains: [
{
className: 'variable',
scope: 'variable',
variants: [
{ begin: IDENT_RE },
{ begin: /`(?:\\.|[^`\\])+`/ }
Expand All @@ -104,11 +119,11 @@ export default function(hljs) {
]
},
{
className: 'doctag',
scope: 'doctag',
begin: /@[a-zA-Z]+/
},
{
className: 'keyword',
scope: 'keyword',
begin: /\\[a-zA-Z]+/,
}
]
Expand All @@ -118,7 +133,7 @@ export default function(hljs) {
hljs.HASH_COMMENT_MODE,

{
className: 'string',
scope: 'string',
contains: [hljs.BACKSLASH_ESCAPE],
variants: [
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }),
Expand All @@ -131,48 +146,88 @@ export default function(hljs) {
{begin: "'", end: "'", relevance: 0}
],
},

// Matching numbers immediately following punctuation and operators is
// tricky since we need to look at the character ahead of a number to
// ensure the number is not part of an identifier, and we cannot use
// negative look-behind assertions. So instead we explicitly handle all
// possible combinations of (operator|punctuation), number.
// TODO: replace with negative look-behind when available
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
{
relevance: 0,
className: {
2: "number"
},
variants: [
// TODO: replace with negative look-behind when available
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
{
// Special case: only hexadecimal binary powers can contain fractions.
scope: {
1: 'operator',
2: 'number'
},
match: [
OPERATORS_RE,
NUMBER_TYPES_RE
]
},
{
scope: {
1: 'operator',
2: 'number'
},
match: [
/[^a-zA-Z0-9._]/, // not part of an identifier
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/
/%[^%]*%/,
NUMBER_TYPES_RE
]
},
{
scope: {
1: 'punctuation',
2: 'number'
},
match: [
/[^a-zA-Z0-9._]/, // not part of an identifier
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/
PUNCTUATION_RE,
NUMBER_TYPES_RE
]
},
{
scope: { 2: 'number' },
match: [
/[^a-zA-Z0-9._]/, // not part of an identifier
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
/[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document
NUMBER_TYPES_RE
]
}
]
},

// Operators/punctuation when they're not directly followed by numbers
{
// Relevance boost for the most common assignment form.
scope: { 3: 'operator' },
match: [
IDENT_RE,
/\s+/,
/<-/,
/\s+/
]
},

{
// infix operator
begin: '%',
end: '%'
scope: 'operator',
relevance: 0,
variants: [
{ match: OPERATORS_RE },
{ match: /%[^%]*%/ }
]
},
// relevance boost for assignment

{
begin: regex.concat(SIMPLE_IDENT, "\\s+<-\\s+")
scope: 'punctuation',
relevance: 0,
match: PUNCTUATION_RE
},

{
// escaped identifier
// Escaped identifier
begin: '`',
end: '`',
contains: [
Expand Down
1 change: 0 additions & 1 deletion src/languages/vala.js
Expand Up @@ -52,7 +52,6 @@ export default function(hljs) {
className: 'meta',
begin: '^#',
end: '$',
relevance: 2
}
]
};
Expand Down
12 changes: 6 additions & 6 deletions test/markup/r/names.expect.txt
@@ -1,26 +1,26 @@
<span class="hljs-comment"># Valid names</span>

a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, <span class="hljs-built_in">c</span>, <span class="hljs-built_in">T</span>, <span class="hljs-built_in">F</span>, ._1
a1_foo<span class="hljs-punctuation">,</span> A1_FOO<span class="hljs-punctuation">,</span> .foo_<span class="hljs-punctuation">,</span> ._foo<span class="hljs-punctuation">,</span> Bar.42<span class="hljs-punctuation">,</span> foo..1<span class="hljs-punctuation">,</span> .<span class="hljs-punctuation">,</span> ._<span class="hljs-punctuation">,</span> ..<span class="hljs-punctuation">,</span> ...<span class="hljs-punctuation">,</span> ..1<span class="hljs-punctuation">,</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">F</span><span class="hljs-punctuation">,</span> ._1

<span class="hljs-comment"># Reserved Words</span>

<span class="hljs-literal">NA</span>, <span class="hljs-literal">NA_integer_</span>, <span class="hljs-literal">NA_real_</span>, <span class="hljs-literal">NA_character_</span>, <span class="hljs-literal">NA_complex_</span>, <span class="hljs-literal">NULL</span>, <span class="hljs-literal">NaN</span>, <span class="hljs-literal">Inf</span>
<span class="hljs-literal">NA</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_integer_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_real_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_character_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_complex_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NULL</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NaN</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">Inf</span>

<span class="hljs-comment"># Keywords</span>

<span class="hljs-keyword">function</span>, <span class="hljs-keyword">while</span>, <span class="hljs-keyword">repeat</span>, <span class="hljs-keyword">for</span>, <span class="hljs-keyword">if</span>, <span class="hljs-keyword">in</span>, <span class="hljs-keyword">else</span>, <span class="hljs-keyword">next</span>, <span class="hljs-keyword">break</span>
<span class="hljs-keyword">function</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">while</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">repeat</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">for</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">if</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">in</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">else</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">next</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">break</span>

<span class="hljs-comment"># Not reserved</span>

NULLa, NULL1, NULL., `NULL`, <span class="hljs-string">&#x27;NULL&#x27;</span>, NA_foo_, na_real_, Function, for.
NULLa<span class="hljs-punctuation">,</span> NULL1<span class="hljs-punctuation">,</span> NULL.<span class="hljs-punctuation">,</span> `NULL`<span class="hljs-punctuation">,</span> <span class="hljs-string">&#x27;NULL&#x27;</span><span class="hljs-punctuation">,</span> NA_foo_<span class="hljs-punctuation">,</span> na_real_<span class="hljs-punctuation">,</span> Function<span class="hljs-punctuation">,</span> for.

<span class="hljs-comment"># Primitive built-ins</span>

<span class="hljs-built_in">return</span>, <span class="hljs-built_in">switch</span>, <span class="hljs-built_in">sum</span>
<span class="hljs-built_in">return</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">switch</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">sum</span>

<span class="hljs-comment"># Non-primitive base functions</span>

stop, try
stop<span class="hljs-punctuation">,</span> try

<span class="hljs-comment"># Quoted identifiers</span>

Expand Down
10 changes: 6 additions & 4 deletions test/markup/r/numbers.expect.txt
@@ -1,18 +1,20 @@
<span class="hljs-number">1</span> <span class="hljs-comment"># Regression caused numbers at beginning not to be highlighted.</span>

<span class="hljs-comment"># Numbers</span>

<span class="hljs-number">0</span>, <span class="hljs-number">01</span>, <span class="hljs-number">08</span>, <span class="hljs-number">123456</span>, <span class="hljs-number">1256.701</span>, <span class="hljs-number">123e3</span>, <span class="hljs-number">123E+3</span>, <span class="hljs-number">1.23e-3</span>, <span class="hljs-number">1.23E3</span>, <span class="hljs-number">.25</span>, <span class="hljs-number">2.</span>
<span class="hljs-number">0</span><span class="hljs-punctuation">,</span> <span class="hljs-number">01</span><span class="hljs-punctuation">,</span> <span class="hljs-number">08</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123456</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1256.701</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123e3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123E+3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.23e-3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.23E3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">.25</span><span class="hljs-punctuation">,</span> <span class="hljs-number">2.</span>

<span class="hljs-comment"># Integers</span>

<span class="hljs-number">123L</span>, -<span class="hljs-number">50L</span>
<span class="hljs-number">123L</span><span class="hljs-punctuation">,</span> <span class="hljs-operator">-</span><span class="hljs-number">50L</span>

<span class="hljs-comment"># Imaginary numbers</span>

<span class="hljs-number">123i</span>, -<span class="hljs-number">123i</span>, <span class="hljs-number">1.2e-3i</span>, <span class="hljs-number">1.i</span>, <span class="hljs-number">.0i</span>
<span class="hljs-number">123i</span><span class="hljs-punctuation">,</span> <span class="hljs-operator">-</span><span class="hljs-number">123i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.2e-3i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">.0i</span>

<span class="hljs-comment"># Hex numbers</span>

<span class="hljs-number">0x0</span>, <span class="hljs-number">0xabcdefABCDEF01234</span>, <span class="hljs-number">0xabcp123</span>, <span class="hljs-number">0xabcP-123</span>, <span class="hljs-number">0x1.2p2</span>, <span class="hljs-number">0xa.bp-3i</span>
<span class="hljs-number">0x0</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcdefABCDEF01234</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcp123</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcP-123</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0x1.2p2</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xa.bp-3i</span>

<span class="hljs-comment"># Invalid/not literals (for reference)</span>

Expand Down
2 changes: 2 additions & 0 deletions test/markup/r/numbers.txt
@@ -1,3 +1,5 @@
1 # Regression caused numbers at beginning not to be highlighted.

# Numbers

0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2.
Expand Down

0 comments on commit 15ed6a4

Please sign in to comment.