Skip to content

Commit

Permalink
fix(python) Fix recognition of numeric literals followed by keywords …
Browse files Browse the repository at this point in the history
…without whitespace (#3367)

Fixes #2985.

* enh(python) Add tests for keyword after numeric literal
* fix(python) Fix recognition of numeric literals followed by keywords without whitespace
  • Loading branch information
gibson042 committed Oct 22, 2021
1 parent 257cfee commit 7028658
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 9 deletions.
8 changes: 8 additions & 0 deletions CHANGES.md
@@ -1,3 +1,11 @@
## Version 11.3.2 (most likely)

Grammars:

- fix(python) Fix recognition of numeric literals followed by keywords without whitespace (#2985) [Richard Gibson][]

[Richard Gibson]: https://github.com/gibson042

## Version 11.3.1

Build:
Expand Down
18 changes: 12 additions & 6 deletions src/languages/python.js
Expand Up @@ -255,6 +255,12 @@ export default function(hljs) {
// https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
const digitpart = '[0-9](_?[0-9])*';
const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
// Whitespace after a number (or any lexical token) is needed only if its absence
// would change the tokenization
// https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens
// We deviate slightly, requiring a word boundary or a keyword
// to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`)
const lookahead = `\\b|${RESERVED_WORDS.join('|')}`;
const NUMBER = {
className: 'number',
relevance: 0,
Expand All @@ -270,7 +276,7 @@ export default function(hljs) {
// because both MUST contain a decimal point and so cannot be confused with
// the interior part of an identifier
{
begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b`
begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`
},
{
begin: `(${pointfloat})[jJ]?`
Expand All @@ -283,22 +289,22 @@ export default function(hljs) {
// decinteger is optionally imaginary
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b'
begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`
},
{
begin: '\\b0[bB](_?[01])+[lL]?\\b'
begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`
},
{
begin: '\\b0[oO](_?[0-7])+[lL]?\\b'
begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`
},
{
begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b'
begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`
},

// imagnumber (digitpart-based)
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
begin: `\\b(${digitpart})[jJ]\\b`
begin: `\\b(${digitpart})[jJ](?=${lookahead})`
}
]
};
Expand Down
3 changes: 1 addition & 2 deletions test/markup/python/keywords.expect.txt
Expand Up @@ -13,5 +13,4 @@ x = Shorty()

<span class="hljs-built_in">exec</span>(<span class="hljs-number">123</span>)

<span class="hljs-comment"># note, numbers still aren&#x27;t highlighted fully</span>
<span class="hljs-built_in">print</span>(1<span class="hljs-keyword">if</span> <span class="hljs-number">0</span>==0<span class="hljs-keyword">else</span><span class="hljs-string">&quot;b&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-number">1</span><span class="hljs-keyword">if</span> <span class="hljs-number">0</span>==<span class="hljs-number">0</span><span class="hljs-keyword">else</span><span class="hljs-string">&quot;b&quot;</span>)
1 change: 0 additions & 1 deletion test/markup/python/keywords.txt
Expand Up @@ -13,5 +13,4 @@ for _ in sys.path:

exec(123)

# note, numbers still aren't highlighted fully
print(1if 0==0else"b")
18 changes: 18 additions & 0 deletions test/markup/python/numbers.expect.txt
Expand Up @@ -30,6 +30,24 @@

<span class="hljs-comment"># expressions containing numeric literals</span>
<span class="hljs-number">0.</span>.__str__, <span class="hljs-number">1e1</span>.__str__, fn(<span class="hljs-number">.5</span>)
<span class="hljs-number">0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0b0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0b0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0b_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0b_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0o0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0o0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0o_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0o_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0x0a</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0x0el</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0x_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0x_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0e+0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.e-0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0e-0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.e+0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0e+0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.e-0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0e-0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.e+0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">009j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_9j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>

<span class="hljs-comment"># expressions not containing numeric literals</span>
x0.j
Expand Down
18 changes: 18 additions & 0 deletions test/markup/python/numbers.txt
Expand Up @@ -30,6 +30,24 @@

# expressions containing numeric literals
0..__str__, 1e1.__str__, fn(.5)
0is 0, 0lis 0
0_0_0is 0, 0_0_0lis 0
0b0is 0, 0b0lis 0
0b_0_0is 0, 0b_0_0lis 0
0o0is 0, 0o0lis 0
0o_0_0is 0, 0o_0_0lis 0
0x0ais 0, 0x0elis 0
0x_0_0is 0, 0x_0_0lis 0
.0is 0, 0.is 0
.0_0_0is 0, 0_0_0.is 0
.0e+0is 0, 0.e-0is 0
.0_0_0e-0_0_0is 0, 0_0_0.e+0_0_0is 0
.0jis 0, 0.jis 0
.0_0_0jis 0, 0_0_0.jis 0
.0e+0jis 0, 0.e-0jis 0
.0_0_0e-0_0_0jis 0, 0_0_0.e+0_0_0jis 0
0jis 0, 009jis 0
0_0_0jis 0, 0_0_9jis 0

# expressions not containing numeric literals
x0.j
Expand Down

0 comments on commit 7028658

Please sign in to comment.