Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(python) Fix recognition of numeric literals followed by keywords without whitespace #3367

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES.md
@@ -1,3 +1,11 @@
## Version 11.3.2 (most likely)

Grammars:

- fix(python) Fix recognition of numeric literals followed by keywords without whitespace (#2985) [Richard Gibson][]
joshgoebel marked this conversation as resolved.
Show resolved Hide resolved

[Richard Gibson]: https://github.com/gibson042

## Version 11.3.1

Build:
Expand Down
18 changes: 12 additions & 6 deletions src/languages/python.js
Expand Up @@ -255,6 +255,12 @@ export default function(hljs) {
// https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
const digitpart = '[0-9](_?[0-9])*';
const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
// Whitespace after a number (or any lexical token) is needed only if its absence
// would change the tokenization
// https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens
// We deviate slightly, requiring a word boundary or a keyword
// to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`)
const lookahead = `\\b|${RESERVED_WORDS.join('|')}`;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well if that isn't rather clever. :)

const NUMBER = {
className: 'number',
relevance: 0,
Expand All @@ -270,7 +276,7 @@ export default function(hljs) {
// because both MUST contain a decimal point and so cannot be confused with
// the interior part of an identifier
{
begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b`
begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`
},
{
begin: `(${pointfloat})[jJ]?`
Expand All @@ -283,22 +289,22 @@ export default function(hljs) {
// decinteger is optionally imaginary
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b'
begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`
},
{
begin: '\\b0[bB](_?[01])+[lL]?\\b'
begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`
},
{
begin: '\\b0[oO](_?[0-7])+[lL]?\\b'
begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`
},
{
begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b'
begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`
},

// imagnumber (digitpart-based)
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
begin: `\\b(${digitpart})[jJ]\\b`
begin: `\\b(${digitpart})[jJ](?=${lookahead})`
}
]
};
Expand Down
3 changes: 1 addition & 2 deletions test/markup/python/keywords.expect.txt
Expand Up @@ -13,5 +13,4 @@ x = Shorty()

<span class="hljs-built_in">exec</span>(<span class="hljs-number">123</span>)

<span class="hljs-comment"># note, numbers still aren&#x27;t highlighted fully</span>
<span class="hljs-built_in">print</span>(1<span class="hljs-keyword">if</span> <span class="hljs-number">0</span>==0<span class="hljs-keyword">else</span><span class="hljs-string">&quot;b&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-number">1</span><span class="hljs-keyword">if</span> <span class="hljs-number">0</span>==<span class="hljs-number">0</span><span class="hljs-keyword">else</span><span class="hljs-string">&quot;b&quot;</span>)
1 change: 0 additions & 1 deletion test/markup/python/keywords.txt
Expand Up @@ -13,5 +13,4 @@ for _ in sys.path:

exec(123)

# note, numbers still aren't highlighted fully
print(1if 0==0else"b")
18 changes: 18 additions & 0 deletions test/markup/python/numbers.expect.txt
Expand Up @@ -30,6 +30,24 @@

<span class="hljs-comment"># expressions containing numeric literals</span>
<span class="hljs-number">0.</span>.__str__, <span class="hljs-number">1e1</span>.__str__, fn(<span class="hljs-number">.5</span>)
<span class="hljs-number">0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0b0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0b0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0b_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0b_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0o0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0o0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0o_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0o_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0x0a</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0x0el</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0x_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0x_0_0l</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0e+0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.e-0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0e-0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.e+0_0_0</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0e+0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0.e-0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">.0_0_0e-0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_0.e+0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">009j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>
<span class="hljs-number">0_0_0j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>, <span class="hljs-number">0_0_9j</span><span class="hljs-keyword">is</span> <span class="hljs-number">0</span>

<span class="hljs-comment"># expressions not containing numeric literals</span>
x0.j
Expand Down
18 changes: 18 additions & 0 deletions test/markup/python/numbers.txt
Expand Up @@ -30,6 +30,24 @@

# expressions containing numeric literals
0..__str__, 1e1.__str__, fn(.5)
0is 0, 0lis 0
0_0_0is 0, 0_0_0lis 0
0b0is 0, 0b0lis 0
0b_0_0is 0, 0b_0_0lis 0
0o0is 0, 0o0lis 0
0o_0_0is 0, 0o_0_0lis 0
0x0ais 0, 0x0elis 0
0x_0_0is 0, 0x_0_0lis 0
.0is 0, 0.is 0
.0_0_0is 0, 0_0_0.is 0
.0e+0is 0, 0.e-0is 0
.0_0_0e-0_0_0is 0, 0_0_0.e+0_0_0is 0
.0jis 0, 0.jis 0
.0_0_0jis 0, 0_0_0.jis 0
.0e+0jis 0, 0.e-0jis 0
.0_0_0e-0_0_0jis 0, 0_0_0.e+0_0_0jis 0
0jis 0, 009jis 0
0_0_0jis 0, 0_0_9jis 0

# expressions not containing numeric literals
x0.j
Expand Down