diff --git a/CHANGES.md b/CHANGES.md
index da390042c7..ef24a6123c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,11 @@
+## Version 11.3.2 (most likely)
+
+Grammars:
+
+- fix(python) Fix recognition of numeric literals followed by keywords without whitespace (#2985) [Richard Gibson][]
+
+[Richard Gibson]: https://github.com/gibson042
+
## Version 11.3.1
Build:
diff --git a/src/languages/python.js b/src/languages/python.js
index ce603a86d4..46ed88a6f0 100644
--- a/src/languages/python.js
+++ b/src/languages/python.js
@@ -255,6 +255,12 @@ export default function(hljs) {
// https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
const digitpart = '[0-9](_?[0-9])*';
const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
+ // Whitespace after a number (or any lexical token) is needed only if its absence
+ // would change the tokenization
+ // https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens
+ // We deviate slightly, requiring a word boundary or a keyword
+ // to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`)
+ const lookahead = `\\b|${RESERVED_WORDS.join('|')}`;
const NUMBER = {
className: 'number',
relevance: 0,
@@ -270,7 +276,7 @@ export default function(hljs) {
// because both MUST contain a decimal point and so cannot be confused with
// the interior part of an identifier
{
- begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b`
+ begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`
},
{
begin: `(${pointfloat})[jJ]?`
@@ -283,22 +289,22 @@ export default function(hljs) {
// decinteger is optionally imaginary
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
- begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b'
+ begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`
},
{
- begin: '\\b0[bB](_?[01])+[lL]?\\b'
+ begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`
},
{
- begin: '\\b0[oO](_?[0-7])+[lL]?\\b'
+ begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`
},
{
- begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b'
+ begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`
},
// imagnumber (digitpart-based)
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
- begin: `\\b(${digitpart})[jJ]\\b`
+ begin: `\\b(${digitpart})[jJ](?=${lookahead})`
}
]
};
diff --git a/test/markup/python/keywords.expect.txt b/test/markup/python/keywords.expect.txt
index 501f670464..f2227af2f5 100644
--- a/test/markup/python/keywords.expect.txt
+++ b/test/markup/python/keywords.expect.txt
@@ -13,5 +13,4 @@ x = Shorty()
exec(123)
-
-print(1if 0==0else"b")
+print(1if 0==0else"b")
diff --git a/test/markup/python/keywords.txt b/test/markup/python/keywords.txt
index 0b7835526d..30e3d0a180 100644
--- a/test/markup/python/keywords.txt
+++ b/test/markup/python/keywords.txt
@@ -13,5 +13,4 @@ for _ in sys.path:
exec(123)
-# note, numbers still aren't highlighted fully
print(1if 0==0else"b")
diff --git a/test/markup/python/numbers.expect.txt b/test/markup/python/numbers.expect.txt
index aed662eaec..725eb354f7 100644
--- a/test/markup/python/numbers.expect.txt
+++ b/test/markup/python/numbers.expect.txt
@@ -30,6 +30,24 @@
0..__str__, 1e1.__str__, fn(.5)
+0is 0, 0lis 0
+0_0_0is 0, 0_0_0lis 0
+0b0is 0, 0b0lis 0
+0b_0_0is 0, 0b_0_0lis 0
+0o0is 0, 0o0lis 0
+0o_0_0is 0, 0o_0_0lis 0
+0x0ais 0, 0x0elis 0
+0x_0_0is 0, 0x_0_0lis 0
+.0is 0, 0.is 0
+.0_0_0is 0, 0_0_0.is 0
+.0e+0is 0, 0.e-0is 0
+.0_0_0e-0_0_0is 0, 0_0_0.e+0_0_0is 0
+.0jis 0, 0.jis 0
+.0_0_0jis 0, 0_0_0.jis 0
+.0e+0jis 0, 0.e-0jis 0
+.0_0_0e-0_0_0jis 0, 0_0_0.e+0_0_0jis 0
+0jis 0, 009jis 0
+0_0_0jis 0, 0_0_9jis 0
x0.j
diff --git a/test/markup/python/numbers.txt b/test/markup/python/numbers.txt
index 8933000a0f..0511ce4337 100644
--- a/test/markup/python/numbers.txt
+++ b/test/markup/python/numbers.txt
@@ -30,6 +30,24 @@
# expressions containing numeric literals
0..__str__, 1e1.__str__, fn(.5)
+0is 0, 0lis 0
+0_0_0is 0, 0_0_0lis 0
+0b0is 0, 0b0lis 0
+0b_0_0is 0, 0b_0_0lis 0
+0o0is 0, 0o0lis 0
+0o_0_0is 0, 0o_0_0lis 0
+0x0ais 0, 0x0elis 0
+0x_0_0is 0, 0x_0_0lis 0
+.0is 0, 0.is 0
+.0_0_0is 0, 0_0_0.is 0
+.0e+0is 0, 0.e-0is 0
+.0_0_0e-0_0_0is 0, 0_0_0.e+0_0_0is 0
+.0jis 0, 0.jis 0
+.0_0_0jis 0, 0_0_0.jis 0
+.0e+0jis 0, 0.e-0jis 0
+.0_0_0e-0_0_0jis 0, 0_0_0.e+0_0_0jis 0
+0jis 0, 009jis 0
+0_0_0jis 0, 0_0_9jis 0
# expressions not containing numeric literals
x0.j