diff --git a/CHANGES.md b/CHANGES.md index da390042c7..ef24a6123c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +## Version 11.3.2 (most likely) + +Grammars: + +- fix(python) Fix recognition of numeric literals followed by keywords without whitespace (#2985) [Richard Gibson][] + +[Richard Gibson]: https://github.com/gibson042 + ## Version 11.3.1 Build: diff --git a/src/languages/python.js b/src/languages/python.js index ce603a86d4..46ed88a6f0 100644 --- a/src/languages/python.js +++ b/src/languages/python.js @@ -255,6 +255,12 @@ export default function(hljs) { // https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals const digitpart = '[0-9](_?[0-9])*'; const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`; + // Whitespace after a number (or any lexical token) is needed only if its absence + // would change the tokenization + // https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens + // We deviate slightly, requiring a word boundary or a keyword + // to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`) + const lookahead = `\\b|${RESERVED_WORDS.join('|')}`; const NUMBER = { className: 'number', relevance: 0, @@ -270,7 +276,7 @@ export default function(hljs) { // because both MUST contain a decimal point and so cannot be confused with // the interior part of an identifier { - begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b` + begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})` }, { begin: `(${pointfloat})[jJ]?` @@ -283,22 +289,22 @@ export default function(hljs) { // decinteger is optionally imaginary // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals { - begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b' + begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})` }, { - begin: '\\b0[bB](_?[01])+[lL]?\\b' + begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})` }, { - begin: '\\b0[oO](_?[0-7])+[lL]?\\b' + begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})` }, { - begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b' + begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})` }, // imagnumber (digitpart-based) // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals { - begin: `\\b(${digitpart})[jJ]\\b` + begin: `\\b(${digitpart})[jJ](?=${lookahead})` } ] }; diff --git a/test/markup/python/keywords.expect.txt b/test/markup/python/keywords.expect.txt index 501f670464..f2227af2f5 100644 --- a/test/markup/python/keywords.expect.txt +++ b/test/markup/python/keywords.expect.txt @@ -13,5 +13,4 @@ x = Shorty() exec(123) -# note, numbers still aren't highlighted fully -print(1if 0==0else"b") +print(1if 0==0else"b") diff --git a/test/markup/python/keywords.txt b/test/markup/python/keywords.txt index 0b7835526d..30e3d0a180 100644 --- a/test/markup/python/keywords.txt +++ b/test/markup/python/keywords.txt @@ -13,5 +13,4 @@ for _ in sys.path: exec(123) -# note, numbers still aren't highlighted fully print(1if 0==0else"b") diff --git a/test/markup/python/numbers.expect.txt b/test/markup/python/numbers.expect.txt index aed662eaec..725eb354f7 100644 --- a/test/markup/python/numbers.expect.txt +++ b/test/markup/python/numbers.expect.txt @@ -30,6 +30,24 @@ # expressions containing numeric literals 0..__str__, 1e1.__str__, fn(.5) +0is 0, 0lis 0 +0_0_0is 0, 0_0_0lis 0 +0b0is 0, 0b0lis 0 +0b_0_0is 0, 0b_0_0lis 0 +0o0is 0, 0o0lis 0 +0o_0_0is 0, 0o_0_0lis 0 +0x0ais 0, 0x0elis 0 +0x_0_0is 0, 0x_0_0lis 0 +.0is 0, 0.is 0 +.0_0_0is 0, 0_0_0.is 0 +.0e+0is 0, 0.e-0is 0 +.0_0_0e-0_0_0is 0, 0_0_0.e+0_0_0is 0 +.0jis 0, 0.jis 0 +.0_0_0jis 0, 0_0_0.jis 0 +.0e+0jis 0, 0.e-0jis 0 +.0_0_0e-0_0_0jis 0, 0_0_0.e+0_0_0jis 0 +0jis 0, 009jis 0 +0_0_0jis 0, 0_0_9jis 0 # expressions not containing numeric literals x0.j diff --git a/test/markup/python/numbers.txt b/test/markup/python/numbers.txt index 8933000a0f..0511ce4337 100644 --- a/test/markup/python/numbers.txt +++ b/test/markup/python/numbers.txt @@ -30,6 +30,24 @@ # expressions containing numeric literals 0..__str__, 1e1.__str__, fn(.5) +0is 0, 0lis 0 +0_0_0is 0, 0_0_0lis 0 +0b0is 0, 0b0lis 0 +0b_0_0is 0, 0b_0_0lis 0 +0o0is 0, 0o0lis 0 +0o_0_0is 0, 0o_0_0lis 0 +0x0ais 0, 0x0elis 0 +0x_0_0is 0, 0x_0_0lis 0 +.0is 0, 0.is 0 +.0_0_0is 0, 0_0_0.is 0 +.0e+0is 0, 0.e-0is 0 +.0_0_0e-0_0_0is 0, 0_0_0.e+0_0_0is 0 +.0jis 0, 0.jis 0 +.0_0_0jis 0, 0_0_0.jis 0 +.0e+0jis 0, 0.e-0jis 0 +.0_0_0e-0_0_0jis 0, 0_0_0.e+0_0_0jis 0 +0jis 0, 009jis 0 +0_0_0jis 0, 0_0_9jis 0 # expressions not containing numeric literals x0.j