diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 98d53c5ce6..e27e0ddb4f 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -620,7 +620,7 @@ class MySqlLexer(RegexLexer): (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats - (r'[0-9]+', Number.Integer), + (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name # Date literals (r"\{\s*d\s*(?P['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", @@ -673,7 +673,7 @@ class MySqlLexer(RegexLexer): # numeric literals have already been handled above. # ('[0-9a-z$_\u0080-\uffff]+', Name), - (r'`', Name, 'schema-object-name'), + (r'`', Name.Quoted, 'schema-object-name'), # Punctuation (r'[(),.;]', Punctuation), @@ -737,15 +737,15 @@ class MySqlLexer(RegexLexer): # Schema object name substates # ---------------------------- # - # Backtick-quoted schema object names support escape characters. - # It may be desirable to tokenize escape sequences differently, - # but currently Pygments does not have an obvious token type for - # this unique situation (for example, "Name.Escape"). + # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but + # formatters will style them as "Name" by default but add + # additional styles based on the token name. This gives users + # flexibility to add custom styles as desired. # 'schema-object-name': [ - (r'[^`\\]+', Name), - (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type. - (r'`', Name, '#pop'), + (r'[^`]+', Name.Quoted), + (r'``', Name.Quoted.Escape), + (r'`', Name.Quoted, '#pop'), ], } diff --git a/tests/examplefiles/mysql.txt b/tests/examplefiles/mysql.txt index 4927abd833..c00b0b9269 100644 --- a/tests/examplefiles/mysql.txt +++ b/tests/examplefiles/mysql.txt @@ -107,7 +107,7 @@ CREATE TABLE basic ( SELECT e1.`apple` AS a, `example2`.b FROM example1 AS e1 JOIN example2 e2 -ON `example1`.`id` = e2.id; +ON `example1`.`a``b` = e2.`123`; -- Operators diff --git a/tests/test_mysql.py b/tests/test_mysql.py index 9b5e2b8cf8..207ec822c4 100644 --- a/tests/test_mysql.py +++ b/tests/test_mysql.py @@ -28,9 +28,18 @@ def lexer(): yield MySqlLexer() -@pytest.mark.parametrize('text', ('123',)) -def test_integer_literals(lexer, text): - assert list(lexer.get_tokens(text))[0] == (Number.Integer, text) +@pytest.mark.parametrize('text', ('1', '22', '22 333', '22 a', '22+', '22)', '22\n333', '22\r\n333')) +def test_integer_literals_positive_match(lexer, text): + """Validate that integer literals are tokenized as integers.""" + token = list(lexer.get_tokens(text))[0] + assert token[0] == Number.Integer + assert token[1] in {'1', '22'} + + +@pytest.mark.parametrize('text', ('1a', '1A', '1.', '1ひ', '1$', '1_', '1\u0080', '1\uffff')) +def test_integer_literals_negative_match(lexer, text): + """Validate that non-integer texts are not matched as integers.""" + assert list(lexer.get_tokens(text))[0][0] != Number.Integer @pytest.mark.parametrize( @@ -215,18 +224,40 @@ def test_functions(lexer, text): @pytest.mark.parametrize( 'text', ( - 'abc_$123', '上市年限', 'ひらがな', - '`a`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`', - '````', r'`\``', r'`\\`', - '`-- `', '`/*`', '`#`', + 'abc_$123', '上市年限', 'ひらがな', '123_$abc', '123ひらがな', ), ) -def test_schema_object_names(lexer, text): +def test_schema_object_names_unquoted(lexer, text): tokens = list(lexer.get_tokens(text))[:-1] assert all(token[0] == Name for token in tokens) assert ''.join(token[1] for token in tokens) == text +@pytest.mark.parametrize( + 'text', + ( + '`a`', '`1`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`', + '`-- `', '`/*`', '`#`', + ), +) +def test_schema_object_names_quoted(lexer, text): + tokens = list(lexer.get_tokens(text))[:-1] + assert tokens[0] == (Name.Quoted, '`') + assert tokens[1] == (Name.Quoted, text[1:-1]) + assert tokens[2] == (Name.Quoted, '`') + assert ''.join(token[1] for token in tokens) == text + + +@pytest.mark.parametrize('text', ('````', )) +def test_schema_object_names_quoted_escaped(lexer, text): + """Test quoted schema object names with escape sequences.""" + tokens = list(lexer.get_tokens(text))[:-1] + assert tokens[0] == (Name.Quoted, '`') + assert tokens[1] == (Name.Quoted.Escape, text[1:-1]) + assert tokens[2] == (Name.Quoted, '`') + assert ''.join(token[1] for token in tokens) == text + + @pytest.mark.parametrize( 'text', ('+', '*', '/', '%', '&&', ':=', '!', '<', '->>', '^', '|', '~'),