Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a catastrophic backtracking bug in JavaLexer #1594

Merged
merged 2 commits into from Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion pygments/lexers/jvm.py
Expand Up @@ -65,7 +65,7 @@ class JavaLexer(RegexLexer):
'var'),
(r'(import(?:\s+static)?)(\s+)', bygroups(Keyword.Namespace, Text),
'import'),
(r'"(\\\\|\\"|[^"])*"', String),
(r'"', String, 'string'),
(r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
(r'(\.)((?:[^\W\d]|\$)[\w$]*)', bygroups(Punctuation,
Name.Attribute)),
Expand Down Expand Up @@ -96,6 +96,13 @@ class JavaLexer(RegexLexer):
'import': [
(r'[\w.]+\*?', Name.Namespace, '#pop')
],
'string': [
(r'[^\\"]+', String),
(r'\\\\', String), # Escaped backslash
(r'\\"', String), # Escaped quote
(r'\\', String), # Bare backslash
(r'"', String, '#pop'), # Closing quote
],
}


Expand Down
25 changes: 24 additions & 1 deletion tests/test_java.py
Expand Up @@ -7,9 +7,11 @@
:license: BSD, see LICENSE for details.
"""

import time

import pytest

from pygments.token import Text, Name, Punctuation, Keyword, Number
from pygments.token import Keyword, Name, Number, Punctuation, String, Text
from pygments.lexers import JavaLexer


Expand Down Expand Up @@ -76,3 +78,24 @@ def test_numeric_literals(lexer):
(Text, '\n')
]
assert list(lexer.get_tokens(fragment)) == tokens


@pytest.mark.parametrize(
'text',
(
'""', '"abc"', '"ひらがな"', '"123"',
'"\\\\"', '"\\t"' '"\\""',
),
)
def test_string_literals_positive_match(lexer, text):
"""Test positive matches for string literals."""
tokens = list(lexer.get_tokens_unprocessed(text))
assert all([token is String for _, token, _ in tokens])
assert ''.join([value for _, _, value in tokens]) == text


def test_string_literals_backtracking(lexer):
"""Test catastrophic backtracking for string literals."""
start_time = time.time()
list(lexer.get_tokens_unprocessed('"' + '\\' * 100))
assert time.time() - start_time < 1, 'possible backtracking bug'