Skip to content

Commit

Permalink
Fix a catastrophic backtracking bug in JavaLexer (#1594)
Browse files Browse the repository at this point in the history
* JavaLexer: Demonstrate a catastrophic backtracking bug

* JavaLexer: Fix a catastrophic backtracking bug

Closes #1586
  • Loading branch information
kurtmckee committed Nov 9, 2020
1 parent afa9e3b commit fb40b71
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
9 changes: 8 additions & 1 deletion pygments/lexers/jvm.py
Expand Up @@ -65,7 +65,7 @@ class JavaLexer(RegexLexer):
'var'),
(r'(import(?:\s+static)?)(\s+)', bygroups(Keyword.Namespace, Text),
'import'),
(r'"(\\\\|\\"|[^"])*"', String),
(r'"', String, 'string'),
(r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
(r'(\.)((?:[^\W\d]|\$)[\w$]*)', bygroups(Punctuation,
Name.Attribute)),
Expand Down Expand Up @@ -96,6 +96,13 @@ class JavaLexer(RegexLexer):
'import': [
(r'[\w.]+\*?', Name.Namespace, '#pop')
],
'string': [
(r'[^\\"]+', String),
(r'\\\\', String), # Escaped backslash
(r'\\"', String), # Escaped quote
(r'\\', String), # Bare backslash
(r'"', String, '#pop'), # Closing quote
],
}


Expand Down
25 changes: 24 additions & 1 deletion tests/test_java.py
Expand Up @@ -7,9 +7,11 @@
:license: BSD, see LICENSE for details.
"""

import time

import pytest

from pygments.token import Text, Name, Punctuation, Keyword, Number
from pygments.token import Keyword, Name, Number, Punctuation, String, Text
from pygments.lexers import JavaLexer


Expand Down Expand Up @@ -76,3 +78,24 @@ def test_numeric_literals(lexer):
(Text, '\n')
]
assert list(lexer.get_tokens(fragment)) == tokens


@pytest.mark.parametrize(
'text',
(
'""', '"abc"', '"ひらがな"', '"123"',
'"\\\\"', '"\\t"' '"\\""',
),
)
def test_string_literals_positive_match(lexer, text):
"""Test positive matches for string literals."""
tokens = list(lexer.get_tokens_unprocessed(text))
assert all([token is String for _, token, _ in tokens])
assert ''.join([value for _, _, value in tokens]) == text


def test_string_literals_backtracking(lexer):
"""Test catastrophic backtracking for string literals."""
start_time = time.time()
list(lexer.get_tokens_unprocessed('"' + '\\' * 100))
assert time.time() - start_time < 1, 'possible backtracking bug'

0 comments on commit fb40b71

Please sign in to comment.