From d06e8a4a87116601ca05f14f0e66a02eb2ffbd30 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Sun, 8 Nov 2020 20:23:33 -0600 Subject: [PATCH 1/2] JavaLexer: Demonstrate a catastrophic backtracking bug --- tests/test_java.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/test_java.py b/tests/test_java.py index 3baec0adc3..f7b16bd7ea 100644 --- a/tests/test_java.py +++ b/tests/test_java.py @@ -7,9 +7,11 @@ :license: BSD, see LICENSE for details. """ +import time + import pytest -from pygments.token import Text, Name, Punctuation, Keyword, Number +from pygments.token import Keyword, Name, Number, Punctuation, String, Text from pygments.lexers import JavaLexer @@ -76,3 +78,24 @@ def test_numeric_literals(lexer): (Text, '\n') ] assert list(lexer.get_tokens(fragment)) == tokens + + +@pytest.mark.parametrize( + 'text', + ( + '""', '"abc"', '"ひらがな"', '"123"', + '"\\\\"', '"\\t"' '"\\""', + ), +) +def test_string_literals_positive_match(lexer, text): + """Test positive matches for string literals.""" + tokens = list(lexer.get_tokens_unprocessed(text)) + assert all([token is String for _, token, _ in tokens]) + assert ''.join([value for _, _, value in tokens]) == text + + +def test_string_literals_backtracking(lexer): + """Test catastrophic backtracking for string literals.""" + start_time = time.time() + list(lexer.get_tokens_unprocessed('"' + '\\' * 100)) + assert time.time() - start_time < 1, 'possible backtracking bug' From 8a505c805ba0ac170e6d690f5c4b479f51c2bc4b Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Sun, 8 Nov 2020 20:24:51 -0600 Subject: [PATCH 2/2] JavaLexer: Fix a catastrophic backtracking bug Closes #1586 --- pygments/lexers/jvm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index ee0bc7af95..2736329d94 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -65,7 +65,7 @@ class JavaLexer(RegexLexer): 'var'), (r'(import(?:\s+static)?)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"', String, 'string'), (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), (r'(\.)((?:[^\W\d]|\$)[\w$]*)', bygroups(Punctuation, Name.Attribute)), @@ -96,6 +96,13 @@ class JavaLexer(RegexLexer): 'import': [ (r'[\w.]+\*?', Name.Namespace, '#pop') ], + 'string': [ + (r'[^\\"]+', String), + (r'\\\\', String), # Escaped backslash + (r'\\"', String), # Escaped quote + (r'\\', String), # Bare backslash + (r'"', String, '#pop'), # Closing quote + ], }