From b9247e939573bf2caa8d455f35b224a9e3cb9124 Mon Sep 17 00:00:00 2001 From: jmzambon Date: Sat, 17 Sep 2022 22:31:16 +0200 Subject: [PATCH 1/5] Fix #2224: Issues with .properties format using whitespace delimited key Added: - support for space delimitor in every case, included multiline value - check for odd number of backslash escapes - "!" as comment start - support for escape of spaces and separators Dropped: - undocumented ";" and "//" comment start --- pygments/lexers/configs.py | 26 ++++-- tests/examplefiles/properties/java.properties | 38 +++++--- .../properties/java.properties.output | 91 ++++++++++++++----- 3 files changed, 110 insertions(+), 45 deletions(-) diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 4e0e7f1282..8e05cbd57d 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -128,14 +128,24 @@ class PropertiesLexer(RegexLexer): tokens = { 'root': [ - (r'^(\w+)([ \t])(\w+\s*)$', bygroups(Name.Attribute, Whitespace, String)), - (r'^\w+(\\[ \t]\w*)*$', Name.Attribute), - (r'(^ *)([#!].*)', bygroups(Whitespace, Comment)), - # More controversial comments - (r'(^ *)((?:;|//).*)', bygroups(Whitespace, Comment)), - (r'(.*?)([ \t]*)([=:])([ \t]*)(.*(?:(?<=\\)\n.*)*)', - bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)), - (r'\s', Whitespace), + (r'\s+', Whitespace), + (r'[!#].*|/{2}.*', Comment.Single), + # search for first separator + (r'.+?[^\\](?:\\{2})*?(?=[ \f\t=:])', Name.Attribute, "separator"), + # empty key + (r'(.+?)', Name.Attribute), + ], + 'separator': [ + (r'([ \f\t]*)([=:]*)([ \f\t]*)(.*(? Date: Sun, 18 Sep 2022 18:54:19 +0200 Subject: [PATCH 2/5] Add some comments. --- pygments/lexers/configs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 8e05cbd57d..c2cb37e4ce 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -136,6 +136,7 @@ class PropertiesLexer(RegexLexer): (r'(.+?)', Name.Attribute), ], 'separator': [ + # search for line continuation escape (r'([ \f\t]*)([=:]*)([ \f\t]*)(.*(? Date: Sun, 18 Sep 2022 21:53:41 +0200 Subject: [PATCH 3/5] Fix empty key regex and update test snippets. --- pygments/lexers/configs.py | 2 +- tests/snippets/properties/test_comments.txt | 4 ++-- .../snippets/properties/test_leading_whitespace_comments.txt | 2 +- tests/snippets/properties/test_space_delimited_kv_pair.txt | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index c2cb37e4ce..a29fc512a9 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -133,7 +133,7 @@ class PropertiesLexer(RegexLexer): # search for first separator (r'.+?[^\\](?:\\{2})*?(?=[ \f\t=:])', Name.Attribute, "separator"), # empty key - (r'(.+?)', Name.Attribute), + (r'.+?$', Name.Attribute), ], 'separator': [ # search for line continuation escape diff --git a/tests/snippets/properties/test_comments.txt b/tests/snippets/properties/test_comments.txt index b6bc8fb9e6..9bc6586078 100644 --- a/tests/snippets/properties/test_comments.txt +++ b/tests/snippets/properties/test_comments.txt @@ -5,8 +5,8 @@ # also a comment ---tokens--- -'! a comment' Comment +'! a comment' Comment.Single '\n' Text.Whitespace -'# also a comment' Comment +'# also a comment' Comment.Single '\n' Text.Whitespace diff --git a/tests/snippets/properties/test_leading_whitespace_comments.txt b/tests/snippets/properties/test_leading_whitespace_comments.txt index be77792b33..3a36afc9df 100644 --- a/tests/snippets/properties/test_leading_whitespace_comments.txt +++ b/tests/snippets/properties/test_leading_whitespace_comments.txt @@ -2,5 +2,5 @@ # comment ---tokens--- -'# comment' Comment +'# comment' Comment.Single '\n' Text.Whitespace diff --git a/tests/snippets/properties/test_space_delimited_kv_pair.txt b/tests/snippets/properties/test_space_delimited_kv_pair.txt index fe12d48efa..98961e4256 100644 --- a/tests/snippets/properties/test_space_delimited_kv_pair.txt +++ b/tests/snippets/properties/test_space_delimited_kv_pair.txt @@ -4,4 +4,5 @@ key value ---tokens--- 'key' Name.Attribute ' ' Text.Whitespace -'value\n' Literal.String +'value' Literal.String +'\n' Text.Whitespace From fe33b8afb49662c789d05dce96610d273e8f0e29 Mon Sep 17 00:00:00 2001 From: jmzambon Date: Sun, 18 Sep 2022 21:56:58 +0200 Subject: [PATCH 4/5] Update example file java.properties. --- tests/examplefiles/properties/java.properties.output | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/examplefiles/properties/java.properties.output b/tests/examplefiles/properties/java.properties.output index 0b8b4f2c67..0c1fdeebf7 100644 --- a/tests/examplefiles/properties/java.properties.output +++ b/tests/examplefiles/properties/java.properties.output @@ -70,10 +70,7 @@ '=' Operator '\n' Text.Whitespace -'k' Name.Attribute -'e' Name.Attribute -'y' Name.Attribute -'2' Name.Attribute +'key2' Name.Attribute '\n' Text.Whitespace 'key3' Name.Attribute From 9fc9bb473f87750da1cb51f1c5a587c61609c485 Mon Sep 17 00:00:00 2001 From: jmzambon Date: Sun, 25 Sep 2022 22:47:57 +0200 Subject: [PATCH 5/5] Fix regex error on line 134. See Jean-Abou-Samra comment here: https://github.com/pygments/pygments/pull/2241#discussion_r979296491. --- pygments/lexers/configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index a29fc512a9..6f61d98e41 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -131,7 +131,7 @@ class PropertiesLexer(RegexLexer): (r'\s+', Whitespace), (r'[!#].*|/{2}.*', Comment.Single), # search for first separator - (r'.+?[^\\](?:\\{2})*?(?=[ \f\t=:])', Name.Attribute, "separator"), + (r'([^\\\n]|\\.)*?(?=[ \f\t=:])', Name.Attribute, "separator"), # empty key (r'.+?$', Name.Attribute), ],