From 34d5fe41a9fa05f6f2fd306b728483c880222ae9 Mon Sep 17 00:00:00 2001 From: Bertrand Bonnefoy-Claudet Date: Wed, 9 Sep 2020 11:00:13 +0200 Subject: [PATCH] Fix parsing of unquoted values with two spaces If a value is unquoted and has two or more adjacent spaces (like in `a=b c`), the parser would detect an error. This commit fixes that. Tabs and other whitespace characters are now also considered like space characters in this case and I added relevant test cases. --- CHANGELOG.md | 5 ++++- src/dotenv/parser.py | 12 +++--------- tests/test_parser.py | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34cdb324..b5305f19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,8 +19,10 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Fixed -- Fix potentially empty expanded value for duplicate key (#260 by [@bbc]). +- Fix potentially empty expanded value for duplicate key (#260 by [@bbc2]). - Fix import error on Python 3.5.0 and 3.5.1 (#267 by [@gongqingkui]). +- Fix parsing of unquoted values containing several adjacent space or tab characters + (#277 by [@bbc2], review by [@x-yuri]). ## [0.14.0] - 2020-07-03 @@ -226,6 +228,7 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). [@theskumar]: https://github.com/theskumar [@ulyssessouza]: https://github.com/ulyssessouza [@venthur]: https://github.com/venthur +[@x-yuri]: https://github.com/x-yuri [@yannham]: https://github.com/yannham [Unreleased]: https://github.com/theskumar/python-dotenv/compare/v0.14.0...HEAD diff --git a/src/dotenv/parser.py b/src/dotenv/parser.py index 4eba0ac4..5cb1cdfa 100644 --- a/src/dotenv/parser.py +++ b/src/dotenv/parser.py @@ -24,7 +24,7 @@ def make_regex(string, extra_flags=0): _equal_sign = make_regex(r"(=[^\S\r\n]*)") _single_quoted_value = make_regex(r"'((?:\\'|[^'])*)'") _double_quoted_value = make_regex(r'"((?:\\"|[^"])*)"') -_unquoted_value_part = make_regex(r"([^ \r\n]*)") +_unquoted_value = make_regex(r"([^\r\n]*)") _comment = make_regex(r"(?:[^\S\r\n]*#[^\r\n]*)?") _end_of_line = make_regex(r"[^\S\r\n]*(?:\r\n|\n|\r|$)") _rest_of_line = make_regex(r"[^\r\n]*(?:\r|\n|\r\n)?") @@ -167,14 +167,8 @@ def parse_key(reader): def parse_unquoted_value(reader): # type: (Reader) -> Text - value = u"" - while True: - (part,) = reader.read_regex(_unquoted_value_part) - value += part - after = reader.peek(2) - if len(after) < 2 or after[0] in u"\r\n" or after[1] in u" #\r\n": - return value - value += reader.read(2) + (part,) = reader.read_regex(_unquoted_value) + return re.sub(r"\s+#.*", "", part).rstrip() def parse_value(reader): diff --git a/tests/test_parser.py b/tests/test_parser.py index f8075138..48cecdce 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -19,20 +19,40 @@ (u"# a=b", [Binding(key=None, value=None, original=Original(string=u"# a=b", line=1), error=False)]), (u"a=b#c", [Binding(key=u"a", value=u"b#c", original=Original(string=u"a=b#c", line=1), error=False)]), ( - u'a=b # comment', - [Binding(key=u"a", value=u"b", original=Original(string=u"a=b # comment", line=1), error=False)], + u'a=b #c', + [Binding(key=u"a", value=u"b", original=Original(string=u"a=b #c", line=1), error=False)], ), ( - u"a=b space ", - [Binding(key=u"a", value=u"b space", original=Original(string=u"a=b space ", line=1), error=False)], + u'a=b\t#c', + [Binding(key=u"a", value=u"b", original=Original(string=u"a=b\t#c", line=1), error=False)], ), ( - u"a='b space '", - [Binding(key=u"a", value=u"b space ", original=Original(string=u"a='b space '", line=1), error=False)], + u"a=b c", + [Binding(key=u"a", value=u"b c", original=Original(string=u"a=b c", line=1), error=False)], ), ( - u'a="b space "', - [Binding(key=u"a", value=u"b space ", original=Original(string=u'a="b space "', line=1), error=False)], + u"a=b\tc", + [Binding(key=u"a", value=u"b\tc", original=Original(string=u"a=b\tc", line=1), error=False)], + ), + ( + u"a=b c", + [Binding(key=u"a", value=u"b c", original=Original(string=u"a=b c", line=1), error=False)], + ), + ( + u"a=b\u00a0 c", + [Binding(key=u"a", value=u"b\u00a0 c", original=Original(string=u"a=b\u00a0 c", line=1), error=False)], + ), + ( + u"a=b c ", + [Binding(key=u"a", value=u"b c", original=Original(string=u"a=b c ", line=1), error=False)], + ), + ( + u"a='b c '", + [Binding(key=u"a", value=u"b c ", original=Original(string=u"a='b c '", line=1), error=False)], + ), + ( + u'a="b c "', + [Binding(key=u"a", value=u"b c ", original=Original(string=u'a="b c "', line=1), error=False)], ), ( u"export export_a=1",