From c4a531bd13da939c3f961c6025c9c6dc82cf9179 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Fri, 2 Sep 2022 15:00:57 +0100 Subject: [PATCH 1/2] Fix existing percent encoding --- changes/4458-samuelcolvin.md | 1 + pydantic/networks.py | 19 +++++++++++--- tests/test_networks.py | 49 ++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 changes/4458-samuelcolvin.md diff --git a/changes/4458-samuelcolvin.md b/changes/4458-samuelcolvin.md new file mode 100644 index 0000000000..6f3357a9de --- /dev/null +++ b/changes/4458-samuelcolvin.md @@ -0,0 +1 @@ +Fix percent encoding of URLs with which are already percent encoded. diff --git a/pydantic/networks.py b/pydantic/networks.py index 725e4f8a27..a5364d28f5 100644 --- a/pydantic/networks.py +++ b/pydantic/networks.py @@ -1,4 +1,5 @@ import re +from functools import partial from ipaddress import ( IPv4Address, IPv4Interface, @@ -250,7 +251,7 @@ def build( if port and ('port' not in cls.hidden_parts or cls.get_default_parts(parts).get('port') != port): url += ':' + port if path: - url += '/'.join(map(cls.quote, path.split('/'))) + url += cls.quote(path) if query: queries = query.split('&') url += '?' + '&'.join(map(lambda s: '='.join(map(cls.quote, s.split('='))), queries)) @@ -395,8 +396,20 @@ def apply_default_parts(cls, parts: 'Parts') -> 'Parts': return parts @classmethod - def quote(cls, string: str, safe: str = '') -> str: - return quote_plus(string, safe) if cls.quote_plus else quote(string, safe) + def quote(cls, string: str) -> str: + if cls.quote_plus: + quote_func = partial(quote_plus, safe='+/') + else: + quote_func = partial(quote, safe='/') + + last_end = 0 + quoted = '' + for match in re.finditer(r'(.*?)(%[\dA-F]{2})', string, flags=re.S | re.I): + raw, percent = match.groups() + quoted += quote_func(raw) + percent.upper() + last_end = match.end() + + return quoted + quote_func(string[last_end:]) def __repr__(self) -> str: extra = ', '.join(f'{n}={getattr(self, n)!r}' for n in self.__slots__ if getattr(self, n) is not None) diff --git a/tests/test_networks.py b/tests/test_networks.py index 95af277b35..7a221346f7 100644 --- a/tests/test_networks.py +++ b/tests/test_networks.py @@ -682,7 +682,12 @@ class Model2(BaseModel): (dict(scheme='http', user='foo@bar', host='example.net'), 'http://foo%40bar@example.net'), (dict(scheme='http', user='foo', password='a b', host='example.net'), 'http://foo:a%20b@example.net'), (dict(scheme='http', host='example.net', query='q=foo bar'), 'http://example.net?q=foo%20bar'), + (dict(scheme='http', host='example.net', query='q=foo/bar'), 'http://example.net?q=foo/bar'), (dict(scheme='http', host='example.net', path="/m&m's"), 'http://example.net/m%26m%27s'), + (dict(scheme='http', host='example.net', path='/m%26m%27s'), 'http://example.net/m%26m%27s'), + (dict(scheme='http', host='example.net', path='/m%m'), 'http://example.net/m%25m'), + (dict(scheme='http', host='example.net', path='/m%25m'), 'http://example.net/m%25m'), + (dict(scheme='http', host='example.net', path='/m+m'), 'http://example.net/m%2Bm'), ], ) def test_build_url(kwargs, expected): @@ -698,12 +703,56 @@ def test_build_url(kwargs, expected): 'https://my+name:a+password@example.com', ), (dict(scheme='https', host='example.com', path='/this is a path'), 'https://example.com/this+is+a+path'), + (dict(scheme='https', host='example.com', path='/this+is+a+path'), 'https://example.com/this+is+a+path'), + (dict(scheme='https', host='example.com', path='/this+is/a+path'), 'https://example.com/this+is/a+path'), ], ) def test_build_url_quote_plus(kwargs, expected): assert stricturl(quote_plus=True).build(**kwargs) == expected +@pytest.mark.parametrize( + 'quote_plus,before,after', + [ + (False, ' ', '%20'), + (True, ' ', '+'), + (False, '+', '%2B'), + (True, '+', '+'), + (True, 'foo bar', 'foo+bar'), + (False, 'foo bar', 'foo%20bar'), + (True, 'foo+bar', 'foo+bar'), + (False, 'foo+bar', 'foo%2Bbar'), + (True, 'foo%20bar', 'foo%20bar'), + (False, 'foo%20bar', 'foo%20bar'), + (True, 'f\noo%20bar', 'f%0Aoo%20bar'), + (False, 'f\noo%20bar', 'f%0Aoo%20bar'), + (True, 'f\noo', 'f%0Aoo'), + (False, 'f\noo', 'f%0Aoo'), + (False, '#', '%23'), + (False, 'xx#', 'xx%23'), + (False, '#xx', '%23xx'), + (False, '%23', '%23'), + (False, 'xx%23', 'xx%23'), + (False, '%23xx', '%23xx'), + (False, 'ñ', '%C3%B1'), + (True, 'ñ', '%C3%B1'), + (False, '%C3%B1', '%C3%B1'), + (True, '%C3%B1', '%C3%B1'), + (True, '\x00', '%00'), + (False, '\x00', '%00'), + (True, '%00', '%00'), + (False, '%00', '%00'), + (True, '%B1', '%B1'), + (False, '%B1', '%B1'), + (True, '%b1', '%B1'), + (False, '%b1', '%B1'), + ], +) +def test_url_quoting(quote_plus: bool, before: str, after: str): + u = stricturl(quote_plus=quote_plus) + assert u.quote(before) == after + + @pytest.mark.parametrize( 'kwargs,expected', [ From 748e2219a4c7a56f40df10b53f937c53eb52511c Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Fri, 2 Sep 2022 15:50:13 +0100 Subject: [PATCH 2/2] stop escaping + --- pydantic/networks.py | 11 ++++------- tests/test_networks.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/pydantic/networks.py b/pydantic/networks.py index a5364d28f5..0d1748cae0 100644 --- a/pydantic/networks.py +++ b/pydantic/networks.py @@ -1,5 +1,4 @@ import re -from functools import partial from ipaddress import ( IPv4Address, IPv4Interface, @@ -397,19 +396,17 @@ def apply_default_parts(cls, parts: 'Parts') -> 'Parts': @classmethod def quote(cls, string: str) -> str: - if cls.quote_plus: - quote_func = partial(quote_plus, safe='+/') - else: - quote_func = partial(quote, safe='/') + quote_func = quote_plus if cls.quote_plus else quote + safe = '+/' last_end = 0 quoted = '' for match in re.finditer(r'(.*?)(%[\dA-F]{2})', string, flags=re.S | re.I): raw, percent = match.groups() - quoted += quote_func(raw) + percent.upper() + quoted += quote_func(raw, safe) + percent.upper() last_end = match.end() - return quoted + quote_func(string[last_end:]) + return quoted + quote_func(string[last_end:], safe) def __repr__(self) -> str: extra = ', '.join(f'{n}={getattr(self, n)!r}' for n in self.__slots__ if getattr(self, n) is not None) diff --git a/tests/test_networks.py b/tests/test_networks.py index 7a221346f7..3633ea778d 100644 --- a/tests/test_networks.py +++ b/tests/test_networks.py @@ -74,22 +74,40 @@ 'http://twitter.com/@handle/', 'http://11.11.11.11.example.com/action', 'http://abc.11.11.11.11.example.com/action', - 'http://example#', - 'http://example/#', 'http://example/#fragment', - 'http://example/?#', 'http://example.org/path#', 'http://example.org/path#fragment', 'http://example.org/path?query#', 'http://example.org/path?query#fragment', 'file://localhost/foo/bar', + 'http://localhost/path?a=b/c', + 'http://localhost/path?a=b+c', + 'http://localhost/path?a=b%22c', ], ) -def test_any_url_success(value): +def test_any_url_success_unchanged(value): class Model(BaseModel): v: AnyUrl - assert Model(v=value).v, value + assert str(Model(v=value).v) == value + + +@pytest.mark.parametrize( + 'before,after', + [ + ('http://example#', 'http://example'), + ('http://example/#', 'http://example/'), + ('http://example/?#', 'http://example/'), + ('http://localhost/path?a=b"c', 'http://localhost/path?a=b%22c'), + ('http://localhost/path?a=b%c', 'http://localhost/path?a=b%25c'), + # ('http://example.com/path?a=b"c', 'http://localhost/path?a=b%22c'), + ], +) +def test_any_url_success_changed(before, after): + class Model(BaseModel): + v: AnyUrl + + assert Model(v=before).v == after @pytest.mark.parametrize( @@ -687,7 +705,7 @@ class Model2(BaseModel): (dict(scheme='http', host='example.net', path='/m%26m%27s'), 'http://example.net/m%26m%27s'), (dict(scheme='http', host='example.net', path='/m%m'), 'http://example.net/m%25m'), (dict(scheme='http', host='example.net', path='/m%25m'), 'http://example.net/m%25m'), - (dict(scheme='http', host='example.net', path='/m+m'), 'http://example.net/m%2Bm'), + (dict(scheme='http', host='example.net', path='/m+m'), 'http://example.net/m+m'), ], ) def test_build_url(kwargs, expected): @@ -716,12 +734,12 @@ def test_build_url_quote_plus(kwargs, expected): [ (False, ' ', '%20'), (True, ' ', '+'), - (False, '+', '%2B'), + (False, '+', '+'), (True, '+', '+'), (True, 'foo bar', 'foo+bar'), (False, 'foo bar', 'foo%20bar'), (True, 'foo+bar', 'foo+bar'), - (False, 'foo+bar', 'foo%2Bbar'), + (False, 'foo+bar', 'foo+bar'), (True, 'foo%20bar', 'foo%20bar'), (False, 'foo%20bar', 'foo%20bar'), (True, 'f\noo%20bar', 'f%0Aoo%20bar'), @@ -742,6 +760,8 @@ def test_build_url_quote_plus(kwargs, expected): (False, '\x00', '%00'), (True, '%00', '%00'), (False, '%00', '%00'), + (False, '%0', '%250'), + (False, '%', '%25'), (True, '%B1', '%B1'), (False, '%B1', '%B1'), (True, '%b1', '%B1'),