From 9901f1b985164ab30e4167cc90af8ed388dd6b9d Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Sun, 10 Jul 2022 09:43:42 +0200 Subject: [PATCH 1/9] Fix `AnyUrl.build` doesn't do percent encoding (#3061) please review --- changes/3061-FaresAhmedb.md | 1 + pydantic/networks.py | 16 +++++++++++----- pydantic/utils.py | 26 ++++++++++++++++++++++++++ tests/test_networks.py | 6 +++++- tests/test_utils.py | 13 +++++++++++++ 5 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 changes/3061-FaresAhmedb.md diff --git a/changes/3061-FaresAhmedb.md b/changes/3061-FaresAhmedb.md new file mode 100644 index 0000000000..5be2fc1b01 --- /dev/null +++ b/changes/3061-FaresAhmedb.md @@ -0,0 +1 @@ +Fix `AnyUrl.build` doesn't do percent encoding diff --git a/pydantic/networks.py b/pydantic/networks.py index 0626d2c355..f0b986aced 100644 --- a/pydantic/networks.py +++ b/pydantic/networks.py @@ -1,4 +1,5 @@ import re +from functools import partial from ipaddress import ( IPv4Address, IPv4Interface, @@ -26,7 +27,7 @@ ) from . import errors -from .utils import Representation, update_not_none +from .utils import Representation, percent_encode, update_not_none from .validators import constr_length_validator, str_validator if TYPE_CHECKING: @@ -153,6 +154,7 @@ def __init__( path: Optional[str] = None, query: Optional[str] = None, fragment: Optional[str] = None, + plus: bool = False, ) -> None: str.__init__(url) self.scheme = scheme @@ -178,6 +180,7 @@ def build( path: Optional[str] = None, query: Optional[str] = None, fragment: Optional[str] = None, + plus: bool = False, **_kwargs: str, ) -> str: parts = Parts( @@ -192,20 +195,23 @@ def build( **_kwargs, # type: ignore[misc] ) + percent_encode_plus = partial(percent_encode, plus=plus) + url = scheme + '://' if user: - url += user + url += percent_encode_plus(user) if password: - url += ':' + password + url += ':' + percent_encode_plus(password) if user or password: url += '@' url += host if port and ('port' not in cls.hidden_parts or cls.get_default_parts(parts).get('port') != port): url += ':' + port if path: - url += path + url += '/'.join(map(percent_encode_plus, path.split('/'))) if query: - url += '?' + query + queries = query.split('&') + url += '?' + '&'.join(map(lambda s: '='.join(map(percent_encode_plus, s.split('='))), queries)) if fragment: url += '#' + fragment return url diff --git a/pydantic/utils.py b/pydantic/utils.py index f9c4ec1602..351a270842 100644 --- a/pydantic/utils.py +++ b/pydantic/utils.py @@ -787,3 +787,29 @@ def __setitem__(self, __key: Any, __value: Any) -> None: def __class_getitem__(cls, *args: Any) -> Any: # to avoid errors with 3.7 pass + + +URI_RESERVED_CHARACTERS = frozenset(":/?#[]@!$&'()*+,;=% ") + + +def percent_encode(string: str, *, plus: bool = False) -> str: + """ + Percent encode the URI reserved characters in a string + """ + if not string: + return string + + string = string.replace('%', f'%{ord("%"):02X}') + + if plus: + string = string.replace('+', f'%{ord("+"):02X}') + string = string.replace(' ', '+') + characters = URI_RESERVED_CHARACTERS.symmetric_difference('+% ') + else: + characters = URI_RESERVED_CHARACTERS.symmetric_difference('%') + + for c in characters: + if c in string: + string = string.replace(c, f'%{ord(c):02X}') + + return string diff --git a/tests/test_networks.py b/tests/test_networks.py index 4d768b20fa..8fe2c316f1 100644 --- a/tests/test_networks.py +++ b/tests/test_networks.py @@ -370,7 +370,7 @@ class Model(BaseModel): @pytest.mark.parametrize( 'value', - ['file:///foo/bar', 'file://localhost/foo/bar' 'file:////localhost/foo/bar'], + ['file:///foo/bar', 'file://localhost/foo/bar', 'file:////localhost/foo/bar'], ) def test_file_url_success(value): class Model(BaseModel): @@ -559,6 +559,10 @@ class Model2(BaseModel): (dict(scheme='ws', user='foo', password='x', host='example.net'), 'ws://foo:x@example.net'), (dict(scheme='ws', host='example.net', query='a=b', fragment='c=d'), 'ws://example.net?a=b#c=d'), (dict(scheme='http', host='example.net', port='1234'), 'http://example.net:1234'), + (dict(scheme='http', user='foo@bar', host='example.net'), 'http://foo%40bar@example.net'), + (dict(scheme='http', user='foo', password='a b', host='example.net'), 'http://foo:a%20b@example.net'), + (dict(scheme='http', host='example.net', query='q=foo bar', plus=True), 'http://example.net?q=foo+bar'), + (dict(scheme='http', host='example.net', path="/m&m's"), 'http://example.net/m%26m%27s'), ], ) def test_build_url(kwargs, expected): diff --git a/tests/test_utils.py b/tests/test_utils.py index d7eaa74c69..a3e0c7d51f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -36,6 +36,7 @@ import_string, lenient_issubclass, path_type, + percent_encode, smart_deepcopy, truncate, unique_list, @@ -566,3 +567,15 @@ def test_limited_dict(): assert len(d) == 10 d[13] = '13' assert len(d) == 9 + + +@pytest.mark.parametrize( + 'input_value,output,plus', + [ + ('Pa##w0rd?', 'Pa%23%23w0rd%3F', False), + ('A query', 'A%20query', False), + ('A query', 'A+query', True), + ], +) +def test_percent_encode(input_value, output, plus): + assert percent_encode(input_value, plus=plus) == output From 49c19c1771bb9e66cc305f202c1d45eeca362237 Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Sat, 13 Aug 2022 07:36:27 +0200 Subject: [PATCH 2/9] Use `urllib.parse` instead of custom implementation for percent encoding --- pydantic/networks.py | 14 +++++++------- pydantic/utils.py | 26 -------------------------- tests/test_utils.py | 13 ------------- 3 files changed, 7 insertions(+), 46 deletions(-) diff --git a/pydantic/networks.py b/pydantic/networks.py index f0b986aced..b9e9873f16 100644 --- a/pydantic/networks.py +++ b/pydantic/networks.py @@ -1,5 +1,4 @@ import re -from functools import partial from ipaddress import ( IPv4Address, IPv4Interface, @@ -25,9 +24,10 @@ cast, no_type_check, ) +from urllib.parse import quote, quote_plus from . import errors -from .utils import Representation, percent_encode, update_not_none +from .utils import Representation, update_not_none from .validators import constr_length_validator, str_validator if TYPE_CHECKING: @@ -195,23 +195,23 @@ def build( **_kwargs, # type: ignore[misc] ) - percent_encode_plus = partial(percent_encode, plus=plus) + do_quote = quote_plus if plus else quote url = scheme + '://' if user: - url += percent_encode_plus(user) + url += do_quote(user) if password: - url += ':' + percent_encode_plus(password) + url += ':' + do_quote(password) if user or password: url += '@' url += host if port and ('port' not in cls.hidden_parts or cls.get_default_parts(parts).get('port') != port): url += ':' + port if path: - url += '/'.join(map(percent_encode_plus, path.split('/'))) + url += '/'.join(map(do_quote, path.split('/'))) if query: queries = query.split('&') - url += '?' + '&'.join(map(lambda s: '='.join(map(percent_encode_plus, s.split('='))), queries)) + url += '?' + '&'.join(map(lambda s: '='.join(map(do_quote, s.split('='))), queries)) if fragment: url += '#' + fragment return url diff --git a/pydantic/utils.py b/pydantic/utils.py index 351a270842..f9c4ec1602 100644 --- a/pydantic/utils.py +++ b/pydantic/utils.py @@ -787,29 +787,3 @@ def __setitem__(self, __key: Any, __value: Any) -> None: def __class_getitem__(cls, *args: Any) -> Any: # to avoid errors with 3.7 pass - - -URI_RESERVED_CHARACTERS = frozenset(":/?#[]@!$&'()*+,;=% ") - - -def percent_encode(string: str, *, plus: bool = False) -> str: - """ - Percent encode the URI reserved characters in a string - """ - if not string: - return string - - string = string.replace('%', f'%{ord("%"):02X}') - - if plus: - string = string.replace('+', f'%{ord("+"):02X}') - string = string.replace(' ', '+') - characters = URI_RESERVED_CHARACTERS.symmetric_difference('+% ') - else: - characters = URI_RESERVED_CHARACTERS.symmetric_difference('%') - - for c in characters: - if c in string: - string = string.replace(c, f'%{ord(c):02X}') - - return string diff --git a/tests/test_utils.py b/tests/test_utils.py index a3e0c7d51f..d7eaa74c69 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -36,7 +36,6 @@ import_string, lenient_issubclass, path_type, - percent_encode, smart_deepcopy, truncate, unique_list, @@ -567,15 +566,3 @@ def test_limited_dict(): assert len(d) == 10 d[13] = '13' assert len(d) == 9 - - -@pytest.mark.parametrize( - 'input_value,output,plus', - [ - ('Pa##w0rd?', 'Pa%23%23w0rd%3F', False), - ('A query', 'A%20query', False), - ('A query', 'A+query', True), - ], -) -def test_percent_encode(input_value, output, plus): - assert percent_encode(input_value, plus=plus) == output From 5c1d6a7abf953199aded81a3cb14e4f1dce0ce21 Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Mon, 15 Aug 2022 12:35:18 +0200 Subject: [PATCH 3/9] Make `quote_plus` a `stricturl` option --- pydantic/networks.py | 19 +++++++++++-------- tests/test_networks.py | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/pydantic/networks.py b/pydantic/networks.py index b9e9873f16..244b9c227c 100644 --- a/pydantic/networks.py +++ b/pydantic/networks.py @@ -133,6 +133,7 @@ class AnyUrl(str): user_required: bool = False host_required: bool = True hidden_parts: Set[str] = set() + quote_plus: bool = False __slots__ = ('scheme', 'user', 'password', 'host', 'tld', 'host_type', 'port', 'path', 'query', 'fragment') @@ -154,7 +155,6 @@ def __init__( path: Optional[str] = None, query: Optional[str] = None, fragment: Optional[str] = None, - plus: bool = False, ) -> None: str.__init__(url) self.scheme = scheme @@ -180,7 +180,6 @@ def build( path: Optional[str] = None, query: Optional[str] = None, fragment: Optional[str] = None, - plus: bool = False, **_kwargs: str, ) -> str: parts = Parts( @@ -195,23 +194,21 @@ def build( **_kwargs, # type: ignore[misc] ) - do_quote = quote_plus if plus else quote - url = scheme + '://' if user: - url += do_quote(user) + url += cls.quote(user) if password: - url += ':' + do_quote(password) + url += ':' + cls.quote(password) if user or password: url += '@' url += host if port and ('port' not in cls.hidden_parts or cls.get_default_parts(parts).get('port') != port): url += ':' + port if path: - url += '/'.join(map(do_quote, path.split('/'))) + url += '/'.join(map(cls.quote, path.split('/'))) if query: queries = query.split('&') - url += '?' + '&'.join(map(lambda s: '='.join(map(do_quote, s.split('='))), queries)) + url += '?' + '&'.join(map(lambda s: '='.join(map(cls.quote, s.split('='))), queries)) if fragment: url += '#' + fragment return url @@ -336,6 +333,10 @@ def apply_default_parts(cls, parts: 'Parts') -> 'Parts': parts[key] = value # type: ignore[literal-required] return parts + @classmethod + def quote(cls, string: str, safe: str = '') -> str: + return quote_plus(string, safe) if cls.quote_plus else quote(string, safe) + def __repr__(self) -> str: extra = ', '.join(f'{n}={getattr(self, n)!r}' for n in self.__slots__ if getattr(self, n) is not None) return f'{self.__class__.__name__}({super().__repr__()}, {extra})' @@ -412,6 +413,7 @@ def stricturl( tld_required: bool = True, host_required: bool = True, allowed_schemes: Optional[Collection[str]] = None, + quote_plus: bool = False, ) -> Type[AnyUrl]: # use kwargs then define conf in a dict to aid with IDE type hinting namespace = dict( @@ -421,6 +423,7 @@ def stricturl( tld_required=tld_required, host_required=host_required, allowed_schemes=allowed_schemes, + quote_plus=quote_plus, ) return type('UrlValue', (AnyUrl,), namespace) diff --git a/tests/test_networks.py b/tests/test_networks.py index 8fe2c316f1..848a958825 100644 --- a/tests/test_networks.py +++ b/tests/test_networks.py @@ -561,7 +561,7 @@ class Model2(BaseModel): (dict(scheme='http', host='example.net', port='1234'), 'http://example.net:1234'), (dict(scheme='http', user='foo@bar', host='example.net'), 'http://foo%40bar@example.net'), (dict(scheme='http', user='foo', password='a b', host='example.net'), 'http://foo:a%20b@example.net'), - (dict(scheme='http', host='example.net', query='q=foo bar', plus=True), 'http://example.net?q=foo+bar'), + (dict(scheme='http', host='example.net', query='q=foo bar'), 'http://example.net?q=foo%20bar'), (dict(scheme='http', host='example.net', path="/m&m's"), 'http://example.net/m%26m%27s'), ], ) @@ -569,6 +569,21 @@ def test_build_url(kwargs, expected): assert AnyUrl(None, **kwargs) == expected +@pytest.mark.parametrize( + 'kwargs,expected', + [ + (dict(scheme='https', host='example.com', query='query=my query'), 'https://example.com?query=my+query'), + ( + dict(scheme='https', host='example.com', user='my name', password='a password'), + 'https://my+name:a+password@example.com', + ), + (dict(scheme='https', host='example.com', path='/this is a path'), 'https://example.com/this+is+a+path'), + ], +) +def test_build_url_quote_plus(kwargs, expected): + assert stricturl(quote_plus=True).build(**kwargs) == expected + + @pytest.mark.parametrize( 'kwargs,expected', [ From bc84da90cc38bbf04f5e8889901db870c85870eb Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Mon, 15 Aug 2022 12:37:20 +0200 Subject: [PATCH 4/9] Add docs for `stricturl(quote_plus=True)` --- docs/examples/types_url_building.py | 8 ++++++++ docs/usage/types.md | 15 +++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 docs/examples/types_url_building.py diff --git a/docs/examples/types_url_building.py b/docs/examples/types_url_building.py new file mode 100644 index 0000000000..40c41e8b59 --- /dev/null +++ b/docs/examples/types_url_building.py @@ -0,0 +1,8 @@ +from pydantic import AnyUrl, stricturl + +url = AnyUrl.build(scheme='https', host='example.com', query='query=my query') +print(url) + +my_url_builder = stricturl(quote_plus=True) +url = my_url_builder.build(scheme='https', host='example.com', query='query=my query') +print(url) diff --git a/docs/usage/types.md b/docs/usage/types.md index 0651bead5b..6e90349b2a 100644 --- a/docs/usage/types.md +++ b/docs/usage/types.md @@ -652,6 +652,7 @@ For URI/URL validation the following types are available: - `tld_required: bool = True` - `host_required: bool = True` - `allowed_schemes: Optional[Set[str]] = None` + - `quote_plus: bool = False` The above types (which all inherit from `AnyUrl`) will attempt to give descriptive errors when invalid URLs are provided: @@ -723,6 +724,20 @@ _(This script is complete, it should run "as is")_ Also, Chrome, Firefox, and Safari all currently accept `http://exam_ple.com` as a URL, so we're in good (or at least big) company. +#### Building URLs + +You can build URLs from seperate [URL Properties](#url-properties) using the `build` method in [Pydantic URL types](#urls) or any type that inherits from them; see [Custom Data Types](#custom-Data-Types). + +By default *pydantic* percent encodes the following URL properties: `user`, `password`, `path`, `query` as per [RFC 3986](https://www.ietf.org/rfc/rfc3986.txt) without replacing spaces with `+` but this can be changed using the `stricturl` method: + +!!! note + Percent encoding was added in V2.0 + +```py +{!.tmp_examples/types_url_building.py!} +``` +_(This script is complete, it should run "as is")_ + ### Color Type You can use the `Color` data type for storing colors as per From 6a5f01be094c2e35dfaad7a2230b7ae4eb5eade3 Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Mon, 15 Aug 2022 12:39:45 +0200 Subject: [PATCH 5/9] Better changes message for #3061 Co-authored-by: Samuel Colvin --- changes/3061-FaresAhmedb.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changes/3061-FaresAhmedb.md b/changes/3061-FaresAhmedb.md index 5be2fc1b01..622dc0e7b3 100644 --- a/changes/3061-FaresAhmedb.md +++ b/changes/3061-FaresAhmedb.md @@ -1 +1 @@ -Fix `AnyUrl.build` doesn't do percent encoding +Add percent encoding in `AnyUrl` and descendent types From 31f81bd3bfbf06ddad5f22814babb7ea1afd3bd8 Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Wed, 17 Aug 2022 04:13:34 +0200 Subject: [PATCH 6/9] Fix precent encoding version note to V1.10 Co-authored-by: Samuel Colvin --- docs/usage/types.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage/types.md b/docs/usage/types.md index 6e90349b2a..b0299648b0 100644 --- a/docs/usage/types.md +++ b/docs/usage/types.md @@ -731,7 +731,7 @@ You can build URLs from seperate [URL Properties](#url-properties) using the `bu By default *pydantic* percent encodes the following URL properties: `user`, `password`, `path`, `query` as per [RFC 3986](https://www.ietf.org/rfc/rfc3986.txt) without replacing spaces with `+` but this can be changed using the `stricturl` method: !!! note - Percent encoding was added in V2.0 + Percent encoding was added in V1.10 ```py {!.tmp_examples/types_url_building.py!} From f733f30a2e56f752f581280f89a884d3dbe5738b Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Wed, 17 Aug 2022 04:15:59 +0200 Subject: [PATCH 7/9] Fix `Building URLs` snippit markdown Co-authored-by: Samuel Colvin --- docs/usage/types.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/usage/types.md b/docs/usage/types.md index b0299648b0..e8d267b61b 100644 --- a/docs/usage/types.md +++ b/docs/usage/types.md @@ -733,10 +733,7 @@ By default *pydantic* percent encodes the following URL properties: `user`, `pas !!! note Percent encoding was added in V1.10 -```py {!.tmp_examples/types_url_building.py!} -``` -_(This script is complete, it should run "as is")_ ### Color Type From fd122d14db228c62f95e8bd3e1056b92a335f68f Mon Sep 17 00:00:00 2001 From: Fares Ahmed Date: Wed, 17 Aug 2022 22:49:27 +0200 Subject: [PATCH 8/9] Fix formatting for `docs/examples/types_url_building.py` --- docs/examples/types_url_building.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/examples/types_url_building.py b/docs/examples/types_url_building.py index 40c41e8b59..7d8d67b2ba 100644 --- a/docs/examples/types_url_building.py +++ b/docs/examples/types_url_building.py @@ -4,5 +4,7 @@ print(url) my_url_builder = stricturl(quote_plus=True) -url = my_url_builder.build(scheme='https', host='example.com', query='query=my query') +url = my_url_builder.build( + scheme='http', host='example.com', query='query=my query' +) print(url) From 5be8047c62ce96c6590b7ca40d8380c0edf90076 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Fri, 19 Aug 2022 10:00:59 +0100 Subject: [PATCH 9/9] fix docs --- docs/usage/types.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/usage/types.md b/docs/usage/types.md index 8771efb009..d063186cbc 100644 --- a/docs/usage/types.md +++ b/docs/usage/types.md @@ -681,14 +681,17 @@ If further validation is required, these properties can be used by validators to #### Building URLs -You can build URLs from seperate [URL Properties](#url-properties) using the `build` method in [Pydantic URL types](#urls) or any type that inherits from them; see [Custom Data Types](#custom-Data-Types). +You can build URLs from separate [URL Properties](#url-properties) using the `build` method in +[Pydantic URL types](#urls) or any type that inherits from them. -By default *pydantic* percent encodes the following URL properties: `user`, `password`, `path`, `query` as per [RFC 3986](https://www.ietf.org/rfc/rfc3986.txt) without replacing spaces with `+` but this can be changed using the `stricturl` method: +By default, *pydantic* percent encodes the following URL properties: `user`, `password`, `path`, `query` +as per [RFC 3986](https://www.ietf.org/rfc/rfc3986.txt) without replacing spaces with `+` but this can +be changed using the `stricturl` method: !!! note Percent encoding was added in V1.10 -{!.tmp_examples/types_url_building.py!} +{!.tmp_examples/types_url_building.md!} ### Color Type