From 5665d6a4b64137c88f138ab1be3ec6824288f06f Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 4 Nov 2019 10:34:41 -0600 Subject: [PATCH] Don't encode fragment into target (#1732) * Don't encode fragment into target * Add entry in CHANGES.rst --- CHANGES.rst | 8 ++++++++ dummyserver/handlers.py | 4 ++++ src/urllib3/util/url.py | 7 ++----- test/with_dummyserver/test_poolmanager.py | 19 +++++++++++++++++++ 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9f585302a1..c8f288e197 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,14 @@ Changes ======= +master (dev) +------------ + +* Fix issue where URL fragment was sent within the request target. (Pull #1732) + +* Fix issue where an empty query section in a URL would fail to parse. (Pull #1732) + + 1.25.6 (2019-09-24) ------------------- diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index 8089432af0..b751d83fcc 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -229,6 +229,10 @@ def echo(self, request): return Response(request.body) + def echo_uri(self, request): + "Echo back the requested URI" + return Response(request.uri) + def encodingrequest(self, request): "Check for UA accepting gzip/deflate encoding" data = b"hello, world!" diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index 9675f74217..f7568e9d78 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -55,7 +55,7 @@ ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]" REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" -TARGET_RE = re.compile(r"^(/[^?]*)(?:\?([^#]+))?(?:#(.*))?$") +TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$") IPV4_RE = re.compile("^" + IPV4_PAT + "$") IPV6_RE = re.compile("^" + IPV6_PAT + "$") @@ -325,14 +325,11 @@ def _encode_target(target): if not target.startswith("/"): return target - path, query, fragment = TARGET_RE.match(target).groups() + path, query = TARGET_RE.match(target).groups() target = _encode_invalid_chars(path, PATH_CHARS) query = _encode_invalid_chars(query, QUERY_CHARS) - fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS) if query is not None: target += "?" + query - if fragment is not None: - target += "#" + target return target diff --git a/test/with_dummyserver/test_poolmanager.py b/test/with_dummyserver/test_poolmanager.py index cbd6dd7dc0..4a47f62740 100644 --- a/test/with_dummyserver/test_poolmanager.py +++ b/test/with_dummyserver/test_poolmanager.py @@ -312,6 +312,25 @@ def test_http_with_ca_cert_dir(self): r = http.request("GET", "http://%s:%s/" % (self.host, self.port)) assert r.status == 200 + @pytest.mark.parametrize( + ["target", "expected_target"], + [ + ("/echo_uri?q=1#fragment", b"/echo_uri?q=1"), + ("/echo_uri?#", b"/echo_uri?"), + ("/echo_uri#?", b"/echo_uri"), + ("/echo_uri#?#", b"/echo_uri"), + ("/echo_uri??#", b"/echo_uri??"), + ("/echo_uri?%3f#", b"/echo_uri?%3F"), + ("/echo_uri?%3F#", b"/echo_uri?%3F"), + ("/echo_uri?[]", b"/echo_uri?%5B%5D"), + ], + ) + def test_encode_http_target(self, target, expected_target): + with PoolManager() as http: + url = "http://%s:%d%s" % (self.host, self.port, target) + r = http.request("GET", url) + assert r.data == expected_target + @pytest.mark.skipif(not HAS_IPV6, reason="IPv6 is not supported on this system") class TestIPv6PoolManager(IPv6HTTPDummyServerTestCase):