diff --git a/src/treq/client.py b/src/treq/client.py index ba4f0a1c..7b17f402 100644 --- a/src/treq/client.py +++ b/src/treq/client.py @@ -157,9 +157,11 @@ def request(self, method, url, **kwargs): elif isinstance(url, EncodedURL): parsed_url = DecodedURL(url) elif isinstance(url, six.text_type): - parsed_url = DecodedURL.from_text(url) + # We use hyperlink in lazy mode so that users can pass arbitrary + # bytes in the path and querystring. + parsed_url = DecodedURL.from_text(url, lazy=True) else: - parsed_url = DecodedURL.from_text(url.decode('ascii')) + parsed_url = DecodedURL.from_text(url.decode('ascii'), lazy=True) # Join parameters provided in the URL # and the ones passed as argument. diff --git a/src/treq/test/test_client.py b/src/treq/test/test_client.py index a4dc9577..1236fced 100644 --- a/src/treq/test/test_client.py +++ b/src/treq/test/test_client.py @@ -73,6 +73,23 @@ def test_request_uri_encodedurl(self): None, ) + def test_request_uri_bytes_pass(self): + """ + The URL parameter may contain path segments or querystring parameters + that are not valid UTF-8. These pass through. + """ + # This URL is http://example.com/hello?who=you, but "hello", "who", and + # "you" are encoded as UTF-16. The particulars of the encoding aren't + # important; what matters is that those segments can't be decoded by + # Hyperlink's UTF-8 default. + self.client.request('GET', 'http://example.com/%FF%FEh%00e%00l%00l%00o%00?%FF%FEw%00h%00o%00=%FF%FEy%00o%00u%00') + self.agent.request.assert_called_once_with( + b'GET', + b'http://example.com/%FF%FEh%00e%00l%00l%00o%00?%FF%FEw%00h%00o%00=%FF%FEy%00o%00u%00', + Headers({b'accept-encoding': [b'gzip']}), + None, + ) + def test_request_uri_idn_params(self): """ A URL that contains non-ASCII characters can be augmented with