From 00093d628412112fe7fa08d8ce5dc66d3ba29fe5 Mon Sep 17 00:00:00 2001 From: Fabian Date: Sat, 3 Nov 2018 20:27:23 +0100 Subject: [PATCH 01/48] Update _appengine_environ.py Fixed Bug https://github.com/urllib3/urllib3/issues/1470 --- src/urllib3/contrib/_appengine_environ.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index f3e00942cb..69291c0c23 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -17,12 +17,12 @@ def is_appengine_sandbox(): def is_local_appengine(): return ('APPENGINE_RUNTIME' in os.environ and - 'Development/' in os.environ['SERVER_SOFTWARE']) + os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) def is_prod_appengine(): return ('APPENGINE_RUNTIME' in os.environ and - 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and not is_prod_appengine_mvms()) From 6b17c92677a750f046f2a650ba419397b511c75d Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 6 Nov 2018 08:25:55 +0100 Subject: [PATCH 02/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 69291c0c23..936fb0f2df 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -16,14 +16,12 @@ def is_appengine_sandbox(): def is_local_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and - os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) + return 'SERVER_SOFTWARE' not in os.environ or + os.environ['SERVER_SOFTWARE'].startswith('Development') def is_prod_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and - not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and - not is_prod_appengine_mvms()) + return not is_local_appengine() def is_prod_appengine_mvms(): From 271705433f060003fadd9e78fc18d0e11948b6f0 Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 7 Nov 2018 08:10:25 +0100 Subject: [PATCH 03/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 936fb0f2df..1fc7296da2 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,11 +14,9 @@ def is_appengine(): def is_appengine_sandbox(): return is_appengine() and not is_prod_appengine_mvms() - def is_local_appengine(): - return 'SERVER_SOFTWARE' not in os.environ or - os.environ['SERVER_SOFTWARE'].startswith('Development') - + return ('SERVER_SOFTWARE' not in os.environ or + os.environ['SERVER_SOFTWARE'].startswith('Development')) def is_prod_appengine(): return not is_local_appengine() From a9f55e114a45bbb6f3fa178b6cfad963319661b3 Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 7 Nov 2018 08:11:23 +0100 Subject: [PATCH 04/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 1fc7296da2..3e2da32b76 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,10 +14,12 @@ def is_appengine(): def is_appengine_sandbox(): return is_appengine() and not is_prod_appengine_mvms() + def is_local_appengine(): return ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) + def is_prod_appengine(): return not is_local_appengine() From 0c6d7ad2fc5c8707722eee6bdc397f5e137c59b5 Mon Sep 17 00:00:00 2001 From: Fabian Date: Sun, 25 Nov 2018 12:43:39 +0100 Subject: [PATCH 05/48] Use APPENGINE_RUNTIME in is_appengine --- src/urllib3/contrib/_appengine_environ.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 3e2da32b76..9838335df9 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -6,9 +6,7 @@ def is_appengine(): - return (is_local_appengine() or - is_prod_appengine() or - is_prod_appengine_mvms()) + return 'APPENGINE_RUNTIME' in os.environ def is_appengine_sandbox(): From 00b0be004afc4f2fdc0e903d3b84989e38b0b7e8 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 26 Nov 2018 19:19:25 +0100 Subject: [PATCH 06/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 9838335df9..6d69ccdd23 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,12 +14,13 @@ def is_appengine_sandbox(): def is_local_appengine(): - return ('SERVER_SOFTWARE' not in os.environ or + return is_appengine() and + ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) def is_prod_appengine(): - return not is_local_appengine() + return is_appengine() and not is_local_appengine() def is_prod_appengine_mvms(): From 5521a8ed152d3bd87d27b5218d35d1423b7d515a Mon Sep 17 00:00:00 2001 From: Fabian Witt Date: Tue, 27 Nov 2018 09:18:32 +0100 Subject: [PATCH 07/48] fixed syntax error --- src/urllib3/contrib/_appengine_environ.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 6d69ccdd23..7bdf8770dc 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,7 +14,7 @@ def is_appengine_sandbox(): def is_local_appengine(): - return is_appengine() and + return is_appengine() and \ ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) From edfd3450c6ab8a9d6e1ecc441c665b0afd4084ba Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Sat, 23 Mar 2019 07:24:17 +0400 Subject: [PATCH 08/48] Improve CI stability for timeouts and branch coverage (#1554) --- src/urllib3/connection.py | 9 ++++---- src/urllib3/connectionpool.py | 6 ++++-- src/urllib3/response.py | 5 +++-- src/urllib3/util/timeout.py | 3 ++- test/with_dummyserver/test_connectionpool.py | 2 ++ test/with_dummyserver/test_https.py | 2 +- test/with_dummyserver/test_socketlevel.py | 22 ++++++++++---------- 7 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py index 3d2439646c..f816ee807d 100644 --- a/src/urllib3/connection.py +++ b/src/urllib3/connection.py @@ -19,10 +19,11 @@ class BaseSSLError(BaseException): pass -try: # Python 3: - # Not a no-op, we're adding this to the namespace so it can be imported. +try: + # Python 3: not a no-op, we're adding this to the namespace so it can be imported. ConnectionError = ConnectionError -except NameError: # Python 2: +except NameError: + # Python 2 class ConnectionError(Exception): pass @@ -101,7 +102,7 @@ class HTTPConnection(_HTTPConnection, object): is_verified = False def __init__(self, *args, **kw): - if six.PY3: # Python 3 + if six.PY3: kw.pop('strict', None) # Pre-set source_address. diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py index ecc081a163..57502c3345 100644 --- a/src/urllib3/connectionpool.py +++ b/src/urllib3/connectionpool.py @@ -373,9 +373,11 @@ def _make_request(self, conn, method, url, timeout=_Default, chunked=False, # Receive the response from the server try: - try: # Python 2.7, use buffering of HTTP responses + try: + # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 3 + except TypeError: + # Python 3 try: httplib_response = conn.getresponse() except Exception as e: diff --git a/src/urllib3/response.py b/src/urllib3/response.py index f7c9df5dde..7629cbb10f 100644 --- a/src/urllib3/response.py +++ b/src/urllib3/response.py @@ -538,9 +538,10 @@ def from_httplib(ResponseCls, r, **response_kw): headers = r.msg if not isinstance(headers, HTTPHeaderDict): - if PY3: # Python 3 + if PY3: headers = HTTPHeaderDict(headers.items()) - else: # Python 2 + else: + # Python 2.7 headers = HTTPHeaderDict.from_httplib(headers) # HTTPResponse objects in Python 3 don't have a .strict attribute diff --git a/src/urllib3/util/timeout.py b/src/urllib3/util/timeout.py index cec817e6ef..a4d004a848 100644 --- a/src/urllib3/util/timeout.py +++ b/src/urllib3/util/timeout.py @@ -131,7 +131,8 @@ def _validate_timeout(cls, value, name): raise ValueError("Attempted to set %s timeout to %s, but the " "timeout cannot be set to a value less " "than or equal to 0." % (name, value)) - except TypeError: # Python 3 + except TypeError: + # Python 3 raise ValueError("Timeout value %s was %s, but it must be an " "int, float or None." % (name, value)) diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 4c5cad2b20..d58b0c4483 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -214,6 +214,8 @@ def test_total_timeout(self): self.assertRaises(ReadTimeoutError, pool.request, 'GET', '/') def test_create_connection_timeout(self): + self.start_basic_handler(block_send=Event(), num=0) # needed for self.port + timeout = Timeout(connect=SHORT_TIMEOUT, total=LONG_TIMEOUT) pool = HTTPConnectionPool(TARPIT_HOST, self.port, timeout=timeout, retries=False) self.addCleanup(pool.close) diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index eafd40b0b6..4b6d21db40 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -507,7 +507,7 @@ def test_https_timeout(self): self.addCleanup(https_pool.close) self.assertRaises(ConnectTimeoutError, https_pool.request, 'GET', '/') - timeout = Timeout(read=0.001) + timeout = Timeout(read=0.01) https_pool = HTTPSConnectionPool(self.host, self.port, timeout=timeout, retries=False, cert_reqs='CERT_REQUIRED') diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py index 9cdad4bdc5..2700a4083f 100644 --- a/test/with_dummyserver/test_socketlevel.py +++ b/test/with_dummyserver/test_socketlevel.py @@ -371,7 +371,7 @@ def socket_handler(listener): self._start_server(socket_handler) http = HTTPConnectionPool(self.host, self.port, - timeout=0.001, + timeout=0.01, retries=False, maxsize=3, block=True) @@ -394,7 +394,7 @@ def socket_handler(listener): sock.close() self._start_server(socket_handler) - pool = HTTPConnectionPool(self.host, self.port, timeout=0.001, retries=True) + pool = HTTPConnectionPool(self.host, self.port, timeout=0.01, retries=True) self.addCleanup(pool.close) try: @@ -415,7 +415,7 @@ def socket_handler(listener): sock.close() self._start_server(socket_handler) - pool = HTTPSConnectionPool(self.host, self.port, timeout=0.001, retries=False) + pool = HTTPSConnectionPool(self.host, self.port, timeout=0.01, retries=False) self.addCleanup(pool.close) try: self.assertRaises(ReadTimeoutError, pool.request, 'GET', '/') @@ -454,7 +454,7 @@ def socket_handler(listener): try: self._start_server(socket_handler) - t = Timeout(connect=0.001, read=0.001) + t = Timeout(connect=0.001, read=0.01) pool = HTTPConnectionPool(self.host, self.port, timeout=t) self.addCleanup(pool.close) @@ -487,7 +487,7 @@ def socket_handler(listener): self.addCleanup(pool.close) response = pool.urlopen('GET', '/', retries=0, preload_content=False, - timeout=Timeout(connect=1, read=0.001)) + timeout=Timeout(connect=1, read=0.01)) try: self.assertRaises(ReadTimeoutError, response.read) finally: @@ -517,7 +517,7 @@ def socket_handler(listener): try: self.assertRaises(ReadTimeoutError, pool.urlopen, 'GET', '/', retries=False, - timeout=Timeout(connect=1, read=0.001)) + timeout=Timeout(connect=1, read=0.01)) finally: timed_out.set() @@ -614,7 +614,7 @@ def socket_handler(listener): with HTTPConnectionPool(self.host, self.port) as pool: poolsize = pool.pool.qsize() response = pool.urlopen('GET', '/', retries=0, preload_content=False, - timeout=Timeout(connect=1, read=0.001)) + timeout=Timeout(connect=1, read=0.01)) try: self.assertRaises(ReadTimeoutError, response.read) self.assertEqual(poolsize, pool.pool.qsize()) @@ -712,7 +712,7 @@ def socket_handler(listener): # Second should succeed. response = pool.urlopen('GET', '/', retries=0, preload_content=False, - timeout=Timeout(connect=1, read=0.1)) + timeout=Timeout(connect=1, read=1)) self.assertEqual(len(response.read()), 8) def test_closing_response_actually_closes_connection(self): @@ -803,7 +803,7 @@ def socket_handler(listener): # save it. response = pool.urlopen('GET', '/', retries=1, release_conn=False, preload_content=False, - timeout=Timeout(connect=1, read=0.001)) + timeout=Timeout(connect=1, read=0.01)) # The connection should still be on the response object, and none # should be in the pool. We opened two though. @@ -1093,7 +1093,7 @@ def socket_handler(listener): self.addCleanup(pool.close) response = pool.urlopen('GET', '/', retries=0, preload_content=False, - timeout=Timeout(connect=1, read=0.001)) + timeout=Timeout(connect=1, read=0.01)) try: self.assertRaises(ReadTimeoutError, response.read) finally: @@ -1127,7 +1127,7 @@ def request(): assert_fingerprint=fingerprint) try: response = pool.urlopen('GET', '/', preload_content=False, - timeout=Timeout(connect=1, read=0.001), + timeout=Timeout(connect=1, read=0.01), retries=0) response.read() finally: From 46331f94275a4c3b4c71a358a495b1caeaececa0 Mon Sep 17 00:00:00 2001 From: Robb Date: Fri, 22 Mar 2019 23:52:12 -0400 Subject: [PATCH 09/48] Encode field names using HTML5 by default instead of RFC 2231 (#1492) --- CHANGES.rst | 2 + dummyserver/handlers.py | 60 +++-------------- src/urllib3/fields.py | 140 +++++++++++++++++++++++++++++++++------- test/test_fields.py | 47 +++++++++++--- tox.ini | 2 +- 5 files changed, 169 insertions(+), 82 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 29c2676ffc..013d119b93 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -30,6 +30,8 @@ dev (master) * Add support for IPv6 addresses in subjectAltName section of certificates. (Issue #1269) +* Switched the default multipart header encoder from RFC 2231 to HTML 5 working draft. (Issue #303, PR #1492) + * ... [Short description of non-trivial change.] (Issue #) diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index f570d8819f..146241dc8d 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -17,6 +17,7 @@ from urllib3.packages.six.moves.http_client import responses from urllib3.packages.six.moves.urllib.parse import urlsplit +from urllib3.packages.six import binary_type log = logging.getLogger(__name__) @@ -157,10 +158,15 @@ def upload(self, request): return Response("Wrong size: %d != %d" % (size, len(data)), status='400 Bad Request') - if filename != file_['filename']: - return Response("Wrong filename: %s != %s" % - (filename, file_.filename), - status='400 Bad Request') + got_filename = file_['filename'] + if(isinstance(got_filename, binary_type)): + got_filename = got_filename.decode('utf-8') + + # Tornado can leave the trailing \n in place on the filename. + if filename != got_filename: + return Response( + u"Wrong filename: %s != %s" % (filename, file_.filename), + status='400 Bad Request') return Response() @@ -304,49 +310,3 @@ def redirect_after(self, request): def shutdown(self, request): sys.exit() - - -# RFC2231-aware replacement of internal tornado function -def _parse_header(line): - r"""Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - >>> d = _parse_header("CD: fd; foo=\"bar\"; file*=utf-8''T%C3%A4st")[1] - >>> d['file'] == 'T\u00e4st' - True - >>> d['foo'] - 'bar' - """ - import tornado.httputil - import email.utils - from urllib3.packages import six - if not six.PY3: - line = line.encode('utf-8') - parts = tornado.httputil._parseparam(';' + line) - key = next(parts) - # decode_params treats first argument special, but we already stripped key - params = [('Dummy', 'value')] - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i + 1:].strip() - params.append((name, value)) - params = email.utils.decode_params(params) - params.pop(0) # get rid of the dummy again - pdict = {} - for name, value in params: - value = email.utils.collapse_rfc2231_value(value) - if len(value) >= 2 and value[0] == '"' and value[-1] == '"': - value = value[1:-1] - pdict[name] = value - return key, pdict - - -# TODO: make the following conditional as soon as we know a version -# which does not require this fix. -# See https://github.com/facebook/tornado/issues/868 -if True: - import tornado.httputil - tornado.httputil._parse_header = _parse_header diff --git a/src/urllib3/fields.py b/src/urllib3/fields.py index 37fe64a3e8..6a9a5a7f56 100644 --- a/src/urllib3/fields.py +++ b/src/urllib3/fields.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import email.utils import mimetypes +import re from .packages import six @@ -19,57 +20,147 @@ def guess_content_type(filename, default='application/octet-stream'): return default -def format_header_param(name, value): +def format_header_param_rfc2231(name, value): """ - Helper function to format and quote a single header parameter. + Helper function to format and quote a single header parameter using the + strategy defined in RFC 2231. Particularly useful for header parameters which might contain - non-ASCII values, like file names. This follows RFC 2231, as - suggested by RFC 2388 Section 4.4. + non-ASCII values, like file names. This follows RFC 2388 Section 4.4. :param name: The name of the parameter, a string expected to be ASCII only. :param value: - The value of the parameter, provided as a unicode string. + The value of the parameter, provided as ``bytes`` or `str``. + :ret: + An RFC-2231-formatted unicode string. """ + if isinstance(value, six.binary_type): + value = value.decode("utf-8") + if not any(ch in value for ch in '"\\\r\n'): - result = '%s="%s"' % (name, value) + result = u'%s="%s"' % (name, value) try: result.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): pass else: return result - if not six.PY3 and isinstance(value, six.text_type): # Python 2: + + if not six.PY3: # Python 2: value = value.encode('utf-8') + + # encode_rfc2231 accepts an encoded string and returns an ascii-encoded + # string in Python 2 but accepts and returns unicode strings in Python 3 value = email.utils.encode_rfc2231(value, 'utf-8') value = '%s*=%s' % (name, value) + + if not six.PY3: # Python 2: + value = value.decode('utf-8') + return value +_HTML5_REPLACEMENTS = { + u"\u0022": u"%22", + # Replace "\" with "\\". + u"\u005C": u"\u005C\u005C", + u"\u005C": u"\u005C\u005C", +} + +# All control characters from 0x00 to 0x1F *except* 0x1B. +_HTML5_REPLACEMENTS.update({ + six.unichr(cc): u"%{:02X}".format(cc) + for cc + in range(0x00, 0x1F+1) + if cc not in (0x1B,) +}) + + +def _replace_multiple(value, needles_and_replacements): + + def replacer(match): + return needles_and_replacements[match.group(0)] + + pattern = re.compile( + r"|".join([ + re.escape(needle) for needle in needles_and_replacements.keys() + ]) + ) + + result = pattern.sub(replacer, value) + + return result + + +def format_header_param_html5(name, value): + """ + Helper function to format and quote a single header parameter using the + HTML5 strategy. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows the `HTML5 Working Draft + Section 4.10.22.7`_ and matches the behavior of curl and modern browsers. + + .. _HTML5 Working Draft Section 4.10.22.7: + https://w3c.github.io/html/sec-forms.html#multipart-form-data + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as ``bytes`` or `str``. + :ret: + A unicode string, stripped of troublesome characters. + """ + if isinstance(value, six.binary_type): + value = value.decode("utf-8") + + value = _replace_multiple(value, _HTML5_REPLACEMENTS) + + return u'%s="%s"' % (name, value) + + +# For backwards-compatibility. +format_header_param = format_header_param_html5 + + class RequestField(object): """ A data container for request body parameters. :param name: - The name of this request field. + The name of this request field. Must be unicode. :param data: The data/value body. :param filename: - An optional filename of the request field. + An optional filename of the request field. Must be unicode. :param headers: An optional dict-like object of headers to initially use for the field. + :param header_formatter: + An optional callable that is used to encode and format the headers. By + default, this is :func:`format_header_param_html5`. """ - def __init__(self, name, data, filename=None, headers=None): + def __init__( + self, + name, + data, + filename=None, + headers=None, + header_formatter=format_header_param_html5): self._name = name self._filename = filename self.data = data self.headers = {} if headers: self.headers = dict(headers) + self.header_formatter = header_formatter @classmethod - def from_tuples(cls, fieldname, value): + def from_tuples( + cls, + fieldname, + value, + header_formatter=format_header_param_html5): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. @@ -97,21 +188,24 @@ def from_tuples(cls, fieldname, value): content_type = None data = value - request_param = cls(fieldname, data, filename=filename) + request_param = cls( + fieldname, data, filename=filename, header_formatter=header_formatter) request_param.make_multipart(content_type=content_type) return request_param def _render_part(self, name, value): """ - Overridable helper function to format a single header parameter. + Overridable helper function to format a single header parameter. By + default, this calls ``self.header_formatter``. :param name: The name of the parameter, a string expected to be ASCII only. :param value: The value of the parameter, provided as a unicode string. """ - return format_header_param(name, value) + + return self.header_formatter(name, value) def _render_parts(self, header_parts): """ @@ -133,7 +227,7 @@ def _render_parts(self, header_parts): if value is not None: parts.append(self._render_part(name, value)) - return '; '.join(parts) + return u'; '.join(parts) def render_headers(self): """ @@ -144,15 +238,15 @@ def render_headers(self): sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] for sort_key in sort_keys: if self.headers.get(sort_key, False): - lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + lines.append(u'%s: %s' % (sort_key, self.headers[sort_key])) for header_name, header_value in self.headers.items(): if header_name not in sort_keys: if header_value: - lines.append('%s: %s' % (header_name, header_value)) + lines.append(u'%s: %s' % (header_name, header_value)) - lines.append('\r\n') - return '\r\n'.join(lines) + lines.append(u'\r\n') + return u'\r\n'.join(lines) def make_multipart(self, content_disposition=None, content_type=None, content_location=None): @@ -168,10 +262,10 @@ def make_multipart(self, content_disposition=None, content_type=None, The 'Content-Location' of the request body. """ - self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join([ - '', self._render_parts( - (('name', self._name), ('filename', self._filename)) + self.headers['Content-Disposition'] = content_disposition or u'form-data' + self.headers['Content-Disposition'] += u'; '.join([ + u'', self._render_parts( + ((u'name', self._name), (u'filename', self._filename)) ) ]) self.headers['Content-Type'] = content_type diff --git a/test/test_fields.py b/test/test_fields.py index e944ec43d5..72e70b8e75 100644 --- a/test/test_fields.py +++ b/test/test_fields.py @@ -1,8 +1,7 @@ import pytest -from urllib3.fields import guess_content_type, RequestField +from urllib3.fields import format_header_param_rfc2231, guess_content_type, RequestField from urllib3.packages.six import u -from . import onlyPy2 class TestRequestField(object): @@ -53,13 +52,45 @@ def test_render_parts(self): parts = field._render_parts([('name', 'value'), ('filename', 'value')]) assert parts == 'name="value"; filename="value"' - def test_render_part(self): - field = RequestField('somename', 'data') + def test_render_part_rfc2231_unicode(self): + field = RequestField('somename', 'data', header_formatter=format_header_param_rfc2231) param = field._render_part('filename', u('n\u00e4me')) assert param == "filename*=utf-8''n%C3%A4me" - @onlyPy2 - def test_render_unicode_bytes_py2(self): + def test_render_part_rfc2231_ascii(self): + field = RequestField('somename', 'data', header_formatter=format_header_param_rfc2231) + param = field._render_part('filename', b'name') + assert param == 'filename="name"' + + def test_render_part_html5_unicode(self): field = RequestField('somename', 'data') - param = field._render_part('filename', 'n\xc3\xa4me') - assert param == "filename*=utf-8''n%C3%A4me" + param = field._render_part('filename', u('n\u00e4me')) + assert param == u('filename="n\u00e4me"') + + def test_render_part_html5_ascii(self): + field = RequestField('somename', 'data') + param = field._render_part('filename', b'name') + assert param == 'filename="name"' + + def test_render_part_html5_unicode_escape(self): + field = RequestField('somename', 'data') + param = field._render_part('filename', u('hello\\world\u0022')) + assert param == u('filename="hello\\\\world%22"') + + def test_render_part_html5_unicode_with_control_character(self): + field = RequestField('somename', 'data') + param = field._render_part('filename', u('hello\x1A\x1B\x1C')) + assert param == u('filename="hello%1A\x1B%1C"') + + def test_from_tuples_rfc2231(self): + field = RequestField.from_tuples( + u('fieldname'), + (u('filen\u00e4me'), 'data'), + header_formatter=format_header_param_rfc2231) + cd = field.headers['Content-Disposition'] + assert (cd == u("form-data; name=\"fieldname\"; filename*=utf-8''filen%C3%A4me")) + + def test_from_tuples_html5(self): + field = RequestField.from_tuples(u('fieldname'), (u('filen\u00e4me'), 'data')) + cd = field.headers['Content-Disposition'] + assert (cd == u('form-data; name="fieldname"; filename="filen\u00e4me"')) diff --git a/tox.ini b/tox.ini index 8764a349e3..f2e46a0b50 100644 --- a/tox.ini +++ b/tox.ini @@ -41,7 +41,7 @@ setenv = passenv = TRAVIS TRAVIS_INFRA [testenv:flake8-py3] -basepython = python3.4 +basepython = python3 deps= flake8 commands= From c427daf83f1adc71c2a3ced9c0a53ccacb814abd Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Wed, 27 Mar 2019 17:42:42 +0400 Subject: [PATCH 10/48] Remove mercurial files (#1558) --- .hgignore | 7 ------- .hgtags | 4 ---- 2 files changed, 11 deletions(-) delete mode 100644 .hgignore delete mode 100644 .hgtags diff --git a/.hgignore b/.hgignore deleted file mode 100644 index 59206f9e0f..0000000000 --- a/.hgignore +++ /dev/null @@ -1,7 +0,0 @@ -syntax: glob -.* -*.pyc -*.egg-info -*.log -dist -build diff --git a/.hgtags b/.hgtags deleted file mode 100644 index 6ea9c1ecfe..0000000000 --- a/.hgtags +++ /dev/null @@ -1,4 +0,0 @@ -c72fc50cfc59ab5cc8331aa591f63aaf859d5250 0.3 -b90dbbc39fd0f16e38f49b1cba0605738a135e95 0.3.1 -db5e569cf68c737e35a26cc246c07620aed9564f 0.4 -38339cf1816ea99f1d54b3c1ec68c79384d529d7 0.4.1 From 1ebcbc2fe6743b3fff9985dc33abe7da2aa93b5d Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 19 Apr 2019 09:52:09 -0500 Subject: [PATCH 11/48] Apply changes from 1.24.2 release to master (#1566) --- .travis.yml | 14 +++++++++----- CHANGES.rst | 15 +++++++++++---- docs/requirements.txt | 1 - src/urllib3/util/ssl_.py | 3 ++- test/test_ssl.py | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1e76821edb..0710c1c573 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,13 +31,13 @@ env: - PYPI_USERNAME=urllib3 # PYPI_PASSWORD is set in Travis control panel. - matrix: - - TOXENV=flake8-py3 - - TOXENV=gae - - TOXENV=docs - matrix: include: + - python: 3.6 + env: TOXENV=flake8-py3 + - python: 3.6 + env: TOXENV=docs + - python: 2.7 env: TOXENV=py27 - python: 2.7 @@ -63,6 +63,10 @@ matrix: - python: pypy-5.4 env: TOXENV=pypy + + - python: 2.7 + env: TOXENV=gae + - language: generic os: osx env: TOXENV=py27 diff --git a/CHANGES.rst b/CHANGES.rst index 013d119b93..c7a996d5f9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,8 +8,6 @@ dev (master) * Upgraded ``urllib3.utils.parse_url()`` to be RFC 3986 compliant. (Pull #1487) -* Remove Authorization header regardless of case when redirecting to cross-site. (Issue #1510) - * Added support for ``key_password`` for ``HTTPSConnectionPool`` to use encrypted ``key_file`` without creating your own ``SSLContext`` object. (Pull #1489) @@ -28,13 +26,22 @@ dev (master) * Drop ciphers using DSS key exchange from default TLS cipher suites. Improve default ciphers when using SecureTransport. (Pull #1496) -* Add support for IPv6 addresses in subjectAltName section of certificates. (Issue #1269) - * Switched the default multipart header encoder from RFC 2231 to HTML 5 working draft. (Issue #303, PR #1492) * ... [Short description of non-trivial change.] (Issue #) +1.24.2 (2019-04-17) +------------------- + +* Don't load system certificates by default when any other ``ca_certs``, ``ca_certs_dir`` or + ``ssl_context`` parameters are specified. + +* Remove Authorization header regardless of case when redirecting to cross-site. (Issue #1510) + +* Add support for IPv6 addresses in subjectAltName section of certificates. (Issue #1269) + + 1.24.1 (2018-11-02) ------------------- diff --git a/docs/requirements.txt b/docs/requirements.txt index 95449be35d..f769722022 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,4 @@ -r ../dev-requirements.txt -ndg-httpsclient sphinx alabaster requests>=2,<2.16 diff --git a/src/urllib3/util/ssl_.py b/src/urllib3/util/ssl_.py index 0327a923ad..fdf7d1e07d 100644 --- a/src/urllib3/util/ssl_.py +++ b/src/urllib3/util/ssl_.py @@ -329,7 +329,8 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, if e.errno == errno.ENOENT: raise SSLError(e) raise - elif getattr(context, 'load_default_certs', None) is not None: + + elif ssl_context is None and hasattr(context, 'load_default_certs'): # try to load OS default certs; works well on Windows (require Python3.4+) context.load_default_certs() diff --git a/test/test_ssl.py b/test/test_ssl.py index 47359717d2..6a46b4f3ea 100644 --- a/test/test_ssl.py +++ b/test/test_ssl.py @@ -88,3 +88,40 @@ def test_create_urllib3_context_set_ciphers(monkeypatch, ciphers, expected_ciphe assert context.set_ciphers.call_count == 1 assert context.set_ciphers.call_args == mock.call(expected_ciphers) + + +def test_wrap_socket_given_context_no_load_default_certs(): + context = mock.create_autospec(ssl_.SSLContext) + context.load_default_certs = mock.Mock() + + sock = mock.Mock() + ssl_.ssl_wrap_socket(sock, ssl_context=context) + + context.load_default_certs.assert_not_called() + + +def test_wrap_socket_given_ca_certs_no_load_default_certs(monkeypatch): + context = mock.create_autospec(ssl_.SSLContext) + context.load_default_certs = mock.Mock() + context.options = 0 + + monkeypatch.setattr(ssl_, "SSLContext", lambda *_, **__: context) + + sock = mock.Mock() + ssl_.ssl_wrap_socket(sock, ca_certs="/tmp/fake-file") + + context.load_default_certs.assert_not_called() + context.load_verify_locations.assert_called_with("/tmp/fake-file", None) + + +def test_wrap_socket_default_loads_default_certs(monkeypatch): + context = mock.create_autospec(ssl_.SSLContext) + context.load_default_certs = mock.Mock() + context.options = 0 + + monkeypatch.setattr(ssl_, "SSLContext", lambda *_, **__: context) + + sock = mock.Mock() + ssl_.ssl_wrap_socket(sock) + + context.load_default_certs.assert_called_with() From 1d3e60e86fce8938845fdc052f47ed9ef3da8859 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 19 Apr 2019 15:31:15 -0500 Subject: [PATCH 12/48] Update urllib3.contrib.socks documentation (#1567) Closes #1565. --- src/urllib3/contrib/socks.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/urllib3/contrib/socks.py b/src/urllib3/contrib/socks.py index 532d99cea4..636d261fb0 100644 --- a/src/urllib3/contrib/socks.py +++ b/src/urllib3/contrib/socks.py @@ -1,25 +1,38 @@ # -*- coding: utf-8 -*- """ This module contains provisional support for SOCKS proxies from within -urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and +urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and SOCKS5. To enable its functionality, either install PySocks or install this module with the ``socks`` extra. The SOCKS implementation supports the full range of urllib3 features. It also supports the following SOCKS features: -- SOCKS4 -- SOCKS4a -- SOCKS5 +- SOCKS4A (``proxy_url='socks4a://...``) +- SOCKS4 (``proxy_url='socks4://...``) +- SOCKS5 with remote DNS (``proxy_url='socks5h://...``) +- SOCKS5 with local DNS (``proxy_url='socks5://...``) - Usernames and passwords for the SOCKS proxy -Known Limitations: + .. note:: + It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in + your ``proxy_url`` to ensure that DNS resolution is done from the remote + server instead of client-side when connecting to a domain name. + +SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5 +supports IPv4, IPv6, and domain names. + +When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url`` +will be sent as the ``userid`` section of the SOCKS request:: + + proxy_url="socks4a://@proxy-host" + +When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion +of the ``proxy_url`` will be sent as the username/password to authenticate +with the proxy:: + + proxy_url="socks5h://:@proxy-host" -- Currently PySocks does not support contacting remote websites via literal - IPv6 addresses. Any such connection attempt will fail. You must use a domain - name. -- Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any - such connection attempt will fail. """ from __future__ import absolute_import From 5d523706c7b03f947dc50a7e783758a2bfff0532 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Sat, 20 Apr 2019 20:43:17 -0500 Subject: [PATCH 13/48] Use rfc3986.validator.Validator for parse_url (#1531) --- src/urllib3/connectionpool.py | 18 +- src/urllib3/packages/rfc3986/__init__.py | 6 +- src/urllib3/packages/rfc3986/_mixin.py | 353 ++++++++++++++++++ src/urllib3/packages/rfc3986/abnf_regexp.py | 95 ++++- src/urllib3/packages/rfc3986/api.py | 15 + src/urllib3/packages/rfc3986/exceptions.py | 9 +- src/urllib3/packages/rfc3986/iri.py | 143 ++++++++ src/urllib3/packages/rfc3986/misc.py | 46 ++- src/urllib3/packages/rfc3986/normalizers.py | 17 +- src/urllib3/packages/rfc3986/uri.py | 373 +------------------- src/urllib3/packages/rfc3986/validators.py | 24 +- src/urllib3/util/url.py | 84 +++-- test/test_util.py | 71 +++- 13 files changed, 841 insertions(+), 413 deletions(-) create mode 100644 src/urllib3/packages/rfc3986/_mixin.py create mode 100644 src/urllib3/packages/rfc3986/iri.py diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py index 57502c3345..157568a395 100644 --- a/src/urllib3/connectionpool.py +++ b/src/urllib3/connectionpool.py @@ -26,6 +26,7 @@ from .packages.ssl_match_hostname import CertificateError from .packages import six from .packages.six.moves import queue +from .packages.rfc3986.normalizers import normalize_host from .connection import ( port_by_scheme, DummyConnection, @@ -65,7 +66,7 @@ def __init__(self, host, port=None): if not host: raise LocationValueError("No host specified.") - self.host = _ipv6_host(host, self.scheme) + self.host = _normalize_host(host, scheme=self.scheme) self._proxy_host = host.lower() self.port = port @@ -434,8 +435,8 @@ def is_same_host(self, url): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) - - host = _ipv6_host(host, self.scheme) + if host is not None: + host = _normalize_host(host, scheme=scheme) # Use explicit default port for comparison when none is given if self.port and not port: @@ -878,9 +879,9 @@ def connection_from_url(url, **kw): return HTTPConnectionPool(host, port=port, **kw) -def _ipv6_host(host, scheme): +def _normalize_host(host, scheme): """ - Process IPv6 address literals + Normalize hosts for comparisons and use with sockets. """ # httplib doesn't like it when we include brackets in IPv6 addresses @@ -889,11 +890,8 @@ def _ipv6_host(host, scheme): # Instead, we need to make sure we never pass ``None`` as the port. # However, for backward compatibility reasons we can't actually # *assert* that. See http://bugs.python.org/issue28539 - # - # Also if an IPv6 address literal has a zone identifier, the - # percent sign might be URIencoded, convert it back into ASCII if host.startswith('[') and host.endswith(']'): - host = host.replace('%25', '%').strip('[]') + host = host.strip('[]') if scheme in NORMALIZABLE_SCHEMES: - host = host.lower() + host = normalize_host(host) return host diff --git a/src/urllib3/packages/rfc3986/__init__.py b/src/urllib3/packages/rfc3986/__init__.py index 9719d6f7ec..13a786dfb0 100644 --- a/src/urllib3/packages/rfc3986/__init__.py +++ b/src/urllib3/packages/rfc3986/__init__.py @@ -22,6 +22,8 @@ :license: Apache v2.0, see LICENSE for details """ +from .api import iri_reference +from .api import IRIReference from .api import is_valid_uri from .api import normalize_uri from .api import uri_reference @@ -34,14 +36,16 @@ __author_email__ = 'graffatcolmingov@gmail.com' __license__ = 'Apache v2.0' __copyright__ = 'Copyright 2014 Rackspace' -__version__ = '1.2.0' +__version__ = '1.3.0' __all__ = ( 'ParseResult', 'URIReference', + 'IRIReference', 'is_valid_uri', 'normalize_uri', 'uri_reference', + 'iri_reference', 'urlparse', '__title__', '__author__', diff --git a/src/urllib3/packages/rfc3986/_mixin.py b/src/urllib3/packages/rfc3986/_mixin.py new file mode 100644 index 0000000000..543925cdbc --- /dev/null +++ b/src/urllib3/packages/rfc3986/_mixin.py @@ -0,0 +1,353 @@ +"""Module containing the implementation of the URIMixin class.""" +import warnings + +from . import exceptions as exc +from . import misc +from . import normalizers +from . import validators + + +class URIMixin(object): + """Mixin with all shared methods for URIs and IRIs.""" + + __hash__ = tuple.__hash__ + + def authority_info(self): + """Return a dictionary with the ``userinfo``, ``host``, and ``port``. + + If the authority is not valid, it will raise a + :class:`~rfc3986.exceptions.InvalidAuthority` Exception. + + :returns: + ``{'userinfo': 'username:password', 'host': 'www.example.com', + 'port': '80'}`` + :rtype: dict + :raises rfc3986.exceptions.InvalidAuthority: + If the authority is not ``None`` and can not be parsed. + """ + if not self.authority: + return {'userinfo': None, 'host': None, 'port': None} + + match = self._match_subauthority() + + if match is None: + # In this case, we have an authority that was parsed from the URI + # Reference, but it cannot be further parsed by our + # misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid + # authority. + raise exc.InvalidAuthority(self.authority.encode(self.encoding)) + + # We had a match, now let's ensure that it is actually a valid host + # address if it is IPv4 + matches = match.groupdict() + host = matches.get('host') + + if (host and misc.IPv4_MATCHER.match(host) and not + validators.valid_ipv4_host_address(host)): + # If we have a host, it appears to be IPv4 and it does not have + # valid bytes, it is an InvalidAuthority. + raise exc.InvalidAuthority(self.authority.encode(self.encoding)) + + return matches + + def _match_subauthority(self): + return misc.SUBAUTHORITY_MATCHER.match(self.authority) + + @property + def host(self): + """If present, a string representing the host.""" + try: + authority = self.authority_info() + except exc.InvalidAuthority: + return None + return authority['host'] + + @property + def port(self): + """If present, the port extracted from the authority.""" + try: + authority = self.authority_info() + except exc.InvalidAuthority: + return None + return authority['port'] + + @property + def userinfo(self): + """If present, the userinfo extracted from the authority.""" + try: + authority = self.authority_info() + except exc.InvalidAuthority: + return None + return authority['userinfo'] + + def is_absolute(self): + """Determine if this URI Reference is an absolute URI. + + See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation. + + :returns: ``True`` if it is an absolute URI, ``False`` otherwise. + :rtype: bool + """ + return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit())) + + def is_valid(self, **kwargs): + """Determine if the URI is valid. + + .. deprecated:: 1.1.0 + + Use the :class:`~rfc3986.validators.Validator` object instead. + + :param bool require_scheme: Set to ``True`` if you wish to require the + presence of the scheme component. + :param bool require_authority: Set to ``True`` if you wish to require + the presence of the authority component. + :param bool require_path: Set to ``True`` if you wish to require the + presence of the path component. + :param bool require_query: Set to ``True`` if you wish to require the + presence of the query component. + :param bool require_fragment: Set to ``True`` if you wish to require + the presence of the fragment component. + :returns: ``True`` if the URI is valid. ``False`` otherwise. + :rtype: bool + """ + warnings.warn("Please use rfc3986.validators.Validator instead. " + "This method will be eventually removed.", + DeprecationWarning) + validators = [ + (self.scheme_is_valid, kwargs.get('require_scheme', False)), + (self.authority_is_valid, kwargs.get('require_authority', False)), + (self.path_is_valid, kwargs.get('require_path', False)), + (self.query_is_valid, kwargs.get('require_query', False)), + (self.fragment_is_valid, kwargs.get('require_fragment', False)), + ] + return all(v(r) for v, r in validators) + + def authority_is_valid(self, require=False): + """Determine if the authority component is valid. + + .. deprecated:: 1.1.0 + + Use the :class:`~rfc3986.validators.Validator` object instead. + + :param bool require: + Set to ``True`` to require the presence of this component. + :returns: + ``True`` if the authority is valid. ``False`` otherwise. + :rtype: + bool + """ + warnings.warn("Please use rfc3986.validators.Validator instead. " + "This method will be eventually removed.", + DeprecationWarning) + try: + self.authority_info() + except exc.InvalidAuthority: + return False + + return validators.authority_is_valid( + self.authority, + host=self.host, + require=require, + ) + + def scheme_is_valid(self, require=False): + """Determine if the scheme component is valid. + + .. deprecated:: 1.1.0 + + Use the :class:`~rfc3986.validators.Validator` object instead. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the scheme is valid. ``False`` otherwise. + :rtype: bool + """ + warnings.warn("Please use rfc3986.validators.Validator instead. " + "This method will be eventually removed.", + DeprecationWarning) + return validators.scheme_is_valid(self.scheme, require) + + def path_is_valid(self, require=False): + """Determine if the path component is valid. + + .. deprecated:: 1.1.0 + + Use the :class:`~rfc3986.validators.Validator` object instead. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the path is valid. ``False`` otherwise. + :rtype: bool + """ + warnings.warn("Please use rfc3986.validators.Validator instead. " + "This method will be eventually removed.", + DeprecationWarning) + return validators.path_is_valid(self.path, require) + + def query_is_valid(self, require=False): + """Determine if the query component is valid. + + .. deprecated:: 1.1.0 + + Use the :class:`~rfc3986.validators.Validator` object instead. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the query is valid. ``False`` otherwise. + :rtype: bool + """ + warnings.warn("Please use rfc3986.validators.Validator instead. " + "This method will be eventually removed.", + DeprecationWarning) + return validators.query_is_valid(self.query, require) + + def fragment_is_valid(self, require=False): + """Determine if the fragment component is valid. + + .. deprecated:: 1.1.0 + + Use the Validator object instead. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the fragment is valid. ``False`` otherwise. + :rtype: bool + """ + warnings.warn("Please use rfc3986.validators.Validator instead. " + "This method will be eventually removed.", + DeprecationWarning) + return validators.fragment_is_valid(self.fragment, require) + + def normalized_equality(self, other_ref): + """Compare this URIReference to another URIReference. + + :param URIReference other_ref: (required), The reference with which + we're comparing. + :returns: ``True`` if the references are equal, ``False`` otherwise. + :rtype: bool + """ + return tuple(self.normalize()) == tuple(other_ref.normalize()) + + def resolve_with(self, base_uri, strict=False): + """Use an absolute URI Reference to resolve this relative reference. + + Assuming this is a relative reference that you would like to resolve, + use the provided base URI to resolve it. + + See http://tools.ietf.org/html/rfc3986#section-5 for more information. + + :param base_uri: Either a string or URIReference. It must be an + absolute URI or it will raise an exception. + :returns: A new URIReference which is the result of resolving this + reference using ``base_uri``. + :rtype: :class:`URIReference` + :raises rfc3986.exceptions.ResolutionError: + If the ``base_uri`` is not an absolute URI. + """ + if not isinstance(base_uri, URIMixin): + base_uri = type(self).from_string(base_uri) + + if not base_uri.is_absolute(): + raise exc.ResolutionError(base_uri) + + # This is optional per + # http://tools.ietf.org/html/rfc3986#section-5.2.1 + base_uri = base_uri.normalize() + + # The reference we're resolving + resolving = self + + if not strict and resolving.scheme == base_uri.scheme: + resolving = resolving.copy_with(scheme=None) + + # http://tools.ietf.org/html/rfc3986#page-32 + if resolving.scheme is not None: + target = resolving.copy_with( + path=normalizers.normalize_path(resolving.path) + ) + else: + if resolving.authority is not None: + target = resolving.copy_with( + scheme=base_uri.scheme, + path=normalizers.normalize_path(resolving.path) + ) + else: + if resolving.path is None: + if resolving.query is not None: + query = resolving.query + else: + query = base_uri.query + target = resolving.copy_with( + scheme=base_uri.scheme, + authority=base_uri.authority, + path=base_uri.path, + query=query + ) + else: + if resolving.path.startswith('/'): + path = normalizers.normalize_path(resolving.path) + else: + path = normalizers.normalize_path( + misc.merge_paths(base_uri, resolving.path) + ) + target = resolving.copy_with( + scheme=base_uri.scheme, + authority=base_uri.authority, + path=path, + query=resolving.query + ) + return target + + def unsplit(self): + """Create a URI string from the components. + + :returns: The URI Reference reconstituted as a string. + :rtype: str + """ + # See http://tools.ietf.org/html/rfc3986#section-5.3 + result_list = [] + if self.scheme: + result_list.extend([self.scheme, ':']) + if self.authority: + result_list.extend(['//', self.authority]) + if self.path: + result_list.append(self.path) + if self.query is not None: + result_list.extend(['?', self.query]) + if self.fragment is not None: + result_list.extend(['#', self.fragment]) + return ''.join(result_list) + + def copy_with(self, scheme=misc.UseExisting, authority=misc.UseExisting, + path=misc.UseExisting, query=misc.UseExisting, + fragment=misc.UseExisting): + """Create a copy of this reference with the new components. + + :param str scheme: + (optional) The scheme to use for the new reference. + :param str authority: + (optional) The authority to use for the new reference. + :param str path: + (optional) The path to use for the new reference. + :param str query: + (optional) The query to use for the new reference. + :param str fragment: + (optional) The fragment to use for the new reference. + :returns: + New URIReference with provided components. + :rtype: + URIReference + """ + attributes = { + 'scheme': scheme, + 'authority': authority, + 'path': path, + 'query': query, + 'fragment': fragment, + } + for key, value in list(attributes.items()): + if value is misc.UseExisting: + del attributes[key] + uri = self._replace(**attributes) + uri.encoding = self.encoding + return uri diff --git a/src/urllib3/packages/rfc3986/abnf_regexp.py b/src/urllib3/packages/rfc3986/abnf_regexp.py index 5b6da1771b..24c9c3d00a 100644 --- a/src/urllib3/packages/rfc3986/abnf_regexp.py +++ b/src/urllib3/packages/rfc3986/abnf_regexp.py @@ -13,6 +13,8 @@ # limitations under the License. """Module for the regular expressions crafted from ABNF.""" +import sys + # https://tools.ietf.org/html/rfc3986#page-13 GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@" GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) @@ -25,7 +27,7 @@ ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' DIGIT = '0123456789' # https://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + '._!-' +UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r'._!-' UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET) # We need to escape the '-' in this case: @@ -75,7 +77,7 @@ '%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + UNRESERVED_RE ) # The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, -IPv4_RE = '([0-9]{1,3}.){3}[0-9]{1,3}' +IPv4_RE = r'([0-9]{1,3}\.){3}[0-9]{1,3}' # Hexadecimal characters used in each piece of an IPv6 address HEXDIG_RE = '[0-9A-Fa-f]{1,4}' # Least-significant 32 bits of an IPv6 address @@ -111,18 +113,18 @@ *variations ) -IPv_FUTURE_RE = 'v[0-9A-Fa-f]+.[%s]+' % ( +IPv_FUTURE_RE = r'v[0-9A-Fa-f]+\.[%s]+' % ( UNRESERVED_RE + SUB_DELIMITERS_RE + ':' ) - # RFC 6874 Zone ID ABNF ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+' -IPv6_ADDRZ_RE = IPv6_RE + '%25' + ZONE_ID -IP_LITERAL_RE = r'\[({0}|(?:{1})|{2})\]'.format( - IPv6_RE, - IPv6_ADDRZ_RE, +IPv6_ADDRZ_RFC4007_RE = IPv6_RE + '(?:(?:%25|%)' + ZONE_ID + ')?' +IPv6_ADDRZ_RE = IPv6_RE + '(?:%25' + ZONE_ID + ')?' + +IP_LITERAL_RE = r'\[({0}|{1})\]'.format( + IPv6_ADDRZ_RFC4007_RE, IPv_FUTURE_RE, ) @@ -186,3 +188,80 @@ PATH_ROOTLESS, PATH_EMPTY, ) + +# ############### +# IRIs / RFC 3987 +# ############### + +# Only wide-unicode gets the high-ranges of UCSCHAR +if sys.maxunicode > 0xFFFF: # pragma: no cover + IPRIVATE = u'\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD' + UCSCHAR_RE = ( + u'\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF' + u'\U00010000-\U0001FFFD\U00020000-\U0002FFFD' + u'\U00030000-\U0003FFFD\U00040000-\U0004FFFD' + u'\U00050000-\U0005FFFD\U00060000-\U0006FFFD' + u'\U00070000-\U0007FFFD\U00080000-\U0008FFFD' + u'\U00090000-\U0009FFFD\U000A0000-\U000AFFFD' + u'\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD' + u'\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD' + ) +else: # pragma: no cover + IPRIVATE = u'\uE000-\uF8FF' + UCSCHAR_RE = ( + u'\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF' + ) + +IUNRESERVED_RE = u'A-Za-z0-9\\._~\\-' + UCSCHAR_RE +IPCHAR = u'([' + IUNRESERVED_RE + SUB_DELIMITERS_RE + u':@]|%s)' % PCT_ENCODED + +isegments = { + 'isegment': IPCHAR + u'*', + # Non-zero length segment + 'isegment-nz': IPCHAR + u'+', + # Non-zero length segment without ":" + 'isegment-nz-nc': IPCHAR.replace(':', '') + u'+' +} + +IPATH_ROOTLESS = u'%(isegment-nz)s(/%(isegment)s)*' % isegments +IPATH_NOSCHEME = u'%(isegment-nz-nc)s(/%(isegment)s)*' % isegments +IPATH_ABSOLUTE = u'/(?:%s)?' % IPATH_ROOTLESS +IPATH_ABEMPTY = u'(?:/%(isegment)s)*' % isegments +IPATH_RE = u'^(?:%s|%s|%s|%s|%s)$' % ( + IPATH_ABEMPTY, IPATH_ABSOLUTE, IPATH_NOSCHEME, IPATH_ROOTLESS, PATH_EMPTY +) + +IREGULAR_NAME_RE = IREG_NAME = u'(?:{0}|[{1}])*'.format( + u'%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + IUNRESERVED_RE +) + +IHOST_RE = IHOST_PATTERN = u'({0}|{1}|{2})'.format( + IREG_NAME, + IPv4_RE, + IP_LITERAL_RE, +) + +IUSERINFO_RE = u'^(?:[' + IUNRESERVED_RE + SUB_DELIMITERS_RE + u':]|%s)+' % ( + PCT_ENCODED +) + +IFRAGMENT_RE = (u'^(?:[/?:@' + IUNRESERVED_RE + SUB_DELIMITERS_RE + + u']|%s)*$' % PCT_ENCODED) +IQUERY_RE = (u'^(?:[/?:@' + IUNRESERVED_RE + SUB_DELIMITERS_RE + + IPRIVATE + u']|%s)*$' % PCT_ENCODED) + +IRELATIVE_PART_RE = u'(//%s%s|%s|%s|%s)' % ( + COMPONENT_PATTERN_DICT['authority'], + IPATH_ABEMPTY, + IPATH_ABSOLUTE, + IPATH_NOSCHEME, + PATH_EMPTY, +) + +IHIER_PART_RE = u'(//%s%s|%s|%s|%s)' % ( + COMPONENT_PATTERN_DICT['authority'], + IPATH_ABEMPTY, + IPATH_ABSOLUTE, + IPATH_ROOTLESS, + PATH_EMPTY, +) diff --git a/src/urllib3/packages/rfc3986/api.py b/src/urllib3/packages/rfc3986/api.py index 17f4daf927..ddc4a1cd28 100644 --- a/src/urllib3/packages/rfc3986/api.py +++ b/src/urllib3/packages/rfc3986/api.py @@ -19,6 +19,7 @@ and classes of rfc3986. """ +from .iri import IRIReference from .parseresult import ParseResult from .uri import URIReference @@ -37,6 +38,20 @@ def uri_reference(uri, encoding='utf-8'): return URIReference.from_string(uri, encoding) +def iri_reference(iri, encoding='utf-8'): + """Parse a IRI string into an IRIReference. + + This is a convenience function. You could achieve the same end by using + ``IRIReference.from_string(iri)``. + + :param str iri: The IRI which needs to be parsed into a reference. + :param str encoding: The encoding of the string provided + :returns: A parsed IRI + :rtype: :class:`IRIReference` + """ + return IRIReference.from_string(iri, encoding) + + def is_valid_uri(uri, encoding='utf-8', **kwargs): """Determine if the URI given is valid. diff --git a/src/urllib3/packages/rfc3986/exceptions.py b/src/urllib3/packages/rfc3986/exceptions.py index e0886a5ff0..da8ca7cb1f 100644 --- a/src/urllib3/packages/rfc3986/exceptions.py +++ b/src/urllib3/packages/rfc3986/exceptions.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- """Exceptions module for rfc3986.""" +from . import compat + class RFC3986Exception(Exception): """Base class for all rfc3986 exception classes.""" @@ -14,7 +16,8 @@ class InvalidAuthority(RFC3986Exception): def __init__(self, authority): """Initialize the exception with the invalid authority.""" super(InvalidAuthority, self).__init__( - "The authority ({0}) is not valid.".format(authority)) + u"The authority ({0}) is not valid.".format( + compat.to_str(authority))) class InvalidPort(RFC3986Exception): @@ -109,3 +112,7 @@ def __init__(self, uri, *component_names): uri, self.components, ) + + +class MissingDependencyError(RFC3986Exception): + """Exception raised when an IRI is encoded without the 'idna' module.""" diff --git a/src/urllib3/packages/rfc3986/iri.py b/src/urllib3/packages/rfc3986/iri.py new file mode 100644 index 0000000000..2c708d853a --- /dev/null +++ b/src/urllib3/packages/rfc3986/iri.py @@ -0,0 +1,143 @@ +"""Module containing the implementation of the IRIReference class.""" +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Copyright (c) 2015 Ian Stapleton Cordasco +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple + +from . import compat +from . import exceptions +from . import misc +from . import normalizers +from . import uri + + +try: + import idna +except ImportError: # pragma: no cover + idna = None + + +class IRIReference(namedtuple('IRIReference', misc.URI_COMPONENTS), + uri.URIMixin): + """Immutable object representing a parsed IRI Reference. + + Can be encoded into an URIReference object via the procedure + specified in RFC 3987 Section 3.1 + + .. note:: + The IRI submodule is a new interface and may possibly change in + the future. Check for changes to the interface when upgrading. + """ + + slots = () + + def __new__(cls, scheme, authority, path, query, fragment, + encoding='utf-8'): + """Create a new IRIReference.""" + ref = super(IRIReference, cls).__new__( + cls, + scheme or None, + authority or None, + path or None, + query, + fragment) + ref.encoding = encoding + return ref + + def __eq__(self, other): + """Compare this reference to another.""" + other_ref = other + if isinstance(other, tuple): + other_ref = self.__class__(*other) + elif not isinstance(other, IRIReference): + try: + other_ref = self.__class__.from_string(other) + except TypeError: + raise TypeError( + 'Unable to compare {0}() to {1}()'.format( + type(self).__name__, type(other).__name__)) + + # See http://tools.ietf.org/html/rfc3986#section-6.2 + return tuple(self) == tuple(other_ref) + + def _match_subauthority(self): + return misc.ISUBAUTHORITY_MATCHER.match(self.authority) + + @classmethod + def from_string(cls, iri_string, encoding='utf-8'): + """Parse a IRI reference from the given unicode IRI string. + + :param str iri_string: Unicode IRI to be parsed into a reference. + :param str encoding: The encoding of the string provided + :returns: :class:`IRIReference` or subclass thereof + """ + iri_string = compat.to_str(iri_string, encoding) + + split_iri = misc.IRI_MATCHER.match(iri_string).groupdict() + return cls( + split_iri['scheme'], split_iri['authority'], + normalizers.encode_component(split_iri['path'], encoding), + normalizers.encode_component(split_iri['query'], encoding), + normalizers.encode_component(split_iri['fragment'], encoding), + encoding, + ) + + def encode(self, idna_encoder=None): + """Encode an IRIReference into a URIReference instance. + + If the ``idna`` module is installed or the ``rfc3986[idna]`` + extra is used then unicode characters in the IRI host + component will be encoded with IDNA2008. + + :param idna_encoder: + Function that encodes each part of the host component + If not given will raise an exception if the IRI + contains a host component. + :rtype: uri.URIReference + :returns: A URI reference + """ + authority = self.authority + if authority: + if idna_encoder is None: + if idna is None: # pragma: no cover + raise exceptions.MissingDependencyError( + "Could not import the 'idna' module " + "and the IRI hostname requires encoding" + ) + else: + def idna_encoder(x): + try: + return idna.encode(x, strict=True, std3_rules=True).lower() + except idna.IDNAError: + raise exceptions.InvalidAuthority(self.authority) + + authority = "" + if self.host: + authority = ".".join([compat.to_str(idna_encoder(part)) + for part in self.host.split(".")]) + + if self.userinfo is not None: + authority = (normalizers.encode_component( + self.userinfo, self.encoding) + '@' + authority) + + if self.port is not None: + authority += ":" + str(self.port) + + return uri.URIReference(self.scheme, + authority, + path=self.path, + query=self.query, + fragment=self.fragment, + encoding=self.encoding) diff --git a/src/urllib3/packages/rfc3986/misc.py b/src/urllib3/packages/rfc3986/misc.py index 697039a98c..00f9f3b94d 100644 --- a/src/urllib3/packages/rfc3986/misc.py +++ b/src/urllib3/packages/rfc3986/misc.py @@ -58,7 +58,14 @@ abnf_regexp.PORT_RE)) +HOST_MATCHER = re.compile('^' + abnf_regexp.HOST_RE + '$') IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$') +IPv6_MATCHER = re.compile(r'^\[' + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r'\]$') + +# Used by host validator +IPv6_NO_RFC4007_MATCHER = re.compile(r'^\[%s\]$' % ( + abnf_regexp.IPv6_ADDRZ_RE +)) # Matcher used to validate path components PATH_MATCHER = re.compile(abnf_regexp.PATH_RE) @@ -76,7 +83,8 @@ SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE)) RELATIVE_REF_MATCHER = re.compile(r'^%s(\?%s)?(#%s)?$' % ( - abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE, + abnf_regexp.RELATIVE_PART_RE, + abnf_regexp.QUERY_RE, abnf_regexp.FRAGMENT_RE, )) @@ -87,6 +95,42 @@ abnf_regexp.QUERY_RE[1:-1], )) +# ############### +# IRIs / RFC 3987 +# ############### + +IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE) + +ISUBAUTHORITY_MATCHER = re.compile(( + u'^(?:(?P{0})@)?' # iuserinfo + u'(?P{1})' # ihost + u':?(?P{2})?$' # port + ).format(abnf_regexp.IUSERINFO_RE, + abnf_regexp.IHOST_RE, + abnf_regexp.PORT_RE), re.UNICODE) + + +IHOST_MATCHER = re.compile('^' + abnf_regexp.IHOST_RE + '$', re.UNICODE) + +IPATH_MATCHER = re.compile(abnf_regexp.IPATH_RE, re.UNICODE) + +IQUERY_MATCHER = re.compile(abnf_regexp.IQUERY_RE, re.UNICODE) + +IFRAGMENT_MATCHER = re.compile(abnf_regexp.IFRAGMENT_RE, re.UNICODE) + + +RELATIVE_IRI_MATCHER = re.compile(u'^%s(?:\\?%s)?(?:%s)?$' % ( + abnf_regexp.IRELATIVE_PART_RE, + abnf_regexp.IQUERY_RE, + abnf_regexp.IFRAGMENT_RE +), re.UNICODE) + +ABSOLUTE_IRI_MATCHER = re.compile(u'^%s:%s(?:\\?%s)?$' % ( + abnf_regexp.COMPONENT_PATTERN_DICT['scheme'], + abnf_regexp.IHIER_PART_RE, + abnf_regexp.IQUERY_RE[1:-1] +), re.UNICODE) + # Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 def merge_paths(base_uri, relative_path): diff --git a/src/urllib3/packages/rfc3986/normalizers.py b/src/urllib3/packages/rfc3986/normalizers.py index ea6c6e18aa..2eb1bb36f7 100644 --- a/src/urllib3/packages/rfc3986/normalizers.py +++ b/src/urllib3/packages/rfc3986/normalizers.py @@ -49,6 +49,21 @@ def normalize_password(password): def normalize_host(host): """Normalize a host string.""" + if misc.IPv6_MATCHER.match(host): + percent = host.find('%') + if percent != -1: + percent_25 = host.find('%25') + + # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25' + # from RFC 6874. If the host is '[%25]' then we + # assume RFC 4007 and normalize to '[%2525]' + if percent_25 == -1 or percent < percent_25 or \ + (percent == percent_25 and percent_25 == len(host) - 4): + host = host.replace('%', '%25', 1) + + # Don't normalize the casing of the Zone ID + return host[:percent].lower() + host[percent:] + return host.lower() @@ -147,6 +162,6 @@ def encode_component(uri_component, encoding): or (byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED)): encoded_uri.extend(byte) continue - encoded_uri.extend('%{0:02x}'.format(byte_ord).encode()) + encoded_uri.extend('%{0:02x}'.format(byte_ord).encode().upper()) return encoded_uri.decode(encoding) diff --git a/src/urllib3/packages/rfc3986/uri.py b/src/urllib3/packages/rfc3986/uri.py index 244fff5565..d1d71505e2 100644 --- a/src/urllib3/packages/rfc3986/uri.py +++ b/src/urllib3/packages/rfc3986/uri.py @@ -15,16 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import namedtuple -import warnings from . import compat -from . import exceptions as exc from . import misc from . import normalizers -from . import validators +from ._mixin import URIMixin -class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): +class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS), URIMixin): """Immutable object representing a parsed URI Reference. .. note:: @@ -116,228 +114,6 @@ def __eq__(self, other): naive_equality = tuple(self) == tuple(other_ref) return naive_equality or self.normalized_equality(other_ref) - @classmethod - def from_string(cls, uri_string, encoding='utf-8'): - """Parse a URI reference from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: :class:`URIReference` or subclass thereof - """ - uri_string = compat.to_str(uri_string, encoding) - - split_uri = misc.URI_MATCHER.match(uri_string).groupdict() - return cls( - split_uri['scheme'], split_uri['authority'], - normalizers.encode_component(split_uri['path'], encoding), - normalizers.encode_component(split_uri['query'], encoding), - normalizers.encode_component(split_uri['fragment'], encoding), - encoding, - ) - - def authority_info(self): - """Return a dictionary with the ``userinfo``, ``host``, and ``port``. - - If the authority is not valid, it will raise a - :class:`~rfc3986.exceptions.InvalidAuthority` Exception. - - :returns: - ``{'userinfo': 'username:password', 'host': 'www.example.com', - 'port': '80'}`` - :rtype: dict - :raises rfc3986.exceptions.InvalidAuthority: - If the authority is not ``None`` and can not be parsed. - """ - if not self.authority: - return {'userinfo': None, 'host': None, 'port': None} - - match = misc.SUBAUTHORITY_MATCHER.match(self.authority) - - if match is None: - # In this case, we have an authority that was parsed from the URI - # Reference, but it cannot be further parsed by our - # misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid - # authority. - raise exc.InvalidAuthority(self.authority.encode(self.encoding)) - - # We had a match, now let's ensure that it is actually a valid host - # address if it is IPv4 - matches = match.groupdict() - host = matches.get('host') - - if (host and misc.IPv4_MATCHER.match(host) and not - validators.valid_ipv4_host_address(host)): - # If we have a host, it appears to be IPv4 and it does not have - # valid bytes, it is an InvalidAuthority. - raise exc.InvalidAuthority(self.authority.encode(self.encoding)) - - return matches - - @property - def host(self): - """If present, a string representing the host.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority['host'] - - @property - def port(self): - """If present, the port extracted from the authority.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority['port'] - - @property - def userinfo(self): - """If present, the userinfo extracted from the authority.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority['userinfo'] - - def is_absolute(self): - """Determine if this URI Reference is an absolute URI. - - See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation. - - :returns: ``True`` if it is an absolute URI, ``False`` otherwise. - :rtype: bool - """ - return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit())) - - def is_valid(self, **kwargs): - """Determine if the URI is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param bool require_scheme: Set to ``True`` if you wish to require the - presence of the scheme component. - :param bool require_authority: Set to ``True`` if you wish to require - the presence of the authority component. - :param bool require_path: Set to ``True`` if you wish to require the - presence of the path component. - :param bool require_query: Set to ``True`` if you wish to require the - presence of the query component. - :param bool require_fragment: Set to ``True`` if you wish to require - the presence of the fragment component. - :returns: ``True`` if the URI is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn("Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning) - validators = [ - (self.scheme_is_valid, kwargs.get('require_scheme', False)), - (self.authority_is_valid, kwargs.get('require_authority', False)), - (self.path_is_valid, kwargs.get('require_path', False)), - (self.query_is_valid, kwargs.get('require_query', False)), - (self.fragment_is_valid, kwargs.get('require_fragment', False)), - ] - return all(v(r) for v, r in validators) - - def authority_is_valid(self, require=False): - """Determine if the authority component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param bool require: - Set to ``True`` to require the presence of this component. - :returns: - ``True`` if the authority is valid. ``False`` otherwise. - :rtype: - bool - """ - warnings.warn("Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning) - try: - self.authority_info() - except exc.InvalidAuthority: - return False - - return validators.authority_is_valid( - self.authority, - host=self.host, - require=require, - ) - - def scheme_is_valid(self, require=False): - """Determine if the scheme component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the scheme is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn("Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning) - return validators.scheme_is_valid(self.scheme, require) - - def path_is_valid(self, require=False): - """Determine if the path component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the path is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn("Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning) - return validators.path_is_valid(self.path, require) - - def query_is_valid(self, require=False): - """Determine if the query component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the query is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn("Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning) - return validators.query_is_valid(self.query, require) - - def fragment_is_valid(self, require=False): - """Determine if the fragment component is valid. - - .. deprecated:: 1.1.0 - - Use the Validator object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the fragment is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn("Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning) - return validators.fragment_is_valid(self.fragment, require) - def normalize(self): """Normalize this reference as described in Section 6.2.2. @@ -357,136 +133,21 @@ def normalize(self): normalizers.normalize_fragment(self.fragment), self.encoding) - def normalized_equality(self, other_ref): - """Compare this URIReference to another URIReference. + @classmethod + def from_string(cls, uri_string, encoding='utf-8'): + """Parse a URI reference from the given unicode URI string. - :param URIReference other_ref: (required), The reference with which - we're comparing. - :returns: ``True`` if the references are equal, ``False`` otherwise. - :rtype: bool + :param str uri_string: Unicode URI to be parsed into a reference. + :param str encoding: The encoding of the string provided + :returns: :class:`URIReference` or subclass thereof """ - return tuple(self.normalize()) == tuple(other_ref.normalize()) - - def resolve_with(self, base_uri, strict=False): - """Use an absolute URI Reference to resolve this relative reference. - - Assuming this is a relative reference that you would like to resolve, - use the provided base URI to resolve it. - - See http://tools.ietf.org/html/rfc3986#section-5 for more information. + uri_string = compat.to_str(uri_string, encoding) - :param base_uri: Either a string or URIReference. It must be an - absolute URI or it will raise an exception. - :returns: A new URIReference which is the result of resolving this - reference using ``base_uri``. - :rtype: :class:`URIReference` - :raises rfc3986.exceptions.ResolutionError: - If the ``base_uri`` is not an absolute URI. - """ - if not isinstance(base_uri, URIReference): - base_uri = URIReference.from_string(base_uri) - - if not base_uri.is_absolute(): - raise exc.ResolutionError(base_uri) - - # This is optional per - # http://tools.ietf.org/html/rfc3986#section-5.2.1 - base_uri = base_uri.normalize() - - # The reference we're resolving - resolving = self - - if not strict and resolving.scheme == base_uri.scheme: - resolving = resolving.copy_with(scheme=None) - - # http://tools.ietf.org/html/rfc3986#page-32 - if resolving.scheme is not None: - target = resolving.copy_with( - path=normalizers.normalize_path(resolving.path) - ) - else: - if resolving.authority is not None: - target = resolving.copy_with( - scheme=base_uri.scheme, - path=normalizers.normalize_path(resolving.path) - ) - else: - if resolving.path is None: - if resolving.query is not None: - query = resolving.query - else: - query = base_uri.query - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=base_uri.path, - query=query - ) - else: - if resolving.path.startswith('/'): - path = normalizers.normalize_path(resolving.path) - else: - path = normalizers.normalize_path( - misc.merge_paths(base_uri, resolving.path) - ) - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=path, - query=resolving.query - ) - return target - - def unsplit(self): - """Create a URI string from the components. - - :returns: The URI Reference reconstituted as a string. - :rtype: str - """ - # See http://tools.ietf.org/html/rfc3986#section-5.3 - result_list = [] - if self.scheme: - result_list.extend([self.scheme, ':']) - if self.authority: - result_list.extend(['//', self.authority]) - if self.path: - result_list.append(self.path) - if self.query is not None: - result_list.extend(['?', self.query]) - if self.fragment is not None: - result_list.extend(['#', self.fragment]) - return ''.join(result_list) - - def copy_with(self, scheme=misc.UseExisting, authority=misc.UseExisting, - path=misc.UseExisting, query=misc.UseExisting, - fragment=misc.UseExisting): - """Create a copy of this reference with the new components. - - :param str scheme: - (optional) The scheme to use for the new reference. - :param str authority: - (optional) The authority to use for the new reference. - :param str path: - (optional) The path to use for the new reference. - :param str query: - (optional) The query to use for the new reference. - :param str fragment: - (optional) The fragment to use for the new reference. - :returns: - New URIReference with provided components. - :rtype: - URIReference - """ - attributes = { - 'scheme': scheme, - 'authority': authority, - 'path': path, - 'query': query, - 'fragment': fragment, - } - for key, value in list(attributes.items()): - if value is misc.UseExisting: - del attributes[key] - uri = self._replace(**attributes) - uri.encoding = self.encoding - return uri + split_uri = misc.URI_MATCHER.match(uri_string).groupdict() + return cls( + split_uri['scheme'], split_uri['authority'], + normalizers.encode_component(split_uri['path'], encoding), + normalizers.encode_component(split_uri['query'], encoding), + normalizers.encode_component(split_uri['fragment'], encoding), + encoding, + ) diff --git a/src/urllib3/packages/rfc3986/validators.py b/src/urllib3/packages/rfc3986/validators.py index c781325e03..7fc97215b1 100644 --- a/src/urllib3/packages/rfc3986/validators.py +++ b/src/urllib3/packages/rfc3986/validators.py @@ -304,8 +304,28 @@ def authority_is_valid(authority, host=None, require=False): bool """ validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require) + if validated and host is not None: + return host_is_valid(host, require) + return validated + + +def host_is_valid(host, require=False): + """Determine if the host string is valid. + + :param str host: + The host to validate. + :param bool require: + (optional) Specify if host must not be None. + :returns: + ``True`` if valid, ``False`` otherwise + :rtype: + bool + """ + validated = is_valid(host, misc.HOST_MATCHER, require) if validated and host is not None and misc.IPv4_MATCHER.match(host): return valid_ipv4_host_address(host) + elif validated and host is not None and misc.IPv6_MATCHER.match(host): + return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None return validated @@ -395,7 +415,9 @@ def subauthority_component_is_valid(uri, component): # If we can parse the authority into sub-components and we're not # validating the port, we can assume it's valid. - if component != 'port': + if component == 'host': + return host_is_valid(subauthority_dict['host']) + elif component != 'port': return True try: diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index e12278b53d..0127e2fe5c 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -4,7 +4,8 @@ from ..exceptions import LocationParseError from ..packages import six, rfc3986 -from ..packages.rfc3986.exceptions import RFC3986Exception +from ..packages.rfc3986.exceptions import RFC3986Exception, ValidationError +from ..packages.rfc3986.validators import Validator url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -14,12 +15,12 @@ NORMALIZABLE_SCHEMES = ('http', 'https', None) # Regex for detecting URLs with schemes. RFC 3986 Section 3.1 -SCHEME_REGEX = re.compile(r"^[a-zA-Z][a-zA-Z0-9+\-.]*://") +SCHEME_REGEX = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+\-]*:|/)") class Url(namedtuple('Url', url_attrs)): """ - Datastructure for representing an HTTP URL. Used as a return value for + Data structure for representing an HTTP URL. Used as a return value for :func:`parse_url`. Both the scheme and host are normalized as they are both case-insensitive according to RFC 3986. """ @@ -29,10 +30,8 @@ def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): if path and not path.startswith('/'): path = '/' + path - if scheme: + if scheme is not None: scheme = scheme.lower() - if host and scheme in NORMALIZABLE_SCHEMES: - host = host.lower() return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) @@ -78,23 +77,23 @@ def url(self): 'http://username:password@host.com:80/path?query#fragment' """ scheme, auth, host, port, path, query, fragment = self - url = '' + url = u'' # We use "is not None" we want things to happen with empty strings (or 0 port) if scheme is not None: - url += scheme + '://' + url += scheme + u'://' if auth is not None: - url += auth + '@' + url += auth + u'@' if host is not None: url += host if port is not None: - url += ':' + str(port) + url += u':' + str(port) if path is not None: url += path if query is not None: - url += '?' + query + url += u'?' + query if fragment is not None: - url += '#' + fragment + url += u'#' + fragment return url @@ -104,7 +103,7 @@ def __str__(self): def split_first(s, delims): """ - Deprecated. No longer used by parse_url(). + .. deprecated:: 1.25 Given a string and an iterable of delimiters, split on the first found delimiter. Return two split parts and the matched delimiter. @@ -161,6 +160,8 @@ def parse_url(url): return Url() is_string = not isinstance(url, six.binary_type) + if not is_string: + url = url.decode("utf-8") # RFC 3986 doesn't like URLs that have a host but don't start # with a scheme and we support URLs like that so we need to @@ -171,22 +172,53 @@ def parse_url(url): url = "//" + url try: - parse_result = rfc3986.urlparse(url, encoding="utf-8") + iri_ref = rfc3986.IRIReference.from_string(url, encoding="utf-8") except (ValueError, RFC3986Exception): + six.raise_from(LocationParseError(url), None) + + def idna_encode(name): + if name and any([ord(x) > 128 for x in name]): + try: + import idna + except ImportError: + raise LocationParseError("Unable to parse URL without the 'idna' module") + try: + return idna.encode(name, strict=True, std3_rules=True).lower() + except idna.IDNAError: + raise LocationParseError(u"Name '%s' is not a valid IDNA label" % name) + return name + + has_authority = iri_ref.authority is not None + uri_ref = iri_ref.encode(idna_encoder=idna_encode) + + # rfc3986 strips the authority if it's invalid + if has_authority and uri_ref.authority is None: raise LocationParseError(url) - # RFC 3986 doesn't assert ports must be non-negative. - if parse_result.port and parse_result.port < 0: - raise LocationParseError(url) + # Only normalize schemes we understand to not break http+unix + # or other schemes that don't follow RFC 3986. + if uri_ref.scheme is None or uri_ref.scheme.lower() in NORMALIZABLE_SCHEMES: + uri_ref = uri_ref.normalize() + + # Validate all URIReference components and ensure that all + # components that were set before are still set after + # normalization has completed. + validator = Validator() + try: + validator.check_validity_of( + *validator.COMPONENT_NAMES + ).validate(uri_ref) + except ValidationError: + six.raise_from(LocationParseError(url), None) # For the sake of backwards compatibility we put empty # string values for path if there are any defined values # beyond the path in the URL. # TODO: Remove this when we break backwards compatibility. - path = parse_result.path + path = uri_ref.path if not path: - if (parse_result.query is not None - or parse_result.fragment is not None): + if (uri_ref.query is not None + or uri_ref.fragment is not None): path = "" else: path = None @@ -201,13 +233,13 @@ def to_input_type(x): return x return Url( - scheme=to_input_type(parse_result.scheme), - auth=to_input_type(parse_result.userinfo), - host=to_input_type(parse_result.hostname), - port=parse_result.port, + scheme=to_input_type(uri_ref.scheme), + auth=to_input_type(uri_ref.userinfo), + host=to_input_type(uri_ref.host), + port=int(uri_ref.port) if uri_ref.port is not None else None, path=to_input_type(path), - query=to_input_type(parse_result.query), - fragment=to_input_type(parse_result.fragment) + query=to_input_type(uri_ref.query), + fragment=to_input_type(uri_ref.fragment) ) diff --git a/test/test_util.py b/test/test_util.py index ac527355a1..b8ab2e6862 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -131,12 +131,24 @@ def test_invalid_host(self, location): with pytest.raises(LocationParseError): get_host(location) + @pytest.mark.parametrize('url', [ + 'http://user\\@google.com', + 'http://google\\.com', + 'user\\@google.com', + 'http://google.com#fragment#', + 'http://user@user@google.com/', + ]) + def test_invalid_url(self, url): + with pytest.raises(LocationParseError): + parse_url(url) + @pytest.mark.parametrize('url, expected_normalized_url', [ ('HTTP://GOOGLE.COM/MAIL/', 'http://google.com/MAIL/'), ('HTTP://JeremyCline:Hunter2@Example.com:8080/', 'http://JeremyCline:Hunter2@example.com:8080/'), ('HTTPS://Example.Com/?Key=Value', 'https://example.com/?Key=Value'), ('Https://Example.Com/#Fragment', 'https://example.com/#Fragment'), + ('[::Ff%etH0%Ff]/%ab%Af', '[::ff%25etH0%Ff]/%AB%AF'), ]) def test_parse_url_normalization(self, url, expected_normalized_url): """Assert parse_url normalizes the scheme/host, and only the scheme/host""" @@ -155,8 +167,7 @@ def test_parse_url_normalization(self, url, expected_normalized_url): # Path/query/fragment ('', Url()), ('/', Url(path='/')), - ('/abc/../def', Url(path="/abc/../def")), - ('#?/!google.com/?foo#bar', Url(path='', fragment='?/!google.com/?foo#bar')), + ('#?/!google.com/?foo', Url(path='', fragment='?/!google.com/?foo')), ('/foo', Url(path='/foo')), ('/foo?bar=baz', Url(path='/foo', query='bar=baz')), ('/foo?bar=baz#banana?apple/orange', Url(path='/foo', @@ -173,10 +184,10 @@ def test_parse_url_normalization(self, url, expected_normalized_url): # Auth ('http://foo:bar@localhost/', Url('http', auth='foo:bar', host='localhost', path='/')), ('http://foo@localhost/', Url('http', auth='foo', host='localhost', path='/')), - ('http://foo:bar@baz@localhost/', Url('http', - auth='foo:bar@baz', - host='localhost', - path='/')), + ('http://foo:bar@localhost/', Url('http', + auth='foo:bar', + host='localhost', + path='/')), # Unicode type (Python 2.x) (u'http://foo:bar@localhost/', Url(u'http', @@ -194,6 +205,9 @@ def test_parse_url_normalization(self, url, expected_normalized_url): ('?', Url(path='', query='')), ('#', Url(path='', fragment='')), + # Path normalization + ('/abc/../def', Url(path="/def")), + # Empty Port ('http://google.com:', Url('http', host='google.com')), ('http://google.com:/', Url('http', host='google.com', path='/')), @@ -211,6 +225,23 @@ def test_parse_url(self, url, expected_url): def test_unparse_url(self, url, expected_url): assert url == expected_url.url + @pytest.mark.parametrize( + ['url', 'expected_url'], + [ + # RFC 3986 5.2.4 + ('/abc/../def', Url(path="/def")), + ('/..', Url(path="/")), + ('/./abc/./def/', Url(path='/abc/def/')), + ('/.', Url(path='/')), + ('/./', Url(path='/')), + ('/abc/./.././d/././e/.././f/./../../ghi', Url(path='/ghi')) + ] + ) + def test_parse_and_normalize_url_paths(self, url, expected_url): + actual_url = parse_url(url) + assert actual_url == expected_url + assert actual_url.url == expected_url.url + def test_parse_url_invalid_IPv6(self): with pytest.raises(LocationParseError): parse_url('[::1') @@ -260,12 +291,36 @@ def test_netloc(self, url, expected_netloc): # CVE-2016-5699 ("http://127.0.0.1%0d%0aConnection%3a%20keep-alive", - Url("http", host="127.0.0.1%0d%0aConnection%3a%20keep-alive")), + Url("http", host="127.0.0.1%0d%0aconnection%3a%20keep-alive")), # NodeJS unicode -> double dot (u"http://google.com/\uff2e\uff2e/abc", Url("http", host="google.com", - path='/%ef%bc%ae%ef%bc%ae/abc')) + path='/%EF%BC%AE%EF%BC%AE/abc')), + + # Scheme without :// + ("javascript:a='@google.com:12345/';alert(0)", + Url(scheme="javascript", + path="a='@google.com:12345/';alert(0)")), + + ("//google.com/a/b/c", Url(host="google.com", path="/a/b/c")), + + # International URLs + (u'http://ヒ:キ@ヒ.abc.ニ/ヒ?キ#ワ', Url(u'http', + host=u'xn--pdk.abc.xn--idk', + auth=u'%E3%83%92:%E3%82%AD', + path=u'/%E3%83%92', + query=u'%E3%82%AD', + fragment=u'%E3%83%AF')), + + # Injected headers (CVE-2016-5699, CVE-2019-9740, CVE-2019-9947) + ("10.251.0.83:7777?a=1 HTTP/1.1\r\nX-injected: header", + Url(host='10.251.0.83', port=7777, path='', + query='a=1%20HTTP/1.1%0D%0AX-injected:%20header')), + + ("http://127.0.0.1:6379?\r\nSET test failure12\r\n:8080/test/?test=a", + Url(scheme='http', host='127.0.0.1', port=6379, path='', + query='%0D%0ASET%20test%20failure12%0D%0A:8080/test/?test=a')), ] @pytest.mark.parametrize("url, expected_url", url_vulnerabilities) From bbeeae20cc9af5f9c833da8d35a95a42fad8393e Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Sun, 21 Apr 2019 19:56:42 -0500 Subject: [PATCH 14/48] Apply lowercasing before IDNA-encoding (#1569) --- src/urllib3/util/url.py | 2 +- test/test_util.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index 0127e2fe5c..de3c4686ec 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -183,7 +183,7 @@ def idna_encode(name): except ImportError: raise LocationParseError("Unable to parse URL without the 'idna' module") try: - return idna.encode(name, strict=True, std3_rules=True).lower() + return idna.encode(name.lower(), strict=True, std3_rules=True) except idna.IDNAError: raise LocationParseError(u"Name '%s' is not a valid IDNA label" % name) return name diff --git a/test/test_util.py b/test/test_util.py index b8ab2e6862..c86170cfad 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -211,6 +211,10 @@ def test_parse_url_normalization(self, url, expected_normalized_url): # Empty Port ('http://google.com:', Url('http', host='google.com')), ('http://google.com:/', Url('http', host='google.com', path='/')), + + # Uppercase IRI + (u'http://Königsgäßchen.de/straße', + Url('http', host='xn--knigsgchen-b4a3dun.de', path='/stra%C3%9Fe')) ] @pytest.mark.parametrize( From c3157af5e3b272d3fbffef23ee6215a229f8c61f Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 22 Apr 2019 08:52:22 -0500 Subject: [PATCH 15/48] Release 1.25 (#1568) --- CHANGES.rst | 20 +++++++++----------- _travis/downstream/requests-requirements.txt | 9 +++++++++ _travis/downstream/requests.sh | 9 +++------ src/urllib3/__init__.py | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) create mode 100644 _travis/downstream/requests-requirements.txt diff --git a/CHANGES.rst b/CHANGES.rst index c7a996d5f9..382955068b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,34 +1,32 @@ Changes ======= -dev (master) ------------- +1.25 (2019-04-22) +----------------- -* Implemented a more efficient ``HTTPResponse.__iter__()`` method. (Issue #1483) +* Require and validate certificates by default when using HTTPS (Pull #1507) * Upgraded ``urllib3.utils.parse_url()`` to be RFC 3986 compliant. (Pull #1487) * Added support for ``key_password`` for ``HTTPSConnectionPool`` to use encrypted ``key_file`` without creating your own ``SSLContext`` object. (Pull #1489) +* Add TLSv1.3 support to CPython, pyOpenSSL, and SecureTransport ``SSLContext`` + implementations. (Pull #1496) + +* Switched the default multipart header encoder from RFC 2231 to HTML 5 working draft. (Issue #303, PR #1492) + * Fixed issue where OpenSSL would block if an encrypted client private key was given and no password was given. Instead an ``SSLError`` is raised. (Pull #1489) -* Require and validate certificates by default when using HTTPS (Pull #1507) - * Added support for Brotli content encoding. It is enabled automatically if ``brotlipy`` package is installed which can be requested with ``urllib3[brotli]`` extra. (Pull #1532) -* Add TLSv1.3 support to CPython, pyOpenSSL, and SecureTransport ``SSLContext`` - implementations. (Pull #1496) - * Drop ciphers using DSS key exchange from default TLS cipher suites. Improve default ciphers when using SecureTransport. (Pull #1496) -* Switched the default multipart header encoder from RFC 2231 to HTML 5 working draft. (Issue #303, PR #1492) - -* ... [Short description of non-trivial change.] (Issue #) +* Implemented a more efficient ``HTTPResponse.__iter__()`` method. (Issue #1483) 1.24.2 (2019-04-17) diff --git a/_travis/downstream/requests-requirements.txt b/_travis/downstream/requests-requirements.txt new file mode 100644 index 0000000000..82436b1649 --- /dev/null +++ b/_travis/downstream/requests-requirements.txt @@ -0,0 +1,9 @@ +pytest-mock +pysocks +httpbin + +# kennethreitz/requests#5049 +pytest<4.1 + +# kennethreitz/requests#5004 +pytest-httpbin==0.3.0 diff --git a/_travis/downstream/requests.sh b/_travis/downstream/requests.sh index 3c5f4551db..29ffde3e5f 100755 --- a/_travis/downstream/requests.sh +++ b/_travis/downstream/requests.sh @@ -7,15 +7,12 @@ case "${1}" in git clone --depth 1 https://github.com/kennethreitz/requests cd requests git rev-parse HEAD - python -m pip install --upgrade pipenv - pipenv install --dev --skip-lock - - # See: kennethreitz/requests/5004 - python -m pip install pytest-httpbin==0.3.0 + python -m pip install -r ${TRAVIS_BUILD_DIR}/_travis/downstream/requests-requirements.txt + python -m pip install . ;; run) cd requests - pipenv run py.test -n 8 --boxed + pytest tests/ ;; *) exit 1 diff --git a/src/urllib3/__init__.py b/src/urllib3/__init__.py index f010fc4ed4..c0e750d361 100644 --- a/src/urllib3/__init__.py +++ b/src/urllib3/__init__.py @@ -26,7 +26,7 @@ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = 'dev' +__version__ = '1.25' __all__ = ( 'HTTPConnectionPool', From 0144514d1b8f0d4980bd95c59b6af2cd1874482c Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 22 Apr 2019 11:04:15 -0500 Subject: [PATCH 16/48] Restore dev version (#1570) --- CHANGES.rst | 6 ++++++ src/urllib3/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 382955068b..cee9719b55 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes ======= +dev (master) +------------ + +* ... [Short description of non-trivial change.] (Issue #) + + 1.25 (2019-04-22) ----------------- diff --git a/src/urllib3/__init__.py b/src/urllib3/__init__.py index c0e750d361..f010fc4ed4 100644 --- a/src/urllib3/__init__.py +++ b/src/urllib3/__init__.py @@ -26,7 +26,7 @@ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.25' +__version__ = 'dev' __all__ = ( 'HTTPConnectionPool', From ecb1c7a2d5862c3728f01bca9d21809e953654fa Mon Sep 17 00:00:00 2001 From: Elvis Pranskevichus Date: Mon, 22 Apr 2019 13:09:07 -0400 Subject: [PATCH 17/48] Add compatibility with Google's 'Brotli' package (#1572) --- src/urllib3/response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/urllib3/response.py b/src/urllib3/response.py index 7629cbb10f..b0a80a7378 100644 --- a/src/urllib3/response.py +++ b/src/urllib3/response.py @@ -341,7 +341,7 @@ def _init_decoder(self): DECODER_ERROR_CLASSES = (IOError, zlib.error) if brotli is not None: - DECODER_ERROR_CLASSES += (brotli.Error,) + DECODER_ERROR_CLASSES += (brotli.error,) def _decode(self, data, decode_content, flush_decoder): """ From 1e468883a03491c806fb1a55ace17da82582c17a Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 22 Apr 2019 22:01:58 -0500 Subject: [PATCH 18/48] Add integration tests for Google's Brotli package (#1573) --- .travis.yml | 10 ++++++++++ CHANGES.rst | 2 ++ _travis/downstream/google-brotli.sh | 17 +++++++++++++++++ 3 files changed, 29 insertions(+) create mode 100755 _travis/downstream/google-brotli.sh diff --git a/.travis.yml b/.travis.yml index 0710c1c573..664699c017 100644 --- a/.travis.yml +++ b/.travis.yml @@ -103,6 +103,16 @@ matrix: sudo: required stage: integration + - python: 2.7 + env: DOWNSTREAM=google-brotli + stage: integration + + - python: 3.7 + env: DOWNSTREAM=google-brotli + dist: xenial + sudo: required + stage: integration + allow_failures: - python: pypy-5.4 diff --git a/CHANGES.rst b/CHANGES.rst index cee9719b55..b35388524f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,8 @@ Changes dev (master) ------------ +* Add support for Google's ``Brotli`` package. (Pull #1752) + * ... [Short description of non-trivial change.] (Issue #) diff --git a/_travis/downstream/google-brotli.sh b/_travis/downstream/google-brotli.sh new file mode 100755 index 0000000000..535a4a927a --- /dev/null +++ b/_travis/downstream/google-brotli.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -exo pipefail + +case "${1}" in + install) + # Because Google's 'Brotli' package shares an importable name with + # 'brotlipy' we need to make sure both implementations don't break. + python -m pip install Brotli + ;; + run) + pytest tests/ + ;; + *) + exit 1 + ;; +esac From d7ece604359520f1c80c0b28ce711244640ab907 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 23 Apr 2019 07:54:14 -0500 Subject: [PATCH 19/48] Change deploy stage to have it's own job and not run unit tests (#1574) --- .travis.yml | 53 ++++++++++---------------------------- _travis/upload_coverage.sh | 8 +++--- 2 files changed, 18 insertions(+), 43 deletions(-) diff --git a/.travis.yml b/.travis.yml index 664699c017..ef4673670a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: python sudo: false stage: test +dist: xenial before_install: - env @@ -50,16 +51,10 @@ matrix: env: TOXENV=py36 - python: 3.7 env: TOXENV=py37 - dist: xenial - sudo: required - python: 3.7 env: TOXENV=py37-nobrotli - dist: xenial - sudo: required - python: 3.8-dev env: TOXENV=py38 - dist: xenial - sudo: required - python: pypy-5.4 env: TOXENV=pypy @@ -89,8 +84,6 @@ matrix: - python: 3.7 env: DOWNSTREAM=requests - dist: xenial - sudo: required stage: integration - python: 2.7 @@ -99,8 +92,6 @@ matrix: - python: 3.7 env: DOWNSTREAM=botocore - dist: xenial - sudo: required stage: integration - python: 2.7 @@ -109,42 +100,24 @@ matrix: - python: 3.7 env: DOWNSTREAM=google-brotli - dist: xenial - sudo: required stage: integration + - python: 3.7 + stage: deploy + script: + - ./_travis/deploy.sh + allow_failures: - python: pypy-5.4 stages: - - test + - name: test + if: tag IS blank # Run integration tests for release candidates - name: integration - if: type = pull_request AND head_branch =~ ^release-[\d.]+$ - -deploy: - - provider: script - script: bash _travis/deploy.sh - skip_cleanup: true - on: - branch: master - repo: urllib3/urllib3 - tags: true - python: 3.7 - - - provider: releases - api_key: - secure: ... # GitHub access token - name: "$TRAVIS_TAG" - body: "Release $TRAVIS_TAG" - draft: true - skip_cleanup: true - file_glob: true - file: dist/* - overwrite: true - on: - branch: master - repo: urllib3/urllib3 - tags: true - python: 3.7 + if: type = pull_request AND head_branch =~ ^release-[\d.]+$ AND tag IS blank + + # Deploy on any tags + - name: deploy + if: branch = master AND tag IS present diff --git a/_travis/upload_coverage.sh b/_travis/upload_coverage.sh index 812e9e0a57..fa4816d675 100755 --- a/_travis/upload_coverage.sh +++ b/_travis/upload_coverage.sh @@ -2,6 +2,8 @@ set -exo pipefail -source .tox/${TOXENV}/bin/activate -pip install codecov -codecov --env TRAVIS_OS_NAME,TOXENV +if [[ -e .coverage ]]; then + source .tox/${TOXENV}/bin/activate + pip install codecov + codecov --env TRAVIS_OS_NAME,TOXENV +fi From ff8f7219d98a4f939260020244d0769bef81b305 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 23 Apr 2019 21:59:06 -0500 Subject: [PATCH 20/48] Upgrade rfc3986 to v1.3.1 (#1578) --- src/urllib3/packages/rfc3986/__init__.py | 2 +- src/urllib3/packages/rfc3986/iri.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/urllib3/packages/rfc3986/__init__.py b/src/urllib3/packages/rfc3986/__init__.py index 13a786dfb0..9d3c3bc92b 100644 --- a/src/urllib3/packages/rfc3986/__init__.py +++ b/src/urllib3/packages/rfc3986/__init__.py @@ -36,7 +36,7 @@ __author_email__ = 'graffatcolmingov@gmail.com' __license__ = 'Apache v2.0' __copyright__ = 'Copyright 2014 Rackspace' -__version__ = '1.3.0' +__version__ = '1.3.1' __all__ = ( 'ParseResult', diff --git a/src/urllib3/packages/rfc3986/iri.py b/src/urllib3/packages/rfc3986/iri.py index 2c708d853a..9c01fe1cd0 100644 --- a/src/urllib3/packages/rfc3986/iri.py +++ b/src/urllib3/packages/rfc3986/iri.py @@ -94,7 +94,7 @@ def from_string(cls, iri_string, encoding='utf-8'): encoding, ) - def encode(self, idna_encoder=None): + def encode(self, idna_encoder=None): # noqa: C901 """Encode an IRIReference into a URIReference instance. If the ``idna`` module is installed or the ``rfc3986[idna]`` @@ -116,12 +116,16 @@ def encode(self, idna_encoder=None): "Could not import the 'idna' module " "and the IRI hostname requires encoding" ) - else: - def idna_encoder(x): + + def idna_encoder(name): + if any(ord(c) > 128 for c in name): try: - return idna.encode(x, strict=True, std3_rules=True).lower() + return idna.encode(name.lower(), + strict=True, + std3_rules=True) except idna.IDNAError: raise exceptions.InvalidAuthority(self.authority) + return name authority = "" if self.host: From 64e413f1b2fef86a150ae747f00aab0e2be8e59c Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 24 Apr 2019 07:18:44 -0500 Subject: [PATCH 21/48] Add first-class support for Brotli package (#1579) --- .travis.yml | 12 ++++-------- CHANGES.rst | 2 +- _travis/downstream/google-brotli.sh | 17 ----------------- src/urllib3/response.py | 15 +++++++++++---- tox.ini | 14 +++++++++++++- 5 files changed, 29 insertions(+), 31 deletions(-) delete mode 100755 _travis/downstream/google-brotli.sh diff --git a/.travis.yml b/.travis.yml index ef4673670a..74d7dd3bfc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,6 +43,8 @@ matrix: env: TOXENV=py27 - python: 2.7 env: TOXENV=py27-nobrotli + - python: 2.7 + env: TOXENV=py27-google-brotli - python: 3.4 env: TOXENV=py34 - python: 3.5 @@ -53,6 +55,8 @@ matrix: env: TOXENV=py37 - python: 3.7 env: TOXENV=py37-nobrotli + - python: 3.7 + env: TOXENV=py37-google-brotli - python: 3.8-dev env: TOXENV=py38 @@ -94,14 +98,6 @@ matrix: env: DOWNSTREAM=botocore stage: integration - - python: 2.7 - env: DOWNSTREAM=google-brotli - stage: integration - - - python: 3.7 - env: DOWNSTREAM=google-brotli - stage: integration - - python: 3.7 stage: deploy script: diff --git a/CHANGES.rst b/CHANGES.rst index b35388524f..a1a9f7898b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changes dev (master) ------------ -* Add support for Google's ``Brotli`` package. (Pull #1752) +* Add support for Google's ``Brotli`` package. (Pull #1572, Pull #1579) * ... [Short description of non-trivial change.] (Issue #) diff --git a/_travis/downstream/google-brotli.sh b/_travis/downstream/google-brotli.sh deleted file mode 100755 index 535a4a927a..0000000000 --- a/_travis/downstream/google-brotli.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -exo pipefail - -case "${1}" in - install) - # Because Google's 'Brotli' package shares an importable name with - # 'brotlipy' we need to make sure both implementations don't break. - python -m pip install Brotli - ;; - run) - pytest tests/ - ;; - *) - exit 1 - ;; -esac diff --git a/src/urllib3/response.py b/src/urllib3/response.py index b0a80a7378..4f857932c5 100644 --- a/src/urllib3/response.py +++ b/src/urllib3/response.py @@ -97,14 +97,21 @@ def decompress(self, data): if brotli is not None: class BrotliDecoder(object): + # Supports both 'brotlipy' and 'Brotli' packages + # since they share an import name. The top branches + # are for 'brotlipy' and bottom branches for 'Brotli' def __init__(self): self._obj = brotli.Decompressor() - def __getattr__(self, name): - return getattr(self._obj, name) - def decompress(self, data): - return self._obj.decompress(data) + if hasattr(self._obj, 'decompress'): + return self._obj.decompress(data) + return self._obj.process(data) + + def flush(self): + if hasattr(self._obj, 'flush'): + return self._obj.flush() + return b'' class MultiDecoder(object): diff --git a/tox.ini b/tox.ini index f2e46a0b50..841e528985 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = flake8-py3, py27, py34, py35, py36, py37, py38, pypy, py{27,37}-nobrotli +envlist = flake8-py3, py27, py34, py35, py36, py37, py38, pypy, py{27,37}-nobrotli, py{27,37}-google-brotli [testenv] deps= -r{toxinidir}/dev-requirements.txt @@ -21,6 +21,18 @@ setenv = PYTHONWARNINGS=always::DeprecationWarning passenv = CFLAGS LDFLAGS TRAVIS APPVEYOR CRYPTOGRAPHY_OSX_NO_LINK_FLAGS TRAVIS_INFRA +[testenv:py37-google-brotli] +extras = socks,secure +deps = + {[testenv]deps} + Brotli + +[testenv:py27-google-brotli] +extras = socks,secure +deps = + {[testenv]deps} + Brotli + [testenv:py27-nobrotli] extras = socks,secure From 85e7798378fe1eda5c7425c0141c045974bd976f Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 24 Apr 2019 10:16:43 -0500 Subject: [PATCH 22/48] Release 1.25.1 (#1581) --- CHANGES.rst | 6 +++--- src/urllib3/__init__.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a1a9f7898b..29d08f8635 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,12 +1,12 @@ Changes ======= -dev (master) ------------- +1.25.1 (2019-04-24) +------------------- * Add support for Google's ``Brotli`` package. (Pull #1572, Pull #1579) -* ... [Short description of non-trivial change.] (Issue #) +* Upgrade bundled rfc3986 to v1.3.1 (Pull #1578) 1.25 (2019-04-22) diff --git a/src/urllib3/__init__.py b/src/urllib3/__init__.py index f010fc4ed4..c816272a2a 100644 --- a/src/urllib3/__init__.py +++ b/src/urllib3/__init__.py @@ -26,7 +26,7 @@ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = 'dev' +__version__ = '1.25.1' __all__ = ( 'HTTPConnectionPool', From 2001292974c0c98e1f06a51494babd5c601285d8 Mon Sep 17 00:00:00 2001 From: Thea Flowers Date: Wed, 24 Apr 2019 10:32:45 -0700 Subject: [PATCH 23/48] Switch to Nox (#1580) * Fix appveyor config escaping (I think, ugh yaml) * Attempt to fix appveyor run script and help mac find nox * Really, AppVeyor? * Another attempt to get mac and appveyor working. * Comb in brotli changes from #1579 * Comb in pypy changes from #1576 --- .travis.yml | 56 +++++++++++++++++---------------- _travis/install.sh | 60 +++++++++++++++++++++++++++--------- _travis/run.sh | 12 +++++--- appveyor.yml | 38 +++++------------------ noxfile.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++ tox.ini | 71 ------------------------------------------ 6 files changed, 168 insertions(+), 146 deletions(-) create mode 100644 noxfile.py delete mode 100644 tox.ini diff --git a/.travis.yml b/.travis.yml index 74d7dd3bfc..27e45410f6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,6 @@ notifications: env: global: - - GAE_SDK_PATH=${HOME}/.cache/google_appengine - PYTHONWARNINGS=always::DeprecationWarning - PYPI_USERNAME=urllib3 @@ -34,54 +33,56 @@ env: matrix: include: + # Lint & documentation. - python: 3.6 - env: TOXENV=flake8-py3 + env: NOX_SESSION=lint - python: 3.6 - env: TOXENV=docs + env: NOX_SESSION=docs + # Unit tests - python: 2.7 - env: TOXENV=py27 - - python: 2.7 - env: TOXENV=py27-nobrotli - - python: 2.7 - env: TOXENV=py27-google-brotli + env: NOX_SESSION=test-2.7 - python: 3.4 - env: TOXENV=py34 + env: NOX_SESSION=test-3.4 - python: 3.5 - env: TOXENV=py35 + env: NOX_SESSION=test-3.5 - python: 3.6 - env: TOXENV=py36 - - python: 3.7 - env: TOXENV=py37 - - python: 3.7 - env: TOXENV=py37-nobrotli + env: NOX_SESSION=test-3.6 - python: 3.7 - env: TOXENV=py37-google-brotli + env: NOX_SESSION=test-3.7 - python: 3.8-dev - env: TOXENV=py38 - - - python: pypy-5.4 - env: TOXENV=pypy + env: NOX_SESSION=test-3.8 + - python: pypy2.7-6.0 + env: NOX_SESSION=test-pypy + - python: pypy3.5-6.0 + env: NOX_SESSION=test-pypy + # Extras + - python: 2.7 + env: NOX_SESSION=app_engine GAE_SDK_PATH=${HOME}/.cache/google_appengine - python: 2.7 - env: TOXENV=gae + env: NOX_SESSION=google_brotli-2 + - python: 3.7 + env: NOX_SESSION=google_brotli-3 + # OS X unit tests. - language: generic os: osx - env: TOXENV=py27 + env: NOX_SESSION=test-2.7 - language: generic os: osx - env: TOXENV=py34 + env: NOX_SESSION=test-3.4 - language: generic os: osx - env: TOXENV=py35 + env: NOX_SESSION=test-3.5 - language: generic os: osx - env: TOXENV=py36 + env: NOX_SESSION=test-3.6 - language: generic os: osx - env: TOXENV=py37 + env: NOX_SESSION=test-3.7 + # Downstream integration tests. - python: 2.7 env: DOWNSTREAM=requests stage: integration @@ -104,7 +105,8 @@ matrix: - ./_travis/deploy.sh allow_failures: - - python: pypy-5.4 + - python: pypy3.5-6.0 + - python: pypy2.7-6.0 stages: - name: test diff --git a/_travis/install.sh b/_travis/install.sh index 6bdfa58f69..558e4eddf7 100755 --- a/_travis/install.sh +++ b/_travis/install.sh @@ -2,32 +2,64 @@ set -exo pipefail -if [[ "$(uname -s)" == 'Darwin' ]]; then - case "${TOXENV}" in - py27) MACPYTHON=2.7.15 ;; - py34) MACPYTHON=3.4.4 ;; - py35) MACPYTHON=3.5.4 ;; - py36) MACPYTHON=3.6.7 ;; - py37) MACPYTHON=3.7.1 ;; - esac - MINOR=$(echo $MACPYTHON | cut -d. -f1,2) +install_mac_python() { + local FULL=$1 + local MINOR=$(echo $FULL | cut -d. -f1,2) + local PYTHON_EXE=/Library/Frameworks/Python.framework/Versions/${MINOR}/bin/python${MINOR} + + # Already installed. + if [[ -f "${PYTHON_EXE}" ]]; then + return 0; + fi - curl -Lo macpython.pkg https://www.python.org/ftp/python/${MACPYTHON}/python-${MACPYTHON}-macosx10.6.pkg + curl -Lo macpython.pkg https://www.python.org/ftp/python/${FULL}/python-${FULL}-macosx10.6.pkg sudo installer -pkg macpython.pkg -target / - ls /Library/Frameworks/Python.framework/Versions/$MINOR/bin/ - PYTHON_EXE=/Library/Frameworks/Python.framework/Versions/$MINOR/bin/python$MINOR + # The pip in older MacPython releases doesn't support a new enough TLS curl https://bootstrap.pypa.io/get-pip.py | sudo $PYTHON_EXE $PYTHON_EXE -m pip install virtualenv +} + + +if [[ "$(uname -s)" == 'Darwin' ]]; then + # Mac OS setup. + case "${NOX_SESSION}" in + test-2.7) MACPYTHON=2.7.15 ;; + test-3.4) MACPYTHON=3.4.4 ;; + test-3.5) MACPYTHON=3.5.4 ;; + test-3.6) MACPYTHON=3.6.7 ;; + test-3.7) MACPYTHON=3.7.1 ;; + esac + + # Install additional versions as needed. + install_mac_python $MACPYTHON + + # Always install 3.6 for Nox + install_mac_python "3.6.7" # Enable TLS 1.3 on macOS sudo defaults write /Library/Preferences/com.apple.networkd tcp_connect_enable_tls13 1 + + # Install Nox + python3.6 -m pip install nox + else - python -m pip install virtualenv + # Linux Setup + # Even when testing on Python 2, we need Python 3 for Nox. This detects if + # we're in one of the Travis Python 2 sessions and sets up the Python 3 install + # for Nox. + if ! python3 -m pip --version; then + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py + sudo python3 get-pip.py + sudo python3 -m pip install nox + else + # We're not in "dual Python" mode, so we can just install Nox normally. + python3 -m pip install nox + fi fi -if [[ "${TOXENV}" == "gae" ]]; then +if [[ "${NOX_SESSION}" == "app_engine" ]]; then python -m pip install gcp-devrel-py-tools gcp-devrel-py-tools download-appengine-sdk "$(dirname ${GAE_SDK_PATH})" fi diff --git a/_travis/run.sh b/_travis/run.sh index 0c35e2e48a..841e26f6b5 100755 --- a/_travis/run.sh +++ b/_travis/run.sh @@ -2,13 +2,17 @@ set -exo pipefail -if [[ "$(uname -s)" == "Darwin" && "$TOXENV" == "py27" ]]; then +if [[ "$(uname -s)" == "Darwin" && "$NOX_SESSION" == "tests-2.7" ]]; then export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin":$PATH fi -if [ -n "${TOXENV}" ]; then - python -m pip install tox - tox +if [ -n "${NOX_SESSION}" ]; then + if [[ "$(uname -s)" == 'Darwin' ]]; then + # Explicitly use Python 3.6 on MacOS, otherwise it won't find Nox properly. + python3.6 -m nox -s "${NOX_SESSION}" + else + nox -s "${NOX_SESSION}" + fi else downstream_script="${TRAVIS_BUILD_DIR}/_travis/downstream/${DOWNSTREAM}.sh" if [ ! -x "$downstream_script" ]; then diff --git a/appveyor.yml b/appveyor.yml index 25dfbcc3b9..04df64ded9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,44 +10,27 @@ environment: - PYTHON: "C:\\Python27-x64" PYTHON_VERSION: "2.7.x" PYTHON_ARCH: "64" - TOXENV: "py27" - TOXPY27: "%PYTHON%\\python.exe" - - - PYTHON: "C:\\Python27-x64" - PYTHON_VERSION: "2.7.x" - PYTHON_ARCH: "64" - TOXENV: "py27-nobrotli" - TOXPY27: "%PYTHON%\\python.exe" + NOX_SESSION: "test-2.7" - PYTHON: "C:\\Python34-x64" PYTHON_VERSION: "3.4.x" PYTHON_ARCH: "64" - TOXENV: "py34" - TOXPY34: "%PYTHON%\\python.exe" + NOX_SESSION: "test-3.4" - PYTHON: "C:\\Python35-x64" PYTHON_VERSION: "3.5.x" PYTHON_ARCH: "64" - TOXENV: "py35" - TOXPY35: "%PYTHON%\\python.exe" + NOX_SESSION: "test-3.5" - PYTHON: "C:\\Python36-x64" PYTHON_VERSION: "3.6.x" PYTHON_ARCH: "64" - TOXENV: "py36" - TOXPY36: "%PYTHON%\\python.exe" - - - PYTHON: "C:\\Python37-x64" - PYTHON_VERSION: "3.7.x" - PYTHON_ARCH: "64" - TOXENV: "py37" - TOXPY37: "%PYTHON%\\python.exe" + NOX_SESSION: "test-3.6" - PYTHON: "C:\\Python37-x64" PYTHON_VERSION: "3.7.x" PYTHON_ARCH: "64" - TOXENV: "py37-nobrotli" - TOXPY37: "%PYTHON%\\python.exe" + NOX_SESSION: "test-3.7" cache: - C:\Users\appveyor\AppData\Local\pip\Cache @@ -62,19 +45,14 @@ install: # the parent CMD process). - SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH% - # Check that we have the expected version and architecture for Python - - python --version - - python -c "import struct; print(struct.calcsize('P') * 8)" - # Upgrade to the latest version of pip to avoid it displaying warnings # about it being out of date. - - python -m pip install --upgrade pip wheel - - pip install tox virtualenv + - C:\Python36-x64\python.exe -m pip install --upgrade pip wheel + - C:\Python36-x64\python.exe -m pip install nox test_script: - - tox + - C:\Python36-x64\python.exe -m nox -s "%NOX_SESSION%" on_success: - - .tox/%TOXENV%/Scripts/activate.bat - pip install codecov - codecov --env PLATFORM,TOXENV diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000000..136a110a7b --- /dev/null +++ b/noxfile.py @@ -0,0 +1,77 @@ +import os +import shutil + +import nox + + +def tests_impl(session, extras="socks,secure,brotli"): + # Install deps and the package itself. + session.install("-r", "dev-requirements.txt") + session.install(".[{extras}]".format(extras=extras)) + + # Show the pip version. + session.run("pip", "--version") + # Print the Python version and bytesize. + session.run("python", "--version") + session.run("python", "-c", "import struct; print(struct.calcsize('P') * 8)") + # Print OpenSSL information. + session.run("python", "-m", "OpenSSL.debug") + + # Inspired from https://github.com/pyca/cryptography + # We use parallel mode and then combine here so that coverage.py will take + # the paths like .tox/pyXY/lib/pythonX.Y/site-packages/urllib3/__init__.py + # and collapse them into src/urllib3/__init__.py. + + session.run( + "coverage", "run", "--parallel-mode", "-m", + "pytest", "-r", "sx", "test", + *session.posargs, + env={ + "PYTHONWARNINGS": "always::DeprecationWarning" + }) + session.run("coverage", "combine") + session.run("coverage", "report", "-m") + + +@nox.session(python=["2.7", "3.4", "3.5", "3.6", "3.7", "3.8", "pypy"]) +def test(session): + tests_impl(session) + + +@nox.session(python=["2", "3"]) +def google_brotli(session): + # https://pypi.org/project/Brotli/ is the Google version of brotli, so + # install it separately and don't install our brotli extra (which installs + # brotlipy). + session.install("brotli") + tests_impl(session, extras="socks,secure") + + +@nox.session(python="2.7") +def app_engine(session): + session.install("-r", "dev-requirements.txt") + session.install(".") + session.run( + "coverage", "run", "--parallel-mode", "-m", + "pytest", "-r", "sx", "test/appengine", + *session.posargs) + session.run("coverage", "combine") + session.run("coverage", "report", "-m") + + +@nox.session +def lint(session): + session.install("flake8") + session.run("flake8", "--version") + session.run("flake8", "setup.py", "docs", "dummyserver", "src", "test") + + +@nox.session +def docs(session): + session.install("-r", "docs/requirements.txt") + session.install(".[socks,secure,brotli]") + + session.chdir("docs") + if os.path.exists("_build"): + shutil.rmtree("_build") + session.run("sphinx-build", "-W", ".", "_build/html") diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 841e528985..0000000000 --- a/tox.ini +++ /dev/null @@ -1,71 +0,0 @@ -[tox] -envlist = flake8-py3, py27, py34, py35, py36, py37, py38, pypy, py{27,37}-nobrotli, py{27,37}-google-brotli - -[testenv] -deps= -r{toxinidir}/dev-requirements.txt -extras = socks,secure,brotli -commands = - # Print out the python version and bitness - pip --version - python --version - python -c "import struct; print(struct.calcsize('P') * 8)" - python -m OpenSSL.debug - # Inspired from https://github.com/pyca/cryptography - # We use parallel mode and then combine here so that coverage.py will take - # the paths like .tox/pyXY/lib/pythonX.Y/site-packages/urllib3/__init__.py - # and collapse them into src/urllib3/__init__.py. - coverage run --parallel-mode -m pytest -r sx test {posargs} - coverage combine - coverage report -m -setenv = - PYTHONWARNINGS=always::DeprecationWarning -passenv = CFLAGS LDFLAGS TRAVIS APPVEYOR CRYPTOGRAPHY_OSX_NO_LINK_FLAGS TRAVIS_INFRA - -[testenv:py37-google-brotli] -extras = socks,secure -deps = - {[testenv]deps} - Brotli - -[testenv:py27-google-brotli] -extras = socks,secure -deps = - {[testenv]deps} - Brotli - -[testenv:py27-nobrotli] -extras = socks,secure - -[testenv:py37-nobrotli] -extras = socks,secure - -[testenv:gae] -basepython = python2.7 -deps= - {[testenv]deps} -commands= - coverage run --parallel-mode -m pytest -r sx test/appengine {posargs} - coverage combine - coverage report -m -setenv = - GAE_SDK_PATH={env:GAE_SDK_PATH:} - {[testenv]setenv} -passenv = TRAVIS TRAVIS_INFRA - -[testenv:flake8-py3] -basepython = python3 -deps= - flake8 -commands= - flake8 --version - flake8 setup.py docs dummyserver src test - -[testenv:docs] -deps= - -r{toxinidir}/docs/requirements.txt -commands= - rm -rf {toxinidir}/docs/_build - make -C {toxinidir}/docs html -whitelist_externals= - make - rm From dae790cc1f41a9a8014ccfe0dad06b94794c46be Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 25 Apr 2019 06:54:33 -0500 Subject: [PATCH 24/48] Don't require branch = master for tag pushes (#1584) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 27e45410f6..c803f4886f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -118,4 +118,4 @@ stages: # Deploy on any tags - name: deploy - if: branch = master AND tag IS present + if: tag IS present AND tag =~ /^(\d+\.\d+(?:.\d+)?)$/ AND repo = urllib3/urllib3 From a0d2bfd6098fe996fca74b80cbe473236187157c Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 25 Apr 2019 14:56:17 -0500 Subject: [PATCH 25/48] is_ipaddress shouldn't detect IPvFuture addresses (#1583) --- CHANGES.rst | 6 ++++++ src/urllib3/util/ssl_.py | 8 ++++---- test/test_ssl.py | 16 ++++++++++++++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 29d08f8635..54c669e6e4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes ======= +dev (master) +------------ + +* Change ``is_ipaddress`` to not detect IPvFuture addresses. (Pull #1583) + + 1.25.1 (2019-04-24) ------------------- diff --git a/src/urllib3/util/ssl_.py b/src/urllib3/util/ssl_.py index fdf7d1e07d..f271ce9301 100644 --- a/src/urllib3/util/ssl_.py +++ b/src/urllib3/util/ssl_.py @@ -44,11 +44,10 @@ def _const_compare_digest_backport(a, b): # Borrow rfc3986's regular expressions for IPv4 # and IPv6 addresses for use in is_ipaddress() _IP_ADDRESS_REGEX = re.compile( - r'^(?:%s|%s|%s|%s)$' % ( + r'^(?:%s|%s|%s)$' % ( abnf_regexp.IPv4_RE, abnf_regexp.IPv6_RE, - abnf_regexp.IPv6_ADDRZ_RE, - abnf_regexp.IPv_FUTURE_RE + abnf_regexp.IPv6_ADDRZ_RFC4007_RE ) ) @@ -370,7 +369,8 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, def is_ipaddress(hostname): - """Detects whether the hostname given is an IP address. + """Detects whether the hostname given is an IPv4 or IPv6 address. + Also detects IPv6 addresses with Zone IDs. :param str hostname: Hostname to examine. :return: True if the hostname is an IP address, False otherwise. diff --git a/test/test_ssl.py b/test/test_ssl.py index 6a46b4f3ea..8cc15f2a22 100644 --- a/test/test_ssl.py +++ b/test/test_ssl.py @@ -5,11 +5,21 @@ @pytest.mark.parametrize('addr', [ + # IPv6 '::1', '::', + 'FE80::8939:7684:D84b:a5A4%251', + + # IPv4 '127.0.0.1', '8.8.8.8', - b'127.0.0.1' + b'127.0.0.1', + + # IPv6 w/ Zone IDs + 'FE80::8939:7684:D84b:a5A4%251', + b'FE80::8939:7684:D84b:a5A4%251', + 'FE80::8939:7684:D84b:a5A4%19', + b'FE80::8939:7684:D84b:a5A4%19' ]) def test_is_ipaddress_true(addr): assert ssl_.is_ipaddress(addr) @@ -17,7 +27,9 @@ def test_is_ipaddress_true(addr): @pytest.mark.parametrize('addr', [ 'www.python.org', - b'www.python.org' + b'www.python.org', + 'v2.sg.media-imdb.com', + b'v2.sg.media-imdb.com' ]) def test_is_ipaddress_false(addr): assert not ssl_.is_ipaddress(addr) From 35b4caa9b53d651235bce78d2b934ff26b12ba5c Mon Sep 17 00:00:00 2001 From: Fabian Witt Date: Thu, 25 Apr 2019 22:59:20 +0200 Subject: [PATCH 26/48] use import for is_appengine check --- src/urllib3/contrib/_appengine_environ.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 7bdf8770dc..a03cf22f8d 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -6,7 +6,11 @@ def is_appengine(): - return 'APPENGINE_RUNTIME' in os.environ + try: + from google.appengine.tools.devappserver2.python import instance_factory + return True + except ImportError: + return False def is_appengine_sandbox(): From 685914d722ad91bc1170456c3624928e9bb35663 Mon Sep 17 00:00:00 2001 From: Fabian Date: Sat, 3 Nov 2018 20:27:23 +0100 Subject: [PATCH 27/48] Update _appengine_environ.py Fixed Bug https://github.com/urllib3/urllib3/issues/1470 --- src/urllib3/contrib/_appengine_environ.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index f3e00942cb..69291c0c23 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -17,12 +17,12 @@ def is_appengine_sandbox(): def is_local_appengine(): return ('APPENGINE_RUNTIME' in os.environ and - 'Development/' in os.environ['SERVER_SOFTWARE']) + os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) def is_prod_appengine(): return ('APPENGINE_RUNTIME' in os.environ and - 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and not is_prod_appengine_mvms()) From eb94d9f572cd522eacdeed94c586f1b3962616e8 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 6 Nov 2018 08:25:55 +0100 Subject: [PATCH 28/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 69291c0c23..936fb0f2df 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -16,14 +16,12 @@ def is_appengine_sandbox(): def is_local_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and - os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) + return 'SERVER_SOFTWARE' not in os.environ or + os.environ['SERVER_SOFTWARE'].startswith('Development') def is_prod_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and - not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and - not is_prod_appengine_mvms()) + return not is_local_appengine() def is_prod_appengine_mvms(): From 0e92902eb552793562eeb221d24c383b215d17db Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 7 Nov 2018 08:10:25 +0100 Subject: [PATCH 29/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 936fb0f2df..1fc7296da2 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,11 +14,9 @@ def is_appengine(): def is_appengine_sandbox(): return is_appengine() and not is_prod_appengine_mvms() - def is_local_appengine(): - return 'SERVER_SOFTWARE' not in os.environ or - os.environ['SERVER_SOFTWARE'].startswith('Development') - + return ('SERVER_SOFTWARE' not in os.environ or + os.environ['SERVER_SOFTWARE'].startswith('Development')) def is_prod_appengine(): return not is_local_appengine() From 35ed29f6c5e77cda6df81ea353c97be24a4ff196 Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 7 Nov 2018 08:11:23 +0100 Subject: [PATCH 30/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 1fc7296da2..3e2da32b76 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,10 +14,12 @@ def is_appengine(): def is_appengine_sandbox(): return is_appengine() and not is_prod_appengine_mvms() + def is_local_appengine(): return ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) + def is_prod_appengine(): return not is_local_appengine() From 875f07e3ed2496ca9bdfa0b53ee99b7a2232ae46 Mon Sep 17 00:00:00 2001 From: Fabian Date: Sun, 25 Nov 2018 12:43:39 +0100 Subject: [PATCH 31/48] Use APPENGINE_RUNTIME in is_appengine --- src/urllib3/contrib/_appengine_environ.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 3e2da32b76..9838335df9 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -6,9 +6,7 @@ def is_appengine(): - return (is_local_appengine() or - is_prod_appengine() or - is_prod_appengine_mvms()) + return 'APPENGINE_RUNTIME' in os.environ def is_appengine_sandbox(): From 678f64e669abd50e93e5da0a4699bb5dd46cf195 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 26 Nov 2018 19:19:25 +0100 Subject: [PATCH 32/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 9838335df9..6d69ccdd23 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,12 +14,13 @@ def is_appengine_sandbox(): def is_local_appengine(): - return ('SERVER_SOFTWARE' not in os.environ or + return is_appengine() and + ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) def is_prod_appengine(): - return not is_local_appengine() + return is_appengine() and not is_local_appengine() def is_prod_appengine_mvms(): From 53a62f1a759d62448e4790caeaa3059131af3029 Mon Sep 17 00:00:00 2001 From: Fabian Witt Date: Tue, 27 Nov 2018 09:18:32 +0100 Subject: [PATCH 33/48] fixed syntax error --- src/urllib3/contrib/_appengine_environ.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 6d69ccdd23..7bdf8770dc 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,7 +14,7 @@ def is_appengine_sandbox(): def is_local_appengine(): - return is_appengine() and + return is_appengine() and \ ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) From 6daeb6c3da9a63f9a37fb8c946162bfa7ef494f1 Mon Sep 17 00:00:00 2001 From: Fabian Witt Date: Thu, 25 Apr 2019 22:59:20 +0200 Subject: [PATCH 34/48] use import for is_appengine check --- src/urllib3/contrib/_appengine_environ.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 7bdf8770dc..a03cf22f8d 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -6,7 +6,11 @@ def is_appengine(): - return 'APPENGINE_RUNTIME' in os.environ + try: + from google.appengine.tools.devappserver2.python import instance_factory + return True + except ImportError: + return False def is_appengine_sandbox(): From a74c9cfbaed9f811e7563cfc3dce894928e0221a Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Sun, 28 Apr 2019 14:19:42 -0500 Subject: [PATCH 35/48] Percent-encode invalid characters with request target (#1586) --- CHANGES.rst | 3 +++ src/urllib3/util/url.py | 58 ++++++++++++++++++++++++++++++++++------- test/test_util.py | 27 +++++++++++++++++-- 3 files changed, 76 insertions(+), 12 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 54c669e6e4..cf22853337 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,9 @@ dev (master) * Change ``is_ipaddress`` to not detect IPvFuture addresses. (Pull #1583) +* Change ``parse_url`` to percent-encode invalid characters within the + path, query, and target components. (Pull #1586) + 1.25.1 (2019-04-24) ------------------- diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index de3c4686ec..0bc6ced756 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -6,6 +6,7 @@ from ..packages import six, rfc3986 from ..packages.rfc3986.exceptions import RFC3986Exception, ValidationError from ..packages.rfc3986.validators import Validator +from ..packages.rfc3986 import abnf_regexp, normalizers, compat, misc url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -17,6 +18,9 @@ # Regex for detecting URLs with schemes. RFC 3986 Section 3.1 SCHEME_REGEX = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+\-]*:|/)") +PATH_CHARS = abnf_regexp.UNRESERVED_CHARS_SET | abnf_regexp.SUB_DELIMITERS_SET | {':', '@', '/'} +QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {'?'} + class Url(namedtuple('Url', url_attrs)): """ @@ -136,6 +140,37 @@ def split_first(s, delims): return s[:min_idx], s[min_idx + 1:], min_delim +def _encode_invalid_chars(component, allowed_chars, encoding='utf-8'): + """Percent-encodes a URI component without reapplying + onto an already percent-encoded component. Based on + rfc3986.normalizers.encode_component() + """ + if component is None: + return component + + # Try to see if the component we're encoding is already percent-encoded + # so we can skip all '%' characters but still encode all others. + percent_encodings = len(normalizers.PERCENT_MATCHER.findall( + compat.to_str(component, encoding))) + + uri_bytes = component.encode('utf-8', 'surrogatepass') + is_percent_encoded = percent_encodings == uri_bytes.count(b'%') + + encoded_component = bytearray() + + for i in range(0, len(uri_bytes)): + # Will return a single character bytestring on both Python 2 & 3 + byte = uri_bytes[i:i+1] + byte_ord = ord(byte) + if ((is_percent_encoded and byte == b'%') + or (byte_ord < 128 and byte.decode() in allowed_chars)): + encoded_component.extend(byte) + continue + encoded_component.extend('%{0:02x}'.format(byte_ord).encode().upper()) + + return encoded_component.decode(encoding) + + def parse_url(url): """ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is @@ -160,8 +195,6 @@ def parse_url(url): return Url() is_string = not isinstance(url, six.binary_type) - if not is_string: - url = url.decode("utf-8") # RFC 3986 doesn't like URLs that have a host but don't start # with a scheme and we support URLs like that so we need to @@ -171,11 +204,6 @@ def parse_url(url): if not SCHEME_REGEX.search(url): url = "//" + url - try: - iri_ref = rfc3986.IRIReference.from_string(url, encoding="utf-8") - except (ValueError, RFC3986Exception): - six.raise_from(LocationParseError(url), None) - def idna_encode(name): if name and any([ord(x) > 128 for x in name]): try: @@ -188,8 +216,18 @@ def idna_encode(name): raise LocationParseError(u"Name '%s' is not a valid IDNA label" % name) return name - has_authority = iri_ref.authority is not None - uri_ref = iri_ref.encode(idna_encoder=idna_encode) + try: + split_iri = misc.IRI_MATCHER.match(compat.to_str(url)).groupdict() + iri_ref = rfc3986.IRIReference( + split_iri['scheme'], split_iri['authority'], + _encode_invalid_chars(split_iri['path'], PATH_CHARS), + _encode_invalid_chars(split_iri['query'], QUERY_CHARS), + _encode_invalid_chars(split_iri['fragment'], FRAGMENT_CHARS) + ) + has_authority = iri_ref.authority is not None + uri_ref = iri_ref.encode(idna_encoder=idna_encode) + except (ValueError, RFC3986Exception): + return six.raise_from(LocationParseError(url), None) # rfc3986 strips the authority if it's invalid if has_authority and uri_ref.authority is None: @@ -209,7 +247,7 @@ def idna_encode(name): *validator.COMPONENT_NAMES ).validate(uri_ref) except ValidationError: - six.raise_from(LocationParseError(url), None) + return six.raise_from(LocationParseError(url), None) # For the sake of backwards compatibility we put empty # string values for path if there are any defined values diff --git a/test/test_util.py b/test/test_util.py index c86170cfad..cc237b0e4a 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -135,8 +135,15 @@ def test_invalid_host(self, location): 'http://user\\@google.com', 'http://google\\.com', 'user\\@google.com', - 'http://google.com#fragment#', 'http://user@user@google.com/', + + # Invalid IDNA labels + u'http://\uD7FF.com', + u'http://❤️', + + # Unicode surrogates + u'http://\uD800.com', + u'http://\uDC00.com', ]) def test_invalid_url(self, url): with pytest.raises(LocationParseError): @@ -149,6 +156,15 @@ def test_invalid_url(self, url): ('HTTPS://Example.Com/?Key=Value', 'https://example.com/?Key=Value'), ('Https://Example.Com/#Fragment', 'https://example.com/#Fragment'), ('[::Ff%etH0%Ff]/%ab%Af', '[::ff%25etH0%Ff]/%AB%AF'), + + # Invalid characters for the query/fragment getting encoded + ('http://google.com/p[]?parameter[]=\"hello\"#fragment#', + 'http://google.com/p%5B%5D?parameter%5B%5D=%22hello%22#fragment%23'), + + # Percent encoding isn't applied twice despite '%' being invalid + # but the percent encoding is still normalized. + ('http://google.com/p%5B%5d?parameter%5b%5D=%22hello%22#fragment%23', + 'http://google.com/p%5B%5D?parameter%5B%5D=%22hello%22#fragment%23') ]) def test_parse_url_normalization(self, url, expected_normalized_url): """Assert parse_url normalizes the scheme/host, and only the scheme/host""" @@ -214,7 +230,14 @@ def test_parse_url_normalization(self, url, expected_normalized_url): # Uppercase IRI (u'http://Königsgäßchen.de/straße', - Url('http', host='xn--knigsgchen-b4a3dun.de', path='/stra%C3%9Fe')) + Url('http', host='xn--knigsgchen-b4a3dun.de', path='/stra%C3%9Fe')), + + # Unicode Surrogates + (u'http://google.com/\uD800', Url('http', host='google.com', path='%ED%A0%80')), + (u'http://google.com?q=\uDC00', + Url('http', host='google.com', path='', query='q=%ED%B0%80')), + (u'http://google.com#\uDC00', + Url('http', host='google.com', path='', fragment='%ED%B0%80')), ] @pytest.mark.parametrize( From 49eea8082ab34094d0c53f1d26e5c588d5372d74 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 29 Apr 2019 08:05:23 -0500 Subject: [PATCH 36/48] Release 1.25.2 (#1588) --- CHANGES.rst | 4 ++-- src/urllib3/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index cf22853337..19e75f432a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,8 @@ Changes ======= -dev (master) ------------- +1.25.2 (2019-04-28) +------------------- * Change ``is_ipaddress`` to not detect IPvFuture addresses. (Pull #1583) diff --git a/src/urllib3/__init__.py b/src/urllib3/__init__.py index c816272a2a..eb9158867a 100644 --- a/src/urllib3/__init__.py +++ b/src/urllib3/__init__.py @@ -26,7 +26,7 @@ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.25.1' +__version__ = '1.25.2' __all__ = ( 'HTTPConnectionPool', From fb706cbca25d4b96c81262098654b07b3b66c14f Mon Sep 17 00:00:00 2001 From: Fabian Date: Sat, 3 Nov 2018 20:27:23 +0100 Subject: [PATCH 37/48] Update _appengine_environ.py Fixed Bug https://github.com/urllib3/urllib3/issues/1470 --- src/urllib3/contrib/_appengine_environ.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index f3e00942cb..69291c0c23 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -17,12 +17,12 @@ def is_appengine_sandbox(): def is_local_appengine(): return ('APPENGINE_RUNTIME' in os.environ and - 'Development/' in os.environ['SERVER_SOFTWARE']) + os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) def is_prod_appengine(): return ('APPENGINE_RUNTIME' in os.environ and - 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and not is_prod_appengine_mvms()) From 602fe61bfceaef2a2497ddc72fbf90b08ccdc306 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 6 Nov 2018 08:25:55 +0100 Subject: [PATCH 38/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 69291c0c23..936fb0f2df 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -16,14 +16,12 @@ def is_appengine_sandbox(): def is_local_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and - os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) + return 'SERVER_SOFTWARE' not in os.environ or + os.environ['SERVER_SOFTWARE'].startswith('Development') def is_prod_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and - not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and - not is_prod_appengine_mvms()) + return not is_local_appengine() def is_prod_appengine_mvms(): From 7dc72c50e845210a8c7223e11007f90d80ca5f1e Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 7 Nov 2018 08:10:25 +0100 Subject: [PATCH 39/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 936fb0f2df..1fc7296da2 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,11 +14,9 @@ def is_appengine(): def is_appengine_sandbox(): return is_appengine() and not is_prod_appengine_mvms() - def is_local_appengine(): - return 'SERVER_SOFTWARE' not in os.environ or - os.environ['SERVER_SOFTWARE'].startswith('Development') - + return ('SERVER_SOFTWARE' not in os.environ or + os.environ['SERVER_SOFTWARE'].startswith('Development')) def is_prod_appengine(): return not is_local_appengine() From febec12f5d91c9e7e75e22ae1e2fa8ed4b4686a8 Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 7 Nov 2018 08:11:23 +0100 Subject: [PATCH 40/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 1fc7296da2..3e2da32b76 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,10 +14,12 @@ def is_appengine(): def is_appengine_sandbox(): return is_appengine() and not is_prod_appengine_mvms() + def is_local_appengine(): return ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) + def is_prod_appengine(): return not is_local_appengine() From 3bc64d744cf0000fe0c86f31f09be7514cc82eaf Mon Sep 17 00:00:00 2001 From: Fabian Date: Sun, 25 Nov 2018 12:43:39 +0100 Subject: [PATCH 41/48] Use APPENGINE_RUNTIME in is_appengine --- src/urllib3/contrib/_appengine_environ.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 3e2da32b76..9838335df9 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -6,9 +6,7 @@ def is_appengine(): - return (is_local_appengine() or - is_prod_appengine() or - is_prod_appengine_mvms()) + return 'APPENGINE_RUNTIME' in os.environ def is_appengine_sandbox(): From 5f4361e1d5dc2182a115d93aac1e572de6d93d3a Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 26 Nov 2018 19:19:25 +0100 Subject: [PATCH 42/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 9838335df9..6d69ccdd23 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,12 +14,13 @@ def is_appengine_sandbox(): def is_local_appengine(): - return ('SERVER_SOFTWARE' not in os.environ or + return is_appengine() and + ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) def is_prod_appengine(): - return not is_local_appengine() + return is_appengine() and not is_local_appengine() def is_prod_appengine_mvms(): From ef821ef9d6b535232144486c58dc4a8dfaad9180 Mon Sep 17 00:00:00 2001 From: Fabian Witt Date: Tue, 27 Nov 2018 09:18:32 +0100 Subject: [PATCH 43/48] fixed syntax error --- src/urllib3/contrib/_appengine_environ.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 6d69ccdd23..7bdf8770dc 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -14,7 +14,7 @@ def is_appengine_sandbox(): def is_local_appengine(): - return is_appengine() and + return is_appengine() and \ ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) From 3328ebd8bf19a9cf74ee77fa5b02e9f9831666be Mon Sep 17 00:00:00 2001 From: Fabian Witt Date: Thu, 25 Apr 2019 22:59:20 +0200 Subject: [PATCH 44/48] use import for is_appengine check --- src/urllib3/contrib/_appengine_environ.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 7bdf8770dc..a03cf22f8d 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -6,7 +6,11 @@ def is_appengine(): - return 'APPENGINE_RUNTIME' in os.environ + try: + from google.appengine.tools.devappserver2.python import instance_factory + return True + except ImportError: + return False def is_appengine_sandbox(): From 48abb93704ae9aa73270e5417c0c693b7f7bd9ba Mon Sep 17 00:00:00 2001 From: Fabian Date: Sat, 3 Nov 2018 20:27:23 +0100 Subject: [PATCH 45/48] Update _appengine_environ.py Fixed Bug https://github.com/urllib3/urllib3/issues/1470 --- src/urllib3/contrib/_appengine_environ.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index a03cf22f8d..8d9805969d 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -21,9 +21,14 @@ def is_local_appengine(): return is_appengine() and \ ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')) + return ('APPENGINE_RUNTIME' in os.environ and + os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) def is_prod_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and + not is_prod_appengine_mvms()) return is_appengine() and not is_local_appengine() From 806f17243a68fee2cb6a78b600f916e9d5cbb501 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 6 Nov 2018 08:25:55 +0100 Subject: [PATCH 46/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 8d9805969d..7359ede548 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -20,16 +20,14 @@ def is_appengine_sandbox(): def is_local_appengine(): return is_appengine() and \ ('SERVER_SOFTWARE' not in os.environ or - os.environ['SERVER_SOFTWARE'].startswith('Development')) - return ('APPENGINE_RUNTIME' in os.environ and os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) def is_prod_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and + return is_appengine() and \ + ('APPENGINE_RUNTIME' in os.environ and not os.environ.get('SERVER_SOFTWARE', '').startswith('Development') and not is_prod_appengine_mvms()) - return is_appengine() and not is_local_appengine() def is_prod_appengine_mvms(): From 7b40462c102fd795b8ea3fc10961d0cf200e94ed Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 7 Nov 2018 08:11:23 +0100 Subject: [PATCH 47/48] Update _appengine_environ.py --- src/urllib3/contrib/_appengine_environ.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index 7359ede548..c4039e6b45 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -23,6 +23,7 @@ def is_local_appengine(): os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) + def is_prod_appengine(): return is_appengine() and \ ('APPENGINE_RUNTIME' in os.environ and From 7d66edc6ff07ea37916a57df76175ce5f2ad80ab Mon Sep 17 00:00:00 2001 From: Fabian Witt Date: Thu, 2 May 2019 15:55:59 +0200 Subject: [PATCH 48/48] rebase --- src/urllib3/contrib/_appengine_environ.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/urllib3/contrib/_appengine_environ.py b/src/urllib3/contrib/_appengine_environ.py index c4039e6b45..7359ede548 100644 --- a/src/urllib3/contrib/_appengine_environ.py +++ b/src/urllib3/contrib/_appengine_environ.py @@ -23,7 +23,6 @@ def is_local_appengine(): os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) - def is_prod_appengine(): return is_appengine() and \ ('APPENGINE_RUNTIME' in os.environ and