From 2436dd46681fba903c6e002752400d5dc42dfed8 Mon Sep 17 00:00:00 2001 From: Jorge Lopez Silva Date: Wed, 25 Mar 2020 14:26:21 -0700 Subject: [PATCH] Integrate SSLTransport into urllib3. For connections that will attempt to use an HTTPS proxy with an HTTPS destination, we'll use the TLS in TLS support provided by SSL Transport. HTTPS proxy and HTTP destinations will continue using a single TLS session as expected. HTTPS proxies and HTTPS destinations can continue to be insecure if indicated by the allow_insecure_proxies parameter. --- src/urllib3/connection.py | 64 +++++++++++++- src/urllib3/connectionpool.py | 11 ++- src/urllib3/poolmanager.py | 86 +++++++++++-------- src/urllib3/util/connection.py | 23 +++++ src/urllib3/util/ssl_.py | 18 +++- test/test_proxymanager.py | 12 --- test/test_ssl.py | 2 +- test/test_util.py | 38 +++++++- .../test_proxy_poolmanager.py | 49 ++++++++--- 9 files changed, 237 insertions(+), 66 deletions(-) diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py index ce94b25640..13a19afb75 100644 --- a/src/urllib3/connection.py +++ b/src/urllib3/connection.py @@ -10,9 +10,10 @@ from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection from .packages.six.moves.http_client import HTTPException # noqa: F401 +hasSSL = False try: # Compiled with SSL? import ssl - + hasSSL = True BaseSSLError = ssl.SSLError except (ImportError, AttributeError): # Platform-specific: No SSL. ssl = None @@ -111,6 +112,11 @@ def __init__(self, *args, **kw): #: The socket options provided by the user. If no options are #: provided, we use the default options. self.socket_options = kw.pop("socket_options", self.default_socket_options) + + # Proxy options provided by the user. + self.proxy = kw.pop("proxy", None) + self.proxy_config = kw.pop("proxy_config", None) + _HTTPConnection.__init__(self, *args, **kw) @property @@ -309,8 +315,13 @@ def connect(self): # Add certificate verification conn = self._new_conn() hostname = self.host + tls_in_tls = False if self._is_using_tunnel(): + if self._connection_requires_tls_in_tls(): + conn = self._connect_tls_proxy(hostname, conn) + tls_in_tls = True + self.sock = conn # Calls self._set_hostport(), so self.host is @@ -370,6 +381,7 @@ def connect(self): ca_cert_data=self.ca_cert_data, server_hostname=server_hostname, ssl_context=context, + tls_in_tls=tls_in_tls, ) if self.assert_fingerprint: @@ -402,6 +414,56 @@ def connect(self): or self.assert_fingerprint is not None ) + def _connection_requires_tls_in_tls(self): + """ + Indicates if the current connection requires two TLS connections, one to + the proxy and one to the destination server. + """ + # Not supported in < py3 or ssl-less environments. + if not six.PY3 or not hasSSL: + return False + + if not self.proxy or not self.proxy_config: + return False + + if self.proxy_config.destination_scheme == "http": + return False + + return ( + self.proxy.scheme == "https" and not self.proxy_config.allow_insecure_proxy + ) + + def _connect_tls_proxy(self, hostname, conn): + """ + Establish a TLS connection to the proxy using the provided SSL context. + """ + proxy_config = self.proxy_config + ssl_context = proxy_config.ssl_context + if not ssl_context: + ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(self.ssl_version), + cert_reqs=resolve_cert_reqs(self.cert_reqs), + ) + if ( + not self.ca_certs + and not self.ca_cert_dir + and not self.ca_cert_data + and hasattr(ssl_context, "load_default_certs") + ): + ssl_context.load_default_certs() + + return ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + key_password=self.key_password, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + ca_cert_data=self.ca_cert_data, + server_hostname=hostname, + ssl_context=ssl_context, + ) + def _match_hostname(cert, asserted_hostname): try: diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py index a1bb399dcf..ab30a7e9f8 100644 --- a/src/urllib3/connectionpool.py +++ b/src/urllib3/connectionpool.py @@ -38,7 +38,7 @@ from .request import RequestMethods from .response import HTTPResponse -from .util.connection import is_connection_dropped +from .util.connection import is_connection_dropped, connection_requires_http_tunnel from .util.request import set_file_position from .util.response import assert_header_parsing from .util.retry import Retry @@ -181,6 +181,7 @@ def __init__( retries=None, _proxy=None, _proxy_headers=None, + _proxy_config=None, **conn_kw ): ConnectionPool.__init__(self, host, port) @@ -202,6 +203,7 @@ def __init__( self.proxy = _proxy self.proxy_headers = _proxy_headers or {} + self.proxy_config = _proxy_config # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): @@ -218,6 +220,9 @@ def __init__( # list. self.conn_kw.setdefault("socket_options", []) + self.conn_kw["proxy"] = self.proxy + self.conn_kw["proxy_config"] = self.proxy_config + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -637,7 +642,7 @@ def urlopen( # Merge the proxy headers. Only done when not using HTTP CONNECT. We # have to copy the headers dict so we can safely change it without those # changes being reflected in anyone else's copy. - if self.scheme == "http" or (self.proxy and self.proxy.scheme == "https"): + if not connection_requires_http_tunnel(self.proxy, self.proxy_config): headers = headers.copy() headers.update(self.proxy_headers) @@ -952,7 +957,7 @@ def _prepare_proxy(self, conn): improperly set Host: header to proxy's IP:port. """ - if self.proxy.scheme != "https": + if connection_requires_http_tunnel(self.proxy, self.proxy_config): conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers) conn.connect() diff --git a/src/urllib3/poolmanager.py b/src/urllib3/poolmanager.py index d081d753b6..b419baf7d4 100644 --- a/src/urllib3/poolmanager.py +++ b/src/urllib3/poolmanager.py @@ -2,13 +2,11 @@ import collections import functools import logging -import warnings from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme from .exceptions import ( - HTTPWarning, LocationValueError, MaxRetryError, ProxySchemeUnknown, @@ -19,17 +17,13 @@ from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry +from .util.connection import connection_requires_http_tunnel +from .packages.six import PY3 __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] -class InvalidProxyConfigurationWarning(HTTPWarning): - """Raised when a user has an HTTPS proxy without enabling HTTPS proxies.""" - - pass - - log = logging.getLogger(__name__) SSL_KEYWORDS = ( @@ -66,6 +60,7 @@ class InvalidProxyConfigurationWarning(HTTPWarning): "key_headers", # dict "key__proxy", # parsed proxy url "key__proxy_headers", # dict + "key__proxy_config", # class "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples "key__socks_options", # dict "key_assert_hostname", # bool or string @@ -76,6 +71,18 @@ class InvalidProxyConfigurationWarning(HTTPWarning): #: The namedtuple class used to construct keys for the connection pool. #: All custom key schemes should include the fields in this key at a minimum. PoolKey = collections.namedtuple("PoolKey", _key_fields) +_proxy_config_fields = ("ssl_context", "allow_insecure_proxy", "destination_scheme") + + +class ProxyConfig: + """ + Configuration class for proxies. + """ + + def __init__(self, ssl_context, allow_insecure_proxy, destination_scheme=None): + self.ssl_context = ssl_context + self.allow_insecure_proxy = allow_insecure_proxy + self.destination_scheme = destination_scheme def _default_key_normalizer(key_class, request_context): @@ -168,6 +175,7 @@ class PoolManager(RequestMethods): """ proxy = None + proxy_config = None def __init__(self, num_pools=10, headers=None, **connection_pool_kw): RequestMethods.__init__(self, headers) @@ -322,14 +330,26 @@ def _merge_pool_kwargs(self, override): def _proxy_requires_url_absolute_form(self, parsed_url): """ Indicates if the proxy requires the complete destination URL in the - request. - - Normally this is only needed when not using an HTTP CONNECT tunnel. + request. Normally this is only needed when not using an HTTP CONNECT + tunnel. """ if self.proxy is None: return False - return parsed_url.scheme == "http" or self.proxy.scheme == "https" + return not connection_requires_http_tunnel(self.proxy, self.proxy_config) + + def _validate_proxy_scheme_url_selection(self, url_scheme): + if ( + not PY3 + and url_scheme == "https" + and self.proxy is not None + and self.proxy.scheme == "https" + and not self.proxy_config.allow_insecure_proxy + ): + + raise ProxySchemeUnsupported( + "Contacting HTTPS destinations through HTTPS proxies is not supported on py2." + ) def urlopen(self, method, url, redirect=True, **kw): """ @@ -341,6 +361,8 @@ def urlopen(self, method, url, redirect=True, **kw): :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ u = parse_url(url) + self._validate_proxy_scheme_url_selection(u.scheme) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) kw["assert_same_host"] = False @@ -408,6 +430,10 @@ class ProxyManager(PoolManager): HTTPS/CONNECT case they are sent only once. Could be used for proxy authentication. + :param proxy_ssl_context: + The proxy SSL context is used to establish the TLS connection to the + proxy when using HTTPS proxies. + :param _allow_https_proxy_to_see_traffic: Allows forwarding of HTTPS requests to HTTPS proxies. The proxy will have visibility of all the traffic sent. ONLY USE IF YOU KNOW WHAT @@ -433,6 +459,7 @@ def __init__( num_pools=10, headers=None, proxy_headers=None, + proxy_ssl_context=None, _allow_https_proxy_to_see_traffic=False, **connection_pool_kw ): @@ -454,15 +481,21 @@ def __init__( self.proxy = proxy self.proxy_headers = proxy_headers or {} + self.proxy_ssl_context = proxy_ssl_context + self.proxy_config = ProxyConfig( + proxy_ssl_context, _allow_https_proxy_to_see_traffic + ) connection_pool_kw["_proxy"] = self.proxy connection_pool_kw["_proxy_headers"] = self.proxy_headers - - self.allow_insecure_proxy = _allow_https_proxy_to_see_traffic + connection_pool_kw["_proxy_config"] = self.proxy_config super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): + # Original destination is needed to identify if we need TLS within TLS. + self.proxy_config.destination_scheme = scheme + if scheme == "https": return super(ProxyManager, self).connection_from_host( host, port, scheme, pool_kwargs=pool_kwargs @@ -487,31 +520,14 @@ def _set_proxy_headers(self, url, headers=None): headers_.update(headers) return headers_ - def _validate_proxy_scheme_url_selection(self, url_scheme): - if ( - url_scheme == "https" - and self.proxy.scheme == "https" - and not self.allow_insecure_proxy - ): - warnings.warn( - "Your proxy configuration specified an HTTPS scheme for the proxy. " - "Are you sure you want to use HTTPS to contact the proxy? " - "This most likely indicates an error in your configuration." - "If you are sure you want use HTTPS to contact the proxy, enable " - "the _allow_https_proxy_to_see_traffic.", - InvalidProxyConfigurationWarning, - ) - - raise ProxySchemeUnsupported( - "Contacting HTTPS destinations through HTTPS proxies is not supported." - ) - def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." u = parse_url(url) - self._validate_proxy_scheme_url_selection(u.scheme) - if u.scheme == "http" or self.proxy.scheme == "https": + # Original destination is needed to identify if we need TLS within TLS. + self.proxy_config.destination_scheme = u.scheme + + if not connection_requires_http_tunnel(self.proxy, self.proxy_config): # For connections using HTTP CONNECT, httplib sets the necessary # headers on the CONNECT to the proxy. For HTTP or when talking # HTTPS to the proxy, we'll definitely need to set 'Host' at the diff --git a/src/urllib3/util/connection.py b/src/urllib3/util/connection.py index 86f0a3b00e..0b9f1e5206 100644 --- a/src/urllib3/util/connection.py +++ b/src/urllib3/util/connection.py @@ -26,6 +26,29 @@ def is_connection_dropped(conn): # Platform-specific return False +def connection_requires_http_tunnel(proxy_url, proxy_config): + """ + Returns True if the connection requires an HTTP CONNECT through the proxy. + + :param destination_url: + :URL URL of the destination. + :param proxy_url: + :URL URL of the proxy. + :param proxy_config: + :class:`PoolManager.ProxyConfig` proxy configuration + """ + if proxy_url is None or proxy_config is None: + return False + + if proxy_config.destination_scheme == "http": + return False + + if proxy_url.scheme == "http": + return True + + return not proxy_config.allow_insecure_proxy + + # This function is copied from socket.py in the Python 2.7 standard # library test suite. Added to its signature is only `socket_options`. # One additional modification is that we avoid binding to IPv6 servers diff --git a/src/urllib3/util/ssl_.py b/src/urllib3/util/ssl_.py index f7e2b70558..75adadf306 100644 --- a/src/urllib3/util/ssl_.py +++ b/src/urllib3/util/ssl_.py @@ -13,6 +13,7 @@ SSLContext = None +SSLTransport = None HAS_SNI = False IS_PYOPENSSL = False IS_SECURETRANSPORT = False @@ -40,6 +41,7 @@ def _const_compare_digest_backport(a, b): import ssl from ssl import wrap_socket, CERT_REQUIRED from ssl import HAS_SNI # Has SNI? + from ..contrib.ssl import SSLTransport except ImportError: pass @@ -309,6 +311,7 @@ def ssl_wrap_socket( ca_cert_dir=None, key_password=None, ca_cert_data=None, + tls_in_tls=False, ): """ All arguments except for server_hostname, ssl_context, and ca_cert_dir have @@ -330,6 +333,8 @@ def ssl_wrap_socket( :param ca_cert_data: Optional string containing CA certificates in PEM format suitable for passing as the cadata parameter to SSLContext.load_verify_locations() + :param tls_in_tls: + Use SSLTransport of attempting to wrap the existing socket. """ context = ssl_context if context is None: @@ -374,7 +379,7 @@ def ssl_wrap_socket( server_hostname is not None and not is_ipaddress(server_hostname) ) or IS_SECURETRANSPORT: if HAS_SNI and server_hostname is not None: - return context.wrap_socket(sock, server_hostname=server_hostname) + return _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) warnings.warn( "An HTTPS request has been made, but the SNI (Server Name " @@ -387,7 +392,7 @@ def ssl_wrap_socket( SNIMissingWarning, ) - return context.wrap_socket(sock) + return _ssl_wrap_socket_impl(sock, context, tls_in_tls) def is_ipaddress(hostname): @@ -412,3 +417,12 @@ def _is_key_file_encrypted(key_file): return True return False + + +def _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname=None): + wrapped_sock = None + if tls_in_tls and SSLTransport: + wrapped_sock = SSLTransport(sock, context, server_hostname) + else: + wrapped_sock = context.wrap_socket(sock, server_hostname=server_hostname) + return wrapped_sock diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py index fb947dae38..80b0c31f15 100644 --- a/test/test_proxymanager.py +++ b/test/test_proxymanager.py @@ -1,7 +1,6 @@ import pytest from urllib3.poolmanager import ProxyManager -from urllib3.util.url import parse_url class TestProxyManager(object): @@ -46,14 +45,3 @@ def test_invalid_scheme(self): ProxyManager("invalid://host/p") with pytest.raises(ValueError): ProxyManager("invalid://host/p") - - def test_proxy_tunnel(self): - http_url = parse_url("http://example.com") - https_url = parse_url("https://example.com") - with ProxyManager("http://proxy:8080") as p: - assert p._proxy_requires_url_absolute_form(http_url) - assert p._proxy_requires_url_absolute_form(https_url) is False - - with ProxyManager("https://proxy:8080") as p: - assert p._proxy_requires_url_absolute_form(http_url) - assert p._proxy_requires_url_absolute_form(https_url) diff --git a/test/test_ssl.py b/test/test_ssl.py index f755938062..4026e83a5b 100644 --- a/test/test_ssl.py +++ b/test/test_ssl.py @@ -63,7 +63,7 @@ def test_context_sni_with_ip_address(monkeypatch, has_sni, server_hostname, uses if uses_sni: context.wrap_socket.assert_called_with(sock, server_hostname=server_hostname) else: - context.wrap_socket.assert_called_with(sock) + context.wrap_socket.assert_called_with(sock, server_hostname=None) @pytest.mark.parametrize( diff --git a/test/test_util.py b/test/test_util.py index 97546e77d3..c4ad86c0a5 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -28,10 +28,16 @@ SNIMissingWarning, UnrewindableBodyError, ) -from urllib3.util.connection import allowed_gai_family, _has_ipv6 +from urllib3.util.connection import ( + allowed_gai_family, + _has_ipv6, + connection_requires_http_tunnel, +) from urllib3.util import is_fp_closed, ssl_ from urllib3.packages import six +from urllib3.poolmanager import ProxyConfig + from . import clear_warnings from test import onlyPy3, onlyPy2, onlyBrotlipy, notBrotlipy @@ -769,7 +775,9 @@ def test_ssl_wrap_socket_with_no_sni_warns(self): sock=socket, server_hostname="www.google.com", ) - mock_context.wrap_socket.assert_called_once_with(socket) + mock_context.wrap_socket.assert_called_once_with( + socket, server_hostname=None + ) assert warn.call_count >= 1 warnings = [call[0][1] for call in warn.call_args_list] assert SNIMissingWarning in warnings @@ -826,3 +834,29 @@ def test_ip_family_ipv6_disabled(self): def test_assert_header_parsing_throws_typeerror_with_non_headers(self, headers): with pytest.raises(TypeError): assert_header_parsing(headers) + + def test_connection_requires_http_tunnel_no_proxy(self): + assert not connection_requires_http_tunnel(None, None) + + def test_connection_requires_http_tunnel_http_proxy(self): + proxy = parse_url("http://proxy:8080") + proxy_config = ProxyConfig( + ssl_context=None, allow_insecure_proxy=False, destination_scheme="http" + ) + assert not connection_requires_http_tunnel(proxy, proxy_config) + + proxy_config.destination_scheme = "https" + assert connection_requires_http_tunnel(proxy, proxy_config) + + def test_connection_requires_http_tunnel_https_proxy(self): + proxy = parse_url("https://proxy:8443") + proxy_config = ProxyConfig( + ssl_context=None, allow_insecure_proxy=False, destination_scheme="http" + ) + assert not connection_requires_http_tunnel(proxy, proxy_config) + + proxy_config.destination_scheme = "https" + assert connection_requires_http_tunnel(proxy, proxy_config) + + proxy_config.allow_insecure_proxy = True + assert not connection_requires_http_tunnel(proxy, proxy_config) diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py index feface09e2..fe9fdd928b 100644 --- a/test/with_dummyserver/test_proxy_poolmanager.py +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -23,7 +23,7 @@ ) from urllib3.connectionpool import connection_from_url, VerifiedHTTPSConnection -from test import SHORT_TIMEOUT, LONG_TIMEOUT +from test import SHORT_TIMEOUT, LONG_TIMEOUT, onlyPy3, onlyPy2 # Retry failed tests pytestmark = pytest.mark.flaky @@ -38,7 +38,7 @@ def setup_class(cls): cls.https_url = "https://%s:%d" % (cls.https_host, cls.https_port) cls.https_url_alt = "https://%s:%d" % (cls.https_host_alt, cls.https_port) cls.proxy_url = "http://%s:%d" % (cls.proxy_host, cls.proxy_port) - cls.https_proxy_url = "https://%s:%d" % (cls.proxy_host, cls.https_proxy_port,) + cls.https_proxy_url = "https://%s:%d" % (cls.proxy_host, cls.https_proxy_port) # Generate another CA to test verification failure cls.certs_dir = tempfile.mkdtemp() @@ -60,7 +60,17 @@ def test_basic_proxy(self): r = http.request("GET", "%s/" % self.https_url) assert r.status == 200 + @onlyPy3 def test_https_proxy(self): + with proxy_from_url(self.https_proxy_url, ca_certs=DEFAULT_CA) as https: + r = https.request("GET", "%s/" % self.http_url) + assert r.status == 200 + + r = https.request("GET", "%s/" % self.https_url) + assert r.status == 200 + + @onlyPy2 + def test_https_proxy_not_supported(self): with proxy_from_url(self.https_proxy_url, ca_certs=DEFAULT_CA) as https: r = https.request("GET", "%s/" % self.http_url) assert r.status == 200 @@ -68,13 +78,16 @@ def test_https_proxy(self): with pytest.raises(ProxySchemeUnsupported): https.request("GET", "%s/" % self.https_url) + def test_https_proxy_insecure(self): with proxy_from_url( self.https_proxy_url, ca_certs=DEFAULT_CA, _allow_https_proxy_to_see_traffic=True, ) as https: r = https.request("GET", "%s/" % self.http_url) - https.request("GET", "%s/" % self.https_url) + assert r.status == 200 + + r = https.request("GET", "%s/" % self.https_url) assert r.status == 200 def test_nagle_proxy(self): @@ -299,6 +312,7 @@ def test_headers(self): self.https_port, ) + @onlyPy3 def test_https_headers(self): with proxy_from_url( self.https_proxy_url, @@ -325,19 +339,34 @@ def test_https_headers(self): self.http_port, ) - with pytest.raises(ProxySchemeUnsupported): - http.request_encode_url("GET", "%s/headers" % self.https_url) - - r = http.request_encode_url( - "GET", "%s/headers" % self.http_url, headers={"Baz": "quux"} + r = http.request_encode_body( + "GET", "%s/headers" % self.https_url, headers={"Baz": "quux"} ) returned_headers = json.loads(r.data.decode()) assert returned_headers.get("Foo") is None assert returned_headers.get("Baz") == "quux" + assert returned_headers.get("Hickory") is None + assert returned_headers.get("Host") == "%s:%s" % ( + self.https_host, + self.https_port, + ) + + def test_https_headers_insecure(self): + with proxy_from_url( + self.https_proxy_url, + headers={"Foo": "bar"}, + proxy_headers={"Hickory": "dickory"}, + ca_certs=DEFAULT_CA, + _allow_https_proxy_to_see_traffic=True, + ) as http: + + r = http.request_encode_url("GET", "%s/headers" % self.https_url) + returned_headers = json.loads(r.data.decode()) + assert returned_headers.get("Foo") == "bar" assert returned_headers.get("Hickory") == "dickory" assert returned_headers.get("Host") == "%s:%s" % ( - self.http_host, - self.http_port, + self.https_host, + self.https_port, ) def test_headerdict(self):