diff --git a/docs/advanced-usage.rst b/docs/advanced-usage.rst index 1442975f48..84c0eef1f5 100644 --- a/docs/advanced-usage.rst +++ b/docs/advanced-usage.rst @@ -122,10 +122,24 @@ HTTP proxy:: The usage of :class:`~poolmanager.ProxyManager` is the same as :class:`~poolmanager.PoolManager`. -You can use :class:`~contrib.socks.SOCKSProxyManager` to connect to SOCKS4 or -SOCKS5 proxies. In order to use SOCKS proxies you will need to install -`PySocks `_ or install urllib3 with the -``socks`` extra:: +You can connect to a proxy using HTTP, HTTPS or SOCKS. urllib3's behavior will +be different depending on the type of proxy you selected and the destination +you're contacting. + +When contacting a HTTP website through a HTTP or HTTPS proxy, the request will +be forwarded with the `absolute URI +`_. + +When contacting a HTTPS website through a HTTP proxy, a TCP tunnel will be +established with a HTTP CONNECT. Afterward a TLS connection will be established +with the destination and your request will be sent. + +Contacting HTTPS websites through HTTPS proxies is currently not supported. + +For SOCKS, you can use :class:`~contrib.socks.SOCKSProxyManager` to connect to +SOCKS4 or SOCKS5 proxies. In order to use SOCKS proxies you will need to +install `PySocks `_ or install urllib3 with +the ``socks`` extra:: pip install urllib3[socks] diff --git a/dummyserver/proxy.py b/dummyserver/proxy.py index c4f0b824f7..42f293104d 100755 --- a/dummyserver/proxy.py +++ b/dummyserver/proxy.py @@ -34,6 +34,7 @@ import tornado.iostream import tornado.web import tornado.httpclient +import ssl __all__ = ["ProxyHandler", "run_proxy"] @@ -66,6 +67,12 @@ def handle_response(response): self.write(response.body) self.finish() + upstream_ca_certs = self.application.settings.get("upstream_ca_certs", None) + ssl_options = None + + if upstream_ca_certs: + ssl_options = ssl.create_default_context(cafile=upstream_ca_certs) + req = tornado.httpclient.HTTPRequest( url=self.request.uri, method=self.request.method, @@ -73,6 +80,7 @@ def handle_response(response): headers=self.request.headers, follow_redirects=False, allow_nonstandard_methods=True, + ssl_options=ssl_options, ) client = tornado.httpclient.AsyncHTTPClient() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 412f5dc7f7..a55a8c18f3 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -180,6 +180,14 @@ def setup_class(cls): app, cls.io_loop, None, "http", cls.proxy_host ) + upstream_ca_certs = cls.https_certs.get("ca_certs", None) + app = web.Application( + [(r".*", ProxyHandler)], upstream_ca_certs=upstream_ca_certs + ) + cls.https_proxy_server, cls.https_proxy_port = run_tornado_app( + app, cls.io_loop, cls.https_certs, "https", cls.proxy_host + ) + cls.server_thread = run_loop_in_thread(cls.io_loop) @classmethod @@ -187,6 +195,7 @@ def teardown_class(cls): cls.io_loop.add_callback(cls.http_server.stop) cls.io_loop.add_callback(cls.https_server.stop) cls.io_loop.add_callback(cls.proxy_server.stop) + cls.io_loop.add_callback(cls.https_proxy_server.stop) cls.io_loop.add_callback(cls.io_loop.stop) cls.server_thread.join() diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py index 80f638cf64..91b07e9610 100644 --- a/src/urllib3/connection.py +++ b/src/urllib3/connection.py @@ -111,7 +111,6 @@ def __init__(self, *args, **kw): #: The socket options provided by the user. If no options are #: provided, we use the default options. self.socket_options = kw.pop("socket_options", self.default_socket_options) - _HTTPConnection.__init__(self, *args, **kw) @property @@ -174,10 +173,13 @@ def _new_conn(self): return conn + def _is_using_tunnel(self): + # Google App Engine's httplib does not define _tunnel_host + return getattr(self, "_tunnel_host", None) + def _prepare_conn(self, conn): self.sock = conn - # Google App Engine's httplib does not define _tunnel_host - if getattr(self, "_tunnel_host", None): + if self._is_using_tunnel(): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() # Mark this connection as not reusable @@ -309,9 +311,9 @@ def connect(self): conn = self._new_conn() hostname = self.host - # Google App Engine's httplib does not define _tunnel_host - if getattr(self, "_tunnel_host", None): + if self._is_using_tunnel(): self.sock = conn + # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py index 174fe6c2e1..492590fb9e 100644 --- a/src/urllib3/connectionpool.py +++ b/src/urllib3/connectionpool.py @@ -634,10 +634,10 @@ def urlopen( # [1] release_this_conn = release_conn - # Merge the proxy headers. Only do this in HTTP. We have to copy the - # headers dict so we can safely change it without those changes being - # reflected in anyone else's copy. - if self.scheme == "http": + # Merge the proxy headers. Only done when not using HTTP CONNECT. We + # have to copy the headers dict so we can safely change it without those + # changes being reflected in anyone else's copy. + if self.scheme == "http" or (self.proxy and self.proxy.scheme == "https"): headers = headers.copy() headers.update(self.proxy_headers) @@ -925,10 +925,15 @@ def _prepare_conn(self, conn): def _prepare_proxy(self, conn): """ - Establish tunnel connection early, because otherwise httplib - would improperly set Host: header to proxy's IP:port. + Establishes a tunnel connection through HTTP CONNECT. + + Tunnel connection is established early because otherwise httplib would + improperly set Host: header to proxy's IP:port. """ - conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers) + + if self.proxy.scheme != "https": + conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers) + conn.connect() def _new_conn(self): diff --git a/src/urllib3/exceptions.py b/src/urllib3/exceptions.py index 3799f8ef9d..8fbf123bf3 100644 --- a/src/urllib3/exceptions.py +++ b/src/urllib3/exceptions.py @@ -259,6 +259,11 @@ def __init__(self, scheme): super(ProxySchemeUnknown, self).__init__(message) +class ProxySchemeUnsupported(ValueError): + "Fetching HTTPS resources through HTTPS proxies is unsupported" + pass + + class HeaderParsingError(HTTPError): "Raised by assert_header_parsing, but we convert it to a log.warning statement." diff --git a/src/urllib3/poolmanager.py b/src/urllib3/poolmanager.py index e2bd3bd8db..db7ce8c39a 100644 --- a/src/urllib3/poolmanager.py +++ b/src/urllib3/poolmanager.py @@ -8,10 +8,11 @@ from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme from .exceptions import ( + HTTPWarning, LocationValueError, MaxRetryError, ProxySchemeUnknown, - InvalidProxyConfigurationWarning, + ProxySchemeUnsupported, ) from .packages import six from .packages.six.moves.urllib.parse import urljoin @@ -23,6 +24,12 @@ __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] +class InvalidProxyConfigurationWarning(HTTPWarning): + """Raised when a user has an HTTPS proxy without enabling HTTPS proxies.""" + + pass + + log = logging.getLogger(__name__) SSL_KEYWORDS = ( @@ -312,6 +319,18 @@ def _merge_pool_kwargs(self, override): base_pool_kwargs[key] = value return base_pool_kwargs + def _proxy_requires_url_absolute_form(self, parsed_url): + """ + Indicates if the proxy requires the complete destination URL in the + request. + + Normally this is only needed when not using an HTTP CONNECT tunnel. + """ + if self.proxy is None: + return False + + return parsed_url.scheme == "http" or self.proxy.scheme == "https" + def urlopen(self, method, url, redirect=True, **kw): """ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` @@ -330,7 +349,7 @@ def urlopen(self, method, url, redirect=True, **kw): if "headers" not in kw: kw["headers"] = self.headers.copy() - if self.proxy is not None and u.scheme == "http": + if self._proxy_requires_url_absolute_form(u): response = conn.urlopen(method, url, **kw) else: response = conn.urlopen(method, u.request_uri, **kw) @@ -392,6 +411,12 @@ class ProxyManager(PoolManager): HTTPS/CONNECT case they are sent only once. Could be used for proxy authentication. + :param _allow_https_proxy_to_see_traffic: + Allows forwarding of HTTPS requests to HTTPS proxies. The proxy will + have visibility of all the traffic sent. ONLY USE IF YOU KNOW WHAT + YOU'RE DOING. This flag might be removed at any time in any future + update. + Example: >>> proxy = urllib3.ProxyManager('http://localhost:3128/') >>> r1 = proxy.request('GET', 'http://google.com/') @@ -411,6 +436,7 @@ def __init__( num_pools=10, headers=None, proxy_headers=None, + _allow_https_proxy_to_see_traffic=False, **connection_pool_kw ): @@ -421,19 +447,22 @@ def __init__( proxy_url.port, ) proxy = parse_url(proxy_url) - if not proxy.port: - port = port_by_scheme.get(proxy.scheme, 80) - proxy = proxy._replace(port=port) if proxy.scheme not in ("http", "https"): raise ProxySchemeUnknown(proxy.scheme) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy self.proxy_headers = proxy_headers or {} connection_pool_kw["_proxy"] = self.proxy connection_pool_kw["_proxy_headers"] = self.proxy_headers + self.allow_insecure_proxy = _allow_https_proxy_to_see_traffic + super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): @@ -462,15 +491,22 @@ def _set_proxy_headers(self, url, headers=None): return headers_ def _validate_proxy_scheme_url_selection(self, url_scheme): - if url_scheme == "https" and self.proxy.scheme == "https": + if ( + url_scheme == "https" + and self.proxy.scheme == "https" + and not self.allow_insecure_proxy + ): warnings.warn( "Your proxy configuration specified an HTTPS scheme for the proxy. " "Are you sure you want to use HTTPS to contact the proxy? " - "This most likely indicates an error in your configuration. " - "Read this issue for more info: " - "https://github.com/urllib3/urllib3/issues/1850", + "This most likely indicates an error in your configuration." + "If you are sure you want use HTTPS to contact the proxy, enable " + "the _allow_https_proxy_to_see_traffic.", InvalidProxyConfigurationWarning, - stacklevel=3, + ) + + raise ProxySchemeUnsupported( + "Contacting HTTPS destinations through HTTPS proxies is not supported." ) def urlopen(self, method, url, redirect=True, **kw): @@ -478,10 +514,11 @@ def urlopen(self, method, url, redirect=True, **kw): u = parse_url(url) self._validate_proxy_scheme_url_selection(u.scheme) - if u.scheme == "http": - # For proxied HTTPS requests, httplib sets the necessary headers - # on the CONNECT to the proxy. For HTTP, we'll definitely - # need to set 'Host' at the very least. + if u.scheme == "http" or self.proxy.scheme == "https": + # For connections using HTTP CONNECT, httplib sets the necessary + # headers on the CONNECT to the proxy. For HTTP or when talking + # HTTPS to the proxy, we'll definitely need to set 'Host' at the + # very least. headers = kw.get("headers", self.headers) kw["headers"] = self._set_proxy_headers(url, headers) diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py index 0fcbe85eb4..0e1b13af33 100644 --- a/test/test_proxymanager.py +++ b/test/test_proxymanager.py @@ -8,12 +8,15 @@ ProxyError, NewConnectionError, ) +from urllib3.util.url import parse_url class TestProxyManager(object): - def test_proxy_headers(self): + @pytest.mark.parametrize("proxy_scheme", ["http", "https"]) + def test_proxy_headers(self, proxy_scheme): url = "http://pypi.org/project/urllib3/" - with ProxyManager("http://something:1234") as p: + proxy_url = "{}://something:1234".format(proxy_scheme) + with ProxyManager(proxy_url) as p: # Verify default headers default_headers = {"Accept": "*/*", "Host": "pypi.org"} headers = p._set_proxy_headers(url) diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py index 8993ed1d06..acdb0729bb 100644 --- a/test/with_dummyserver/test_proxy_poolmanager.py +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -20,6 +20,7 @@ ProxyError, ConnectTimeoutError, InvalidProxyConfigurationWarning, + ProxySchemeUnsupported, ) from urllib3.connectionpool import connection_from_url, VerifiedHTTPSConnection @@ -38,6 +39,7 @@ def setup_class(cls): cls.https_url = "https://%s:%d" % (cls.https_host, cls.https_port) cls.https_url_alt = "https://%s:%d" % (cls.https_host_alt, cls.https_port) cls.proxy_url = "http://%s:%d" % (cls.proxy_host, cls.proxy_port) + cls.https_proxy_url = "https://%s:%d" % (cls.proxy_host, cls.https_proxy_port,) # This URL is used only to test that a warning is # raised due to an improper config. urllib3 doesn't @@ -80,6 +82,24 @@ def test_https_proxy_warning(self): "https://github.com/urllib3/urllib3/issues/1850" ) + def test_https_proxy(self): + with proxy_from_url(self.https_proxy_url, ca_certs=DEFAULT_CA) as https: + r = https.request("GET", "%s/" % self.http_url) + assert r.status == 200 + + with pytest.raises(ProxySchemeUnsupported): + https.request("GET", "%s/" % self.https_url) + + with proxy_from_url( + self.https_proxy_url, + ca_certs=DEFAULT_CA, + _allow_https_proxy_to_see_traffic=True, + ) as https: + r = https.request("GET", "%s/" % self.http_url) + https.request("GET", "%s/" % self.https_url) + assert r.status == 200 +>>>>>>> 8c7a43b4... Add support for HTTPS connections to proxies. (#1679) + def test_nagle_proxy(self): """ Test that proxy connections do not have TCP_NODELAY turned on """ with ProxyManager(self.proxy_url) as http: @@ -302,6 +322,47 @@ def test_headers(self): self.https_port, ) + def test_https_headers(self): + with proxy_from_url( + self.https_proxy_url, + headers={"Foo": "bar"}, + proxy_headers={"Hickory": "dickory"}, + ca_certs=DEFAULT_CA, + ) as http: + + r = http.request_encode_url("GET", "%s/headers" % self.http_url) + returned_headers = json.loads(r.data.decode()) + assert returned_headers.get("Foo") == "bar" + assert returned_headers.get("Hickory") == "dickory" + assert returned_headers.get("Host") == "%s:%s" % ( + self.http_host, + self.http_port, + ) + + r = http.request_encode_url("GET", "%s/headers" % self.http_url_alt) + returned_headers = json.loads(r.data.decode()) + assert returned_headers.get("Foo") == "bar" + assert returned_headers.get("Hickory") == "dickory" + assert returned_headers.get("Host") == "%s:%s" % ( + self.http_host_alt, + self.http_port, + ) + + with pytest.raises(ProxySchemeUnsupported): + http.request_encode_url("GET", "%s/headers" % self.https_url) + + r = http.request_encode_url( + "GET", "%s/headers" % self.http_url, headers={"Baz": "quux"} + ) + returned_headers = json.loads(r.data.decode()) + assert returned_headers.get("Foo") is None + assert returned_headers.get("Baz") == "quux" + assert returned_headers.get("Hickory") == "dickory" + assert returned_headers.get("Host") == "%s:%s" % ( + self.http_host, + self.http_port, + ) + def test_headerdict(self): default_headers = HTTPHeaderDict(a="b") proxy_headers = HTTPHeaderDict()