diff --git a/docs/advanced-usage.rst b/docs/advanced-usage.rst
index 1442975f48..84c0eef1f5 100644
--- a/docs/advanced-usage.rst
+++ b/docs/advanced-usage.rst
@@ -122,10 +122,24 @@ HTTP proxy::
The usage of :class:`~poolmanager.ProxyManager` is the same as
:class:`~poolmanager.PoolManager`.
-You can use :class:`~contrib.socks.SOCKSProxyManager` to connect to SOCKS4 or
-SOCKS5 proxies. In order to use SOCKS proxies you will need to install
-`PySocks `_ or install urllib3 with the
-``socks`` extra::
+You can connect to a proxy using HTTP, HTTPS or SOCKS. urllib3's behavior will
+be different depending on the type of proxy you selected and the destination
+you're contacting.
+
+When contacting a HTTP website through a HTTP or HTTPS proxy, the request will
+be forwarded with the `absolute URI
+`_.
+
+When contacting a HTTPS website through a HTTP proxy, a TCP tunnel will be
+established with a HTTP CONNECT. Afterward a TLS connection will be established
+with the destination and your request will be sent.
+
+Contacting HTTPS websites through HTTPS proxies is currently not supported.
+
+For SOCKS, you can use :class:`~contrib.socks.SOCKSProxyManager` to connect to
+SOCKS4 or SOCKS5 proxies. In order to use SOCKS proxies you will need to
+install `PySocks `_ or install urllib3 with
+the ``socks`` extra::
pip install urllib3[socks]
diff --git a/dummyserver/proxy.py b/dummyserver/proxy.py
index c4f0b824f7..42f293104d 100755
--- a/dummyserver/proxy.py
+++ b/dummyserver/proxy.py
@@ -34,6 +34,7 @@
import tornado.iostream
import tornado.web
import tornado.httpclient
+import ssl
__all__ = ["ProxyHandler", "run_proxy"]
@@ -66,6 +67,12 @@ def handle_response(response):
self.write(response.body)
self.finish()
+ upstream_ca_certs = self.application.settings.get("upstream_ca_certs", None)
+ ssl_options = None
+
+ if upstream_ca_certs:
+ ssl_options = ssl.create_default_context(cafile=upstream_ca_certs)
+
req = tornado.httpclient.HTTPRequest(
url=self.request.uri,
method=self.request.method,
@@ -73,6 +80,7 @@ def handle_response(response):
headers=self.request.headers,
follow_redirects=False,
allow_nonstandard_methods=True,
+ ssl_options=ssl_options,
)
client = tornado.httpclient.AsyncHTTPClient()
diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py
index 412f5dc7f7..a55a8c18f3 100644
--- a/dummyserver/testcase.py
+++ b/dummyserver/testcase.py
@@ -180,6 +180,14 @@ def setup_class(cls):
app, cls.io_loop, None, "http", cls.proxy_host
)
+ upstream_ca_certs = cls.https_certs.get("ca_certs", None)
+ app = web.Application(
+ [(r".*", ProxyHandler)], upstream_ca_certs=upstream_ca_certs
+ )
+ cls.https_proxy_server, cls.https_proxy_port = run_tornado_app(
+ app, cls.io_loop, cls.https_certs, "https", cls.proxy_host
+ )
+
cls.server_thread = run_loop_in_thread(cls.io_loop)
@classmethod
@@ -187,6 +195,7 @@ def teardown_class(cls):
cls.io_loop.add_callback(cls.http_server.stop)
cls.io_loop.add_callback(cls.https_server.stop)
cls.io_loop.add_callback(cls.proxy_server.stop)
+ cls.io_loop.add_callback(cls.https_proxy_server.stop)
cls.io_loop.add_callback(cls.io_loop.stop)
cls.server_thread.join()
diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py
index 80f638cf64..91b07e9610 100644
--- a/src/urllib3/connection.py
+++ b/src/urllib3/connection.py
@@ -111,7 +111,6 @@ def __init__(self, *args, **kw):
#: The socket options provided by the user. If no options are
#: provided, we use the default options.
self.socket_options = kw.pop("socket_options", self.default_socket_options)
-
_HTTPConnection.__init__(self, *args, **kw)
@property
@@ -174,10 +173,13 @@ def _new_conn(self):
return conn
+ def _is_using_tunnel(self):
+ # Google App Engine's httplib does not define _tunnel_host
+ return getattr(self, "_tunnel_host", None)
+
def _prepare_conn(self, conn):
self.sock = conn
- # Google App Engine's httplib does not define _tunnel_host
- if getattr(self, "_tunnel_host", None):
+ if self._is_using_tunnel():
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
# Mark this connection as not reusable
@@ -309,9 +311,9 @@ def connect(self):
conn = self._new_conn()
hostname = self.host
- # Google App Engine's httplib does not define _tunnel_host
- if getattr(self, "_tunnel_host", None):
+ if self._is_using_tunnel():
self.sock = conn
+
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py
index 174fe6c2e1..492590fb9e 100644
--- a/src/urllib3/connectionpool.py
+++ b/src/urllib3/connectionpool.py
@@ -634,10 +634,10 @@ def urlopen(
# [1]
release_this_conn = release_conn
- # Merge the proxy headers. Only do this in HTTP. We have to copy the
- # headers dict so we can safely change it without those changes being
- # reflected in anyone else's copy.
- if self.scheme == "http":
+ # Merge the proxy headers. Only done when not using HTTP CONNECT. We
+ # have to copy the headers dict so we can safely change it without those
+ # changes being reflected in anyone else's copy.
+ if self.scheme == "http" or (self.proxy and self.proxy.scheme == "https"):
headers = headers.copy()
headers.update(self.proxy_headers)
@@ -925,10 +925,15 @@ def _prepare_conn(self, conn):
def _prepare_proxy(self, conn):
"""
- Establish tunnel connection early, because otherwise httplib
- would improperly set Host: header to proxy's IP:port.
+ Establishes a tunnel connection through HTTP CONNECT.
+
+ Tunnel connection is established early because otherwise httplib would
+ improperly set Host: header to proxy's IP:port.
"""
- conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)
+
+ if self.proxy.scheme != "https":
+ conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)
+
conn.connect()
def _new_conn(self):
diff --git a/src/urllib3/exceptions.py b/src/urllib3/exceptions.py
index 3799f8ef9d..8fbf123bf3 100644
--- a/src/urllib3/exceptions.py
+++ b/src/urllib3/exceptions.py
@@ -259,6 +259,11 @@ def __init__(self, scheme):
super(ProxySchemeUnknown, self).__init__(message)
+class ProxySchemeUnsupported(ValueError):
+ "Fetching HTTPS resources through HTTPS proxies is unsupported"
+ pass
+
+
class HeaderParsingError(HTTPError):
"Raised by assert_header_parsing, but we convert it to a log.warning statement."
diff --git a/src/urllib3/poolmanager.py b/src/urllib3/poolmanager.py
index e2bd3bd8db..db7ce8c39a 100644
--- a/src/urllib3/poolmanager.py
+++ b/src/urllib3/poolmanager.py
@@ -8,10 +8,11 @@
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
from .exceptions import (
+ HTTPWarning,
LocationValueError,
MaxRetryError,
ProxySchemeUnknown,
- InvalidProxyConfigurationWarning,
+ ProxySchemeUnsupported,
)
from .packages import six
from .packages.six.moves.urllib.parse import urljoin
@@ -23,6 +24,12 @@
__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
+class InvalidProxyConfigurationWarning(HTTPWarning):
+ """Raised when a user has an HTTPS proxy without enabling HTTPS proxies."""
+
+ pass
+
+
log = logging.getLogger(__name__)
SSL_KEYWORDS = (
@@ -312,6 +319,18 @@ def _merge_pool_kwargs(self, override):
base_pool_kwargs[key] = value
return base_pool_kwargs
+ def _proxy_requires_url_absolute_form(self, parsed_url):
+ """
+ Indicates if the proxy requires the complete destination URL in the
+ request.
+
+ Normally this is only needed when not using an HTTP CONNECT tunnel.
+ """
+ if self.proxy is None:
+ return False
+
+ return parsed_url.scheme == "http" or self.proxy.scheme == "https"
+
def urlopen(self, method, url, redirect=True, **kw):
"""
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
@@ -330,7 +349,7 @@ def urlopen(self, method, url, redirect=True, **kw):
if "headers" not in kw:
kw["headers"] = self.headers.copy()
- if self.proxy is not None and u.scheme == "http":
+ if self._proxy_requires_url_absolute_form(u):
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)
@@ -392,6 +411,12 @@ class ProxyManager(PoolManager):
HTTPS/CONNECT case they are sent only once. Could be used for proxy
authentication.
+ :param _allow_https_proxy_to_see_traffic:
+ Allows forwarding of HTTPS requests to HTTPS proxies. The proxy will
+ have visibility of all the traffic sent. ONLY USE IF YOU KNOW WHAT
+ YOU'RE DOING. This flag might be removed at any time in any future
+ update.
+
Example:
>>> proxy = urllib3.ProxyManager('http://localhost:3128/')
>>> r1 = proxy.request('GET', 'http://google.com/')
@@ -411,6 +436,7 @@ def __init__(
num_pools=10,
headers=None,
proxy_headers=None,
+ _allow_https_proxy_to_see_traffic=False,
**connection_pool_kw
):
@@ -421,19 +447,22 @@ def __init__(
proxy_url.port,
)
proxy = parse_url(proxy_url)
- if not proxy.port:
- port = port_by_scheme.get(proxy.scheme, 80)
- proxy = proxy._replace(port=port)
if proxy.scheme not in ("http", "https"):
raise ProxySchemeUnknown(proxy.scheme)
+ if not proxy.port:
+ port = port_by_scheme.get(proxy.scheme, 80)
+ proxy = proxy._replace(port=port)
+
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
connection_pool_kw["_proxy"] = self.proxy
connection_pool_kw["_proxy_headers"] = self.proxy_headers
+ self.allow_insecure_proxy = _allow_https_proxy_to_see_traffic
+
super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
@@ -462,15 +491,22 @@ def _set_proxy_headers(self, url, headers=None):
return headers_
def _validate_proxy_scheme_url_selection(self, url_scheme):
- if url_scheme == "https" and self.proxy.scheme == "https":
+ if (
+ url_scheme == "https"
+ and self.proxy.scheme == "https"
+ and not self.allow_insecure_proxy
+ ):
warnings.warn(
"Your proxy configuration specified an HTTPS scheme for the proxy. "
"Are you sure you want to use HTTPS to contact the proxy? "
- "This most likely indicates an error in your configuration. "
- "Read this issue for more info: "
- "https://github.com/urllib3/urllib3/issues/1850",
+ "This most likely indicates an error in your configuration."
+ "If you are sure you want use HTTPS to contact the proxy, enable "
+ "the _allow_https_proxy_to_see_traffic.",
InvalidProxyConfigurationWarning,
- stacklevel=3,
+ )
+
+ raise ProxySchemeUnsupported(
+ "Contacting HTTPS destinations through HTTPS proxies is not supported."
)
def urlopen(self, method, url, redirect=True, **kw):
@@ -478,10 +514,11 @@ def urlopen(self, method, url, redirect=True, **kw):
u = parse_url(url)
self._validate_proxy_scheme_url_selection(u.scheme)
- if u.scheme == "http":
- # For proxied HTTPS requests, httplib sets the necessary headers
- # on the CONNECT to the proxy. For HTTP, we'll definitely
- # need to set 'Host' at the very least.
+ if u.scheme == "http" or self.proxy.scheme == "https":
+ # For connections using HTTP CONNECT, httplib sets the necessary
+ # headers on the CONNECT to the proxy. For HTTP or when talking
+ # HTTPS to the proxy, we'll definitely need to set 'Host' at the
+ # very least.
headers = kw.get("headers", self.headers)
kw["headers"] = self._set_proxy_headers(url, headers)
diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py
index 0fcbe85eb4..0e1b13af33 100644
--- a/test/test_proxymanager.py
+++ b/test/test_proxymanager.py
@@ -8,12 +8,15 @@
ProxyError,
NewConnectionError,
)
+from urllib3.util.url import parse_url
class TestProxyManager(object):
- def test_proxy_headers(self):
+ @pytest.mark.parametrize("proxy_scheme", ["http", "https"])
+ def test_proxy_headers(self, proxy_scheme):
url = "http://pypi.org/project/urllib3/"
- with ProxyManager("http://something:1234") as p:
+ proxy_url = "{}://something:1234".format(proxy_scheme)
+ with ProxyManager(proxy_url) as p:
# Verify default headers
default_headers = {"Accept": "*/*", "Host": "pypi.org"}
headers = p._set_proxy_headers(url)
diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py
index 8993ed1d06..acdb0729bb 100644
--- a/test/with_dummyserver/test_proxy_poolmanager.py
+++ b/test/with_dummyserver/test_proxy_poolmanager.py
@@ -20,6 +20,7 @@
ProxyError,
ConnectTimeoutError,
InvalidProxyConfigurationWarning,
+ ProxySchemeUnsupported,
)
from urllib3.connectionpool import connection_from_url, VerifiedHTTPSConnection
@@ -38,6 +39,7 @@ def setup_class(cls):
cls.https_url = "https://%s:%d" % (cls.https_host, cls.https_port)
cls.https_url_alt = "https://%s:%d" % (cls.https_host_alt, cls.https_port)
cls.proxy_url = "http://%s:%d" % (cls.proxy_host, cls.proxy_port)
+ cls.https_proxy_url = "https://%s:%d" % (cls.proxy_host, cls.https_proxy_port,)
# This URL is used only to test that a warning is
# raised due to an improper config. urllib3 doesn't
@@ -80,6 +82,24 @@ def test_https_proxy_warning(self):
"https://github.com/urllib3/urllib3/issues/1850"
)
+ def test_https_proxy(self):
+ with proxy_from_url(self.https_proxy_url, ca_certs=DEFAULT_CA) as https:
+ r = https.request("GET", "%s/" % self.http_url)
+ assert r.status == 200
+
+ with pytest.raises(ProxySchemeUnsupported):
+ https.request("GET", "%s/" % self.https_url)
+
+ with proxy_from_url(
+ self.https_proxy_url,
+ ca_certs=DEFAULT_CA,
+ _allow_https_proxy_to_see_traffic=True,
+ ) as https:
+ r = https.request("GET", "%s/" % self.http_url)
+ https.request("GET", "%s/" % self.https_url)
+ assert r.status == 200
+>>>>>>> 8c7a43b4... Add support for HTTPS connections to proxies. (#1679)
+
def test_nagle_proxy(self):
""" Test that proxy connections do not have TCP_NODELAY turned on """
with ProxyManager(self.proxy_url) as http:
@@ -302,6 +322,47 @@ def test_headers(self):
self.https_port,
)
+ def test_https_headers(self):
+ with proxy_from_url(
+ self.https_proxy_url,
+ headers={"Foo": "bar"},
+ proxy_headers={"Hickory": "dickory"},
+ ca_certs=DEFAULT_CA,
+ ) as http:
+
+ r = http.request_encode_url("GET", "%s/headers" % self.http_url)
+ returned_headers = json.loads(r.data.decode())
+ assert returned_headers.get("Foo") == "bar"
+ assert returned_headers.get("Hickory") == "dickory"
+ assert returned_headers.get("Host") == "%s:%s" % (
+ self.http_host,
+ self.http_port,
+ )
+
+ r = http.request_encode_url("GET", "%s/headers" % self.http_url_alt)
+ returned_headers = json.loads(r.data.decode())
+ assert returned_headers.get("Foo") == "bar"
+ assert returned_headers.get("Hickory") == "dickory"
+ assert returned_headers.get("Host") == "%s:%s" % (
+ self.http_host_alt,
+ self.http_port,
+ )
+
+ with pytest.raises(ProxySchemeUnsupported):
+ http.request_encode_url("GET", "%s/headers" % self.https_url)
+
+ r = http.request_encode_url(
+ "GET", "%s/headers" % self.http_url, headers={"Baz": "quux"}
+ )
+ returned_headers = json.loads(r.data.decode())
+ assert returned_headers.get("Foo") is None
+ assert returned_headers.get("Baz") == "quux"
+ assert returned_headers.get("Hickory") == "dickory"
+ assert returned_headers.get("Host") == "%s:%s" % (
+ self.http_host,
+ self.http_port,
+ )
+
def test_headerdict(self):
default_headers = HTTPHeaderDict(a="b")
proxy_headers = HTTPHeaderDict()