Skip to content

Commit

Permalink
Add support for HTTPS connections to proxies. (urllib3#1679)
Browse files Browse the repository at this point in the history
* Add support to talk HTTPS to proxies.

Currently there's no way to validate identify for the proxy you might be
connecting. Proxies supporting HTTPS endpoints are becoming more common
and we need to extend the support for them.

When an HTTPS proxy is provided, instead of doing the HTTP CONNECT,
we'll forward any requests directly to the proxy and ultimately to the
destination.

* Fix proxy_headers missing on HTTPS proxy connections.

* blackfmt missing files.

* Prevent usage of HTTPS proxies when fetching HTTPS resources.

- Will be supported by default when we can do TLS within TLS.

* Update proxy documentation with more information.

* Renamed flag for HTTPS websites through HTTPS proxies.
* Added myself to contributors.

* Documentation and contributors fixes.

* Removed mention that TLS in TLS is being developed as requested.
* Space in between my name and the github page.

* Add flag to enable HTTPS proxy support.

Now that we're adding support for HTTPS proxies we want to avoid
a breaking change with clients that had an improper proxy configuration.

For now, we're adding a warning an defaulting to the previous behavior.
In the future we'll change the behavior to enable HTTPS proxies by
default.

* Remove guard flag, error out on HTTPS/HTTPS.

As requested in the last revision for the PR:

- Removed the _enable_https_proxies flag. Instead the feature will be
  enabled and will error out on invalid configurations. (HTTPS + HTTPS)
- Other comments: rename a method, parentheses to clarify order of
  operations.
  • Loading branch information
jalopezsilva committed Aug 11, 2020
1 parent 3c3fb02 commit bf37ba1
Show file tree
Hide file tree
Showing 9 changed files with 176 additions and 32 deletions.
22 changes: 18 additions & 4 deletions docs/advanced-usage.rst
Expand Up @@ -122,10 +122,24 @@ HTTP proxy::
The usage of :class:`~poolmanager.ProxyManager` is the same as
:class:`~poolmanager.PoolManager`.

You can use :class:`~contrib.socks.SOCKSProxyManager` to connect to SOCKS4 or
SOCKS5 proxies. In order to use SOCKS proxies you will need to install
`PySocks <https://pypi.org/project/PySocks/>`_ or install urllib3 with the
``socks`` extra::
You can connect to a proxy using HTTP, HTTPS or SOCKS. urllib3's behavior will
be different depending on the type of proxy you selected and the destination
you're contacting.

When contacting a HTTP website through a HTTP or HTTPS proxy, the request will
be forwarded with the `absolute URI
<https://tools.ietf.org/html/rfc7230#section-5.3.2>`_.

When contacting a HTTPS website through a HTTP proxy, a TCP tunnel will be
established with a HTTP CONNECT. Afterward a TLS connection will be established
with the destination and your request will be sent.

Contacting HTTPS websites through HTTPS proxies is currently not supported.

For SOCKS, you can use :class:`~contrib.socks.SOCKSProxyManager` to connect to
SOCKS4 or SOCKS5 proxies. In order to use SOCKS proxies you will need to
install `PySocks <https://pypi.org/project/PySocks/>`_ or install urllib3 with
the ``socks`` extra::

pip install urllib3[socks]

Expand Down
8 changes: 8 additions & 0 deletions dummyserver/proxy.py
Expand Up @@ -34,6 +34,7 @@
import tornado.iostream
import tornado.web
import tornado.httpclient
import ssl

__all__ = ["ProxyHandler", "run_proxy"]

Expand Down Expand Up @@ -66,13 +67,20 @@ def handle_response(response):
self.write(response.body)
self.finish()

upstream_ca_certs = self.application.settings.get("upstream_ca_certs", None)
ssl_options = None

if upstream_ca_certs:
ssl_options = ssl.create_default_context(cafile=upstream_ca_certs)

req = tornado.httpclient.HTTPRequest(
url=self.request.uri,
method=self.request.method,
body=self.request.body,
headers=self.request.headers,
follow_redirects=False,
allow_nonstandard_methods=True,
ssl_options=ssl_options,
)

client = tornado.httpclient.AsyncHTTPClient()
Expand Down
9 changes: 9 additions & 0 deletions dummyserver/testcase.py
Expand Up @@ -180,13 +180,22 @@ def setup_class(cls):
app, cls.io_loop, None, "http", cls.proxy_host
)

upstream_ca_certs = cls.https_certs.get("ca_certs", None)
app = web.Application(
[(r".*", ProxyHandler)], upstream_ca_certs=upstream_ca_certs
)
cls.https_proxy_server, cls.https_proxy_port = run_tornado_app(
app, cls.io_loop, cls.https_certs, "https", cls.proxy_host
)

cls.server_thread = run_loop_in_thread(cls.io_loop)

@classmethod
def teardown_class(cls):
cls.io_loop.add_callback(cls.http_server.stop)
cls.io_loop.add_callback(cls.https_server.stop)
cls.io_loop.add_callback(cls.proxy_server.stop)
cls.io_loop.add_callback(cls.https_proxy_server.stop)
cls.io_loop.add_callback(cls.io_loop.stop)
cls.server_thread.join()

Expand Down
12 changes: 7 additions & 5 deletions src/urllib3/connection.py
Expand Up @@ -111,7 +111,6 @@ def __init__(self, *args, **kw):
#: The socket options provided by the user. If no options are
#: provided, we use the default options.
self.socket_options = kw.pop("socket_options", self.default_socket_options)

_HTTPConnection.__init__(self, *args, **kw)

@property
Expand Down Expand Up @@ -174,10 +173,13 @@ def _new_conn(self):

return conn

def _is_using_tunnel(self):
# Google App Engine's httplib does not define _tunnel_host
return getattr(self, "_tunnel_host", None)

def _prepare_conn(self, conn):
self.sock = conn
# Google App Engine's httplib does not define _tunnel_host
if getattr(self, "_tunnel_host", None):
if self._is_using_tunnel():
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
# Mark this connection as not reusable
Expand Down Expand Up @@ -309,9 +311,9 @@ def connect(self):
conn = self._new_conn()
hostname = self.host

# Google App Engine's httplib does not define _tunnel_host
if getattr(self, "_tunnel_host", None):
if self._is_using_tunnel():
self.sock = conn

# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
Expand Down
19 changes: 12 additions & 7 deletions src/urllib3/connectionpool.py
Expand Up @@ -634,10 +634,10 @@ def urlopen(
# [1] <https://github.com/urllib3/urllib3/issues/651>
release_this_conn = release_conn

# Merge the proxy headers. Only do this in HTTP. We have to copy the
# headers dict so we can safely change it without those changes being
# reflected in anyone else's copy.
if self.scheme == "http":
# Merge the proxy headers. Only done when not using HTTP CONNECT. We
# have to copy the headers dict so we can safely change it without those
# changes being reflected in anyone else's copy.
if self.scheme == "http" or (self.proxy and self.proxy.scheme == "https"):
headers = headers.copy()
headers.update(self.proxy_headers)

Expand Down Expand Up @@ -925,10 +925,15 @@ def _prepare_conn(self, conn):

def _prepare_proxy(self, conn):
"""
Establish tunnel connection early, because otherwise httplib
would improperly set Host: header to proxy's IP:port.
Establishes a tunnel connection through HTTP CONNECT.
Tunnel connection is established early because otherwise httplib would
improperly set Host: header to proxy's IP:port.
"""
conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)

if self.proxy.scheme != "https":
conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)

conn.connect()

def _new_conn(self):
Expand Down
5 changes: 5 additions & 0 deletions src/urllib3/exceptions.py
Expand Up @@ -259,6 +259,11 @@ def __init__(self, scheme):
super(ProxySchemeUnknown, self).__init__(message)


class ProxySchemeUnsupported(ValueError):
"Fetching HTTPS resources through HTTPS proxies is unsupported"
pass


class HeaderParsingError(HTTPError):
"Raised by assert_header_parsing, but we convert it to a log.warning statement."

Expand Down
65 changes: 51 additions & 14 deletions src/urllib3/poolmanager.py
Expand Up @@ -8,10 +8,11 @@
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
from .exceptions import (
HTTPWarning,
LocationValueError,
MaxRetryError,
ProxySchemeUnknown,
InvalidProxyConfigurationWarning,
ProxySchemeUnsupported,
)
from .packages import six
from .packages.six.moves.urllib.parse import urljoin
Expand All @@ -23,6 +24,12 @@
__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]


class InvalidProxyConfigurationWarning(HTTPWarning):
"""Raised when a user has an HTTPS proxy without enabling HTTPS proxies."""

pass


log = logging.getLogger(__name__)

SSL_KEYWORDS = (
Expand Down Expand Up @@ -312,6 +319,18 @@ def _merge_pool_kwargs(self, override):
base_pool_kwargs[key] = value
return base_pool_kwargs

def _proxy_requires_url_absolute_form(self, parsed_url):
"""
Indicates if the proxy requires the complete destination URL in the
request.
Normally this is only needed when not using an HTTP CONNECT tunnel.
"""
if self.proxy is None:
return False

return parsed_url.scheme == "http" or self.proxy.scheme == "https"

def urlopen(self, method, url, redirect=True, **kw):
"""
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
Expand All @@ -330,7 +349,7 @@ def urlopen(self, method, url, redirect=True, **kw):
if "headers" not in kw:
kw["headers"] = self.headers.copy()

if self.proxy is not None and u.scheme == "http":
if self._proxy_requires_url_absolute_form(u):
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)
Expand Down Expand Up @@ -392,6 +411,12 @@ class ProxyManager(PoolManager):
HTTPS/CONNECT case they are sent only once. Could be used for proxy
authentication.
:param _allow_https_proxy_to_see_traffic:
Allows forwarding of HTTPS requests to HTTPS proxies. The proxy will
have visibility of all the traffic sent. ONLY USE IF YOU KNOW WHAT
YOU'RE DOING. This flag might be removed at any time in any future
update.
Example:
>>> proxy = urllib3.ProxyManager('http://localhost:3128/')
>>> r1 = proxy.request('GET', 'http://google.com/')
Expand All @@ -411,6 +436,7 @@ def __init__(
num_pools=10,
headers=None,
proxy_headers=None,
_allow_https_proxy_to_see_traffic=False,
**connection_pool_kw
):

Expand All @@ -421,19 +447,22 @@ def __init__(
proxy_url.port,
)
proxy = parse_url(proxy_url)
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)

if proxy.scheme not in ("http", "https"):
raise ProxySchemeUnknown(proxy.scheme)

if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)

self.proxy = proxy
self.proxy_headers = proxy_headers or {}

connection_pool_kw["_proxy"] = self.proxy
connection_pool_kw["_proxy_headers"] = self.proxy_headers

self.allow_insecure_proxy = _allow_https_proxy_to_see_traffic

super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)

def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
Expand Down Expand Up @@ -462,26 +491,34 @@ def _set_proxy_headers(self, url, headers=None):
return headers_

def _validate_proxy_scheme_url_selection(self, url_scheme):
if url_scheme == "https" and self.proxy.scheme == "https":
if (
url_scheme == "https"
and self.proxy.scheme == "https"
and not self.allow_insecure_proxy
):
warnings.warn(
"Your proxy configuration specified an HTTPS scheme for the proxy. "
"Are you sure you want to use HTTPS to contact the proxy? "
"This most likely indicates an error in your configuration. "
"Read this issue for more info: "
"https://github.com/urllib3/urllib3/issues/1850",
"This most likely indicates an error in your configuration."
"If you are sure you want use HTTPS to contact the proxy, enable "
"the _allow_https_proxy_to_see_traffic.",
InvalidProxyConfigurationWarning,
stacklevel=3,
)

raise ProxySchemeUnsupported(
"Contacting HTTPS destinations through HTTPS proxies is not supported."
)

def urlopen(self, method, url, redirect=True, **kw):
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
u = parse_url(url)
self._validate_proxy_scheme_url_selection(u.scheme)

if u.scheme == "http":
# For proxied HTTPS requests, httplib sets the necessary headers
# on the CONNECT to the proxy. For HTTP, we'll definitely
# need to set 'Host' at the very least.
if u.scheme == "http" or self.proxy.scheme == "https":
# For connections using HTTP CONNECT, httplib sets the necessary
# headers on the CONNECT to the proxy. For HTTP or when talking
# HTTPS to the proxy, we'll definitely need to set 'Host' at the
# very least.
headers = kw.get("headers", self.headers)
kw["headers"] = self._set_proxy_headers(url, headers)

Expand Down
7 changes: 5 additions & 2 deletions test/test_proxymanager.py
Expand Up @@ -8,12 +8,15 @@
ProxyError,
NewConnectionError,
)
from urllib3.util.url import parse_url


class TestProxyManager(object):
def test_proxy_headers(self):
@pytest.mark.parametrize("proxy_scheme", ["http", "https"])
def test_proxy_headers(self, proxy_scheme):
url = "http://pypi.org/project/urllib3/"
with ProxyManager("http://something:1234") as p:
proxy_url = "{}://something:1234".format(proxy_scheme)
with ProxyManager(proxy_url) as p:
# Verify default headers
default_headers = {"Accept": "*/*", "Host": "pypi.org"}
headers = p._set_proxy_headers(url)
Expand Down

0 comments on commit bf37ba1

Please sign in to comment.