Skip to content

Commit

Permalink
Add support to talk HTTPS to proxies.
Browse files Browse the repository at this point in the history
Currently there's no way to validate identify for the proxy you might be
connecting. Proxies supporting HTTPS endpoints are becoming more common
and we need to extend the support for them.

When an HTTPS proxy is provided, instead of doing the HTTP CONNECT,
we'll forward any requests directly to the proxy and ultimately to the
destination.
  • Loading branch information
jalopezsilva committed Oct 3, 2019
1 parent 08cf7fd commit ffed605
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 8 deletions.
8 changes: 8 additions & 0 deletions dummyserver/proxy.py
Expand Up @@ -33,6 +33,7 @@
import tornado.iostream
import tornado.web
import tornado.httpclient
import ssl

__all__ = ["ProxyHandler", "run_proxy"]

Expand Down Expand Up @@ -65,13 +66,20 @@ def handle_response(response):
self.write(response.body)
self.finish()

upstream_ca_certs = self.application.settings.get("upstream_ca_certs", None)
ssl_options = None

if upstream_ca_certs:
ssl_options = ssl.create_default_context(cafile=upstream_ca_certs)

req = tornado.httpclient.HTTPRequest(
url=self.request.uri,
method=self.request.method,
body=self.request.body,
headers=self.request.headers,
follow_redirects=False,
allow_nonstandard_methods=True,
ssl_options=ssl_options,
)

client = tornado.httpclient.AsyncHTTPClient()
Expand Down
9 changes: 9 additions & 0 deletions dummyserver/testcase.py
Expand Up @@ -180,13 +180,22 @@ def setup_class(cls):
app, cls.io_loop, None, "http", cls.proxy_host
)

upstream_ca_certs = cls.https_certs.get("ca_certs", None)
app = web.Application(
[(r".*", ProxyHandler)], upstream_ca_certs=upstream_ca_certs
)
cls.https_proxy_server, cls.https_proxy_port = run_tornado_app(
app, cls.io_loop, cls.https_certs, "https", cls.proxy_host
)

cls.server_thread = run_loop_in_thread(cls.io_loop)

@classmethod
def teardown_class(cls):
cls.io_loop.add_callback(cls.http_server.stop)
cls.io_loop.add_callback(cls.https_server.stop)
cls.io_loop.add_callback(cls.proxy_server.stop)
cls.io_loop.add_callback(cls.https_proxy_server.stop)
cls.io_loop.add_callback(cls.io_loop.stop)
cls.server_thread.join()

Expand Down
14 changes: 10 additions & 4 deletions src/urllib3/connection.py
Expand Up @@ -109,6 +109,9 @@ def __init__(self, *args, **kw):
#: provided, we use the default options.
self.socket_options = kw.pop("socket_options", self.default_socket_options)

# Protocol used to talk to the proxy.
self.proxy_scheme = kw.pop("proxy_scheme", None)

_HTTPConnection.__init__(self, *args, **kw)

@property
Expand Down Expand Up @@ -171,10 +174,13 @@ def _new_conn(self):

return conn

def _is_using_tunnel(self):
# Google App Engine's httplib does not define _tunnel_host
return getattr(self, "_tunnel_host", None)

def _prepare_conn(self, conn):
self.sock = conn
# Google App Engine's httplib does not define _tunnel_host
if getattr(self, "_tunnel_host", None):
if self._is_using_tunnel():
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
# Mark this connection as not reusable
Expand Down Expand Up @@ -334,9 +340,9 @@ def connect(self):
conn = self._new_conn()
hostname = self.host

# Google App Engine's httplib does not define _tunnel_host
if getattr(self, "_tunnel_host", None):
if self._is_using_tunnel():
self.sock = conn

# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
Expand Down
13 changes: 10 additions & 3 deletions src/urllib3/connectionpool.py
Expand Up @@ -212,6 +212,8 @@ def __init__(
# We cannot know if the user has added default socket options, so we cannot replace the
# list.
self.conn_kw.setdefault("socket_options", [])
# Capture the proxy scheme to properly establish the connection.
self.conn_kw["proxy_scheme"] = self.proxy.scheme

def _new_conn(self):
"""
Expand Down Expand Up @@ -938,10 +940,15 @@ def _prepare_conn(self, conn):

def _prepare_proxy(self, conn):
"""
Establish tunnel connection early, because otherwise httplib
would improperly set Host: header to proxy's IP:port.
Establishes a tunnel connection through HTTP CONNECT.
Tunnel connection is established early because otherwise httplib would
improperly set Host: header to proxy's IP:port.
"""
conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)

if self.proxy.scheme != "https":
conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)

conn.connect()

def _new_conn(self):
Expand Down
14 changes: 13 additions & 1 deletion src/urllib3/poolmanager.py
Expand Up @@ -306,6 +306,18 @@ def _merge_pool_kwargs(self, override):
base_pool_kwargs[key] = value
return base_pool_kwargs

def _proxy_requires_complete_url(self, parsed_url):
"""
Indicates if the proxy requires the complete destination URL in the
request.
Normally this is only needed when not using an HTTP CONNECT tunnel.
"""
if self.proxy is None:
return False

return parsed_url.scheme == "http" or self.proxy.scheme == "https"

def urlopen(self, method, url, redirect=True, **kw):
"""
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
Expand All @@ -324,7 +336,7 @@ def urlopen(self, method, url, redirect=True, **kw):
if "headers" not in kw:
kw["headers"] = self.headers.copy()

if self.proxy is not None and u.scheme == "http":
if self._proxy_requires_complete_url(u):
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)
Expand Down
12 changes: 12 additions & 0 deletions test/test_proxymanager.py
@@ -1,6 +1,7 @@
import pytest

from urllib3.poolmanager import ProxyManager
from urllib3.util.url import parse_url


class TestProxyManager(object):
Expand Down Expand Up @@ -43,3 +44,14 @@ def test_invalid_scheme(self):
ProxyManager("invalid://host/p")
with pytest.raises(ValueError):
ProxyManager("invalid://host/p")

def test_proxy_tunnel(self):
http_url = parse_url("http://example.com")
https_url = parse_url("https://example.com")
with ProxyManager("http://proxy:8080") as p:
assert p._proxy_requires_complete_url(http_url)
assert p._proxy_requires_complete_url(https_url) is False

with ProxyManager("https://proxy:8080") as p:
assert p._proxy_requires_complete_url(http_url)
assert p._proxy_requires_complete_url(https_url)
9 changes: 9 additions & 0 deletions test/with_dummyserver/test_proxy_poolmanager.py
Expand Up @@ -22,6 +22,7 @@ def setup_class(cls):
cls.https_url = "https://%s:%d" % (cls.https_host, cls.https_port)
cls.https_url_alt = "https://%s:%d" % (cls.https_host_alt, cls.https_port)
cls.proxy_url = "http://%s:%d" % (cls.proxy_host, cls.proxy_port)
cls.https_proxy_url = "https://%s:%d" % (cls.proxy_host, cls.https_proxy_port)

def test_basic_proxy(self):
with proxy_from_url(self.proxy_url, ca_certs=DEFAULT_CA) as http:
Expand All @@ -31,6 +32,14 @@ def test_basic_proxy(self):
r = http.request("GET", "%s/" % self.https_url)
assert r.status == 200

def test_https_proxy(self):
with proxy_from_url(self.https_proxy_url, ca_certs=DEFAULT_CA) as https:
r = https.request("GET", "%s/" % self.http_url)
assert r.status == 200

r = https.request("GET", "%s/" % self.https_url)
assert r.status == 200

def test_nagle_proxy(self):
""" Test that proxy connections do not have TCP_NODELAY turned on """
with ProxyManager(self.proxy_url) as http:
Expand Down

0 comments on commit ffed605

Please sign in to comment.