From 8dc9b6bd59114439128d23a60609bee09daf508c Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 19 Jan 2022 14:58:19 +0000 Subject: [PATCH] SOCKS proxy support (#2034) --- README.md | 6 +- docs/advanced.md | 38 +++++-------- docs/index.md | 6 +- httpx/_config.py | 41 +++++++++----- httpx/_transports/default.py | 107 +++++++++++++++++++++++++---------- requirements.txt | 2 +- setup.py | 1 + tests/client/test_proxies.py | 14 +++++ tests/test_config.py | 33 +++++------ 9 files changed, 157 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index cc819e5a32..2375c365f1 100644 --- a/README.md +++ b/README.md @@ -129,12 +129,16 @@ The HTTPX project relies on these excellent libraries: * `httpcore` - The underlying transport implementation for `httpx`. * `h11` - HTTP/1.1 support. - * `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)* * `certifi` - SSL certificates. * `charset_normalizer` - Charset auto-detection. * `rfc3986` - URL parsing & normalization. * `idna` - Internationalized domain name support. * `sniffio` - Async library autodetection. + +As well as these optional installs: + +* `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)* +* `socksio` - SOCKS proxy support. *(Optional, with `httpx[socks]`)* * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)* * `click` - Command line client support. *(Optional, with `httpx[cli]`)* * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)* diff --git a/docs/advanced.md b/docs/advanced.md index ad4b6f1706..58eb29dab9 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -387,8 +387,6 @@ client = httpx.Client(trust_env=False) HTTPX supports setting up [HTTP proxies](https://en.wikipedia.org/wiki/Proxy_server#Web_proxy_servers) via the `proxies` parameter to be passed on client initialization or top-level API functions like `httpx.get(..., proxies=...)`. -_Note: SOCKS proxies are not supported yet._ -
Diagram of how a proxy works (source: Wikipedia). The left hand side "Internet" blob may be your HTTPX client requesting example.com through a proxy.
@@ -565,44 +563,34 @@ See documentation on [`HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`](environment_vari In general, the flow for making an HTTP request through a proxy is as follows: 1. The client connects to the proxy (initial connection request). -1. The proxy somehow transfers data to the server on your behalf. +2. The proxy transfers data to the server on your behalf. How exactly step 2/ is performed depends on which of two proxying mechanisms is used: * **Forwarding**: the proxy makes the request for you, and sends back the response it obtained from the server. -* **Tunneling**: the proxy establishes a TCP connection to the server on your behalf, and the client reuses this connection to send the request and receive the response. This is known as an [HTTP Tunnel](https://en.wikipedia.org/wiki/HTTP_tunnel). This mechanism is how you can access websites that use HTTPS from an HTTP proxy (the client "upgrades" the connection to HTTPS by performing the TLS handshake with the server over the TCP connection provided by the proxy). +* **Tunnelling**: the proxy establishes a TCP connection to the server on your behalf, and the client reuses this connection to send the request and receive the response. This is known as an [HTTP Tunnel](https://en.wikipedia.org/wiki/HTTP_tunnel). This mechanism is how you can access websites that use HTTPS from an HTTP proxy (the client "upgrades" the connection to HTTPS by performing the TLS handshake with the server over the TCP connection provided by the proxy). -#### Default behavior +### Troubleshooting proxies -Given the technical definitions above, by default (and regardless of whether you're using an HTTP or HTTPS proxy), HTTPX will: +If you encounter issues when setting up proxies, please refer to our [Troubleshooting guide](troubleshooting.md#proxies). -* Use forwarding for HTTP requests. -* Use tunneling for HTTPS requests. +## SOCKS -This ensures that you can make HTTP and HTTPS requests in all cases (i.e. regardless of which type of proxy you're using). +In addition to HTTP proxies, `httpcore` also supports proxies using the SOCKS protocol. +This is an optional feature that requires an additional third-party library be installed before use. -#### Forcing the proxy mechanism +You can install SOCKS support using `pip`: -In most cases, the default behavior should work just fine as well as provide enough security. +```shell +$ pip install httpx[socks] +``` -But if you know what you're doing and you want to force which mechanism to use, you can do so by passing an `httpx.Proxy()` instance, setting the `mode` to either `FORWARD_ONLY` or `TUNNEL_ONLY`. For example... +You can now configure a client to make requests via a proxy using the SOCKS protocol: ```python -# Route all requests through an HTTPS proxy, using tunneling only. -proxies = httpx.Proxy( - url="https://localhost:8030", - mode="TUNNEL_ONLY", -) - -with httpx.Client(proxies=proxies) as client: - # This HTTP request will be tunneled instead of forwarded. - r = client.get("http://example.com") +httpx.Client(proxies='socks5://user:pass@host:port') ``` -### Troubleshooting proxies - -If you encounter issues when setting up proxies, please refer to our [Troubleshooting guide](troubleshooting.md#proxies). - ## Timeout Configuration HTTPX is careful to enforce timeouts everywhere by default. diff --git a/docs/index.md b/docs/index.md index 448247c0d2..bf0eecffff 100644 --- a/docs/index.md +++ b/docs/index.md @@ -112,12 +112,16 @@ The HTTPX project relies on these excellent libraries: * `httpcore` - The underlying transport implementation for `httpx`. * `h11` - HTTP/1.1 support. - * `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)* * `certifi` - SSL certificates. * `charset_normalizer` - Charset auto-detection. * `rfc3986` - URL parsing & normalization. * `idna` - Internationalized domain name support. * `sniffio` - Async library autodetection. + +As well as these optional installs: + +* `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)* +* `socksio` - SOCKS proxy support. *(Optional, with `httpx[socks]`)* * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)* * `click` - Command line client support. *(Optional, with `httpx[cli]`)* * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)* diff --git a/httpx/_config.py b/httpx/_config.py index 927a67c2b1..9cf6d16777 100644 --- a/httpx/_config.py +++ b/httpx/_config.py @@ -1,7 +1,6 @@ import os import ssl import typing -from base64 import b64encode from pathlib import Path import certifi @@ -316,32 +315,46 @@ def __repr__(self) -> str: class Proxy: - def __init__(self, url: URLTypes, *, headers: HeaderTypes = None): + def __init__( + self, + url: URLTypes, + *, + auth: typing.Tuple[str, str] = None, + headers: HeaderTypes = None, + ): url = URL(url) headers = Headers(headers) - if url.scheme not in ("http", "https"): + if url.scheme not in ("http", "https", "socks5"): raise ValueError(f"Unknown scheme for proxy URL {url!r}") if url.username or url.password: - headers.setdefault( - "Proxy-Authorization", - self._build_auth_header(url.username, url.password), - ) - # Remove userinfo from the URL authority, e.g.: - # 'username:password@proxy_host:proxy_port' -> 'proxy_host:proxy_port' + # Remove any auth credentials from the URL. + auth = (url.username, url.password) url = url.copy_with(username=None, password=None) self.url = url + self.auth = auth self.headers = headers - def _build_auth_header(self, username: str, password: str) -> str: - userpass = (username.encode("utf-8"), password.encode("utf-8")) - token = b64encode(b":".join(userpass)).decode() - return f"Basic {token}" + @property + def raw_auth(self) -> typing.Optional[typing.Tuple[bytes, bytes]]: + # The proxy authentication as raw bytes. + return ( + None + if self.auth is None + else (self.auth[0].encode("utf-8"), self.auth[1].encode("utf-8")) + ) def __repr__(self) -> str: - return f"Proxy(url={str(self.url)!r}, headers={dict(self.headers)!r})" + # The authentication is represented with the password component masked. + auth = (self.auth[0], "********") if self.auth else None + + # Build a nice concise representation. + url_str = f"{str(self.url)!r}" + auth_str = f", auth={auth!r}" if auth else "" + headers_str = f", headers={dict(self.headers)!r}" if self.headers else "" + return f"Proxy({url_str}{auth_str}{headers_str})" DEFAULT_TIMEOUT_CONFIG = Timeout(timeout=5.0) diff --git a/httpx/_transports/default.py b/httpx/_transports/default.py index bfb0333d4e..0995c7fa00 100644 --- a/httpx/_transports/default.py +++ b/httpx/_transports/default.py @@ -137,37 +137,51 @@ def __init__( local_address=local_address, retries=retries, ) - else: + elif proxy.url.scheme in ("http", "https"): + self._pool = httpcore.HTTPProxy( + proxy_url=httpcore.URL( + scheme=proxy.url.raw_scheme, + host=proxy.url.raw_host, + port=proxy.url.port, + target=proxy.url.raw_path, + ), + proxy_auth=proxy.raw_auth, + proxy_headers=proxy.headers.raw, + ssl_context=ssl_context, + max_connections=limits.max_connections, + max_keepalive_connections=limits.max_keepalive_connections, + keepalive_expiry=limits.keepalive_expiry, + http1=http1, + http2=http2, + ) + elif proxy.url.scheme == "socks5": try: - self._pool = httpcore.HTTPProxy( - proxy_url=httpcore.URL( - scheme=proxy.url.raw_scheme, - host=proxy.url.raw_host, - port=proxy.url.port, - target=proxy.url.raw_path, - ), - proxy_headers=proxy.headers.raw, - ssl_context=ssl_context, - max_connections=limits.max_connections, - max_keepalive_connections=limits.max_keepalive_connections, - keepalive_expiry=limits.keepalive_expiry, - http1=http1, - http2=http2, - ) - except TypeError: # pragma: nocover - self._pool = httpcore.HTTPProxy( - proxy_url=httpcore.URL( - scheme=proxy.url.raw_scheme, - host=proxy.url.raw_host, - port=proxy.url.port, - target=proxy.url.raw_path, - ), - proxy_headers=proxy.headers.raw, - ssl_context=ssl_context, - max_connections=limits.max_connections, - max_keepalive_connections=limits.max_keepalive_connections, - keepalive_expiry=limits.keepalive_expiry, - ) + import socksio # noqa + except ImportError: # pragma: nocover + raise ImportError( + "Using SOCKS proxy, but the 'socksio' package is not installed. " + "Make sure to install httpx using `pip install httpx[socks]`." + ) from None + + self._pool = httpcore.SOCKSProxy( + proxy_url=httpcore.URL( + scheme=proxy.url.raw_scheme, + host=proxy.url.raw_host, + port=proxy.url.port, + target=proxy.url.raw_path, + ), + proxy_auth=proxy.raw_auth, + ssl_context=ssl_context, + max_connections=limits.max_connections, + max_keepalive_connections=limits.max_keepalive_connections, + keepalive_expiry=limits.keepalive_expiry, + http1=http1, + http2=http2, + ) + else: # pragma: nocover + raise ValueError( + f"Proxy protocol must be either 'http', 'https', or 'socks5', but got {proxy.url.scheme!r}." + ) def __enter__(self: T) -> T: # Use generics for subclass support. self._pool.__enter__() @@ -258,7 +272,7 @@ def __init__( local_address=local_address, retries=retries, ) - else: + elif proxy.url.scheme in ("http", "https"): self._pool = httpcore.AsyncHTTPProxy( proxy_url=httpcore.URL( scheme=proxy.url.raw_scheme, @@ -266,11 +280,42 @@ def __init__( port=proxy.url.port, target=proxy.url.raw_path, ), + proxy_auth=proxy.raw_auth, proxy_headers=proxy.headers.raw, ssl_context=ssl_context, max_connections=limits.max_connections, max_keepalive_connections=limits.max_keepalive_connections, keepalive_expiry=limits.keepalive_expiry, + http1=http1, + http2=http2, + ) + elif proxy.url.scheme == "socks5": + try: + import socksio # noqa + except ImportError: # pragma: nocover + raise ImportError( + "Using SOCKS proxy, but the 'socksio' package is not installed. " + "Make sure to install httpx using `pip install httpx[socks]`." + ) from None + + self._pool = httpcore.AsyncSOCKSProxy( + proxy_url=httpcore.URL( + scheme=proxy.url.raw_scheme, + host=proxy.url.raw_host, + port=proxy.url.port, + target=proxy.url.raw_path, + ), + proxy_auth=proxy.raw_auth, + ssl_context=ssl_context, + max_connections=limits.max_connections, + max_keepalive_connections=limits.max_keepalive_connections, + keepalive_expiry=limits.keepalive_expiry, + http1=http1, + http2=http2, + ) + else: # pragma: nocover + raise ValueError( + f"Proxy protocol must be either 'http', 'https', or 'socks5', but got {proxy.url.scheme!r}." ) async def __aenter__(self: A) -> A: # Use generics for subclass support. diff --git a/requirements.txt b/requirements.txt index bdd34bfa07..9d1a88640a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # On the other hand, we're not pinning package dependencies, because our tests # needs to pass with the latest version of the packages. # Reference: https://github.com/encode/httpx/pull/1721#discussion_r661241588 --e .[cli,http2,brotli] +-e .[brotli,cli,http2,socks] charset-normalizer==2.0.6 diff --git a/setup.py b/setup.py index e72c5be2ec..ba2360c060 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ def get_packages(package): ], extras_require={ "http2": "h2>=3,<5", + "socks": "socksio==1.*", "brotli": [ "brotli; platform_python_implementation == 'CPython'", "brotlicffi; platform_python_implementation != 'CPython'" diff --git a/tests/client/test_proxies.py b/tests/client/test_proxies.py index 2d9c15884c..2e88f644bb 100644 --- a/tests/client/test_proxies.py +++ b/tests/client/test_proxies.py @@ -47,6 +47,20 @@ def test_proxies_parameter(proxies, expected_proxies): assert len(expected_proxies) == len(client._mounts) +def test_socks_proxy(): + url = httpx.URL("http://www.example.com") + + client = httpx.Client(proxies="socks5://localhost/") + transport = client._transport_for_url(url) + assert isinstance(transport, httpx.HTTPTransport) + assert isinstance(transport._pool, httpcore.SOCKSProxy) + + async_client = httpx.AsyncClient(proxies="socks5://localhost/") + async_transport = async_client._transport_for_url(url) + assert isinstance(async_transport, httpx.AsyncHTTPTransport) + assert isinstance(async_transport._pool, httpcore.AsyncSOCKSProxy) + + PROXY_URL = "http://[::1]" diff --git a/tests/test_config.py b/tests/test_config.py index 3a17cf5976..56d354197d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -199,25 +199,22 @@ def test_ssl_config_support_for_keylog_file(tmpdir, monkeypatch): # pragma: noc assert context.keylog_filename is None # type: ignore -@pytest.mark.parametrize( - "url,expected_url,expected_headers", - [ - ("https://example.com", "https://example.com", {}), - ( - "https://user:pass@example.com", - "https://example.com", - {"proxy-authorization": "Basic dXNlcjpwYXNz"}, - ), - ], -) -def test_proxy_from_url(url, expected_url, expected_headers): - proxy = httpx.Proxy(url) +def test_proxy_from_url(): + proxy = httpx.Proxy("https://example.com") - assert str(proxy.url) == expected_url - assert dict(proxy.headers) == expected_headers - assert repr(proxy) == "Proxy(url='{}', headers={})".format( - expected_url, str(expected_headers) - ) + assert str(proxy.url) == "https://example.com" + assert proxy.auth is None + assert proxy.headers == {} + assert repr(proxy) == "Proxy('https://example.com')" + + +def test_proxy_with_auth_from_url(): + proxy = httpx.Proxy("https://username:password@example.com") + + assert str(proxy.url) == "https://example.com" + assert proxy.auth == ("username", "password") + assert proxy.headers == {} + assert repr(proxy) == "Proxy('https://example.com', auth=('username', '********'))" def test_invalid_proxy_scheme():