Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SKIP_HEADER for skipping automatically added headers #2018

Merged
merged 5 commits into from Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/urllib3/_collections.py
Expand Up @@ -155,7 +155,7 @@ def __setitem__(self, key, val):

def __getitem__(self, key):
val = self._container[key.lower()]
return ", ".join([six.ensure_str(v, "ascii") for v in val[1:]])
return ", ".join(val[1:])
sethmlarson marked this conversation as resolved.
Show resolved Hide resolved

def __delitem__(self, key):
del self._container[key.lower()]
Expand Down
31 changes: 18 additions & 13 deletions src/urllib3/connection.py
Expand Up @@ -43,7 +43,6 @@ class BrokenPipeError(Exception):
pass


from ._collections import HTTPHeaderDict
from ._version import __version__
from .exceptions import (
ConnectTimeoutError,
Expand All @@ -52,7 +51,7 @@ class BrokenPipeError(Exception):
SystemTimeWarning,
)
from .packages.ssl_match_hostname import CertificateError, match_hostname
from .util import SUPPRESS_USER_AGENT, connection
from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection
from .util.ssl_ import (
assert_fingerprint,
create_urllib3_context,
Expand Down Expand Up @@ -213,29 +212,35 @@ def putrequest(self, method, url, *args, **kwargs):

return _HTTPConnection.putrequest(self, method, url, *args, **kwargs)

def putheader(self, header, *values):
""""""
if SKIP_HEADER not in values:
_HTTPConnection.putheader(self, header, *values)
elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
raise ValueError(
"urllib3.util.SKIP_HEADER only supports 'Accept-Encoding', 'Host', and 'User-Agent'"
sethmlarson marked this conversation as resolved.
Show resolved Hide resolved
)

def request(self, method, url, body=None, headers=None):
headers = HTTPHeaderDict(headers if headers is not None else {})
if "user-agent" not in headers:
if headers is None:
headers = {"User-Agent": _get_default_user_agent()}
elif "user-agent" not in (k.lower() for k in headers):
sethmlarson marked this conversation as resolved.
Show resolved Hide resolved
headers["User-Agent"] = _get_default_user_agent()
sethmlarson marked this conversation as resolved.
Show resolved Hide resolved
elif headers["user-agent"] == SUPPRESS_USER_AGENT:
del headers["user-agent"]
super(HTTPConnection, self).request(method, url, body=body, headers=headers)

def request_chunked(self, method, url, body=None, headers=None):
"""
Alternative to the common request method, which sends the
body with chunked encoding and not as one block
"""
headers = HTTPHeaderDict(headers if headers is not None else {})
skip_accept_encoding = "accept-encoding" in headers
skip_host = "host" in headers
header_keys = set([k.lower() for k in headers or ()])
skip_accept_encoding = "accept-encoding" in header_keys
skip_host = "host" in header_keys
self.putrequest(
method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
)
if "user-agent" not in headers:
headers["User-Agent"] = _get_default_user_agent()
elif headers["user-agent"] == SUPPRESS_USER_AGENT:
del headers["user-agent"]
if "user-agent" not in header_keys:
self.putheader("User-Agent", _get_default_user_agent())
for header, value in headers.items():
self.putheader(header, value)
if "transfer-encoding" not in headers:
Expand Down
5 changes: 3 additions & 2 deletions src/urllib3/util/__init__.py
Expand Up @@ -2,7 +2,7 @@

# For backwards compatibility, provide imports that used to be here.
from .connection import is_connection_dropped
from .request import SUPPRESS_USER_AGENT, make_headers
from .request import SKIP_HEADER, SKIPPABLE_HEADERS, make_headers
from .response import is_fp_closed
from .retry import Retry
from .ssl_ import (
Expand Down Expand Up @@ -44,5 +44,6 @@
"ssl_wrap_socket",
"wait_for_read",
"wait_for_write",
"SUPPRESS_USER_AGENT",
"SKIP_HEADER",
"SKIPPABLE_HEADERS",
)
11 changes: 7 additions & 4 deletions src/urllib3/util/request.py
Expand Up @@ -5,10 +5,13 @@
from ..exceptions import UnrewindableBodyError
from ..packages.six import b, integer_types

# Use an invalid User-Agent to represent suppressing of default user agent.
# See https://tools.ietf.org/html/rfc7231#section-5.5.3 and
# https://tools.ietf.org/html/rfc7230#section-3.2.6
SUPPRESS_USER_AGENT = "@@@INVALID_USER_AGENT@@@"
# Pass as a value within ``headers`` to skip
# emitting some HTTP headers that are added automatically.
# The only headers that are supported are ``Accept-Encoding``,
# ``Host``, and ``User-Agent``.
SKIP_HEADER = "@@@SKIP_HEADER@@@"
SKIPPABLE_HEADERS = frozenset(["accept-encoding", "host", "user-agent"])

ACCEPT_ENCODING = "gzip,deflate"
try:
import brotli as _unused_module_brotli # noqa: F401
Expand Down
4 changes: 2 additions & 2 deletions test/with_dummyserver/test_chunked_transfer.py
Expand Up @@ -8,7 +8,7 @@
consume_socket,
)
from urllib3 import HTTPConnectionPool
from urllib3.util import SUPPRESS_USER_AGENT
from urllib3.util import SKIP_HEADER
from urllib3.util.retry import Retry

# Retry failed tests
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_remove_user_agent_header(self):
"GET",
"/",
chunks,
headers={"User-Agent": SUPPRESS_USER_AGENT},
headers={"User-Agent": SKIP_HEADER},
chunked=True,
)

Expand Down
100 changes: 94 additions & 6 deletions test/with_dummyserver/test_connectionpool.py
@@ -1,15 +1,18 @@
# -*- coding: utf-8 -*-

import io
import json
import logging
import socket
import sys
import time
import warnings
from test import LONG_TIMEOUT, SHORT_TIMEOUT
from test import LONG_TIMEOUT, SHORT_TIMEOUT, onlyPy2
from threading import Event

import mock
import pytest
import six

from dummyserver.server import HAS_IPV6_AND_DNS, NoIPv6Warning
from dummyserver.testcase import HTTPDummyServerTestCase, SocketDummyServerTestCase
Expand All @@ -26,7 +29,7 @@
)
from urllib3.packages.six import b, u
from urllib3.packages.six.moves.urllib.parse import urlencode
from urllib3.util import SUPPRESS_USER_AGENT
from urllib3.util import SKIP_HEADER, SKIPPABLE_HEADERS
from urllib3.util.retry import RequestHistory, Retry
from urllib3.util.timeout import Timeout

Expand Down Expand Up @@ -830,28 +833,80 @@ def test_no_user_agent_header(self):
custom_ua = "I'm not a web scraper, what are you talking about?"
with HTTPConnectionPool(self.host, self.port) as pool:
# Suppress user agent in the request headers.
no_ua_headers = {"User-Agent": SUPPRESS_USER_AGENT}
no_ua_headers = {"User-Agent": SKIP_HEADER}
r = pool.request("GET", "/headers", headers=no_ua_headers)
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" not in request_headers
assert no_ua_headers["User-Agent"] == SUPPRESS_USER_AGENT
assert no_ua_headers["User-Agent"] == SKIP_HEADER

# Suppress user agent in the pool headers.
pool.headers = no_ua_headers
r = pool.request("GET", "/headers")
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" not in request_headers
assert no_ua_headers["User-Agent"] == SUPPRESS_USER_AGENT
assert no_ua_headers["User-Agent"] == SKIP_HEADER

# Request headers override pool headers.
pool_headers = {"User-Agent": custom_ua}
pool.headers = pool_headers
r = pool.request("GET", "/headers", headers=no_ua_headers)
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" not in request_headers
assert no_ua_headers["User-Agent"] == SUPPRESS_USER_AGENT
assert no_ua_headers["User-Agent"] == SKIP_HEADER
assert pool_headers.get("User-Agent") == custom_ua

@pytest.mark.parametrize(
"accept_encoding", ["Accept-Encoding", "accept-encoding", None]
)
@pytest.mark.parametrize("host", ["Host", "host", None])
@pytest.mark.parametrize("user_agent", ["User-Agent", "user-agent", None])
@pytest.mark.parametrize("chunked", [True, False])
def test_skip_header(self, accept_encoding, host, user_agent, chunked):
headers = {}

if accept_encoding is not None:
headers[accept_encoding] = SKIP_HEADER
if host is not None:
headers[host] = SKIP_HEADER
if user_agent is not None:
headers[user_agent] = SKIP_HEADER

with HTTPConnectionPool(self.host, self.port) as pool:
r = pool.request("GET", "/headers", headers=headers, chunked=chunked)
request_headers = json.loads(r.data.decode("utf8"))

if accept_encoding is None:
assert "Accept-Encoding" in request_headers
else:
assert accept_encoding not in request_headers
if host is None:
assert "Host" in request_headers
else:
assert host not in request_headers
if user_agent is None:
assert "User-Agent" in request_headers
else:
assert user_agent not in request_headers

@pytest.mark.parametrize("header", ["Content-Length", "content-length"])
@pytest.mark.parametrize("chunked", [True, False])
def test_skip_header_non_supported(self, header, chunked):
with HTTPConnectionPool(self.host, self.port) as pool:
with pytest.raises(ValueError) as e:
pool.request(
"GET", "/headers", headers={header: SKIP_HEADER}, chunked=chunked
)
assert (
str(e.value)
== "urllib3.util.SKIP_HEADER only supports 'Accept-Encoding', 'Host', and 'User-Agent'"
)

# Ensure that the error message stays up to date with 'SKIP_HEADER_SUPPORTED_HEADERS'
assert all(
("'" + header.title() + "'") in str(e.value)
for header in SKIPPABLE_HEADERS
)

def test_bytes_header(self):
with HTTPConnectionPool(self.host, self.port) as pool:
headers = {"User-Agent": b"test header"}
Expand All @@ -860,6 +915,39 @@ def test_bytes_header(self):
assert "User-Agent" in request_headers
assert request_headers["User-Agent"] == "test header"

@pytest.mark.parametrize(
"user_agent", [u"Schönefeld/1.18.0", u"Schönefeld/1.18.0".encode("iso-8859-1")]
)
def test_user_agent_non_ascii_user_agent(self, user_agent):
if six.PY2 and not isinstance(user_agent, str):
pytest.skip(
"Python 2 raises UnicodeEncodeError when passed a unicode header"
)

with HTTPConnectionPool(self.host, self.port, retries=False) as pool:
r = pool.urlopen(
"GET",
"/headers",
headers={"User-Agent": user_agent},
)
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" in request_headers
assert request_headers["User-Agent"] == u"Schönefeld/1.18.0"

@onlyPy2
def test_user_agent_non_ascii_fails_on_python_2(self):
with HTTPConnectionPool(self.host, self.port, retries=False) as pool:
with pytest.raises(UnicodeEncodeError) as e:
pool.urlopen(
"GET",
"/headers",
headers={"User-Agent": u"Schönefeld/1.18.0"},
)
assert str(e.value) == (
"'ascii' codec can't encode character u'\\xf6' in "
"position 3: ordinal not in range(128)"
)


class TestRetry(HTTPDummyServerTestCase):
def test_max_retry(self):
Expand Down