Skip to content

Commit

Permalink
IMPORTANT cache invalidation change, fix 307 keep method, add 308 red…
Browse files Browse the repository at this point in the history
…irect

- invalidate cache after request per RFC2616 Section 13.10
  before: `method not in ["GET", "HEAD"]`
  now: `method in h.invalidate_methods`, default: DELETE,POST,PUT
- 308 may be cached not by default as per RFC7538, but only with
  relevant headers
- Http new attributes: safe_methods, invalidate_methods are public API,
  type is anything with `contains` operator, tuple is recommended.
  Defaults are in upper case module level constants.
  As of 2019-12-19, SAFE_METHODS only include GET,HEAD to minimise
  problems from changing much behavior at once.
  Please, expect default SAFE_METHODS to become GET,HEAD,OPTIONS,TRACE as per RFC7231.

#151
  • Loading branch information
temoto committed Dec 19, 2019
1 parent d12344a commit d6113dd
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 32 deletions.
48 changes: 30 additions & 18 deletions python2/httplib2/__init__.py
Expand Up @@ -291,6 +291,13 @@ class NotRunningAppEngineEnvironment(HttpLib2Error):
"upgrade",
]

# https://tools.ietf.org/html/rfc7231#section-8.1.3
SAFE_METHODS = ("GET", "HEAD") # TODO add "OPTIONS", "TRACE"

# https://tools.ietf.org/html/rfc2616#section-13.10
# Some HTTP methods MUST cause a cache to invalidate an entity.
INVALIDATE_METHODS = ("DELETE", "POST", "PUT")


def _get_end2end_headers(response):
hopbyhop = list(HOP_BY_HOP)
Expand Down Expand Up @@ -1175,9 +1182,9 @@ def connect(self):

host = self.host
port = self.port

socket_err = None

for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try:
Expand Down Expand Up @@ -1353,9 +1360,9 @@ def connect(self):

host = self.host
port = self.port

socket_err = None

address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
for family, socktype, proto, canonname, sockaddr in address_info:
try:
Expand Down Expand Up @@ -1665,6 +1672,9 @@ def __init__(
# which methods get an "if-match:" etag header added to them.
self.optimistic_concurrency_methods = ["PUT", "PATCH"]

self.invalidate_methods = list(INVALIDATE_METHODS)
self.safe_methods = list(SAFE_METHODS)

# If 'follow_redirects' is True, and this is set to True then
# all redirecs are followed, including unsafe ones.
self.follow_all_redirects = False
Expand Down Expand Up @@ -1858,10 +1868,10 @@ def _request(

if (
self.follow_all_redirects
or (method in ["GET", "HEAD"])
or response.status == 303
or method in self.safe_methods
or response.status in (303, 308)
):
if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
if self.follow_redirects and response.status in (300, 301, 302, 303, 307, 308):
# Pick out the location header and basically start from the beginning
# remembering first to strip the ETag header and decrement our 'depth'
if redirections:
Expand All @@ -1881,7 +1891,7 @@ def _request(
response["location"] = urlparse.urljoin(
absolute_uri, location
)
if response.status == 301 and method in ["GET", "HEAD"]:
if response.status == 308 or (response.status == 301 and method in self.safe_methods):
response["-x-permanent-redirect-url"] = response["location"]
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -1918,7 +1928,7 @@ def _request(
response,
content,
)
elif response.status in [200, 203] and method in ["GET", "HEAD"]:
elif response.status in [200, 203] and method in self.safe_methods:
# Don't cache 206's since we aren't going to handle byte range requests
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -2018,6 +2028,7 @@ def request(
headers["accept-encoding"] = "gzip, deflate"

info = email.Message.Message()
cachekey = None
cached_value = None
if self.cache:
cachekey = defrag_uri.encode("utf-8")
Expand All @@ -2038,8 +2049,6 @@ def request(
self.cache.delete(cachekey)
cachekey = None
cached_value = None
else:
cachekey = None

if (
method in self.optimistic_concurrency_methods
Expand All @@ -2051,13 +2060,13 @@ def request(
# http://www.w3.org/1999/04/Editing/
headers["if-match"] = info["etag"]

if method not in ["GET", "HEAD"] and self.cache and cachekey:
# RFC 2616 Section 13.10
# RFC 2616 Section 13.10
if self.cache and cachekey and method in self.invalidate_methods:
self.cache.delete(cachekey)

# Check the vary header in the cache to see if this request
# matches what varies in the cache.
if method in ["GET", "HEAD"] and "vary" in info:
if method in self.safe_methods and "vary" in info:
vary = info["vary"]
vary_headers = vary.lower().replace(" ", "").split(",")
for header in vary_headers:
Expand All @@ -2068,11 +2077,14 @@ def request(
break

if (
cached_value
and method in ["GET", "HEAD"]
and self.cache
self.cache
and cached_value
and (method in self.safe_methods or info["status"] == "308")
and "range" not in headers
):
redirect_method = method
if info["status"] not in ("307", "308"):
redirect_method = "GET"
if "-x-permanent-redirect-url" in info:
# Should cached permanent redirects be counted in our redirection count? For now, yes.
if redirections <= 0:
Expand All @@ -2083,7 +2095,7 @@ def request(
)
(response, new_content) = self.request(
info["-x-permanent-redirect-url"],
method="GET",
method=redirect_method,
headers=headers,
redirections=redirections - 1,
)
Expand Down
41 changes: 27 additions & 14 deletions python3/httplib2/__init__.py
Expand Up @@ -161,6 +161,14 @@ class ProxiesUnavailableError(HttpLib2Error):
"upgrade",
]

# https://tools.ietf.org/html/rfc7231#section-8.1.3
SAFE_METHODS = ("GET", "HEAD") # TODO add "OPTIONS", "TRACE"

# https://tools.ietf.org/html/rfc2616#section-13.10
# Some HTTP methods MUST cause a cache to invalidate an entity.
INVALIDATE_METHODS = ("DELETE", "POST", "PUT")


from httplib2 import certs
CA_CERTS = certs.where()

Expand Down Expand Up @@ -1471,6 +1479,9 @@ def __init__(
# which methods get an "if-match:" etag header added to them.
self.optimistic_concurrency_methods = ["PUT", "PATCH"]

self.invalidate_methods = list(INVALIDATE_METHODS)
self.safe_methods = list(SAFE_METHODS)

# If 'follow_redirects' is True, and this is set to True then
# all redirecs are followed, including unsafe ones.
self.follow_all_redirects = False
Expand Down Expand Up @@ -1663,10 +1674,10 @@ def _request(

if (
self.follow_all_redirects
or (method in ["GET", "HEAD"])
or response.status == 303
or method in self.safe_methods
or response.status in (303, 308)
):
if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
if self.follow_redirects and response.status in (300, 301, 302, 303, 307, 308):
# Pick out the location header and basically start from the beginning
# remembering first to strip the ETag header and decrement our 'depth'
if redirections:
Expand All @@ -1686,7 +1697,7 @@ def _request(
response["location"] = urllib.parse.urljoin(
absolute_uri, location
)
if response.status == 301 and method in ["GET", "HEAD"]:
if response.status == 308 or (response.status == 301 and (method in self.safe_methods)):
response["-x-permanent-redirect-url"] = response["location"]
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -1723,7 +1734,7 @@ def _request(
response,
content,
)
elif response.status in [200, 203] and method in ["GET", "HEAD"]:
elif response.status in [200, 203] and method in self.safe_methods:
# Don't cache 206's since we aren't going to handle byte range requests
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -1822,6 +1833,7 @@ def request(
headers["accept-encoding"] = "gzip, deflate"

info = email.message.Message()
cachekey = None
cached_value = None
if self.cache:
cachekey = defrag_uri
Expand All @@ -1839,8 +1851,6 @@ def request(
self.cache.delete(cachekey)
cachekey = None
cached_value = None
else:
cachekey = None

if (
method in self.optimistic_concurrency_methods
Expand All @@ -1852,13 +1862,13 @@ def request(
# http://www.w3.org/1999/04/Editing/
headers["if-match"] = info["etag"]

if method not in ["GET", "HEAD"] and self.cache and cachekey:
# RFC 2616 Section 13.10
# RFC 2616 Section 13.10
if self.cache and cachekey and method in self.invalidate_methods:
self.cache.delete(cachekey)

# Check the vary header in the cache to see if this request
# matches what varies in the cache.
if method in ["GET", "HEAD"] and "vary" in info:
if method in self.safe_methods and "vary" in info:
vary = info["vary"]
vary_headers = vary.lower().replace(" ", "").split(",")
for header in vary_headers:
Expand All @@ -1869,11 +1879,14 @@ def request(
break

if (
cached_value
and method in ["GET", "HEAD"]
and self.cache
self.cache
and cached_value
and (method in self.safe_methods or info["status"] == "308")
and "range" not in headers
):
redirect_method = method
if info["status"] not in ("307", "308"):
redirect_method = "GET"
if "-x-permanent-redirect-url" in info:
# Should cached permanent redirects be counted in our redirection count? For now, yes.
if redirections <= 0:
Expand All @@ -1884,7 +1897,7 @@ def request(
)
(response, new_content) = self.request(
info["-x-permanent-redirect-url"],
method="GET",
method=redirect_method,
headers=headers,
redirections=redirections - 1,
)
Expand Down
2 changes: 2 additions & 0 deletions tests/__init__.py
Expand Up @@ -406,10 +406,12 @@ def server_request_socket_handler(sock, tick):
request = HttpRequest.from_buffered(buf)
if request is None:
break
# print("--- debug request\n" + request.raw.decode("ascii", "replace"))
i += 1
request.client_sock = sock
request.number = i
response = request_handler(request=request)
# print("--- debug response\n" + response.decode("ascii", "replace"))
sock.sendall(response)
request.client_sock = None
if not tick(request):
Expand Down
51 changes: 51 additions & 0 deletions tests/test_http.py
Expand Up @@ -622,6 +622,57 @@ def test_get_307():
assert content == b"final content\n"


def test_post_307():
# 307: follow with same method
http = httplib2.Http(cache=tests.get_cache_path(), timeout=1)
http.follow_all_redirects=True
r307 = tests.http_response_bytes(status=307, headers={"location": "/final"})
r200 = tests.http_response_bytes(status=200, body=b"final content\n")

with tests.server_list_http([r307, r200, r307, r200]) as uri:
response, content = http.request(uri, "POST")
assert response.previous.status == 307
assert not response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content == b"final content\n"

response, content = http.request(uri, "POST")
assert response.previous.status == 307
assert not response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content == b"final content\n"


def test_change_308():
# 308: follow with same method, cache redirect
http = httplib2.Http(cache=tests.get_cache_path(), timeout=1)
routes = {
"/final": tests.make_http_reflect(),
"": tests.http_response_bytes(
status="308 Permanent Redirect",
add_date=True,
headers={"cache-control": "max-age=300", "location": "/final"},
),
}

with tests.server_route(routes, request_count=3) as uri:
response, content = http.request(uri, "CHANGE", body=b"hello308")
assert response.previous.status == 308
assert not response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content.startswith(b"CHANGE /final HTTP")

response, content = http.request(uri, "CHANGE")
assert response.previous.status == 308
assert response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content.startswith(b"CHANGE /final HTTP")


def test_get_410():
# Test that we pass 410's through
http = httplib2.Http()
Expand Down

0 comments on commit d6113dd

Please sign in to comment.