Skip to content

Commit

Permalink
Implement ETag support (aio-libs#4594)
Browse files Browse the repository at this point in the history
This change adds an `etag` property to the response object and
`if_match`, `if_none_match` properties to the request object.
Also, it implements ETag support in static routes and fixes a
few bugs found along the way.

Refs:
* https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
* https://tools.ietf.org/html/rfc7232#section-2.3
* https://tools.ietf.org/html/rfc7232#section-6

PR aio-libs#5298 by @greshilov
Resolves aio-libs#4594

Co-Authored-By: Serhiy Storchaka <storchaka@gmail.com>
Co-Authored-By: Andrew Svetlov <andrew.svetlov@gmail.com>
  • Loading branch information
3 people committed Mar 23, 2021
1 parent 8090594 commit 089949d
Show file tree
Hide file tree
Showing 13 changed files with 562 additions and 177 deletions.
1 change: 1 addition & 0 deletions CHANGES/4594.feature
@@ -0,0 +1 @@
FileResponse now supports ETag.
3 changes: 2 additions & 1 deletion aiohttp/__init__.py
Expand Up @@ -38,7 +38,7 @@
)
from .cookiejar import CookieJar as CookieJar, DummyCookieJar as DummyCookieJar
from .formdata import FormData as FormData
from .helpers import BasicAuth as BasicAuth, ChainMapProxy as ChainMapProxy
from .helpers import BasicAuth, ChainMapProxy, ETag
from .http import (
HttpVersion as HttpVersion,
HttpVersion10 as HttpVersion10,
Expand Down Expand Up @@ -146,6 +146,7 @@
# helpers
"BasicAuth",
"ChainMapProxy",
"ETag",
# http
"HttpVersion",
"HttpVersion10",
Expand Down
25 changes: 24 additions & 1 deletion aiohttp/helpers.py
Expand Up @@ -52,7 +52,7 @@
from .log import client_logger, internal_logger
from .typedefs import PathLike, Protocol # noqa

__all__ = ("BasicAuth", "ChainMapProxy")
__all__ = ("BasicAuth", "ChainMapProxy", "ETag")

PY_36 = sys.version_info >= (3, 6)
PY_37 = sys.version_info >= (3, 7)
Expand Down Expand Up @@ -776,3 +776,26 @@ def __bool__(self) -> bool:
def __repr__(self) -> str:
content = ", ".join(map(repr, self._maps))
return f"ChainMapProxy({content})"


# https://tools.ietf.org/html/rfc7232#section-2.3
_ETAGC = r"[!#-}\x80-\xff]+"
_ETAGC_RE = re.compile(_ETAGC)
_QUOTED_ETAG = fr'(W/)?"({_ETAGC})"'
QUOTED_ETAG_RE = re.compile(_QUOTED_ETAG)
LIST_QUOTED_ETAG_RE = re.compile(fr"({_QUOTED_ETAG})(?:\s*,\s*|$)|(.)")

ETAG_ANY = "*"


@attr.s(auto_attribs=True, frozen=True, slots=True)
class ETag:
value: str
is_weak: bool = False


def validate_etag_value(value: str) -> None:
if value != ETAG_ANY and not _ETAGC_RE.fullmatch(value):
raise ValueError(
f"Value {value!r} is not a valid etag. Maybe it contains '\"'?"
)
66 changes: 55 additions & 11 deletions aiohttp/web_fileresponse.py
Expand Up @@ -9,14 +9,17 @@
Any,
Awaitable,
Callable,
Iterator,
List,
Optional,
Tuple,
Union,
cast,
)

from . import hdrs
from .abc import AbstractStreamWriter
from .helpers import ETAG_ANY, ETag
from .typedefs import Final, LooseHeaders
from .web_exceptions import (
HTTPNotModified,
Expand Down Expand Up @@ -100,6 +103,30 @@ async def _sendfile(
await super().write_eof()
return writer

@staticmethod
def _strong_etag_match(etag_value: str, etags: Tuple[ETag, ...]) -> bool:
if len(etags) == 1 and etags[0].value == ETAG_ANY:
return True
return any(etag.value == etag_value for etag in etags if not etag.is_weak)

async def _not_modified(
self, request: "BaseRequest", etag_value: str, last_modified: float
) -> Optional[AbstractStreamWriter]:
self.set_status(HTTPNotModified.status_code)
self._length_check = False
self.etag = etag_value # type: ignore[assignment]
self.last_modified = last_modified # type: ignore[assignment]
# Delete any Content-Length headers provided by user. HTTP 304
# should always have empty response body
return await super().prepare(request)

async def _precondition_failed(
self, request: "BaseRequest"
) -> Optional[AbstractStreamWriter]:
self.set_status(HTTPPreconditionFailed.status_code)
self.content_length = 0
return await super().prepare(request)

async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter]:
filepath = self._path

Expand All @@ -112,20 +139,35 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter
gzip = True

loop = asyncio.get_event_loop()
st = await loop.run_in_executor(None, filepath.stat)
st: os.stat_result = await loop.run_in_executor(None, filepath.stat)

modsince = request.if_modified_since
if modsince is not None and st.st_mtime <= modsince.timestamp():
self.set_status(HTTPNotModified.status_code)
self._length_check = False
# Delete any Content-Length headers provided by user. HTTP 304
# should always have empty response body
return await super().prepare(request)
etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}"
last_modified = st.st_mtime

# https://tools.ietf.org/html/rfc7232#section-6
ifmatch = request.if_match
if ifmatch is not None and not self._strong_etag_match(etag_value, ifmatch):
return await self._precondition_failed(request)

unmodsince = request.if_unmodified_since
if unmodsince is not None and st.st_mtime > unmodsince.timestamp():
self.set_status(HTTPPreconditionFailed.status_code)
return await super().prepare(request)
if (
unmodsince is not None
and ifmatch is None
and st.st_mtime > unmodsince.timestamp()
):
return await self._precondition_failed(request)

ifnonematch = request.if_none_match
if ifnonematch is not None and self._strong_etag_match(etag_value, ifnonematch):
return await self._not_modified(request, etag_value, last_modified)

modsince = request.if_modified_since
if (
modsince is not None
and ifnonematch is None
and st.st_mtime <= modsince.timestamp()
):
return await self._not_modified(request, etag_value, last_modified)

if hdrs.CONTENT_TYPE not in self.headers:
ct, encoding = mimetypes.guess_type(str(filepath))
Expand Down Expand Up @@ -216,6 +258,8 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter
self.headers[hdrs.CONTENT_ENCODING] = encoding
if gzip:
self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING

self.etag = etag_value # type: ignore[assignment]
self.last_modified = st.st_mtime # type: ignore[assignment]
self.content_length = count

Expand Down
57 changes: 56 additions & 1 deletion aiohttp/web_request.py
Expand Up @@ -31,7 +31,16 @@

from . import hdrs
from .abc import AbstractStreamWriter
from .helpers import DEBUG, ChainMapProxy, HeadersMixin, reify, sentinel
from .helpers import (
DEBUG,
ETAG_ANY,
LIST_QUOTED_ETAG_RE,
ChainMapProxy,
ETag,
HeadersMixin,
reify,
sentinel,
)
from .http_parser import RawRequestMessage
from .http_writer import HttpVersion
from .multipart import BodyPartReader, MultipartReader
Expand Down Expand Up @@ -495,6 +504,52 @@ def if_unmodified_since(self) -> Optional[datetime.datetime]:
"""
return self._http_date(self.headers.get(hdrs.IF_UNMODIFIED_SINCE))

@staticmethod
def _etag_values(etag_header: str) -> Iterator[ETag]:
"""Extract `ETag` objects from raw header."""
if etag_header == ETAG_ANY:
yield ETag(
is_weak=False,
value=ETAG_ANY,
)
else:
for match in LIST_QUOTED_ETAG_RE.finditer(etag_header):
is_weak, value, garbage = match.group(2, 3, 4)
# Any symbol captured by 4th group means
# that the following sequence is invalid.
if garbage:
break

yield ETag(
is_weak=bool(is_weak),
value=value,
)

@classmethod
def _if_match_or_none_impl(
cls, header_value: Optional[str]
) -> Optional[Tuple[ETag, ...]]:
if not header_value:
return None

return tuple(cls._etag_values(header_value))

@reify
def if_match(self) -> Optional[Tuple[ETag, ...]]:
"""The value of If-Match HTTP header, or None.
This header is represented as a `tuple` of `ETag` objects.
"""
return self._if_match_or_none_impl(self.headers.get(hdrs.IF_MATCH))

@reify
def if_none_match(self) -> Optional[Tuple[ETag, ...]]:
"""The value of If-None-Match HTTP header, or None.
This header is represented as a `tuple` of `ETag` objects.
"""
return self._if_match_or_none_impl(self.headers.get(hdrs.IF_NONE_MATCH))

@reify
def if_range(self) -> Optional[datetime.datetime]:
"""The value of If-Range HTTP header, or None.
Expand Down
50 changes: 48 additions & 2 deletions aiohttp/web_response.py
Expand Up @@ -27,7 +27,16 @@

from . import hdrs, payload
from .abc import AbstractStreamWriter
from .helpers import PY_38, HeadersMixin, rfc822_formatted_time, sentinel
from .helpers import (
ETAG_ANY,
PY_38,
QUOTED_ETAG_RE,
ETag,
HeadersMixin,
rfc822_formatted_time,
sentinel,
validate_etag_value,
)
from .http import RESPONSES, SERVER_SOFTWARE, HttpVersion10, HttpVersion11
from .payload import Payload
from .typedefs import JSONEncoder, LooseHeaders
Expand Down Expand Up @@ -341,6 +350,43 @@ def last_modified(
elif isinstance(value, str):
self._headers[hdrs.LAST_MODIFIED] = value

@property
def etag(self) -> Optional[ETag]:
quoted_value = self._headers.get(hdrs.ETAG)
if not quoted_value:
return None
elif quoted_value == ETAG_ANY:
return ETag(value=ETAG_ANY)
match = QUOTED_ETAG_RE.fullmatch(quoted_value)
if not match:
return None
is_weak, value = match.group(1, 2)
return ETag(
is_weak=bool(is_weak),
value=value,
)

@etag.setter
def etag(self, value: Optional[Union[ETag, str]]) -> None:
if value is None:
self._headers.pop(hdrs.ETAG, None)
elif (isinstance(value, str) and value == ETAG_ANY) or (
isinstance(value, ETag) and value.value == ETAG_ANY
):
self._headers[hdrs.ETAG] = ETAG_ANY
elif isinstance(value, str):
validate_etag_value(value)
self._headers[hdrs.ETAG] = f'"{value}"'
elif isinstance(value, ETag) and isinstance(value.value, str):
validate_etag_value(value.value)
hdr_value = f'W/"{value.value}"' if value.is_weak else f'"{value.value}"'
self._headers[hdrs.ETAG] = hdr_value
else:
raise ValueError(
f"Unsupported etag type: {type(value)}. "
f"etag must be str, ETag or None"
)

def _generate_content_type_header(
self, CONTENT_TYPE: istr = hdrs.CONTENT_TYPE
) -> None:
Expand Down Expand Up @@ -435,7 +481,7 @@ async def _prepare_headers(self) -> None:
elif version >= HttpVersion11 and self.status in (100, 101, 102, 103, 204):
del headers[hdrs.CONTENT_LENGTH]

if self.status != 204:
if self.status not in (204, 304):
headers.setdefault(hdrs.CONTENT_TYPE, "application/octet-stream")
headers.setdefault(hdrs.DATE, rfc822_formatted_time())
headers.setdefault(hdrs.SERVER, SERVER_SOFTWARE)
Expand Down
17 changes: 17 additions & 0 deletions docs/client_reference.rst
Expand Up @@ -1688,6 +1688,23 @@ ClientTimeout

.. versionadded:: 3.3

ETag
^^^^

.. class:: ETag(name, is_weak=False)

Represents `ETag` identifier.

.. attribute:: value

Value of corresponding etag without quotes.

.. attribute:: is_weak

Flag indicates that etag is weak (has `W/` prefix).

.. versionadded:: 3.8

RequestInfo
^^^^^^^^^^^

Expand Down
2 changes: 2 additions & 0 deletions docs/spelling_wordlist.txt
Expand Up @@ -28,6 +28,7 @@ Dict
Discord
Django
Dup
ETag
Facebook
HTTPException
HttpProcessingError
Expand Down Expand Up @@ -153,6 +154,7 @@ env
environ
eof
epoll
etag
facto
fallback
fallbacks
Expand Down
34 changes: 34 additions & 0 deletions docs/web_reference.rst
Expand Up @@ -336,6 +336,26 @@ and :ref:`aiohttp-web-signals` handlers.

.. versionadded:: 3.1

.. attribute:: if_match

Read-only property that returns :class:`ETag` objects specified
in the *If-Match* header.

Returns :class:`tuple` of :class:`ETag` or ``None`` if
*If-Match* header is absent.

.. versionadded:: 3.8

.. attribute:: if_none_match

Read-only property that returns :class:`ETag` objects specified
*If-None-Match* header.

Returns :class:`tuple` of :class:`ETag` or ``None`` if
*If-None-Match* header is absent.

.. versionadded:: 3.8

.. attribute:: if_range

Read-only property that returns the date specified in the
Expand Down Expand Up @@ -782,6 +802,20 @@ StreamResponse
as an :class:`int` or a :class:`float` object, and the
value ``None`` to unset the header.

.. attribute:: etag

*ETag* header for outgoing response.

This property accepts raw :class:`str` values, :class:`ETag`
objects and the value ``None`` to unset the header.

In case of :class:`str` input, etag is considered as strong by default.

**Do not** use double quotes ``"`` in the etag value,
they will be added automatically.

.. versionadded:: 3.8

.. comethod:: prepare(request)

:param aiohttp.web.Request request: HTTP request object, that the
Expand Down

0 comments on commit 089949d

Please sign in to comment.