Skip to content

Commit

Permalink
feat(parse_header): provide our own implementation of parse_header() (
Browse files Browse the repository at this point in the history
#2217)

* feat(parse_header): provide our own implementation of `parse_header()`

* docs(newsfragments): add a newsfragment
+ address 1 review comment

* test(test_mediatypes.py): add tests for multiple parameters
  • Loading branch information
vytas7 committed Apr 3, 2024
1 parent a78cfb3 commit 7ec1d31
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.rst
Expand Up @@ -1027,7 +1027,7 @@ See also: `CONTRIBUTING.md <https://github.com/falconry/falcon/blob/master/CONTR
Legal
-----

Copyright 2013-2023 by Individual and corporate contributors as
Copyright 2013-2024 by Individual and corporate contributors as
noted in the individual source files.

Licensed under the Apache License, Version 2.0 (the "License"); you may
Expand Down
4 changes: 4 additions & 0 deletions docs/_newsfragments/2066.newandimproved.rst
@@ -0,0 +1,4 @@
In Python 3.13, the ``cgi`` module is removed entirely from the stdlib,
including its ``parse_header()`` method. Falcon addresses the issue by shipping
an own implementation; :func:`falcon.parse_header` can also be used in your projects
affected by the removal.
5 changes: 5 additions & 0 deletions docs/api/util.rst
Expand Up @@ -34,6 +34,11 @@ HTTP Status
.. autofunction:: falcon.code_to_http_status
.. autofunction:: falcon.get_http_status

Media types
-----------

.. autofunction:: falcon.parse_header

Async
-----

Expand Down
3 changes: 1 addition & 2 deletions docs/user/recipes/pretty-json.rst
Expand Up @@ -52,7 +52,6 @@ implemented with a :ref:`custom media handler <custom-media-handler-type>`:

.. code:: python
import cgi
import json
import falcon
Expand All @@ -66,7 +65,7 @@ implemented with a :ref:`custom media handler <custom-media-handler-type>`:
return json.loads(data.decode())
def serialize(self, media, content_type):
_, params = cgi.parse_header(content_type)
_, params = falcon.parse_header(content_type)
indent = params.get('indent')
if indent is not None:
try:
Expand Down
1 change: 1 addition & 0 deletions falcon/__init__.py
Expand Up @@ -77,6 +77,7 @@
from falcon.util import IS_64_BITS
from falcon.util import is_python_func
from falcon.util import misc
from falcon.util import parse_header
from falcon.util import reader
from falcon.util import runs_sync
from falcon.util import secure_filename
Expand Down
5 changes: 2 additions & 3 deletions falcon/asgi/multipart.py
Expand Up @@ -14,11 +14,10 @@

"""ASGI multipart form media handler components."""

import cgi

from falcon.asgi.reader import BufferedReader
from falcon.errors import DelimiterError
from falcon.media import multipart
from falcon.util.mediatypes import parse_header

_ALLOWED_CONTENT_HEADERS = multipart._ALLOWED_CONTENT_HEADERS
_CRLF = multipart._CRLF
Expand Down Expand Up @@ -54,7 +53,7 @@ async def get_media(self):
return self._media

async def get_text(self):
content_type, options = cgi.parse_header(self.content_type)
content_type, options = parse_header(self.content_type)
if content_type != 'text/plain':
return None

Expand Down
10 changes: 5 additions & 5 deletions falcon/media/multipart.py
Expand Up @@ -14,7 +14,6 @@

"""Multipart form media handler."""

import cgi
import re
from urllib.parse import unquote_to_bytes

Expand All @@ -24,6 +23,7 @@
from falcon.stream import BoundedStream
from falcon.util import BufferedReader
from falcon.util import misc
from falcon.util.mediatypes import parse_header


# TODO(vytas):
Expand Down Expand Up @@ -249,7 +249,7 @@ def get_text(self):
str: The part decoded as a text string provided the part is
encoded as ``text/plain``, ``None`` otherwise.
"""
content_type, options = cgi.parse_header(self.content_type)
content_type, options = parse_header(self.content_type)
if content_type != 'text/plain':
return None

Expand All @@ -275,7 +275,7 @@ def filename(self):

if self._content_disposition is None:
value = self._headers.get(b'content-disposition', b'')
self._content_disposition = cgi.parse_header(value.decode())
self._content_disposition = parse_header(value.decode())

_, params = self._content_disposition

Expand Down Expand Up @@ -311,7 +311,7 @@ def name(self):

if self._content_disposition is None:
value = self._headers.get(b'content-disposition', b'')
self._content_disposition = cgi.parse_header(value.decode())
self._content_disposition = parse_header(value.decode())

_, params = self._content_disposition
self._name = params.get('name')
Expand Down Expand Up @@ -493,7 +493,7 @@ def __init__(self, parse_options=None):
def _deserialize_form(
self, stream, content_type, content_length, form_cls=MultipartForm
):
_, options = cgi.parse_header(content_type)
_, options = parse_header(content_type)
try:
boundary = options['boundary']
except KeyError:
Expand Down
4 changes: 2 additions & 2 deletions falcon/testing/helpers.py
Expand Up @@ -23,7 +23,6 @@
"""

import asyncio
import cgi
from collections import defaultdict
from collections import deque
import contextlib
Expand Down Expand Up @@ -51,6 +50,7 @@
from falcon.constants import SINGLETON_HEADERS
import falcon.request
from falcon.util import uri
from falcon.util.mediatypes import parse_header

# NOTE(kgriffs): Changed in 3.0 from 'curl/7.24.0 (x86_64-apple-darwin12.0)'
DEFAULT_UA = 'falcon-client/' + falcon.__version__
Expand Down Expand Up @@ -802,7 +802,7 @@ def get_encoding_from_headers(headers):
if not content_type:
return None

content_type, params = cgi.parse_header(content_type)
content_type, params = parse_header(content_type)

if 'charset' in params:
return params['charset'].strip('\'"')
Expand Down
1 change: 1 addition & 0 deletions falcon/util/__init__.py
Expand Up @@ -29,6 +29,7 @@
from falcon.util.deprecation import deprecated
from falcon.util.deprecation import deprecated_args
from falcon.util.deprecation import DeprecatedWarning
from falcon.util.mediatypes import parse_header
from falcon.util.misc import code_to_http_status
from falcon.util.misc import dt_to_http
from falcon.util.misc import get_argnames
Expand Down
89 changes: 89 additions & 0 deletions falcon/util/mediatypes.py
@@ -0,0 +1,89 @@
# Copyright 2023-2024 by Vytautas Liuolia.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Media (aka MIME) type parsing and matching utilities."""

import typing


def _parse_param_old_stdlib(s): # type: ignore
while s[:1] == ';':
s = s[1:]
end = s.find(';')
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)
if end < 0:
end = len(s)
f = s[:end]
yield f.strip()
s = s[end:]


def _parse_header_old_stdlib(line): # type: ignore
"""Parse a Content-type like header.
Return the main content-type and a dictionary of options.
Note:
This method has been copied (almost) verbatim from CPython 3.8 stdlib.
It is slated for removal from the stdlib in 3.13.
"""
parts = _parse_param_old_stdlib(';' + line)
key = parts.__next__()
pdict = {}
for p in parts:
i = p.find('=')
if i >= 0:
name = p[:i].strip().lower()
value = p[i + 1 :].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace('\\\\', '\\').replace('\\"', '"')
pdict[name] = value
return key, pdict


def parse_header(line: str) -> typing.Tuple[str, dict]:
"""Parse a Content-type like header.
Return the main content-type and a dictionary of options.
Args:
line: A header value to parse.
Returns:
tuple: (the main content-type, dictionary of options).
Note:
This function replaces an equivalent method previously available in the
stdlib as ``cgi.parse_header()``.
It was removed from the stdlib in Python 3.13.
"""
if '"' not in line and '\\' not in line:
key, semicolon, parts = line.partition(';')
if not semicolon:
return (key.strip(), {})

pdict = {}
for part in parts.split(';'):
name, equals, value = part.partition('=')
if equals:
pdict[name.strip().lower()] = value.strip()

return (key.strip(), pdict)

return _parse_header_old_stdlib(line)


__all__ = ['parse_header']
4 changes: 2 additions & 2 deletions falcon/vendor/mimeparse/mimeparse.py
@@ -1,4 +1,4 @@
import cgi
from falcon.util.mediatypes import parse_header

__version__ = '1.6.0'
__author__ = 'Joe Gregorio'
Expand All @@ -23,7 +23,7 @@ def parse_mime_type(mime_type):
:rtype: (str,str,dict)
"""
full_type, params = cgi.parse_header(mime_type)
full_type, params = parse_header(mime_type)
# Java URLConnection class sends an Accept header that includes a
# single '*'. Turn it into a legal wildcard.
if full_type == '*':
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Expand Up @@ -94,7 +94,6 @@ filterwarnings = [
"ignore:Using or importing the ABCs:DeprecationWarning",
"ignore:cannot collect test class 'TestClient':pytest.PytestCollectionWarning",
"ignore:inspect.getargspec\\(\\) is deprecated:DeprecationWarning",
"ignore:.cgi. is deprecated and slated for removal:DeprecationWarning",
"ignore:path is deprecated\\. Use files\\(\\) instead:DeprecationWarning",
"ignore:This process \\(.+\\) is multi-threaded",
]
Expand Down
41 changes: 41 additions & 0 deletions tests/test_mediatypes.py
@@ -0,0 +1,41 @@
import pytest

from falcon.util import mediatypes


@pytest.mark.parametrize(
'value,expected',
[
('', ('', {})),
('strange', ('strange', {})),
('text/plain', ('text/plain', {})),
('text/plain ', ('text/plain', {})),
(' text/plain', ('text/plain', {})),
(' text/plain ', ('text/plain', {})),
(' text/plain ', ('text/plain', {})),
(
'falcon/peregrine; key1; key2=value; key3',
('falcon/peregrine', {'key2': 'value'}),
),
(
'audio/pcm;rate=48000;encoding=float;bits=32',
('audio/pcm', {'bits': '32', 'encoding': 'float', 'rate': '48000'}),
),
(
'falcon/*; genus=falco; family=falconidae; class=aves; ',
('falcon/*', {'class': 'aves', 'family': 'falconidae', 'genus': 'falco'}),
),
('"falcon/peregrine" ; key="value"', ('"falcon/peregrine"', {'key': 'value'})),
('falcon/peregrine; empty=""', ('falcon/peregrine', {'empty': ''})),
('falcon/peregrine; quote="', ('falcon/peregrine', {'quote': '"'})),
('text/plain; charset=utf-8', ('text/plain', {'charset': 'utf-8'})),
('stuff/strange; missing-value; missing-another', ('stuff/strange', {})),
('stuff/strange; missing-value\\missing-another', ('stuff/strange', {})),
(
'application/falcon; P1 = "key; value"; P2="\\""',
('application/falcon', {'p1': 'key; value', 'p2': '"'}),
),
],
)
def test_parse_header(value, expected):
assert mediatypes.parse_header(value) == expected

0 comments on commit 7ec1d31

Please sign in to comment.