From 431173d18d403895534af05a990a57f3468a16d9 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 6 Nov 2023 21:28:53 -0800 Subject: [PATCH 1/8] Eliminate use of cgi.parse_multipart() --- setup.py | 1 + src/treq/test/test_multipart.py | 35 +++++++++++++++------------------ 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 52c26c80..9fbc6d41 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ "pyflakes", "httpbin==0.7.0", "werkzeug==2.0.3", + "multipart", ], "docs": [ "sphinx<7.0.0", # Removal of 'style' key breaks RTD. diff --git a/src/treq/test/test_multipart.py b/src/treq/test/test_multipart.py index 5cbaade5..4d44f11c 100644 --- a/src/treq/test/test_multipart.py +++ b/src/treq/test/test_multipart.py @@ -1,12 +1,11 @@ # Copyright (c) Twisted Matrix Laboratories. # See LICENSE for details. -import cgi -import sys from typing import cast, AnyStr from io import BytesIO +from multipart import MultipartParser from twisted.trial import unittest from zope.interface.verify import verifyObject @@ -588,9 +587,10 @@ def test_newLinesInParams(self): --heyDavid-- """.encode("utf-8")), output) - def test_worksWithCgi(self): + def test_worksWithMultipart(self): """ - Make sure the stuff we generated actually parsed by python cgi + Make sure the stuff we generated can actually be parsed by the + `multipart` module. """ output = self.getOutput( MultiPartProducer([ @@ -612,23 +612,20 @@ def test_worksWithCgi(self): ) ) - form = cgi.parse_multipart(BytesIO(output), { - "boundary": b"heyDavid", - "CONTENT-LENGTH": str(len(output)), - }) + form = MultipartParser( + stream=BytesIO(output), + boundary=b"heyDavid", + content_length=len(output), + ) - # Since Python 3.7, the value for a non-file field is now a list - # of strings, not bytes. - if sys.version_info >= (3, 7): - self.assertEqual(set(['just a string\r\n', 'another string']), - set(form['cfield'])) - else: - self.assertEqual(set([b'just a string\r\n', b'another string']), - set(form['cfield'])) + self.assertEqual( + [b'just a string\r\n', b'another string'], + [f.raw for f in form.get_all('cfield')], + ) - self.assertEqual(set([b'my lovely bytes2']), set(form['efield'])) - self.assertEqual(set([b'my lovely bytes219']), set(form['xfield'])) - self.assertEqual(set([b'my lovely bytes22']), set(form['afield'])) + self.assertEqual(b'my lovely bytes2', form.get('efield').raw) + self.assertEqual(b'my lovely bytes219', form.get('xfield').raw) + self.assertEqual(b'my lovely bytes22', form.get('afield').raw) class LengthConsumerTestCase(unittest.TestCase): From c1c1dc4a355ac009d8ff7341f9c6d2157b8e1165 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 1 Jan 2024 19:29:57 -0800 Subject: [PATCH 2/8] Vendor cgi.parse_header() --- src/treq/_cgi.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/treq/_cgi.py diff --git a/src/treq/_cgi.py b/src/treq/_cgi.py new file mode 100644 index 00000000..9efbe652 --- /dev/null +++ b/src/treq/_cgi.py @@ -0,0 +1,96 @@ +# flake8: noqa: E501 +# +# The contents of this file were vendored from cpython.git Lib/cgi.py +# commit 60edc70a9374f1cc6ecff5974e438d58fec29985 [1]. +# +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; +# All Rights Reserved +# +# Subject to these license terms (from cpython.git LICENSE line 73) [2]: +# +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +# -------------------------------------------- +# +# 1. This LICENSE AGREEMENT is between the Python Software Foundation +# ("PSF"), and the Individual or Organization ("Licensee") accessing and +# otherwise using this software ("Python") in source or binary form and +# its associated documentation. +# +# 2. Subject to the terms and conditions of this License Agreement, PSF hereby +# grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +# analyze, test, perform and/or display publicly, prepare derivative works, +# distribute, and otherwise use Python alone or in any derivative version, +# provided, however, that PSF's License Agreement and PSF's notice of copyright, +# i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; +# All Rights Reserved" are retained in Python alone or in any derivative version +# prepared by Licensee. +# +# 3. In the event Licensee prepares a derivative work that is based on +# or incorporates Python or any part thereof, and wants to make +# the derivative work available to others as provided herein, then +# Licensee hereby agrees to include in any such work a brief summary of +# the changes made to Python. +# +# 4. PSF is making Python available to Licensee on an "AS IS" +# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +# INFRINGE ANY THIRD PARTY RIGHTS. +# +# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. +# +# 6. This License Agreement will automatically terminate upon a material +# breach of its terms and conditions. +# +# 7. Nothing in this License Agreement shall be deemed to create any +# relationship of agency, partnership, or joint venture between PSF and +# Licensee. This License Agreement does not grant permission to use PSF +# trademarks or trade name in a trademark sense to endorse or promote +# products or services of Licensee, or any third party. +# +# 8. By copying, installing or otherwise using Python, Licensee +# agrees to be bound by the terms and conditions of this License +# Agreement. +# +# [1]: https://github.com/python/cpython/blob/60edc70a9374f1cc6ecff5974e438d58fec29985/Lib/cgi.py +# [2]: https://github.com/python/cpython/blob/60edc70a9374f1cc6ecff5974e438d58fec29985/LICENSE#L73 + + +def _parseparam(s): + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] + + +def parse_header(line): + """Parse a Content-type like header. + + Return the main content-type and a dictionary of options. + + """ + parts = _parseparam(';' + line) + key = parts.__next__() + pdict = {} + for p in parts: + i = p.find('=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i+1:].strip() + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + value = value.replace('\\\\', '\\').replace('\\"', '"') + pdict[name] = value + return key, pdict From b4fc499c4a161fdbfe6e0f7d512462f90b1435cc Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 6 Nov 2023 19:03:20 -0800 Subject: [PATCH 3/8] Eliminate use of cgi.parse_header() --- src/treq/content.py | 27 ++++++++++++++---- src/treq/test/test_content.py | 54 +++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/src/treq/content.py b/src/treq/content.py index 27dba507..06118f14 100644 --- a/src/treq/content.py +++ b/src/treq/content.py @@ -1,6 +1,5 @@ -import cgi import json -from typing import Any, Callable, List, Optional, cast +from typing import Any, Callable, Final, List, Optional, cast from twisted.internet.defer import Deferred, succeed from twisted.internet.protocol import Protocol, connectionDone @@ -10,6 +9,19 @@ from twisted.web.http_headers import Headers from twisted.web.iweb import IResponse +from treq import _cgi + + +"""Characters that are valid in a charset name per RFC 2978. + +See https://www.rfc-editor.org/errata/eid5433 +""" +_MIME_CHARSET_CHARS: Final[str] = ( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" # ALPHA + "0123456789" # DIGIT + "!#$%&+-^_`~" # symbols +) + def _encoding_from_headers(headers: Headers) -> Optional[str]: content_types = headers.getRawHeaders("content-type") @@ -18,14 +30,17 @@ def _encoding_from_headers(headers: Headers) -> Optional[str]: # This seems to be the choice browsers make when encountering multiple # content-type headers. - content_type, params = cgi.parse_header(content_types[-1]) + media_type, params = _cgi.parse_header(content_types[-1]) charset = params.get("charset") if charset: - return charset.strip("'\"") + charset = charset.strip("'\"").lower() + if any(c not in _MIME_CHARSET_CHARS for c in charset): + return None + return charset - if content_type == "application/json": - return "UTF-8" + if media_type == "application/json": + return "utf-8" return None diff --git a/src/treq/test/test_content.py b/src/treq/test/test_content.py index 0d83ddfe..60835814 100644 --- a/src/treq/test/test_content.py +++ b/src/treq/test/test_content.py @@ -1,3 +1,4 @@ +import unittest from unittest import mock from twisted.python.failure import Failure @@ -11,6 +12,7 @@ from twisted.web.server import NOT_DONE_YET from treq import collect, content, json_content, text_content +from treq.content import _encoding_from_headers from treq.client import _BufferedResponse from treq.testing import StubTreq @@ -267,6 +269,54 @@ def error(data): # being closed. stub.flush() self.assertEqual(len(resource.request_finishes), 1) - self.assertIsInstance( - resource.request_finishes[0].value, ConnectionDone + self.assertIsInstance(resource.request_finishes[0].value, ConnectionDone) + + +class EncodingFromHeadersTests(unittest.TestCase): + def _encodingFromContentType(self, content_type: str) -> str | None: + """ + Invoke `_encoding_from_headers()` for a header value. + + :param content_type: A Content-Type header value. + :returns: The result of `_encoding_from_headers()` + """ + h = Headers({"Content-Type": [content_type]}) + return _encoding_from_headers(h) + + def test_rfcExamples(self): + """ + The examples from RFC 9110 ยง 8.3.1 are normalized to + canonical (lowercase) form. + """ + for example in [ + "text/html;charset=utf-8", + 'Text/HTML;Charset="utf-8"', + 'text/html; charset="utf-8"', + "text/html;charset=UTF-8", + ]: + self.assertEqual("utf-8", self._encodingFromContentType(example)) + + def test_multipleParams(self): + """The charset parameter is extracted even if mixed with other params.""" + for example in [ + "a/b;c=d;charSet=ascii", + "a/b;c=d;charset=ascii; e=f", + "a/b;c=d; charsEt=ascii;e=f", + "a/b;c=d; charset=ascii; e=f", + ]: + self.assertEqual("ascii", self._encodingFromContentType(example)) + + def test_quotedString(self): + """Any quotes that surround the value of the charset param are removed.""" + self.assertEqual( + "ascii", self._encodingFromContentType("foo/bar; charset='ASCII'") ) + self.assertEqual( + "shift_jis", self._encodingFromContentType('a/b; charset="Shift_JIS"') + ) + + def test_noCharset(self): + """None is returned when no valid charset parameter is found.""" + self.assertIsNone(self._encodingFromContentType("application/octet-stream")) + self.assertIsNone(self._encodingFromContentType("text/plain;charset=")) + self.assertIsNone(self._encodingFromContentType("text/plain;charset=๐Ÿ™ƒ")) From cd4c7f873517814f30ae1950b3cf60d0b2fd8c3d Mon Sep 17 00:00:00 2001 From: Tom Most Date: Tue, 7 Nov 2023 21:42:33 -0800 Subject: [PATCH 4/8] Add change fragment --- changelog.d/355.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/355.bugfix.rst diff --git a/changelog.d/355.bugfix.rst b/changelog.d/355.bugfix.rst new file mode 100644 index 00000000..8e427ee0 --- /dev/null +++ b/changelog.d/355.bugfix.rst @@ -0,0 +1 @@ +:mod:`treq.content.text_content()` no longer generates deprecation warnings due to use of the ``cgi`` module. From 5cc43b879bc1c820470f68d92c7d26e0b9929e0e Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 1 Jan 2024 20:53:41 -0800 Subject: [PATCH 5/8] Reject empty quoted charset --- src/treq/content.py | 10 +++++++--- src/treq/test/test_content.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/treq/content.py b/src/treq/content.py index 06118f14..416c37da 100644 --- a/src/treq/content.py +++ b/src/treq/content.py @@ -1,5 +1,7 @@ import json -from typing import Any, Callable, Final, List, Optional, cast +from typing import ( + Any, Callable, Final, FrozenSet, List, Optional, cast +) from twisted.internet.defer import Deferred, succeed from twisted.internet.protocol import Protocol, connectionDone @@ -16,7 +18,7 @@ See https://www.rfc-editor.org/errata/eid5433 """ -_MIME_CHARSET_CHARS: Final[str] = ( +_MIME_CHARSET_CHARS: Final[FrozenSet[str]] = frozenset( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" # ALPHA "0123456789" # DIGIT "!#$%&+-^_`~" # symbols @@ -35,7 +37,9 @@ def _encoding_from_headers(headers: Headers) -> Optional[str]: charset = params.get("charset") if charset: charset = charset.strip("'\"").lower() - if any(c not in _MIME_CHARSET_CHARS for c in charset): + if not charset: + return None + if not set(charset).issubset(_MIME_CHARSET_CHARS): return None return charset diff --git a/src/treq/test/test_content.py b/src/treq/test/test_content.py index 60835814..162ba558 100644 --- a/src/treq/test/test_content.py +++ b/src/treq/test/test_content.py @@ -317,6 +317,11 @@ def test_quotedString(self): def test_noCharset(self): """None is returned when no valid charset parameter is found.""" - self.assertIsNone(self._encodingFromContentType("application/octet-stream")) - self.assertIsNone(self._encodingFromContentType("text/plain;charset=")) - self.assertIsNone(self._encodingFromContentType("text/plain;charset=๐Ÿ™ƒ")) + for example in [ + "application/octet-stream", + "text/plain;charset=", + "text/plain;charset=''", + "text/plain;charset=\"'\"", + "text/plain;charset=๐Ÿ™ƒ", + ]: + self.assertIsNone(self._encodingFromContentType(example)) From 852e34386b2d36146463c5558396fff0b20cd464 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 1 Jan 2024 21:04:40 -0800 Subject: [PATCH 6/8] Make MyPy happy --- src/treq/_cgi.py | 6 ++++-- src/treq/test/test_content.py | 3 ++- src/treq/test/test_multipart.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/treq/_cgi.py b/src/treq/_cgi.py index 9efbe652..797f147a 100644 --- a/src/treq/_cgi.py +++ b/src/treq/_cgi.py @@ -61,8 +61,10 @@ # [1]: https://github.com/python/cpython/blob/60edc70a9374f1cc6ecff5974e438d58fec29985/Lib/cgi.py # [2]: https://github.com/python/cpython/blob/60edc70a9374f1cc6ecff5974e438d58fec29985/LICENSE#L73 +from typing import Dict, Iterator, Tuple -def _parseparam(s): + +def _parseparam(s: str) -> Iterator[str]: while s[:1] == ';': s = s[1:] end = s.find(';') @@ -75,7 +77,7 @@ def _parseparam(s): s = s[end:] -def parse_header(line): +def parse_header(line: str) -> Tuple[str, Dict[str, str]]: """Parse a Content-type like header. Return the main content-type and a dictionary of options. diff --git a/src/treq/test/test_content.py b/src/treq/test/test_content.py index 162ba558..8158e007 100644 --- a/src/treq/test/test_content.py +++ b/src/treq/test/test_content.py @@ -1,5 +1,6 @@ import unittest from unittest import mock +from typing import Optional from twisted.python.failure import Failure @@ -273,7 +274,7 @@ def error(data): class EncodingFromHeadersTests(unittest.TestCase): - def _encodingFromContentType(self, content_type: str) -> str | None: + def _encodingFromContentType(self, content_type: str) -> Optional[str]: """ Invoke `_encoding_from_headers()` for a header value. diff --git a/src/treq/test/test_multipart.py b/src/treq/test/test_multipart.py index 4d44f11c..999f1afd 100644 --- a/src/treq/test/test_multipart.py +++ b/src/treq/test/test_multipart.py @@ -5,7 +5,7 @@ from io import BytesIO -from multipart import MultipartParser +from multipart import MultipartParser # type: ignore from twisted.trial import unittest from zope.interface.verify import verifyObject From e03348f59160295bcf896006f69c3ff57bb2b318 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 1 Jan 2024 21:12:33 -0800 Subject: [PATCH 7/8] Fix Python 3.7 compat --- src/treq/content.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/treq/content.py b/src/treq/content.py index 416c37da..0a0ed22e 100644 --- a/src/treq/content.py +++ b/src/treq/content.py @@ -1,7 +1,5 @@ import json -from typing import ( - Any, Callable, Final, FrozenSet, List, Optional, cast -) +from typing import Any, Callable, FrozenSet, List, Optional, cast from twisted.internet.defer import Deferred, succeed from twisted.internet.protocol import Protocol, connectionDone @@ -18,7 +16,7 @@ See https://www.rfc-editor.org/errata/eid5433 """ -_MIME_CHARSET_CHARS: Final[FrozenSet[str]] = frozenset( +_MIME_CHARSET_CHARS: FrozenSet[str] = frozenset( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" # ALPHA "0123456789" # DIGIT "!#$%&+-^_`~" # symbols From 0a3d17b852aba99be48e19a85002091d3e34cbf3 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 1 Jan 2024 21:50:44 -0800 Subject: [PATCH 8/8] Avoid vendoring anything --- setup.py | 2 +- src/treq/_cgi.py | 98 --------------------------------------------- src/treq/content.py | 6 +-- 3 files changed, 4 insertions(+), 102 deletions(-) delete mode 100644 src/treq/_cgi.py diff --git a/setup.py b/setup.py index 9fbc6d41..36046d44 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ "Twisted[tls] >= 22.10.0", # For #11635 "attrs", "typing_extensions >= 3.10.0", + "multipart", ], extras_require={ "dev": [ @@ -42,7 +43,6 @@ "pyflakes", "httpbin==0.7.0", "werkzeug==2.0.3", - "multipart", ], "docs": [ "sphinx<7.0.0", # Removal of 'style' key breaks RTD. diff --git a/src/treq/_cgi.py b/src/treq/_cgi.py deleted file mode 100644 index 797f147a..00000000 --- a/src/treq/_cgi.py +++ /dev/null @@ -1,98 +0,0 @@ -# flake8: noqa: E501 -# -# The contents of this file were vendored from cpython.git Lib/cgi.py -# commit 60edc70a9374f1cc6ecff5974e438d58fec29985 [1]. -# -# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; -# All Rights Reserved -# -# Subject to these license terms (from cpython.git LICENSE line 73) [2]: -# -# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -# -------------------------------------------- -# -# 1. This LICENSE AGREEMENT is between the Python Software Foundation -# ("PSF"), and the Individual or Organization ("Licensee") accessing and -# otherwise using this software ("Python") in source or binary form and -# its associated documentation. -# -# 2. Subject to the terms and conditions of this License Agreement, PSF hereby -# grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, -# analyze, test, perform and/or display publicly, prepare derivative works, -# distribute, and otherwise use Python alone or in any derivative version, -# provided, however, that PSF's License Agreement and PSF's notice of copyright, -# i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; -# All Rights Reserved" are retained in Python alone or in any derivative version -# prepared by Licensee. -# -# 3. In the event Licensee prepares a derivative work that is based on -# or incorporates Python or any part thereof, and wants to make -# the derivative work available to others as provided herein, then -# Licensee hereby agrees to include in any such work a brief summary of -# the changes made to Python. -# -# 4. PSF is making Python available to Licensee on an "AS IS" -# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND -# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT -# INFRINGE ANY THIRD PARTY RIGHTS. -# -# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, -# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. -# -# 6. This License Agreement will automatically terminate upon a material -# breach of its terms and conditions. -# -# 7. Nothing in this License Agreement shall be deemed to create any -# relationship of agency, partnership, or joint venture between PSF and -# Licensee. This License Agreement does not grant permission to use PSF -# trademarks or trade name in a trademark sense to endorse or promote -# products or services of Licensee, or any third party. -# -# 8. By copying, installing or otherwise using Python, Licensee -# agrees to be bound by the terms and conditions of this License -# Agreement. -# -# [1]: https://github.com/python/cpython/blob/60edc70a9374f1cc6ecff5974e438d58fec29985/Lib/cgi.py -# [2]: https://github.com/python/cpython/blob/60edc70a9374f1cc6ecff5974e438d58fec29985/LICENSE#L73 - -from typing import Dict, Iterator, Tuple - - -def _parseparam(s: str) -> Iterator[str]: - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - - -def parse_header(line: str) -> Tuple[str, Dict[str, str]]: - """Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - """ - parts = _parseparam(';' + line) - key = parts.__next__() - pdict = {} - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i+1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace('\\\\', '\\').replace('\\"', '"') - pdict[name] = value - return key, pdict diff --git a/src/treq/content.py b/src/treq/content.py index 0a0ed22e..e3f4aaad 100644 --- a/src/treq/content.py +++ b/src/treq/content.py @@ -1,6 +1,7 @@ import json from typing import Any, Callable, FrozenSet, List, Optional, cast +import multipart # type: ignore from twisted.internet.defer import Deferred, succeed from twisted.internet.protocol import Protocol, connectionDone from twisted.python.failure import Failure @@ -9,8 +10,6 @@ from twisted.web.http_headers import Headers from twisted.web.iweb import IResponse -from treq import _cgi - """Characters that are valid in a charset name per RFC 2978. @@ -30,10 +29,11 @@ def _encoding_from_headers(headers: Headers) -> Optional[str]: # This seems to be the choice browsers make when encountering multiple # content-type headers. - media_type, params = _cgi.parse_header(content_types[-1]) + media_type, params = multipart.parse_options_header(content_types[-1]) charset = params.get("charset") if charset: + assert isinstance(charset, str) # for MyPy charset = charset.strip("'\"").lower() if not charset: return None