From ddb65b489d01d696afa1695b75fdd5df3e4ffdf8 Mon Sep 17 00:00:00 2001
From: Bert JW Regeer <bertjw@regeer.org>
Date: Thu, 2 Jan 2020 12:16:00 -0800
Subject: [PATCH 1/3] Remove accidental stripping of non-printable characters

Continuation/follow-up for:
https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4
which showcased the initial problem with the way that waitress was
dealing with whitespace in headers.

Additional testing by ZeddYu Lu also led to other potential issues with
non-printable ascii characters that are stripped by default by Python's
string.strip() function
---
 waitress/parser.py            | 17 ++++++++++++++---
 waitress/tests/test_parser.py | 29 +++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/waitress/parser.py b/waitress/parser.py
index 8b07dd6a..fef8a3da 100644
--- a/waitress/parser.py
+++ b/waitress/parser.py
@@ -213,13 +213,15 @@ def parse_header(self, header_plus):
             if not header:
                 raise ParsingError("Invalid header")
 
-            key, value = header.group('name', 'value')
+            key, value = header.group("name", "value")
 
             if b"_" in key:
                 # TODO(xistence): Should we drop this request instead?
                 continue
 
-            value = value.strip()
+            # Only strip off whitespace that is considered valid whitespace by
+            # RFC7230, don't strip the rest
+            value = value.strip(b" \t")
             key1 = tostr(key.upper().replace(b"-", b"_"))
             # If a header already exists, we append subsequent values
             # seperated by a comma. Applications already need to handle
@@ -257,7 +259,16 @@ def parse_header(self, header_plus):
             # here
             te = headers.pop("TRANSFER_ENCODING", "")
 
-            encodings = [encoding.strip().lower() for encoding in te.split(",") if encoding]
+            # NB: We can not just call bare strip() here because it will also
+            # remove other non-printable characters that we explicitly do not
+            # want removed so that if someone attempts to smuggle a request
+            # with these characters we don't fall prey to it.
+            #
+            # For example \x85 is stripped by default, but it is not considered
+            # valid whitespace to be stripped by RFC7230.
+            encodings = [
+                encoding.strip(" \t").lower() for encoding in te.split(",") if encoding
+            ]
 
             for encoding in encodings:
                 # Out of the transfer-codings listed in
diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py
index 8d42600b..5373fd52 100644
--- a/waitress/tests/test_parser.py
+++ b/waitress/tests/test_parser.py
@@ -236,6 +236,35 @@ def test_parse_header_transfer_encoding_invalid_multiple(self):
         else:  # pragma: nocover
             self.assertTrue(False)
 
+    def test_parse_header_transfer_encoding_invalid_whitespace(self):
+        from waitress.parser import TransferEncodingNotImplemented
+
+        data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding:\x85chunked\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except TransferEncodingNotImplemented as e:
+            self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_transfer_encoding_invalid_unicode(self):
+        from waitress.parser import TransferEncodingNotImplemented
+
+        # This is the binary encoding for the UTF-8 character
+        # https://www.compart.com/en/unicode/U+212A "unicode character "K""
+        # which if waitress were to accidentally do the wrong thing get
+        # lowercased to just the ascii "k" due to unicode collisions during
+        # transformation
+        data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding: chun\xe2\x84\xaaed\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except TransferEncodingNotImplemented as e:
+            self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
     def test_parse_header_11_expect_continue(self):
         data = b"GET /foobar HTTP/1.1\r\nexpect: 100-continue\r\n"
         self.parser.parse_header(data)

From 0bf98dadd8cae23830cb365cc6cb9cedd7f98db0 Mon Sep 17 00:00:00 2001
From: Bert JW Regeer <bertjw@regeer.org>
Date: Thu, 2 Jan 2020 12:36:06 -0800
Subject: [PATCH 2/3] Update RFC7230 regex with errata for header field-content

---
 waitress/rfc7230.py           | 10 +++++++++-
 waitress/tests/test_parser.py |  9 +++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/waitress/rfc7230.py b/waitress/rfc7230.py
index a9f047c8..97a90a41 100644
--- a/waitress/rfc7230.py
+++ b/waitress/rfc7230.py
@@ -33,8 +33,14 @@
 # field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
 # field-vchar    = VCHAR / obs-text
 
+# Errata from: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
+# changes field-content to:
+#
+# field-content  = field-vchar [ 1*( SP / HTAB / field-vchar )
+#                  field-vchar ]
+
 FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]"
-FIELD_CONTENT = FIELD_VCHAR + "(" + RWS + FIELD_VCHAR + "){0,}"
+FIELD_CONTENT = FIELD_VCHAR + "([ \t" + VCHAR + OBS_TEXT + "]+" + FIELD_VCHAR + "){,1}"
 FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}"
 
 HEADER_FIELD = re.compile(
@@ -42,3 +48,5 @@
         "^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
     )
 )
+
+OWS_STRIP = re.compile(OWS + "(?P<value>.*?)" + OWS)
diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py
index 5373fd52..71703e26 100644
--- a/waitress/tests/test_parser.py
+++ b/waitress/tests/test_parser.py
@@ -423,6 +423,15 @@ def test_parse_header_multiple_values_header_folded_multiple(self):
         self.assertIn("FOO", self.parser.headers)
         self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")
 
+    def test_parse_header_multiple_values_extra_space(self):
+        # Tests errata from: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: abrowser/0.001 (C O M M E N T)\r\n"
+        self.parser.parse_header(data)
+
+        self.assertIn("FOO", self.parser.headers)
+        self.assertEqual(self.parser.headers["FOO"], "abrowser/0.001 (C O M M E N T)")
 
 
 class Test_split_uri(unittest.TestCase):

From 3a54e2993db7182feaea4602aac7c1fa8f2ca08b Mon Sep 17 00:00:00 2001
From: Bert JW Regeer <bertjw@regeer.org>
Date: Thu, 2 Jan 2020 12:42:59 -0800
Subject: [PATCH 3/3] Add CHANGES and bump version to 1.4.2

---
 CHANGES.txt | 31 ++++++++++++++++++++++++++++++-
 setup.py    |  2 +-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 71d61bd7..c64f6837 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,30 @@
+1.4.2 (2020-01-??)
+------------------
+
+Security Fixes
+~~~~~~~~~~~~~~
+
+- This is a follow-up to the fix introduced in 1.4.1 to tighten up the way
+  Waitress strips whitespace from header values. This makes sure Waitress won't
+  accidentally treat non-printable characters as whitespace and lead to a
+  potental HTTP request smuggling/splitting security issue.
+
+  Thanks to ZeddYu Lu for the extra test cases.
+
+  Please see the security advisory for more information:
+  https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4
+
+  CVE-ID: CVE-2019-16789
+
+Bugfixes
+~~~~~~~~
+
+- Updated the regex used to validate header-field content to match the errata
+  that was published for RFC7230.
+
+  See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
+
+
 1.4.1 (2019-12-24)
 ------------------
 
@@ -12,6 +39,8 @@ Security Fixes
   Please see the security advisory for more information:
   https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4
 
+  CVE-ID: CVE-2019-16789
+
 1.4.0 (2019-12-20)
 ------------------
 
@@ -80,7 +109,7 @@ Security Fixes
   ``Transfer-Encoding: chunked`` instead of ``Transfer-Encoding: identity,
   chunked``.
 
-  PLease see the security advisory for more information: 
+  Please see the security advisory for more information:
   https://github.com/Pylons/waitress/security/advisories/GHSA-g2xc-35jw-c63p
 
   CVE-ID: CVE-2019-16786
diff --git a/setup.py b/setup.py
index 15e11d5b..c32af931 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@
 
 setup(
     name="waitress",
-    version="1.4.1",
+    version="1.4.2",
     author="Zope Foundation and Contributors",
     author_email="zope-dev@zope.org",
     maintainer="Pylons Project",