Skip to content

Commit

Permalink
Prevent CRLF injections described in CVE-2019-12387
Browse files Browse the repository at this point in the history
Author: markrwilliams

Reviewers: glyph

Fixes: ticket:9647

Twisted's HTTP client APIs were vulnerable to maliciously constructed
HTTP methods, hosts, and/or paths, URI components such as paths and
query parameters.  These vulnerabilities were beyond the header name
and value injection vulnerabilities addressed in:

https://twistedmatrix.com/trac/ticket/9420
#999

The following client APIs will raise a ValueError if given a method,
host, or URI that includes newlines or other disallowed characters:

- twisted.web.client.Agent.request
- twisted.web.client.ProxyAgent.request
- twisted.web.client.Request.__init__
- twisted.web.client.Request.writeTo

ProxyAgent is patched separately from Agent because unlike other
agents (e.g. CookieAgent) it is not implemented as an Agent wrapper.

Request.__init__ checks its method and URI so that errors occur closer
to their originating input.  Request.method and Request.uri are both
public APIs, however, so Request.writeTo (via Request._writeHeaders)
also checks the validity of both before writing anything to the wire.

Additionally, the following deprecated client APIs have also been
patched:

- twisted.web.client.HTTPPageGetter.__init__
- twisted.web.client.HTTPPageDownloader.__init__
- twisted.web.client.HTTPClientFactory.__init__
- twisted.web.client.HTTPClientFactory.setURL
- twisted.web.client.HTTPDownloader.__init__
- twisted.web.client.HTTPDownloader.setURL
- twisted.web.client.getPage
- twisted.web.client.downloadPage

These have been patched prior to their removal so that they won't be
vulnerable in the last Twisted release that includes them.  They
represent a best effort, because testing every combination of these
public APIs would require more code than deprecated APIs warrant.

In all cases URI components, including hostnames, are restricted to
the characters allowed in path components.  This mirrors the CPython
patch (for bpo-30458) that addresses equivalent vulnerabilities:

python/cpython@bb8071a

HTTP methods, however, are checked against the set of characters
described in RFC-7230.
  • Loading branch information
markrwilliams committed Jun 5, 2019
1 parent d0bcf0b commit 6c61fc4
Show file tree
Hide file tree
Showing 6 changed files with 725 additions and 11 deletions.
85 changes: 81 additions & 4 deletions src/twisted/web/_newclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from __future__ import division, absolute_import
__metaclass__ = type

import re

from zope.interface import implementer

from twisted.python.compat import networkString
Expand Down Expand Up @@ -579,6 +581,74 @@ def connectionLost(self, reason):



_VALID_METHOD = re.compile(
br"\A[%s]+\Z" % (
bytes().join(
(
b"!", b"#", b"$", b"%", b"&", b"'", b"*",
b"+", b"-", b".", b"^", b"_", b"`", b"|", b"~",
b"\x30-\x39",
b"\x41-\x5a",
b"\x61-\x7A",
),
),
),
)



def _ensureValidMethod(method):
"""
An HTTP method is an HTTP token, which consists of any visible
ASCII character that is not a delimiter (i.e. one of
C{"(),/:;<=>?@[\\]{}}.)
@param method: the method to check
@type method: L{bytes}
@return: the method if it is valid
@rtype: L{bytes}
@raise ValueError: if the method is not valid
@see: U{https://tools.ietf.org/html/rfc7230#section-3.1.1},
U{https://tools.ietf.org/html/rfc7230#section-3.2.6},
U{https://tools.ietf.org/html/rfc5234#appendix-B.1}
"""
if _VALID_METHOD.match(method):
return method
raise ValueError("Invalid method {!r}".format(method))



_VALID_URI = re.compile(br'\A[\x21-\x7e]+\Z')



def _ensureValidURI(uri):
"""
A valid URI cannot contain control characters (i.e., characters
between 0-32, inclusive and 127) or non-ASCII characters (i.e.,
characters with values between 128-255, inclusive).
@param uri: the URI to check
@type uri: L{bytes}
@return: the URI if it is valid
@rtype: L{bytes}
@raise ValueError: if the URI is not valid
@see: U{https://tools.ietf.org/html/rfc3986#section-3.3},
U{https://tools.ietf.org/html/rfc3986#appendix-A},
U{https://tools.ietf.org/html/rfc5234#appendix-B.1}
"""
if _VALID_URI.match(uri):
return uri
raise ValueError("Invalid URI {!r}".format(uri))



@implementer(IClientRequest)
class Request:
"""
Expand Down Expand Up @@ -618,8 +688,8 @@ def __init__(self, method, uri, headers, bodyProducer, persistent=False):
connection, defaults to C{False}.
@type persistent: L{bool}
"""
self.method = method
self.uri = uri
self.method = _ensureValidMethod(method)
self.uri = _ensureValidURI(uri)
self.headers = headers
self.bodyProducer = bodyProducer
self.persistent = persistent
Expand Down Expand Up @@ -664,8 +734,15 @@ def _writeHeaders(self, transport, TEorCL):
# method would probably be good. It would be nice if this method
# weren't limited to issuing HTTP/1.1 requests.
requestLines = []
requestLines.append(b' '.join([self.method, self.uri,
b'HTTP/1.1\r\n']))
requestLines.append(
b' '.join(
[
_ensureValidMethod(self.method),
_ensureValidURI(self.uri),
b'HTTP/1.1\r\n',
]
),
)
if not self.persistent:
requestLines.append(b'Connection: close\r\n')
if TEorCL is not None:
Expand Down
22 changes: 17 additions & 5 deletions src/twisted/web/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ def urlunparse(parts):
from twisted.web.http_headers import Headers
from twisted.logger import Logger

from twisted.web._newclient import _ensureValidURI, _ensureValidMethod



class PartialDownloadError(error.Error):
"""
Expand Down Expand Up @@ -78,11 +81,13 @@ class HTTPPageGetter(http.HTTPClient):

_completelyDone = True

_specialHeaders = set((b'host', b'user-agent', b'cookie', b'content-length'))
_specialHeaders = set(
(b'host', b'user-agent', b'cookie', b'content-length'),
)

def connectionMade(self):
method = getattr(self.factory, 'method', b'GET')
self.sendCommand(method, self.factory.path)
method = _ensureValidMethod(getattr(self.factory, 'method', b'GET'))
self.sendCommand(method, _ensureValidURI(self.factory.path))
if self.factory.scheme == b'http' and self.factory.port != 80:
host = self.factory.host + b':' + intToBytes(self.factory.port)
elif self.factory.scheme == b'https' and self.factory.port != 443:
Expand Down Expand Up @@ -362,7 +367,7 @@ def __init__(self, url, method=b'GET', postdata=None, headers=None,
# just in case a broken http/1.1 decides to keep connection alive
self.headers.setdefault(b"connection", b"close")
self.postdata = postdata
self.method = method
self.method = _ensureValidMethod(method)

self.setURL(url)

Expand All @@ -389,6 +394,7 @@ def __repr__(self):
return "<%s: %s>" % (self.__class__.__name__, self.url)

def setURL(self, url):
_ensureValidURI(url.strip())
self.url = url
uri = URI.fromBytes(url)
if uri.scheme and uri.host:
Expand Down Expand Up @@ -733,7 +739,7 @@ def _makeGetterFactory(url, factoryFactory, contextFactory=None,
@return: The factory created by C{factoryFactory}
"""
uri = URI.fromBytes(url)
uri = URI.fromBytes(_ensureValidURI(url.strip()))
factory = factoryFactory(url, *args, **kwargs)
if uri.scheme == b'https':
from twisted.internet import ssl
Expand Down Expand Up @@ -1493,6 +1499,9 @@ def _requestWithEndpoint(self, key, endpoint, method, parsedURI,
if not isinstance(method, bytes):
raise TypeError('method={!r} is {}, but must be bytes'.format(
method, type(method)))

method = _ensureValidMethod(method)

# Create minimal headers, if necessary:
if headers is None:
headers = Headers()
Expand Down Expand Up @@ -1717,6 +1726,7 @@ def request(self, method, uri, headers=None, bodyProducer=None):
@see: L{twisted.web.iweb.IAgent.request}
"""
uri = _ensureValidURI(uri.strip())
parsedURI = URI.fromBytes(uri)
try:
endpoint = self._getEndpoint(parsedURI)
Expand Down Expand Up @@ -1750,6 +1760,8 @@ def request(self, method, uri, headers=None, bodyProducer=None):
"""
Issue a new request via the configured proxy.
"""
uri = _ensureValidURI(uri.strip())

# Cache *all* connections under the same key, since we are only
# connecting to a single destination, the proxy:
key = ("http-proxy", self._proxyEndpoint)
Expand Down
1 change: 1 addition & 0 deletions src/twisted/web/newsfragments/9647.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
All HTTP clients in twisted.web.client now raise a ValueError when called with a method and/or URL that contain invalid characters. This mitigates CVE-2019-12387. Thanks to Alex Brasetvik for reporting this vulnerability.
168 changes: 168 additions & 0 deletions src/twisted/web/test/injectionhelpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""
Helpers for URI and method injection tests.
@see: U{CVE-2019-12387}
"""

import string


UNPRINTABLE_ASCII = (
frozenset(range(0, 128)) -
frozenset(bytearray(string.printable, 'ascii'))
)

NONASCII = frozenset(range(128, 256))



class MethodInjectionTestsMixin(object):
"""
A mixin that runs HTTP method injection tests. Define
L{MethodInjectionTestsMixin.attemptRequestWithMaliciousMethod} in
a L{twisted.trial.unittest.SynchronousTestCase} subclass to test
how HTTP client code behaves when presented with malicious HTTP
methods.
@see: U{CVE-2019-12387}
"""

def attemptRequestWithMaliciousMethod(self, method):
"""
Attempt to send a request with the given method. This should
synchronously raise a L{ValueError} if either is invalid.
@param method: the method (e.g. C{GET\x00})
@param uri: the URI
@type method:
"""
raise NotImplementedError()


def test_methodWithCLRFRejected(self):
"""
Issuing a request with a method that contains a carriage
return and line feed fails with a L{ValueError}.
"""
with self.assertRaises(ValueError) as cm:
method = b"GET\r\nX-Injected-Header: value"
self.attemptRequestWithMaliciousMethod(method)
self.assertRegex(str(cm.exception), "^Invalid method")


def test_methodWithUnprintableASCIIRejected(self):
"""
Issuing a request with a method that contains unprintable
ASCII characters fails with a L{ValueError}.
"""
for c in UNPRINTABLE_ASCII:
method = b"GET%s" % (bytearray([c]),)
with self.assertRaises(ValueError) as cm:
self.attemptRequestWithMaliciousMethod(method)
self.assertRegex(str(cm.exception), "^Invalid method")


def test_methodWithNonASCIIRejected(self):
"""
Issuing a request with a method that contains non-ASCII
characters fails with a L{ValueError}.
"""
for c in NONASCII:
method = b"GET%s" % (bytearray([c]),)
with self.assertRaises(ValueError) as cm:
self.attemptRequestWithMaliciousMethod(method)
self.assertRegex(str(cm.exception), "^Invalid method")



class URIInjectionTestsMixin(object):
"""
A mixin that runs HTTP URI injection tests. Define
L{MethodInjectionTestsMixin.attemptRequestWithMaliciousURI} in a
L{twisted.trial.unittest.SynchronousTestCase} subclass to test how
HTTP client code behaves when presented with malicious HTTP
URIs.
"""

def attemptRequestWithMaliciousURI(self, method):
"""
Attempt to send a request with the given URI. This should
synchronously raise a L{ValueError} if either is invalid.
@param uri: the URI.
@type method:
"""
raise NotImplementedError()


def test_hostWithCRLFRejected(self):
"""
Issuing a request with a URI whose host contains a carriage
return and line feed fails with a L{ValueError}.
"""
with self.assertRaises(ValueError) as cm:
uri = b"http://twisted\r\n.invalid/path"
self.attemptRequestWithMaliciousURI(uri)
self.assertRegex(str(cm.exception), "^Invalid URI")


def test_hostWithWithUnprintableASCIIRejected(self):
"""
Issuing a request with a URI whose host contains unprintable
ASCII characters fails with a L{ValueError}.
"""
for c in UNPRINTABLE_ASCII:
uri = b"http://twisted%s.invalid/OK" % (bytearray([c]),)
with self.assertRaises(ValueError) as cm:
self.attemptRequestWithMaliciousURI(uri)
self.assertRegex(str(cm.exception), "^Invalid URI")


def test_hostWithNonASCIIRejected(self):
"""
Issuing a request with a URI whose host contains non-ASCII
characters fails with a L{ValueError}.
"""
for c in NONASCII:
uri = b"http://twisted%s.invalid/OK" % (bytearray([c]),)
with self.assertRaises(ValueError) as cm:
self.attemptRequestWithMaliciousURI(uri)
self.assertRegex(str(cm.exception), "^Invalid URI")


def test_pathWithCRLFRejected(self):
"""
Issuing a request with a URI whose path contains a carriage
return and line feed fails with a L{ValueError}.
"""
with self.assertRaises(ValueError) as cm:
uri = b"http://twisted.invalid/\r\npath"
self.attemptRequestWithMaliciousURI(uri)
self.assertRegex(str(cm.exception), "^Invalid URI")


def test_pathWithWithUnprintableASCIIRejected(self):
"""
Issuing a request with a URI whose path contains unprintable
ASCII characters fails with a L{ValueError}.
"""
for c in UNPRINTABLE_ASCII:
uri = b"http://twisted.invalid/OK%s" % (bytearray([c]),)
with self.assertRaises(ValueError) as cm:
self.attemptRequestWithMaliciousURI(uri)
self.assertRegex(str(cm.exception), "^Invalid URI")


def test_pathWithNonASCIIRejected(self):
"""
Issuing a request with a URI whose path contains non-ASCII
characters fails with a L{ValueError}.
"""
for c in NONASCII:
uri = b"http://twisted.invalid/OK%s" % (bytearray([c]),)
with self.assertRaises(ValueError) as cm:
self.attemptRequestWithMaliciousURI(uri)
self.assertRegex(str(cm.exception), "^Invalid URI")

0 comments on commit 6c61fc4

Please sign in to comment.