Skip to content

Commit

Permalink
Merge pull request #520 from clue-labs/psr7-parser
Browse files Browse the repository at this point in the history
Refactor HTTP message parsing to build on top of new PSR-7 implementation
  • Loading branch information
WyriHaximus committed Mar 25, 2024
2 parents 5873b89 + c0e1f4d commit 58f9049
Show file tree
Hide file tree
Showing 7 changed files with 416 additions and 133 deletions.
8 changes: 8 additions & 0 deletions src/Io/AbstractMessage.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
*/
abstract class AbstractMessage implements MessageInterface
{
/**
* [Internal] Regex used to match all request header fields into an array, thanks to @kelunik for checking the HTTP specs and coming up with this regex
*
* @internal
* @var string
*/
const REGEX_HEADERS = '/^([^()<>@,;:\\\"\/\[\]?={}\x01-\x20\x7F]++):[\x20\x09]*+((?:[\x20\x09]*+[\x21-\x7E\x80-\xFF]++)*+)[\x20\x09]*+[\r]?+\n/m';

/** @var array<string,string[]> */
private $headers = array();

Expand Down
14 changes: 10 additions & 4 deletions src/Io/ClientRequestStream.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
use React\Http\Message\Response;
use React\Socket\ConnectionInterface;
use React\Stream\WritableStreamInterface;
use RingCentral\Psr7 as gPsr;

/**
* @event response
Expand Down Expand Up @@ -152,10 +151,17 @@ public function handleData($data)
$this->buffer .= $data;

// buffer until double CRLF (or double LF for compatibility with legacy servers)
if (false !== strpos($this->buffer, "\r\n\r\n") || false !== strpos($this->buffer, "\n\n")) {
$eom = \strpos($this->buffer, "\r\n\r\n");
$eomLegacy = \strpos($this->buffer, "\n\n");
if ($eom !== false || $eomLegacy !== false) {
try {
$response = gPsr\parse_response($this->buffer);
$bodyChunk = (string) $response->getBody();
if ($eom !== false && ($eomLegacy === false || $eom < $eomLegacy)) {
$response = Response::parseMessage(\substr($this->buffer, 0, $eom + 2));
$bodyChunk = (string) \substr($this->buffer, $eom + 4);
} else {
$response = Response::parseMessage(\substr($this->buffer, 0, $eomLegacy + 1));
$bodyChunk = (string) \substr($this->buffer, $eomLegacy + 2);
}
} catch (\InvalidArgumentException $exception) {
$this->closeError($exception);
return;
Expand Down
130 changes: 1 addition & 129 deletions src/Io/RequestHeaderParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -128,39 +128,6 @@ public function handle(ConnectionInterface $conn)
*/
public function parseRequest($headers, ConnectionInterface $connection)
{
// additional, stricter safe-guard for request line
// because request parser doesn't properly cope with invalid ones
$start = array();
if (!\preg_match('#^(?<method>[^ ]+) (?<target>[^ ]+) HTTP/(?<version>\d\.\d)#m', $headers, $start)) {
throw new \InvalidArgumentException('Unable to parse invalid request-line');
}

// only support HTTP/1.1 and HTTP/1.0 requests
if ($start['version'] !== '1.1' && $start['version'] !== '1.0') {
throw new \InvalidArgumentException('Received request with invalid protocol version', Response::STATUS_VERSION_NOT_SUPPORTED);
}

// match all request header fields into array, thanks to @kelunik for checking the HTTP specs and coming up with this regex
$matches = array();
$n = \preg_match_all('/^([^()<>@,;:\\\"\/\[\]?={}\x01-\x20\x7F]++):[\x20\x09]*+((?:[\x20\x09]*+[\x21-\x7E\x80-\xFF]++)*+)[\x20\x09]*+[\r]?+\n/m', $headers, $matches, \PREG_SET_ORDER);

// check number of valid header fields matches number of lines + request line
if (\substr_count($headers, "\n") !== $n + 1) {
throw new \InvalidArgumentException('Unable to parse invalid request header fields');
}

// format all header fields into associative array
$host = null;
$fields = array();
foreach ($matches as $match) {
$fields[$match[1]][] = $match[2];

// match `Host` request header
if ($host === null && \strtolower($match[1]) === 'host') {
$host = $match[2];
}
}

// reuse same connection params for all server params for this connection
$cid = \PHP_VERSION_ID < 70200 ? \spl_object_hash($connection) : \spl_object_id($connection);
if (isset($this->connectionParams[$cid])) {
Expand Down Expand Up @@ -207,101 +174,6 @@ public function parseRequest($headers, ConnectionInterface $connection)
$serverParams['REQUEST_TIME'] = (int) ($now = $this->clock->now());
$serverParams['REQUEST_TIME_FLOAT'] = $now;

// scheme is `http` unless TLS is used
$scheme = isset($serverParams['HTTPS']) ? 'https://' : 'http://';

// default host if unset comes from local socket address or defaults to localhost
$hasHost = $host !== null;
if ($host === null) {
$host = isset($serverParams['SERVER_ADDR'], $serverParams['SERVER_PORT']) ? $serverParams['SERVER_ADDR'] . ':' . $serverParams['SERVER_PORT'] : '127.0.0.1';
}

if ($start['method'] === 'OPTIONS' && $start['target'] === '*') {
// support asterisk-form for `OPTIONS *` request line only
$uri = $scheme . $host;
} elseif ($start['method'] === 'CONNECT') {
$parts = \parse_url('tcp://' . $start['target']);

// check this is a valid authority-form request-target (host:port)
if (!isset($parts['scheme'], $parts['host'], $parts['port']) || \count($parts) !== 3) {
throw new \InvalidArgumentException('CONNECT method MUST use authority-form request target');
}
$uri = $scheme . $start['target'];
} else {
// support absolute-form or origin-form for proxy requests
if ($start['target'][0] === '/') {
$uri = $scheme . $host . $start['target'];
} else {
// ensure absolute-form request-target contains a valid URI
$parts = \parse_url($start['target']);

// make sure value contains valid host component (IP or hostname), but no fragment
if (!isset($parts['scheme'], $parts['host']) || $parts['scheme'] !== 'http' || isset($parts['fragment'])) {
throw new \InvalidArgumentException('Invalid absolute-form request-target');
}

$uri = $start['target'];
}
}

$request = new ServerRequest(
$start['method'],
$uri,
$fields,
'',
$start['version'],
$serverParams
);

// only assign request target if it is not in origin-form (happy path for most normal requests)
if ($start['target'][0] !== '/') {
$request = $request->withRequestTarget($start['target']);
}

if ($hasHost) {
// Optional Host request header value MUST be valid (host and optional port)
$parts = \parse_url('http://' . $request->getHeaderLine('Host'));

// make sure value contains valid host component (IP or hostname)
if (!$parts || !isset($parts['scheme'], $parts['host'])) {
$parts = false;
}

// make sure value does not contain any other URI component
if (\is_array($parts)) {
unset($parts['scheme'], $parts['host'], $parts['port']);
}
if ($parts === false || $parts) {
throw new \InvalidArgumentException('Invalid Host header value');
}
} elseif (!$hasHost && $start['version'] === '1.1' && $start['method'] !== 'CONNECT') {
// require Host request header for HTTP/1.1 (except for CONNECT method)
throw new \InvalidArgumentException('Missing required Host request header');
} elseif (!$hasHost) {
// remove default Host request header for HTTP/1.0 when not explicitly given
$request = $request->withoutHeader('Host');
}

// ensure message boundaries are valid according to Content-Length and Transfer-Encoding request headers
if ($request->hasHeader('Transfer-Encoding')) {
if (\strtolower($request->getHeaderLine('Transfer-Encoding')) !== 'chunked') {
throw new \InvalidArgumentException('Only chunked-encoding is allowed for Transfer-Encoding', Response::STATUS_NOT_IMPLEMENTED);
}

// Transfer-Encoding: chunked and Content-Length header MUST NOT be used at the same time
// as per https://tools.ietf.org/html/rfc7230#section-3.3.3
if ($request->hasHeader('Content-Length')) {
throw new \InvalidArgumentException('Using both `Transfer-Encoding: chunked` and `Content-Length` is not allowed', Response::STATUS_BAD_REQUEST);
}
} elseif ($request->hasHeader('Content-Length')) {
$string = $request->getHeaderLine('Content-Length');

if ((string)(int)$string !== $string) {
// Content-Length value is not an integer or not a single integer
throw new \InvalidArgumentException('The value of `Content-Length` is not valid', Response::STATUS_BAD_REQUEST);
}
}

return $request;
return ServerRequest::parseMessage($headers, $serverParams);
}
}
42 changes: 42 additions & 0 deletions src/Message/Response.php
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,46 @@ private static function getReasonPhraseForStatusCode($code)

return isset(self::$phrasesMap[$code]) ? self::$phrasesMap[$code] : '';
}

/**
* [Internal] Parse incoming HTTP protocol message
*
* @internal
* @param string $message
* @return self
* @throws \InvalidArgumentException if given $message is not a valid HTTP response message
*/
public static function parseMessage($message)
{
$start = array();
if (!\preg_match('#^HTTP/(?<version>\d\.\d) (?<status>\d{3})(?: (?<reason>[^\r\n]*+))?[\r]?+\n#m', $message, $start)) {
throw new \InvalidArgumentException('Unable to parse invalid status-line');
}

// only support HTTP/1.1 and HTTP/1.0 requests
if ($start['version'] !== '1.1' && $start['version'] !== '1.0') {
throw new \InvalidArgumentException('Received response with invalid protocol version');
}

// check number of valid header fields matches number of lines + status line
$matches = array();
$n = \preg_match_all(self::REGEX_HEADERS, $message, $matches, \PREG_SET_ORDER);
if (\substr_count($message, "\n") !== $n + 1) {
throw new \InvalidArgumentException('Unable to parse invalid response header fields');
}

// format all header fields into associative array
$headers = array();
foreach ($matches as $match) {
$headers[$match[1]][] = $match[2];
}

return new self(
(int) $start['status'],
$headers,
'',
$start['version'],
isset($start['reason']) ? $start['reason'] : ''
);
}
}
139 changes: 139 additions & 0 deletions src/Message/ServerRequest.php
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,143 @@ private function parseCookie($cookie)

return $result;
}

/**
* [Internal] Parse incoming HTTP protocol message
*
* @internal
* @param string $message
* @param array<string,string|int|float> $serverParams
* @return self
* @throws \InvalidArgumentException if given $message is not a valid HTTP request message
*/
public static function parseMessage($message, array $serverParams)
{
// parse request line like "GET /path HTTP/1.1"
$start = array();
if (!\preg_match('#^(?<method>[^ ]+) (?<target>[^ ]+) HTTP/(?<version>\d\.\d)#m', $message, $start)) {
throw new \InvalidArgumentException('Unable to parse invalid request-line');
}

// only support HTTP/1.1 and HTTP/1.0 requests
if ($start['version'] !== '1.1' && $start['version'] !== '1.0') {
throw new \InvalidArgumentException('Received request with invalid protocol version', Response::STATUS_VERSION_NOT_SUPPORTED);
}

// check number of valid header fields matches number of lines + request line
$matches = array();
$n = \preg_match_all(self::REGEX_HEADERS, $message, $matches, \PREG_SET_ORDER);
if (\substr_count($message, "\n") !== $n + 1) {
throw new \InvalidArgumentException('Unable to parse invalid request header fields');
}

// format all header fields into associative array
$host = null;
$headers = array();
foreach ($matches as $match) {
$headers[$match[1]][] = $match[2];

// match `Host` request header
if ($host === null && \strtolower($match[1]) === 'host') {
$host = $match[2];
}
}

// scheme is `http` unless TLS is used
$scheme = isset($serverParams['HTTPS']) ? 'https://' : 'http://';

// default host if unset comes from local socket address or defaults to localhost
$hasHost = $host !== null;
if ($host === null) {
$host = isset($serverParams['SERVER_ADDR'], $serverParams['SERVER_PORT']) ? $serverParams['SERVER_ADDR'] . ':' . $serverParams['SERVER_PORT'] : '127.0.0.1';
}

if ($start['method'] === 'OPTIONS' && $start['target'] === '*') {
// support asterisk-form for `OPTIONS *` request line only
$uri = $scheme . $host;
} elseif ($start['method'] === 'CONNECT') {
$parts = \parse_url('tcp://' . $start['target']);

// check this is a valid authority-form request-target (host:port)
if (!isset($parts['scheme'], $parts['host'], $parts['port']) || \count($parts) !== 3) {
throw new \InvalidArgumentException('CONNECT method MUST use authority-form request target');
}
$uri = $scheme . $start['target'];
} else {
// support absolute-form or origin-form for proxy requests
if ($start['target'][0] === '/') {
$uri = $scheme . $host . $start['target'];
} else {
// ensure absolute-form request-target contains a valid URI
$parts = \parse_url($start['target']);

// make sure value contains valid host component (IP or hostname), but no fragment
if (!isset($parts['scheme'], $parts['host']) || $parts['scheme'] !== 'http' || isset($parts['fragment'])) {
throw new \InvalidArgumentException('Invalid absolute-form request-target');
}

$uri = $start['target'];
}
}

$request = new self(
$start['method'],
$uri,
$headers,
'',
$start['version'],
$serverParams
);

// only assign request target if it is not in origin-form (happy path for most normal requests)
if ($start['target'][0] !== '/') {
$request = $request->withRequestTarget($start['target']);
}

if ($hasHost) {
// Optional Host request header value MUST be valid (host and optional port)
$parts = \parse_url('http://' . $request->getHeaderLine('Host'));

// make sure value contains valid host component (IP or hostname)
if (!$parts || !isset($parts['scheme'], $parts['host'])) {
$parts = false;
}

// make sure value does not contain any other URI component
if (\is_array($parts)) {
unset($parts['scheme'], $parts['host'], $parts['port']);
}
if ($parts === false || $parts) {
throw new \InvalidArgumentException('Invalid Host header value');
}
} elseif (!$hasHost && $start['version'] === '1.1' && $start['method'] !== 'CONNECT') {
// require Host request header for HTTP/1.1 (except for CONNECT method)
throw new \InvalidArgumentException('Missing required Host request header');
} elseif (!$hasHost) {
// remove default Host request header for HTTP/1.0 when not explicitly given
$request = $request->withoutHeader('Host');
}

// ensure message boundaries are valid according to Content-Length and Transfer-Encoding request headers
if ($request->hasHeader('Transfer-Encoding')) {
if (\strtolower($request->getHeaderLine('Transfer-Encoding')) !== 'chunked') {
throw new \InvalidArgumentException('Only chunked-encoding is allowed for Transfer-Encoding', Response::STATUS_NOT_IMPLEMENTED);
}

// Transfer-Encoding: chunked and Content-Length header MUST NOT be used at the same time
// as per https://tools.ietf.org/html/rfc7230#section-3.3.3
if ($request->hasHeader('Content-Length')) {
throw new \InvalidArgumentException('Using both `Transfer-Encoding: chunked` and `Content-Length` is not allowed', Response::STATUS_BAD_REQUEST);
}
} elseif ($request->hasHeader('Content-Length')) {
$string = $request->getHeaderLine('Content-Length');

if ((string)(int)$string !== $string) {
// Content-Length value is not an integer or not a single integer
throw new \InvalidArgumentException('The value of `Content-Length` is not valid', Response::STATUS_BAD_REQUEST);
}
}

return $request;
}
}

0 comments on commit 58f9049

Please sign in to comment.