diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 56c74fda455..3ec0c69a35f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,9 +69,41 @@ jobs: run: | LC_ALL=C sort -c CONTRIBUTORS.txt + gen_llhttp: + name: Generate llhttp sources + needs: lint + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: true + - name: Cache llhttp generated files + uses: actions/cache@v2 + id: cache + with: + key: llhttp-${{ hashFiles('vendor/llhttp/package.json', 'vendor/llhttp/src/**/*') }} + path: vendor/llhttp/build + - name: Setup NodeJS + if: steps.cache.outputs.cache-hit != 'true' + uses: actions/setup-node@v2 + with: + node-version: '14' + - name: Generate llhttp sources + if: steps.cache.outputs.cache-hit != 'true' + run: | + make generate-llhttp + - name: Upload llhttp generated files + uses: actions/upload-artifact@v2 + with: + name: llhttp + path: vendor/llhttp/build + if-no-files-found: error + test: name: Test - needs: lint + needs: gen_llhttp strategy: matrix: pyver: [3.7, 3.8, 3.9, '3.10'] @@ -120,6 +152,12 @@ jobs: if: ${{ matrix.no-extensions == '' }} run: | make cythonize + - name: Restore llhttp generated files + if: ${{ matrix.no-extensions == '' }} + uses: actions/download-artifact@v2 + with: + name: llhttp + path: vendor/llhttp/build/ - name: Run unittests env: COLOR: 'yes' diff --git a/.gitignore b/.gitignore index 8556509c6f7..7d38dd91998 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ .idea .install-cython .install-deps +.llhttp-gen .installed.cfg .mypy_cache .noseids diff --git a/.gitmodules b/.gitmodules index c58945aa9fc..5f9f397b8e2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ -[submodule "vendor/http-parser"] - path = vendor/http-parser - url = git://github.com/nodejs/http-parser.git - branch = 54f55a2 +[submodule "vendor/llhttp"] + path = vendor/llhttp + url = https://github.com/nodejs/llhttp.git + branch = v3.0.0 diff --git a/CHANGES/3561.feature b/CHANGES/3561.feature new file mode 100644 index 00000000000..5fd4523ad36 --- /dev/null +++ b/CHANGES/3561.feature @@ -0,0 +1 @@ +Switch from ``http-parser`` to ``llhttp`` diff --git a/Makefile b/Makefile index 81b8a0325c3..c64173402b0 100644 --- a/Makefile +++ b/Makefile @@ -62,6 +62,15 @@ aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c cython -3 -o $@ $< -I aiohttp +vendor/llhttp/node_modules: vendor/llhttp/package.json + cd vendor/llhttp; npm install + +.llhttp-gen: vendor/llhttp/node_modules + $(MAKE) -C vendor/llhttp generate + @touch .llhttp-gen + +.PHONY: generate-llhttp +generate-llhttp: .llhttp-gen .PHONY: cythonize cythonize: .install-cython $(PYXS:.pyx=.c) @@ -81,7 +90,7 @@ fmt format: mypy: mypy -.develop: .install-deps $(call to-hash,$(PYS) $(CYS) $(CS)) +.develop: .install-deps generate-llhttp $(call to-hash,$(PYS) $(CYS) $(CS)) pip install -e . @touch .develop @@ -137,6 +146,9 @@ clean: @rm -rf aiohttp.egg-info @rm -f .install-deps @rm -f .install-cython + @rm -rf vendor/llhttp/node_modules + @rm -f .llhttp-gen + @$(MAKE) -C vendor/llhttp clean .PHONY: doc doc: diff --git a/aiohttp/_cparser.pxd b/aiohttp/_cparser.pxd index 0f9fc009236..165dd61d8a9 100644 --- a/aiohttp/_cparser.pxd +++ b/aiohttp/_cparser.pxd @@ -1,140 +1,190 @@ -from libc.stdint cimport uint16_t, uint32_t, uint64_t - - -cdef extern from "../vendor/http-parser/http_parser.h": - ctypedef int (*http_data_cb) (http_parser*, - const char *at, - size_t length) except -1 - - ctypedef int (*http_cb) (http_parser*) except -1 - - struct http_parser: - unsigned int type - unsigned int flags - unsigned int state - unsigned int header_state - unsigned int index - - uint32_t nread +from libc.stdint cimport ( + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + + +cdef extern from "../vendor/llhttp/build/llhttp.h": + + struct llhttp__internal_s: + int32_t _index + void* _span_pos0 + void* _span_cb0 + int32_t error + const char* reason + const char* error_pos + void* data + void* _current uint64_t content_length - - unsigned short http_major - unsigned short http_minor - unsigned int status_code - unsigned int method - unsigned int http_errno - - unsigned int upgrade - - void *data - - struct http_parser_settings: - http_cb on_message_begin - http_data_cb on_url - http_data_cb on_status - http_data_cb on_header_field - http_data_cb on_header_value - http_cb on_headers_complete - http_data_cb on_body - http_cb on_message_complete - http_cb on_chunk_header - http_cb on_chunk_complete - - enum http_parser_type: - HTTP_REQUEST, - HTTP_RESPONSE, - HTTP_BOTH - - enum http_errno: + uint8_t type + uint8_t method + uint8_t http_major + uint8_t http_minor + uint8_t header_state + uint8_t lenient_flags + uint8_t upgrade + uint8_t finish + uint16_t flags + uint16_t status_code + void* settings + + ctypedef llhttp__internal_s llhttp__internal_t + ctypedef llhttp__internal_t llhttp_t + + ctypedef int (*llhttp_data_cb)(llhttp_t*, const char *at, size_t length) except -1 + ctypedef int (*llhttp_cb)(llhttp_t*) except -1 + + struct llhttp_settings_s: + llhttp_cb on_message_begin + llhttp_data_cb on_url + llhttp_data_cb on_status + llhttp_data_cb on_header_field + llhttp_data_cb on_header_value + llhttp_cb on_headers_complete + llhttp_data_cb on_body + llhttp_cb on_message_complete + llhttp_cb on_chunk_header + llhttp_cb on_chunk_complete + + llhttp_cb on_url_complete + llhttp_cb on_status_complete + llhttp_cb on_header_field_complete + llhttp_cb on_header_value_complete + + ctypedef llhttp_settings_s llhttp_settings_t + + enum llhttp_errno: HPE_OK, - HPE_CB_message_begin, - HPE_CB_url, - HPE_CB_header_field, - HPE_CB_header_value, - HPE_CB_headers_complete, - HPE_CB_body, - HPE_CB_message_complete, - HPE_CB_status, - HPE_CB_chunk_header, - HPE_CB_chunk_complete, - HPE_INVALID_EOF_STATE, - HPE_HEADER_OVERFLOW, + HPE_INTERNAL, + HPE_STRICT, + HPE_LF_EXPECTED, + HPE_UNEXPECTED_CONTENT_LENGTH, HPE_CLOSED_CONNECTION, - HPE_INVALID_VERSION, - HPE_INVALID_STATUS, HPE_INVALID_METHOD, HPE_INVALID_URL, - HPE_INVALID_HOST, - HPE_INVALID_PORT, - HPE_INVALID_PATH, - HPE_INVALID_QUERY_STRING, - HPE_INVALID_FRAGMENT, - HPE_LF_EXPECTED, + HPE_INVALID_CONSTANT, + HPE_INVALID_VERSION, HPE_INVALID_HEADER_TOKEN, HPE_INVALID_CONTENT_LENGTH, HPE_INVALID_CHUNK_SIZE, - HPE_INVALID_CONSTANT, - HPE_INVALID_INTERNAL_STATE, - HPE_STRICT, + HPE_INVALID_STATUS, + HPE_INVALID_EOF_STATE, + HPE_INVALID_TRANSFER_ENCODING, + HPE_CB_MESSAGE_BEGIN, + HPE_CB_HEADERS_COMPLETE, + HPE_CB_MESSAGE_COMPLETE, + HPE_CB_CHUNK_HEADER, + HPE_CB_CHUNK_COMPLETE, HPE_PAUSED, - HPE_UNKNOWN + HPE_PAUSED_UPGRADE, + HPE_USER - enum flags: - F_CHUNKED, + ctypedef llhttp_errno llhttp_errno_t + + enum llhttp_flags: F_CONNECTION_KEEP_ALIVE, F_CONNECTION_CLOSE, F_CONNECTION_UPGRADE, - F_TRAILING, + F_CHUNKED, F_UPGRADE, + F_CONTENT_LENGTH, F_SKIPBODY, - F_CONTENTLENGTH - - enum http_method: - DELETE, GET, HEAD, POST, PUT, CONNECT, OPTIONS, TRACE, COPY, - LOCK, MKCOL, MOVE, PROPFIND, PROPPATCH, SEARCH, UNLOCK, BIND, - REBIND, UNBIND, ACL, REPORT, MKACTIVITY, CHECKOUT, MERGE, - MSEARCH, NOTIFY, SUBSCRIBE, UNSUBSCRIBE, PATCH, PURGE, MKCALENDAR, - LINK, UNLINK - - void http_parser_init(http_parser *parser, http_parser_type type) - - size_t http_parser_execute(http_parser *parser, - const http_parser_settings *settings, - const char *data, - size_t len) - - int http_should_keep_alive(const http_parser *parser) - - void http_parser_settings_init(http_parser_settings *settings) - - const char *http_errno_name(http_errno err) - const char *http_errno_description(http_errno err) - const char *http_method_str(http_method m) - - # URL Parser - - enum http_parser_url_fields: - UF_SCHEMA = 0, - UF_HOST = 1, - UF_PORT = 2, - UF_PATH = 3, - UF_QUERY = 4, - UF_FRAGMENT = 5, - UF_USERINFO = 6, - UF_MAX = 7 - - struct http_parser_url_field_data: - uint16_t off - uint16_t len + F_TRAILING, + F_TRANSFER_ENCODING - struct http_parser_url: - uint16_t field_set - uint16_t port - http_parser_url_field_data[UF_MAX] field_data + enum llhttp_lenient_flags: + LENIENT_HEADERS, + LENIENT_CHUNKED_LENGTH - void http_parser_url_init(http_parser_url *u) + enum llhttp_type: + HTTP_REQUEST, + HTTP_RESPONSE, + HTTP_BOTH - int http_parser_parse_url(const char *buf, - size_t buflen, - int is_connect, - http_parser_url *u) + enum llhttp_finish_t: + HTTP_FINISH_SAFE, + HTTP_FINISH_SAFE_WITH_CB, + HTTP_FINISH_UNSAFE + + enum llhttp_method: + HTTP_DELETE, + HTTP_GET, + HTTP_HEAD, + HTTP_POST, + HTTP_PUT, + HTTP_CONNECT, + HTTP_OPTIONS, + HTTP_TRACE, + HTTP_COPY, + HTTP_LOCK, + HTTP_MKCOL, + HTTP_MOVE, + HTTP_PROPFIND, + HTTP_PROPPATCH, + HTTP_SEARCH, + HTTP_UNLOCK, + HTTP_BIND, + HTTP_REBIND, + HTTP_UNBIND, + HTTP_ACL, + HTTP_REPORT, + HTTP_MKACTIVITY, + HTTP_CHECKOUT, + HTTP_MERGE, + HTTP_MSEARCH, + HTTP_NOTIFY, + HTTP_SUBSCRIBE, + HTTP_UNSUBSCRIBE, + HTTP_PATCH, + HTTP_PURGE, + HTTP_MKCALENDAR, + HTTP_LINK, + HTTP_UNLINK, + HTTP_SOURCE, + HTTP_PRI, + HTTP_DESCRIBE, + HTTP_ANNOUNCE, + HTTP_SETUP, + HTTP_PLAY, + HTTP_PAUSE, + HTTP_TEARDOWN, + HTTP_GET_PARAMETER, + HTTP_SET_PARAMETER, + HTTP_REDIRECT, + HTTP_RECORD, + HTTP_FLUSH + + ctypedef llhttp_method llhttp_method_t; + + void llhttp_settings_init(llhttp_settings_t* settings) + void llhttp_init(llhttp_t* parser, llhttp_type type, + const llhttp_settings_t* settings) + + llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len) + llhttp_errno_t llhttp_finish(llhttp_t* parser) + + int llhttp_message_needs_eof(const llhttp_t* parser) + + int llhttp_should_keep_alive(const llhttp_t* parser) + + void llhttp_pause(llhttp_t* parser) + void llhttp_resume(llhttp_t* parser) + + void llhttp_resume_after_upgrade(llhttp_t* parser) + + llhttp_errno_t llhttp_get_errno(const llhttp_t* parser) + const char* llhttp_get_error_reason(const llhttp_t* parser) + void llhttp_set_error_reason(llhttp_t* parser, const char* reason) + const char* llhttp_get_error_pos(const llhttp_t* parser) + const char* llhttp_errno_name(llhttp_errno_t err) + + const char* llhttp_method_name(llhttp_method_t method) + + void llhttp_set_lenient_headers(llhttp_t* parser, int enabled) + void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled) diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index 2a6d1ffa6f5..b76d723fb2e 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -80,13 +80,13 @@ cdef inline object extend(object buf, const char* at, size_t length): memcpy(ptr + s, at, length) -DEF METHODS_COUNT = 34; +DEF METHODS_COUNT = 46; cdef list _http_method = [] for i in range(METHODS_COUNT): _http_method.append( - cparser.http_method_str( i).decode('ascii')) + cparser.llhttp_method_name( i).decode('ascii')) cdef inline str http_method_str(int i): @@ -272,8 +272,8 @@ cdef _new_response_message(object version, cdef class HttpParser: cdef: - cparser.http_parser* _cparser - cparser.http_parser_settings* _csettings + cparser.llhttp_t* _cparser + cparser.llhttp_settings_t* _csettings bytearray _raw_name bytearray _raw_value @@ -310,13 +310,13 @@ cdef class HttpParser: Py_buffer py_buf def __cinit__(self): - self._cparser = \ - PyMem_Malloc(sizeof(cparser.http_parser)) + self._cparser = \ + PyMem_Malloc(sizeof(cparser.llhttp_t)) if self._cparser is NULL: raise MemoryError() - self._csettings = \ - PyMem_Malloc(sizeof(cparser.http_parser_settings)) + self._csettings = \ + PyMem_Malloc(sizeof(cparser.llhttp_settings_t)) if self._csettings is NULL: raise MemoryError() @@ -324,19 +324,20 @@ cdef class HttpParser: PyMem_Free(self._cparser) PyMem_Free(self._csettings) - cdef _init(self, cparser.http_parser_type mode, - object protocol, object loop, int limit, - object timer=None, - size_t max_line_size=8190, size_t max_headers=32768, - size_t max_field_size=8190, payload_exception=None, - bint response_with_body=True, bint read_until_eof=False, - bint auto_decompress=True): - cparser.http_parser_init(self._cparser, mode) + cdef _init( + self, cparser.llhttp_type mode, + object protocol, object loop, int limit, + object timer=None, + size_t max_line_size=8190, size_t max_headers=32768, + size_t max_field_size=8190, payload_exception=None, + bint response_with_body=True, bint read_until_eof=False, + bint auto_decompress=True, + ): + cparser.llhttp_settings_init(self._csettings) + cparser.llhttp_init(self._cparser, mode, self._csettings) self._cparser.data = self self._cparser.content_length = 0 - cparser.http_parser_settings_init(self._csettings) - self._protocol = protocol self._loop = loop self._timer = timer @@ -417,7 +418,7 @@ cdef class HttpParser: self._process_header() method = http_method_str(self._cparser.method) - should_close = not cparser.http_should_keep_alive(self._cparser) + should_close = not cparser.llhttp_should_keep_alive(self._cparser) upgrade = self._cparser.upgrade chunked = self._cparser.flags & cparser.F_CHUNKED @@ -450,11 +451,12 @@ cdef class HttpParser: headers, raw_headers, should_close, encoding, upgrade, chunked) - if (ULLONG_MAX > self._cparser.content_length > 0 or chunked or - self._cparser.method == 5 or # CONNECT: 5 - (self._cparser.status_code >= 199 and - self._cparser.content_length == ULLONG_MAX and - self._read_until_eof) + if ( + ULLONG_MAX > self._cparser.content_length > 0 or chunked or + self._cparser.method == 5 or # CONNECT: 5 + (self._cparser.status_code >= 199 and + self._cparser.content_length == 0 and + self._read_until_eof) ): payload = StreamReader( self._protocol, timer=self._timer, loop=self._loop, @@ -485,7 +487,7 @@ cdef class HttpParser: pass cdef inline http_version(self): - cdef cparser.http_parser* parser = self._cparser + cdef cparser.llhttp_t* parser = self._cparser if parser.http_major == 1: if parser.http_minor == 0: @@ -504,12 +506,11 @@ cdef class HttpParser: if self._cparser.flags & cparser.F_CHUNKED: raise TransferEncodingError( "Not enough data for satisfy transfer length header.") - elif self._cparser.flags & cparser.F_CONTENTLENGTH: + elif self._cparser.flags & cparser.F_CONTENT_LENGTH: raise ContentLengthError( "Not enough data for satisfy content length header.") - elif self._cparser.http_errno != cparser.HPE_OK: - desc = cparser.http_errno_description( - self._cparser.http_errno) + elif cparser.llhttp_get_errno(self._cparser) != cparser.HPE_OK: + desc = cparser.llhttp_get_error_reason(self._cparser) raise PayloadEncodingError(desc.decode('latin-1')) else: self._payload.feed_eof() @@ -522,26 +523,30 @@ cdef class HttpParser: cdef: size_t data_len size_t nb + cdef cparser.llhttp_errno_t errno PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE) data_len = self.py_buf.len - nb = cparser.http_parser_execute( + errno = cparser.llhttp_execute( self._cparser, - self._csettings, self.py_buf.buf, data_len) + if errno is cparser.HPE_PAUSED_UPGRADE: + cparser.llhttp_resume_after_upgrade(self._cparser) + + nb = cparser.llhttp_get_error_pos(self._cparser) - self.py_buf.buf + PyBuffer_Release(&self.py_buf) - if (self._cparser.http_errno != cparser.HPE_OK): + if errno not in (cparser.HPE_OK, cparser.HPE_PAUSED_UPGRADE): if self._payload_error == 0: if self._last_error is not None: ex = self._last_error self._last_error = None else: - ex = parser_error_from_errno( - self._cparser.http_errno) + ex = parser_error_from_errno(self._cparser) self._payload = None raise ex @@ -562,41 +567,65 @@ cdef class HttpParser: cdef class HttpRequestParser(HttpParser): - def __init__(self, protocol, loop, int limit, timer=None, - size_t max_line_size=8190, size_t max_headers=32768, - size_t max_field_size=8190, payload_exception=None, - bint response_with_body=True, bint read_until_eof=False, - bint auto_decompress=True, + def __init__( + self, protocol, loop, int limit, timer=None, + size_t max_line_size=8190, size_t max_headers=32768, + size_t max_field_size=8190, payload_exception=None, + bint response_with_body=True, bint read_until_eof=False, + bint auto_decompress=True, ): - self._init(cparser.HTTP_REQUEST, protocol, loop, limit, timer, - max_line_size, max_headers, max_field_size, - payload_exception, response_with_body, read_until_eof, - auto_decompress) + self._init(cparser.HTTP_REQUEST, protocol, loop, limit, timer, + max_line_size, max_headers, max_field_size, + payload_exception, response_with_body, read_until_eof, + auto_decompress) cdef object _on_status_complete(self): - cdef Py_buffer py_buf - if not self._buf: - return - self._path = self._buf.decode('utf-8', 'surrogateescape') - if self._cparser.method == 5: # CONNECT - self._url = URL(self._path) - else: - PyObject_GetBuffer(self._buf, &py_buf, PyBUF_SIMPLE) - try: - self._url = _parse_url(py_buf.buf, - py_buf.len) - finally: - PyBuffer_Release(&py_buf) - PyByteArray_Resize(self._buf, 0) + cdef int idx1, idx2 + if not self._buf: + return + self._path = self._buf.decode('utf-8', 'surrogateescape') + try: + idx3 = len(self._path) + idx1 = self._path.find("?") + if idx1 == -1: + query = "" + idx2 = self._path.find("#") + if idx2 == -1: + path = self._path + fragment = "" + else: + path = self._path[0: idx2] + fragment = self._path[idx2+1:] + + else: + path = self._path[0:idx1] + idx1 += 1 + idx2 = self._path.find("#", idx1+1) + if idx2 == -1: + query = self._path[idx1:] + fragment = "" + else: + query = self._path[idx1: idx2] + fragment = self._path[idx2+1:] + + self._url = URL.build( + path=path, + query_string=query, + fragment=fragment, + encoded=True, + ) + finally: + PyByteArray_Resize(self._buf, 0) cdef class HttpResponseParser(HttpParser): - def __init__(self, protocol, loop, int limit, timer=None, - size_t max_line_size=8190, size_t max_headers=32768, - size_t max_field_size=8190, payload_exception=None, - bint response_with_body=True, bint read_until_eof=False, - bint auto_decompress=True + def __init__( + self, protocol, loop, int limit, timer=None, + size_t max_line_size=8190, size_t max_headers=32768, + size_t max_field_size=8190, payload_exception=None, + bint response_with_body=True, bint read_until_eof=False, + bint auto_decompress=True ): self._init(cparser.HTTP_RESPONSE, protocol, loop, limit, timer, max_line_size, max_headers, max_field_size, @@ -610,7 +639,7 @@ cdef class HttpResponseParser(HttpParser): else: self._reason = self._reason or '' -cdef int cb_on_message_begin(cparser.http_parser* parser) except -1: +cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data pyparser._started = True @@ -622,7 +651,7 @@ cdef int cb_on_message_begin(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_url(cparser.http_parser* parser, +cdef int cb_on_url(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: @@ -637,7 +666,7 @@ cdef int cb_on_url(cparser.http_parser* parser, return 0 -cdef int cb_on_status(cparser.http_parser* parser, +cdef int cb_on_status(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef str reason @@ -653,7 +682,7 @@ cdef int cb_on_status(cparser.http_parser* parser, return 0 -cdef int cb_on_header_field(cparser.http_parser* parser, +cdef int cb_on_header_field(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef Py_ssize_t size @@ -671,7 +700,7 @@ cdef int cb_on_header_field(cparser.http_parser* parser, return 0 -cdef int cb_on_header_value(cparser.http_parser* parser, +cdef int cb_on_header_value(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef Py_ssize_t size @@ -688,7 +717,7 @@ cdef int cb_on_header_value(cparser.http_parser* parser, return 0 -cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1: +cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_status_complete() @@ -703,7 +732,7 @@ cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_body(cparser.http_parser* parser, +cdef int cb_on_body(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef bytes body = at[:length] @@ -720,7 +749,7 @@ cdef int cb_on_body(cparser.http_parser* parser, return 0 -cdef int cb_on_message_complete(cparser.http_parser* parser) except -1: +cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._started = False @@ -732,7 +761,7 @@ cdef int cb_on_message_complete(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1: +cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_header() @@ -743,7 +772,7 @@ cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1: +cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_complete() @@ -754,19 +783,21 @@ cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1: return 0 -cdef parser_error_from_errno(cparser.http_errno errno): - cdef bytes desc = cparser.http_errno_description(errno) - - if errno in (cparser.HPE_CB_message_begin, - cparser.HPE_CB_url, - cparser.HPE_CB_header_field, - cparser.HPE_CB_header_value, - cparser.HPE_CB_headers_complete, - cparser.HPE_CB_body, - cparser.HPE_CB_message_complete, - cparser.HPE_CB_status, - cparser.HPE_CB_chunk_header, - cparser.HPE_CB_chunk_complete): +cdef parser_error_from_errno(cparser.llhttp_t* parser): + cdef cparser.llhttp_errno_t errno = cparser.llhttp_get_errno(parser) + cdef bytes desc = cparser.llhttp_get_error_reason(parser) + + if errno in (cparser.HPE_CB_MESSAGE_BEGIN, + cparser.HPE_CB_HEADERS_COMPLETE, + cparser.HPE_CB_MESSAGE_COMPLETE, + cparser.HPE_CB_CHUNK_HEADER, + cparser.HPE_CB_CHUNK_COMPLETE, + cparser.HPE_INVALID_CONSTANT, + cparser.HPE_INVALID_HEADER_TOKEN, + cparser.HPE_INVALID_CONTENT_LENGTH, + cparser.HPE_INVALID_CHUNK_SIZE, + cparser.HPE_INVALID_EOF_STATE, + cparser.HPE_INVALID_TRANSFER_ENCODING): cls = BadHttpMessage elif errno == cparser.HPE_INVALID_STATUS: @@ -775,6 +806,9 @@ cdef parser_error_from_errno(cparser.http_errno errno): elif errno == cparser.HPE_INVALID_METHOD: cls = BadStatusLine + elif errno == cparser.HPE_INVALID_VERSION: + cls = BadStatusLine + elif errno == cparser.HPE_INVALID_URL: cls = InvalidURLError @@ -782,96 +816,3 @@ cdef parser_error_from_errno(cparser.http_errno errno): cls = BadHttpMessage return cls(desc.decode('latin-1')) - - -def parse_url(url): - cdef: - Py_buffer py_buf - char* buf_data - - PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE) - try: - buf_data = py_buf.buf - return _parse_url(buf_data, py_buf.len) - finally: - PyBuffer_Release(&py_buf) - - -cdef _parse_url(char* buf_data, size_t length): - cdef: - cparser.http_parser_url* parsed - int res - str schema = None - str host = None - object port = None - str path = None - str query = None - str fragment = None - str user = None - str password = None - str userinfo = None - object result = None - int off - int ln - - parsed = \ - PyMem_Malloc(sizeof(cparser.http_parser_url)) - if parsed is NULL: - raise MemoryError() - cparser.http_parser_url_init(parsed) - try: - res = cparser.http_parser_parse_url(buf_data, length, 0, parsed) - - if res == 0: - if parsed.field_set & (1 << cparser.UF_SCHEMA): - off = parsed.field_data[cparser.UF_SCHEMA].off - ln = parsed.field_data[cparser.UF_SCHEMA].len - schema = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - schema = '' - - if parsed.field_set & (1 << cparser.UF_HOST): - off = parsed.field_data[cparser.UF_HOST].off - ln = parsed.field_data[cparser.UF_HOST].len - host = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - host = '' - - if parsed.field_set & (1 << cparser.UF_PORT): - port = parsed.port - - if parsed.field_set & (1 << cparser.UF_PATH): - off = parsed.field_data[cparser.UF_PATH].off - ln = parsed.field_data[cparser.UF_PATH].len - path = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - path = '' - - if parsed.field_set & (1 << cparser.UF_QUERY): - off = parsed.field_data[cparser.UF_QUERY].off - ln = parsed.field_data[cparser.UF_QUERY].len - query = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - query = '' - - if parsed.field_set & (1 << cparser.UF_FRAGMENT): - off = parsed.field_data[cparser.UF_FRAGMENT].off - ln = parsed.field_data[cparser.UF_FRAGMENT].len - fragment = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - fragment = '' - - if parsed.field_set & (1 << cparser.UF_USERINFO): - off = parsed.field_data[cparser.UF_USERINFO].off - ln = parsed.field_data[cparser.UF_USERINFO].len - userinfo = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - - user, sep, password = userinfo.partition(':') - - return URL_build(scheme=schema, - user=user, password=password, host=host, port=port, - path=path, query_string=query, fragment=fragment, encoded=True) - else: - raise InvalidURLError("invalid url {!r}".format(buf_data)) - finally: - PyMem_Free(parsed) diff --git a/setup.py b/setup.py index a73d331ea07..ef236cb275a 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ NO_EXTENSIONS = True -if IS_GIT_REPO and not (HERE / "vendor/http-parser/README.md").exists(): +if IS_GIT_REPO and not (HERE / "vendor/llhttp/README.md").exists(): print("Install submodules when building from git clone", file=sys.stderr) print("Hint:", file=sys.stderr) print(" git submodule update --init", file=sys.stderr) @@ -33,10 +33,13 @@ "aiohttp._http_parser", [ "aiohttp/_http_parser.c", - "vendor/http-parser/http_parser.c", "aiohttp/_find_header.c", + "vendor/llhttp/build/c/llhttp.c", + "vendor/llhttp/src/native/api.c", + "vendor/llhttp/src/native/http.c", ], - define_macros=[("HTTP_PARSER_STRICT", 0)], + define_macros=[("LLHTTP_STRICT_MODE", 0)], + include_dirs=["vendor/llhttp/build"], ), Extension("aiohttp._helpers", ["aiohttp/_helpers.c"]), Extension("aiohttp._http_writer", ["aiohttp/_http_writer.c"]), diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 80913ae4360..2874b1428f4 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -2,7 +2,7 @@ # Tests for aiohttp/protocol.py import asyncio -from typing import Any +from typing import Any, List from unittest import mock from urllib.parse import quote @@ -13,6 +13,7 @@ import aiohttp from aiohttp import http_exceptions, streams from aiohttp.http_parser import ( + NO_EXTENSIONS, DeflateBuffer, HttpPayloadParser, HttpRequestParserPy, @@ -42,7 +43,14 @@ def protocol(): return mock.Mock() -@pytest.fixture(params=REQUEST_PARSERS) +def _gen_ids(parsers: List[Any]) -> List[str]: + return [ + "py-parser" if parser.__module__ == "aiohttp.http_parser" else "c-parser" + for parser in parsers + ] + + +@pytest.fixture(params=REQUEST_PARSERS, ids=_gen_ids(REQUEST_PARSERS)) def parser(loop: Any, protocol: Any, request: Any): # Parser implementations return request.param( @@ -55,13 +63,13 @@ def parser(loop: Any, protocol: Any, request: Any): ) -@pytest.fixture(params=REQUEST_PARSERS) +@pytest.fixture(params=REQUEST_PARSERS, ids=_gen_ids(REQUEST_PARSERS)) def request_cls(request: Any): # Request Parser class return request.param -@pytest.fixture(params=RESPONSE_PARSERS) +@pytest.fixture(params=RESPONSE_PARSERS, ids=_gen_ids(RESPONSE_PARSERS)) def response(loop: Any, protocol: Any, request: Any): # Parser implementations return request.param( @@ -74,7 +82,7 @@ def response(loop: Any, protocol: Any, request: Any): ) -@pytest.fixture(params=RESPONSE_PARSERS) +@pytest.fixture(params=RESPONSE_PARSERS, ids=_gen_ids(RESPONSE_PARSERS)) def response_cls(request: Any): # Parser implementations return request.param @@ -85,6 +93,14 @@ def stream(): return mock.Mock() +@pytest.mark.skipif(NO_EXTENSIONS, reason="Extentions available but not imported") +def test_c_parser_loaded(): + assert "HttpRequestParserC" in dir(aiohttp.http_parser) + assert "HttpResponseParserC" in dir(aiohttp.http_parser) + assert "RawRequestMessageC" in dir(aiohttp.http_parser) + assert "RawResponseMessageC" in dir(aiohttp.http_parser) + + def test_parse_headers(parser: Any) -> None: text = b"""GET /test HTTP/1.1\r test: line\r diff --git a/vendor/http-parser b/vendor/http-parser deleted file mode 160000 index 2343fd6b521..00000000000 --- a/vendor/http-parser +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2343fd6b5214b2ded2cdcf76de2bf60903bb90cd diff --git a/vendor/llhttp b/vendor/llhttp new file mode 160000 index 00000000000..69d6db20085 --- /dev/null +++ b/vendor/llhttp @@ -0,0 +1 @@ +Subproject commit 69d6db2008508489d19267a0dcab30602b16fc5b