From bca49a0cb92d64ce467401c48008a271daa38faa Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sat, 15 Jul 2023 18:00:51 +0100 Subject: [PATCH 1/8] Improve error messages from C parser --- .gitmodules | 2 +- aiohttp/_http_parser.pyx | 14 +++++++++++--- aiohttp/http_exceptions.py | 4 +++- tests/test_http_parser.py | 18 ++++++++++++++++++ vendor/README | 19 +++++++++++++++++++ vendor/llhttp | 2 +- 6 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 vendor/README diff --git a/.gitmodules b/.gitmodules index 1e901ef79f2..6edb2eea5b2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "vendor/llhttp"] path = vendor/llhttp url = https://github.com/nodejs/llhttp.git - branch = v8.1.1 + branch = v8.x diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index bebd9894374..eb3b8811f1b 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -15,6 +15,7 @@ from cpython cimport ( from cpython.mem cimport PyMem_Free, PyMem_Malloc from libc.limits cimport ULLONG_MAX from libc.string cimport memcpy +from textwrap import indent from multidict import CIMultiDict as _CIMultiDict, CIMultiDictProxy as _CIMultiDictProxy from yarl import URL as _URL @@ -546,7 +547,13 @@ cdef class HttpParser: ex = self._last_error self._last_error = None else: - ex = parser_error_from_errno(self._cparser) + after = cparser.llhttp_get_error_pos(self._cparser) + before = data[:after - self.py_buf.buf] + after_b = after.split(b"\n", 1)[0] + before = before.rsplit(b"\n", 1)[-1] + data = before + after_b + pointer = " " * (len(repr(before))-1) + "^" + ex = parser_error_from_errno(self._cparser, data, pointer) self._payload = None raise ex @@ -797,9 +804,10 @@ cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: return 0 -cdef parser_error_from_errno(cparser.llhttp_t* parser): +cdef parser_error_from_errno(cparser.llhttp_t* parser, data, pointer): cdef cparser.llhttp_errno_t errno = cparser.llhttp_get_errno(parser) cdef bytes desc = cparser.llhttp_get_error_reason(parser) + cdef bytes after = cparser.llhttp_get_error_pos(parser) if errno in (cparser.HPE_CB_MESSAGE_BEGIN, cparser.HPE_CB_HEADERS_COMPLETE, @@ -829,4 +837,4 @@ cdef parser_error_from_errno(cparser.llhttp_t* parser): else: cls = BadHttpMessage - return cls(desc.decode('latin-1')) + return cls("{}:\n\n {!r}\n {}".format(desc.decode("latin-1"), data, pointer)) diff --git a/aiohttp/http_exceptions.py b/aiohttp/http_exceptions.py index d49c59e08f1..8ad932a62a8 100644 --- a/aiohttp/http_exceptions.py +++ b/aiohttp/http_exceptions.py @@ -1,6 +1,7 @@ """Low-level http related exceptions.""" +from textwrap import indent from typing import Optional, Union from .typedefs import _CIMultiDict @@ -35,7 +36,8 @@ def __init__( self.message = message def __str__(self) -> str: - return f"{self.code}, message={self.message!r}" + msg = indent(self.message, " ") + return f"{self.code}, message:\n{msg}" def __repr__(self) -> str: return f"<{self.__class__.__name__}: {self}>" diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index e4fcbff0e62..415051cca45 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -2,6 +2,7 @@ # Tests for aiohttp/protocol.py import asyncio +import re from typing import Any, List from unittest import mock from urllib.parse import quote @@ -116,6 +117,23 @@ def test_parse_headers(parser: Any) -> None: assert msg.compression is None assert not msg.upgrade +@pytest.mark.skipif(NO_EXTENSIONS, reason="Only tests C parser.") +def test_invalid_character(loop: Any, protocol: Any, request: Any) -> None: + parser = HttpRequestParserC( + protocol, + loop, + 2**16, + max_line_size=8190, + max_field_size=8190, + ) + text = b"POST / HTTP/1.1\r\nHost: localhost:8080\r\nSet-Cookie: abc\x01def\r\n\r\n" + error_detail = re.escape(r""": + + b'Set-Cookie: abc\x01def\r' + ^""") + with pytest.raises(http_exceptions.BadHttpMessage, match=error_detail): + parser.feed_data(text) + def test_parse_headers_longline(parser: Any) -> None: invalid_unicode_byte = b"\xd9" diff --git a/vendor/README b/vendor/README new file mode 100644 index 00000000000..9e9a53701b4 --- /dev/null +++ b/vendor/README @@ -0,0 +1,19 @@ +LLHTTP +====== + +To build the llhttp parser, first get/update the submodule: + + git submodule update --init --recursive + +Then build llhttp: + + cd vendor/llhttp/ + npm install + make + +Then build our parser: + + cd - + make cythonize + +Then you can build or install it with ``python -m build`` or ''pip install .'' diff --git a/vendor/llhttp b/vendor/llhttp index 69d6db20085..7e18596bae8 160000 --- a/vendor/llhttp +++ b/vendor/llhttp @@ -1 +1 @@ -Subproject commit 69d6db2008508489d19267a0dcab30602b16fc5b +Subproject commit 7e18596bae8f63692ded9d3250d5d984fe90dcfb From 5d9c40e68b9112c52cc3a74c321c676e1ff6b44f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 15 Jul 2023 17:03:18 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_http_parser.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 415051cca45..683177aecf3 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -117,6 +117,7 @@ def test_parse_headers(parser: Any) -> None: assert msg.compression is None assert not msg.upgrade + @pytest.mark.skipif(NO_EXTENSIONS, reason="Only tests C parser.") def test_invalid_character(loop: Any, protocol: Any, request: Any) -> None: parser = HttpRequestParserC( @@ -127,10 +128,12 @@ def test_invalid_character(loop: Any, protocol: Any, request: Any) -> None: max_field_size=8190, ) text = b"POST / HTTP/1.1\r\nHost: localhost:8080\r\nSet-Cookie: abc\x01def\r\n\r\n" - error_detail = re.escape(r""": + error_detail = re.escape( + r""": b'Set-Cookie: abc\x01def\r' - ^""") + ^""" + ) with pytest.raises(http_exceptions.BadHttpMessage, match=error_detail): parser.feed_data(text) From 6f515cf69b1940d12f75aa4613bde7d66ca1005c Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sat, 15 Jul 2023 18:10:29 +0100 Subject: [PATCH 3/8] Create 7366.feature --- CHANGES/7366.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/7366.feature diff --git a/CHANGES/7366.feature b/CHANGES/7366.feature new file mode 100644 index 00000000000..8e38f70f898 --- /dev/null +++ b/CHANGES/7366.feature @@ -0,0 +1 @@ +Added information to C parser exceptions to show which character caused the error. -- by :user:`Dreamsorcerer` From 017bcc11f3deaedf26bccfa6c9d7468856da51c5 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sat, 15 Jul 2023 18:11:48 +0100 Subject: [PATCH 4/8] Update _http_parser.pyx --- aiohttp/_http_parser.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index eb3b8811f1b..b19275c25e5 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -807,7 +807,6 @@ cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: cdef parser_error_from_errno(cparser.llhttp_t* parser, data, pointer): cdef cparser.llhttp_errno_t errno = cparser.llhttp_get_errno(parser) cdef bytes desc = cparser.llhttp_get_error_reason(parser) - cdef bytes after = cparser.llhttp_get_error_pos(parser) if errno in (cparser.HPE_CB_MESSAGE_BEGIN, cparser.HPE_CB_HEADERS_COMPLETE, From 9da4f0597b73c6c60a9b0a1e0dd73f01cb25f170 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sat, 15 Jul 2023 18:12:28 +0100 Subject: [PATCH 5/8] Update _http_parser.pyx --- aiohttp/_http_parser.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index b19275c25e5..4f39dd0c978 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -15,7 +15,6 @@ from cpython cimport ( from cpython.mem cimport PyMem_Free, PyMem_Malloc from libc.limits cimport ULLONG_MAX from libc.string cimport memcpy -from textwrap import indent from multidict import CIMultiDict as _CIMultiDict, CIMultiDictProxy as _CIMultiDictProxy from yarl import URL as _URL From f2ea9959415eb3e9a57f90198a8ec7bd6ecd5b92 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sat, 15 Jul 2023 18:15:44 +0100 Subject: [PATCH 6/8] Update README --- vendor/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/README b/vendor/README index 9e9a53701b4..35c65fc5932 100644 --- a/vendor/README +++ b/vendor/README @@ -16,4 +16,4 @@ Then build our parser: cd - make cythonize -Then you can build or install it with ``python -m build`` or ''pip install .'' +Then you can build or install it with ``python -m build`` or ``pip install .`` From 95ff729fc2b5f34e6b97f2fddee6df8e64ee1474 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sat, 15 Jul 2023 19:07:37 +0100 Subject: [PATCH 7/8] Update tests --- aiohttp/http_exceptions.py | 2 +- tests/test_http_exceptions.py | 22 ++++++++++------------ tests/test_http_parser.py | 11 ++++++----- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/aiohttp/http_exceptions.py b/aiohttp/http_exceptions.py index 8ad932a62a8..728824f856f 100644 --- a/aiohttp/http_exceptions.py +++ b/aiohttp/http_exceptions.py @@ -40,7 +40,7 @@ def __str__(self) -> str: return f"{self.code}, message:\n{msg}" def __repr__(self) -> str: - return f"<{self.__class__.__name__}: {self}>" + return f"<{self.__class__.__name__}: {self.code}, message={self.message!r}>" class BadHttpMessage(HttpProcessingError): diff --git a/tests/test_http_exceptions.py b/tests/test_http_exceptions.py index 27aab67089f..28fdcbe0c69 100644 --- a/tests/test_http_exceptions.py +++ b/tests/test_http_exceptions.py @@ -32,13 +32,13 @@ def test_str(self) -> None: err = http_exceptions.HttpProcessingError( code=500, message="Internal error", headers={} ) - assert str(err) == "500, message='Internal error'" + assert str(err) == "500, message:\n Internal error" def test_repr(self) -> None: err = http_exceptions.HttpProcessingError( code=500, message="Internal error", headers={} ) - assert repr(err) == ("") + assert repr(err) == ("") class TestBadHttpMessage: @@ -61,7 +61,7 @@ def test_pickle(self) -> None: def test_str(self) -> None: err = http_exceptions.BadHttpMessage(message="Bad HTTP message", headers={}) - assert str(err) == "400, message='Bad HTTP message'" + assert str(err) == "400, message:\n Bad HTTP message" def test_repr(self) -> None: err = http_exceptions.BadHttpMessage(message="Bad HTTP message", headers={}) @@ -88,9 +88,8 @@ def test_pickle(self) -> None: def test_str(self) -> None: err = http_exceptions.LineTooLong(line="spam", limit="10", actual_size="12") - assert str(err) == ( - "400, message='Got more than 10 bytes (12) " "when reading spam.'" - ) + expected = "400, message:\n Got more than 10 bytes (12) when reading spam." + assert str(err) == expected def test_repr(self) -> None: err = http_exceptions.LineTooLong(line="spam", limit="10", actual_size="12") @@ -120,25 +119,24 @@ def test_pickle(self) -> None: def test_str(self) -> None: err = http_exceptions.InvalidHeader(hdr="X-Spam") - assert str(err) == "400, message='Invalid HTTP Header: X-Spam'" + assert str(err) == "400, message:\n Invalid HTTP Header: X-Spam" def test_repr(self) -> None: err = http_exceptions.InvalidHeader(hdr="X-Spam") - assert repr(err) == ( - "" - ) + expected = "" + assert repr(err) == expected class TestBadStatusLine: def test_ctor(self) -> None: err = http_exceptions.BadStatusLine("Test") assert err.line == "Test" - assert str(err) == "400, message=\"Bad status line 'Test'\"" + assert str(err) == "400, message:\n Bad status line 'Test'" def test_ctor2(self) -> None: err = http_exceptions.BadStatusLine(b"") assert err.line == "b''" - assert str(err) == "400, message='Bad status line \"b\\'\\'\"'" + assert str(err) == "400, message:\n Bad status line \"b''\"" def test_pickle(self) -> None: err = http_exceptions.BadStatusLine("Test") diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 683177aecf3..8113fb94dd1 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -457,7 +457,7 @@ def test_max_header_field_size(parser: Any, size: Any) -> None: name = b"t" * size text = b"GET /test HTTP/1.1\r\n" + name + b":data\r\n\r\n" - match = f"400, message='Got more than 8190 bytes \\({size}\\) when reading" + match = f"400, message:\n Got more than 8190 bytes \\({size}\\) when reading" with pytest.raises(http_exceptions.LineTooLong, match=match): parser.feed_data(text) @@ -485,7 +485,7 @@ def test_max_header_value_size(parser: Any, size: Any) -> None: name = b"t" * size text = b"GET /test HTTP/1.1\r\n" b"data:" + name + b"\r\n\r\n" - match = f"400, message='Got more than 8190 bytes \\({size}\\) when reading" + match = f"400, message:\n Got more than 8190 bytes \\({size}\\) when reading" with pytest.raises(http_exceptions.LineTooLong, match=match): parser.feed_data(text) @@ -513,7 +513,7 @@ def test_max_header_value_size_continuation(parser: Any, size: Any) -> None: name = b"T" * (size - 5) text = b"GET /test HTTP/1.1\r\n" b"data: test\r\n " + name + b"\r\n\r\n" - match = f"400, message='Got more than 8190 bytes \\({size}\\) when reading" + match = f"400, message:\n Got more than 8190 bytes \\({size}\\) when reading" with pytest.raises(http_exceptions.LineTooLong, match=match): parser.feed_data(text) @@ -636,7 +636,7 @@ def test_http_request_parser_bad_version(parser: Any) -> None: @pytest.mark.parametrize("size", [40965, 8191]) def test_http_request_max_status_line(parser: Any, size: Any) -> None: path = b"t" * (size - 5) - match = f"400, message='Got more than 8190 bytes \\({size}\\) when reading" + match = f"400, message:\n Got more than 8190 bytes \\({size}\\) when reading" with pytest.raises(http_exceptions.LineTooLong, match=match): parser.feed_data(b"GET /path" + path + b" HTTP/1.1\r\n\r\n") @@ -681,7 +681,7 @@ def test_http_response_parser_bad_status_line_too_long( response: Any, size: Any ) -> None: reason = b"t" * (size - 2) - match = f"400, message='Got more than 8190 bytes \\({size}\\) when reading" + match = f"400, message:\n Got more than 8190 bytes \\({size}\\) when reading" with pytest.raises(http_exceptions.LineTooLong, match=match): response.feed_data(b"HTTP/1.1 200 Ok" + reason + b"\r\n\r\n") @@ -715,6 +715,7 @@ def test_http_response_parser_bad(response: Any) -> None: response.feed_data(b"HTT/1\r\n\r\n") +@pytest.mark.skipif(not NO_EXTENSIONS, reason="Behaviour has changed in C parser") def test_http_response_parser_code_under_100(response: Any) -> None: msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0] assert msg.code == 99 From 4b4a1731f4ba8b740ad569136a97a287bfd2b060 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sun, 16 Jul 2023 01:07:31 +0100 Subject: [PATCH 8/8] Rename README to README.rst --- vendor/{README => README.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename vendor/{README => README.rst} (100%) diff --git a/vendor/README b/vendor/README.rst similarity index 100% rename from vendor/README rename to vendor/README.rst