From 5003dc22d804d9033796b6625c0459e200eba315 Mon Sep 17 00:00:00 2001 From: Hoan Nguyen Date: Thu, 4 Sep 2025 10:15:12 +0200 Subject: [PATCH 1/2] fix: Fix bug 'utf-8' codec can't encode characters - surrogates not allowed --- httpx/_content.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/httpx/_content.py b/httpx/_content.py index 6f479a0885..973bc34fb6 100644 --- a/httpx/_content.py +++ b/httpx/_content.py @@ -108,7 +108,7 @@ def encode_content( content: str | bytes | Iterable[bytes] | AsyncIterable[bytes], ) -> tuple[dict[str, str], SyncByteStream | AsyncByteStream]: if isinstance(content, (bytes, str)): - body = content.encode("utf-8") if isinstance(content, str) else content + body = content.encode("utf-8", errors="ignore") if isinstance(content, str) else content content_length = len(body) headers = {"Content-Length": str(content_length)} if body else {} return headers, ByteStream(body) @@ -142,7 +142,7 @@ def encode_urlencoded_data( plain_data.extend([(key, primitive_value_to_str(item)) for item in value]) else: plain_data.append((key, primitive_value_to_str(value))) - body = urlencode(plain_data, doseq=True).encode("utf-8") + body = urlencode(plain_data, doseq=True).encode("utf-8", errors="ignore") content_length = str(len(body)) content_type = "application/x-www-form-urlencoded" headers = {"Content-Length": content_length, "Content-Type": content_type} @@ -158,7 +158,7 @@ def encode_multipart_data( def encode_text(text: str) -> tuple[dict[str, str], ByteStream]: - body = text.encode("utf-8") + body = text.encode("utf-8", errors="ignore") content_length = str(len(body)) content_type = "text/plain; charset=utf-8" headers = {"Content-Length": content_length, "Content-Type": content_type} @@ -166,7 +166,7 @@ def encode_text(text: str) -> tuple[dict[str, str], ByteStream]: def encode_html(html: str) -> tuple[dict[str, str], ByteStream]: - body = html.encode("utf-8") + body = html.encode("utf-8", errors="ignore") content_length = str(len(body)) content_type = "text/html; charset=utf-8" headers = {"Content-Length": content_length, "Content-Type": content_type} @@ -176,7 +176,7 @@ def encode_html(html: str) -> tuple[dict[str, str], ByteStream]: def encode_json(json: Any) -> tuple[dict[str, str], ByteStream]: body = json_dumps( json, ensure_ascii=False, separators=(",", ":"), allow_nan=False - ).encode("utf-8") + ).encode("utf-8", errors="ignore") content_length = str(len(body)) content_type = "application/json" headers = {"Content-Length": content_length, "Content-Type": content_type} From bf694542be9ee15fc65551cbcea37a703f6e4224 Mon Sep 17 00:00:00 2001 From: Hoan Nguyen Date: Thu, 4 Sep 2025 09:07:58 +0000 Subject: [PATCH 2/2] test: add test for utf-8 surrogates not allowed fix --- tests/test_content.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/test_content.py b/tests/test_content.py index 9bfe983722..9d529744f4 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -252,6 +252,47 @@ async def test_urlencoded_list(): assert async_content == b"example=a&example=1&example=true" +@pytest.mark.anyio +async def test_utf8_content(): + request = httpx.Request(method, url, content="Hello\ud83d\udca4, world!") + assert isinstance(request.stream, typing.Iterable) + assert isinstance(request.stream, typing.AsyncIterable) + + sync_content = b"".join(list(request.stream)) + async_content = b"".join([part async for part in request.stream]) + assert request.headers == {"Host": "www.example.com", "Content-Length": "13"} + assert sync_content == b"Hello, world!" + assert async_content == b"Hello, world!" + + # Support 'data' for compat with requests. + with pytest.warns(DeprecationWarning): + request = httpx.Request(method, url, data="Hello\ud83d\udca4, world!") # type: ignore + assert isinstance(request.stream, typing.Iterable) + assert isinstance(request.stream, typing.AsyncIterable) + + sync_content = b"".join(list(request.stream)) + async_content = b"".join([part async for part in request.stream]) + + assert request.headers == {"Host": "www.example.com", "Content-Length": "13"} + assert sync_content == b"Hello, world!" + assert async_content == b"Hello, world!" + + request = httpx.Request(method, url, json={"Hello\ud83d\udca4": "world!\ud83d\udca4"}) + assert isinstance(request.stream, typing.Iterable) + assert isinstance(request.stream, typing.AsyncIterable) + + sync_content = b"".join(list(request.stream)) + async_content = b"".join([part async for part in request.stream]) + + assert request.headers == { + "Host": "www.example.com", + "Content-Length": "18", + "Content-Type": "application/json", + } + assert sync_content == b'{"Hello":"world!"}' + assert async_content == b'{"Hello":"world!"}' + + @pytest.mark.anyio async def test_multipart_files_content(): files = {"file": io.BytesIO(b"")}