diff --git a/httpx/_models.py b/httpx/_models.py index 2cc86321a4..93e602db16 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -64,14 +64,42 @@ def _is_known_encoding(encoding: str) -> bool: return True -def _normalize_header_key(key: str | bytes, encoding: str | None = None) -> bytes: +def _normalize_header_key( + key: str | bytes, + encoding: str | None = None, + header_name: str | bytes = "", +) -> bytes: """ Coerce str/bytes into a strictly byte-wise HTTP header key. """ - return key if isinstance(key, bytes) else key.encode(encoding or "ascii") - - -def _normalize_header_value(value: str | bytes, encoding: str | None = None) -> bytes: + if isinstance(key, bytes): + return key + try: + return key.encode(encoding or "ascii") + except UnicodeEncodeError as exc: + if header_name: + name_str = ( + header_name + if isinstance(header_name, str) + else header_name.decode("ascii", errors="replace") + ) + msg = f"Header name '{name_str}' contains non-ASCII characters" + else: + msg = "Header name contains non-ASCII characters" + raise UnicodeEncodeError( + exc.encoding, + exc.object, + exc.start, + exc.end, + msg, + ) from exc + + +def _normalize_header_value( + value: str | bytes, + encoding: str | None = None, + header_name: str | bytes = "", +) -> bytes: """ Coerce str/bytes into a strictly byte-wise HTTP header value. """ @@ -79,7 +107,25 @@ def _normalize_header_value(value: str | bytes, encoding: str | None = None) -> return value if not isinstance(value, str): raise TypeError(f"Header value must be str or bytes, not {type(value)}") - return value.encode(encoding or "ascii") + try: + return value.encode(encoding or "ascii") + except UnicodeEncodeError as exc: + if header_name: + name_str = ( + header_name + if isinstance(header_name, str) + else header_name.decode("ascii", errors="replace") + ) + msg = f"Header '{name_str}' value contains non-ASCII characters" + else: + msg = "Header value contains non-ASCII characters" + raise UnicodeEncodeError( + exc.encoding, + exc.object, + exc.start, + exc.end, + msg, + ) from exc def _parse_content_type_charset(content_type: str) -> str | None: @@ -152,13 +198,13 @@ def __init__( self._list = list(headers._list) elif isinstance(headers, Mapping): for k, v in headers.items(): - bytes_key = _normalize_header_key(k, encoding) - bytes_value = _normalize_header_value(v, encoding) + bytes_key = _normalize_header_key(k, encoding, header_name=k) + bytes_value = _normalize_header_value(v, encoding, header_name=k) self._list.append((bytes_key, bytes_key.lower(), bytes_value)) elif headers is not None: for k, v in headers: - bytes_key = _normalize_header_key(k, encoding) - bytes_value = _normalize_header_value(v, encoding) + bytes_key = _normalize_header_key(k, encoding, header_name=k) + bytes_value = _normalize_header_value(v, encoding, header_name=k) self._list.append((bytes_key, bytes_key.lower(), bytes_value)) self._encoding = encoding diff --git a/tests/models/test_headers.py b/tests/models/test_headers.py index a87a446784..caece8468d 100644 --- a/tests/models/test_headers.py +++ b/tests/models/test_headers.py @@ -217,3 +217,27 @@ def test_parse_header_links(value, expected): def test_parse_header_links_no_link(): all_links = httpx.Response(200).links assert all_links == {} + + +def test_header_encoding_error_mentions_header_name(): + with pytest.raises(UnicodeEncodeError, match="Header 'auth' value"): + httpx.Headers({"auth": "안녕하세요"}) + + +def test_header_key_encoding_error_mentions_header_name(): + with pytest.raises(UnicodeEncodeError, match="Header name '헤더'"): + httpx.Headers({"헤더": "value"}) + + +def test_header_encoding_error_without_header_name(): + from httpx._models import _normalize_header_value + + with pytest.raises(UnicodeEncodeError, match="Header value contains non-ASCII"): + _normalize_header_value("안녕") + + +def test_header_key_encoding_error_without_header_name(): + from httpx._models import _normalize_header_key + + with pytest.raises(UnicodeEncodeError, match="Header name contains non-ASCII"): + _normalize_header_key("헤더")