diff --git a/planning/deferred-work.md b/planning/deferred-work.md index 0a74b00..667f7b1 100644 --- a/planning/deferred-work.md +++ b/planning/deferred-work.md @@ -27,11 +27,6 @@ Items raised in reviews that are real but not actionable now. ## Deferred from: code review of story-1-2 (2026-05-13) -- **Charset parser robustness** — quoted whitespace, mismatched quotes, multi-`charset=` directives, substring false-positives (e.g. `boundary` containing `charset=`). (`src/httpware/response.py:20-25`) -- **Header name/value validation** — `with_header` / `with_headers` accept CR/LF (injection), `None`, empty string. CRLF lands with the `Redactor` middleware (Story 5.3); the rest is a generic input-validation pass. (`src/httpware/request.py:21-23,37-39`) -- **URL validation** — `with_url("")` accepts empty; `base_url` has no trailing-slash normalization in config (`AsyncClient._resolve_url` does `base.rstrip('/')` so the practical impact is reduced). (`src/httpware/request.py:25-27`, `src/httpware/config.py:34`) -- **`with_query(None)` handling** — type signature is `Mapping[str, str]`; a runtime `None` slips past typing and breaks downstream iteration with no guard. (`src/httpware/request.py:33-35`) -- **`Timeout` / `Limits` negative-value validation** — no `__post_init__` guard; nonsensical values silently accepted. (`src/httpware/config.py:11-27`) - **Multi-valued query params** — `Mapping[str, str]` cannot express `?tag=a&tag=b`. Type widening needed. (`src/httpware/request.py:16`) - **Streaming / async-iterable request bodies** — `body: bytes | None` only. Revisit in streaming work (Story 4.1). (`src/httpware/request.py:18`) - **`@final` to prevent subclassing** — frozen+slots subclassing is fragile. No current subclasser; defer until needed. (`src/httpware/request.py`, `response.py`, `config.py`) diff --git a/src/httpware/client.py b/src/httpware/client.py index 4e70dce..ab6c401 100644 --- a/src/httpware/client.py +++ b/src/httpware/client.py @@ -119,7 +119,7 @@ def _resolve_url(self, path: str) -> str: base = self._config.base_url if base is None: return path - return f"{base.rstrip('/')}/{path.lstrip('/')}" + return f"{base}/{path.lstrip('/')}" def _build_request( self, diff --git a/src/httpware/config.py b/src/httpware/config.py index 91fefdd..bfa49bf 100644 --- a/src/httpware/config.py +++ b/src/httpware/config.py @@ -17,6 +17,13 @@ class Timeout: write: float = 30.0 pool: float = 5.0 + def __post_init__(self) -> None: + for attr in ("connect", "read", "write", "pool"): + value = getattr(self, attr) + if value < 0: + msg = f"Timeout.{attr} must be non-negative (got {value})" + raise ValueError(msg) + @dataclass(frozen=True, slots=True) class Limits: @@ -26,6 +33,17 @@ class Limits: max_keepalive_connections: int = 20 keepalive_expiry: float = 5.0 + def __post_init__(self) -> None: + if self.max_connections < 0: + msg = f"max_connections must be non-negative (got {self.max_connections})" + raise ValueError(msg) + if self.max_keepalive_connections < 0: + msg = f"max_keepalive_connections must be non-negative (got {self.max_keepalive_connections})" + raise ValueError(msg) + if self.keepalive_expiry < 0: + msg = f"keepalive_expiry must be non-negative (got {self.keepalive_expiry})" + raise ValueError(msg) + @dataclass(frozen=True, slots=True) class ClientConfig: @@ -38,3 +56,14 @@ class ClientConfig: limits: Limits = field(default_factory=Limits) decoder: ResponseDecoder = field(default_factory=PydanticDecoder) middleware: tuple[Middleware, ...] = () + + def __post_init__(self) -> None: + if self.base_url is not None: + if not isinstance(self.base_url, str): + msg = "base_url must be a non-empty string or None" + raise ValueError(msg) + normalized = self.base_url.rstrip("/") + if not normalized: + msg = "base_url must be a non-empty string or None" + raise ValueError(msg) + object.__setattr__(self, "base_url", normalized) diff --git a/src/httpware/request.py b/src/httpware/request.py index 218c663..b5e3413 100644 --- a/src/httpware/request.py +++ b/src/httpware/request.py @@ -6,6 +6,18 @@ from typing import Any, Self +def _validate_header_or_cookie(name: str, value: str, *, kind: str) -> None: + if not isinstance(name, str) or not isinstance(value, str): + msg = f"{kind} name and value must be str" + raise TypeError(msg) + if not name or not value: + msg = f"{kind} name and value must be non-empty" + raise ValueError(msg) + if any(c in name or c in value for c in ("\r", "\n")): + msg = f"{kind} name and value must not contain CR or LF" + raise ValueError(msg) + + @dataclass(frozen=True, slots=True) class Request: """Immutable HTTP request value type.""" @@ -18,6 +30,23 @@ class Request: body: bytes | None = None extensions: Mapping[str, Any] = field(default_factory=dict) + def __post_init__(self) -> None: + if not isinstance(self.url, str): + msg = "url must be str" + raise TypeError(msg) + if not self.url: + msg = "url must be non-empty" + raise ValueError(msg) + for field_name in ("headers", "params", "cookies", "extensions"): + field_value = getattr(self, field_name) + if not isinstance(field_value, Mapping): + msg = f"{field_name} must be a Mapping (got {type(field_value).__name__})" + raise TypeError(msg) + for name, value in self.headers.items(): + _validate_header_or_cookie(name, value, kind="header") + for name, value in self.cookies.items(): + _validate_header_or_cookie(name, value, kind="cookie") + def with_header(self, name: str, value: str) -> Self: """Return a copy with the given header added or replaced.""" return dataclasses.replace(self, headers={**self.headers, name: value}) diff --git a/src/httpware/response.py b/src/httpware/response.py index 6c9069b..568491d 100644 --- a/src/httpware/response.py +++ b/src/httpware/response.py @@ -21,7 +21,7 @@ def _parse_charset(content_type: str) -> str | None: for raw in content_type.split(";"): part = raw.strip() if part.lower().startswith(_CHARSET_PREFIX): - return part[len(_CHARSET_PREFIX) :].strip().strip('"').strip("'") + return part[len(_CHARSET_PREFIX) :].strip().strip('"').strip("'").strip() return None diff --git a/tests/test_config.py b/tests/test_config.py index 03a5535..b0ded35 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -50,3 +50,69 @@ def test_client_config_is_frozen() -> None: cfg = ClientConfig() with pytest.raises(FrozenInstanceError): cfg.base_url = "https://example.com" # ty: ignore[invalid-assignment] + + +@pytest.mark.parametrize("field", ["connect", "read", "write", "pool"]) +def test_timeout_rejects_negative(field: str) -> None: + with pytest.raises(ValueError, match=rf"Timeout\.{field} must be non-negative"): + Timeout(**{field: -1.0}) + + +def test_timeout_accepts_zero() -> None: + # Zero is a valid sentinel (fail immediately on this phase). + Timeout(connect=0.0, read=0.0, write=0.0, pool=0.0) + + +@pytest.mark.parametrize("field", ["max_connections", "max_keepalive_connections"]) +def test_limits_rejects_negative_int(field: str) -> None: + with pytest.raises(ValueError, match=f"{field} must be non-negative"): + Limits(**{field: -1}) + + +def test_limits_rejects_negative_keepalive_expiry() -> None: + with pytest.raises(ValueError, match="keepalive_expiry must be non-negative"): + Limits(keepalive_expiry=-0.5) + + +def test_limits_accepts_zero() -> None: + Limits(max_connections=0, max_keepalive_connections=0, keepalive_expiry=0.0) + + +def test_client_config_strips_trailing_slash_from_base_url() -> None: + cfg = ClientConfig(base_url="https://api.example.com/") + assert cfg.base_url == "https://api.example.com" + + +def test_client_config_leaves_base_url_without_trailing_slash() -> None: + cfg = ClientConfig(base_url="https://api.example.com") + assert cfg.base_url == "https://api.example.com" + + +def test_client_config_strips_multiple_trailing_slashes() -> None: + cfg = ClientConfig(base_url="https://api.example.com///") + assert cfg.base_url == "https://api.example.com" + + +def test_client_config_allows_none_base_url() -> None: + cfg = ClientConfig(base_url=None) + assert cfg.base_url is None + + +def test_client_config_rejects_empty_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url="") + + +def test_client_config_rejects_slash_only_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url="/") + + +def test_client_config_rejects_multiple_slashes_only_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url="///") + + +def test_client_config_rejects_non_str_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url=123) # ty: ignore[invalid-argument-type] diff --git a/tests/test_request.py b/tests/test_request.py index 47b746d..fc7879b 100644 --- a/tests/test_request.py +++ b/tests/test_request.py @@ -143,3 +143,84 @@ class _Marker: new = r.with_extension("marker", marker) assert new.extensions == {"marker": marker} assert new.extensions["marker"] is marker + + +def test_request_rejects_empty_url() -> None: + with pytest.raises(ValueError, match="url must be non-empty"): + Request(method="GET", url="") + + +def test_request_rejects_non_str_url() -> None: + with pytest.raises(TypeError, match="url must be str"): + Request(method="GET", url=None) # ty: ignore[invalid-argument-type] + + +def test_with_url_rejects_empty() -> None: + r = Request(method="GET", url="/") + with pytest.raises(ValueError, match="url must be non-empty"): + r.with_url("") + + +def test_request_rejects_header_with_crlf_in_value() -> None: + with pytest.raises(ValueError, match="header name and value must not contain CR or LF"): + Request(method="GET", url="/", headers={"X-Trace": "value\r\nInjected: yes"}) + + +def test_request_rejects_header_with_crlf_in_name() -> None: + with pytest.raises(ValueError, match="header name and value must not contain CR or LF"): + Request(method="GET", url="/", headers={"X-Bad\r\nInjected": "value"}) + + +def test_request_rejects_empty_header_name() -> None: + with pytest.raises(ValueError, match="header name and value must be non-empty"): + Request(method="GET", url="/", headers={"": "value"}) + + +def test_request_rejects_empty_header_value() -> None: + with pytest.raises(ValueError, match="header name and value must be non-empty"): + Request(method="GET", url="/", headers={"X-Trace": ""}) + + +def test_request_rejects_non_str_header_value() -> None: + with pytest.raises(TypeError, match="header name and value must be str"): + Request(method="GET", url="/", headers={"X-Trace": None}) # ty: ignore[invalid-argument-type] + + +def test_request_rejects_cookie_with_crlf() -> None: + with pytest.raises(ValueError, match="cookie name and value must not contain CR or LF"): + Request(method="GET", url="/", cookies={"session": "abc\r\nSet-Cookie: evil"}) + + +def test_request_rejects_empty_cookie_value() -> None: + with pytest.raises(ValueError, match="cookie name and value must be non-empty"): + Request(method="GET", url="/", cookies={"session": ""}) + + +def test_with_header_rejects_crlf() -> None: + r = Request(method="GET", url="/") + with pytest.raises(ValueError, match="header name and value must not contain CR or LF"): + r.with_header("X-Trace", "value\r\n") + + +def test_with_cookie_rejects_crlf() -> None: + r = Request(method="GET", url="/") + with pytest.raises(ValueError, match="cookie name and value must not contain CR or LF"): + r.with_cookie("session", "abc\r\n") + + +@pytest.mark.parametrize("field_name", ["headers", "params", "cookies", "extensions"]) +def test_request_rejects_none_mapping_field(field_name: str) -> None: + with pytest.raises(TypeError, match=f"{field_name} must be a Mapping"): + Request(method="GET", url="/", **{field_name: None}) # ty: ignore[invalid-argument-type] + + +@pytest.mark.parametrize("field_name", ["headers", "params", "cookies", "extensions"]) +def test_request_rejects_list_mapping_field(field_name: str) -> None: + with pytest.raises(TypeError, match=f"{field_name} must be a Mapping"): + Request(method="GET", url="/", **{field_name: []}) # ty: ignore[invalid-argument-type] + + +def test_with_query_none_raises() -> None: + r = Request(method="GET", url="/") + with pytest.raises(TypeError, match="params must be a Mapping"): + r.with_query(None) # ty: ignore[invalid-argument-type] diff --git a/tests/test_response.py b/tests/test_response.py index 192a711..a7fc42e 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -6,6 +6,7 @@ import pytest from httpware import Response +from httpware.response import _parse_charset def test_response_is_frozen() -> None: @@ -68,6 +69,24 @@ def test_response_text_strips_quotes_around_charset(content_type: str) -> None: assert resp.text == "café" +def test_response_text_strips_inner_whitespace_in_quoted_charset() -> None: + # Direct parser check: inner whitespace inside the quoted value must not survive. + # (Python's codec registry happens to normalize whitespace, masking the bug + # end-to-end on most charsets; verify the parser itself returns a clean value.) + assert _parse_charset('text/plain; charset=" iso-8859-1 "') == "iso-8859-1" + + # End-to-end smoke: decoding the Latin-1 body must yield the original text. + body = "café".encode("iso-8859-1") + resp = Response( + status=HTTPStatus.OK, + headers={"content-type": 'text/plain; charset=" iso-8859-1 "'}, + content=body, + url="/", + elapsed=0.0, + ) + assert resp.text == "café" + + def test_response_text_falls_back_to_utf8_on_unknown_charset() -> None: resp = Response( status=200,