From 389c804cf6a0b4b406778ed7dcd747d8f448d402 Mon Sep 17 00:00:00 2001 From: Artur Shiriev Date: Wed, 3 Jun 2026 07:34:35 +0300 Subject: [PATCH 1/5] fix: charset parser strips inner whitespace from quoted values Adds one final .strip() after the quote-stripping chain in _parse_charset so that Content-Type: ...; charset=" utf-8 " yields a clean codec name. Python's codec registry happens to normalize whitespace on lookup (so the end-to-end mojibake risk is currently masked), but the parser should still return a clean value rather than rely on codec leniency. Test verifies _parse_charset directly (the unit-level contract) plus an end-to-end smoke through Response.text. The other "concerns" listed in the deferred-work entry (substring false-positives, mismatched quotes, multi-charset directives) do not actually fire on the current code, per the spec's analysis. Closes deferred-work entry: "Charset parser robustness". Co-Authored-By: Claude Opus 4.7 (1M context) --- src/httpware/response.py | 2 +- tests/test_response.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/httpware/response.py b/src/httpware/response.py index 6c9069b..568491d 100644 --- a/src/httpware/response.py +++ b/src/httpware/response.py @@ -21,7 +21,7 @@ def _parse_charset(content_type: str) -> str | None: for raw in content_type.split(";"): part = raw.strip() if part.lower().startswith(_CHARSET_PREFIX): - return part[len(_CHARSET_PREFIX) :].strip().strip('"').strip("'") + return part[len(_CHARSET_PREFIX) :].strip().strip('"').strip("'").strip() return None diff --git a/tests/test_response.py b/tests/test_response.py index 192a711..a7fc42e 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -6,6 +6,7 @@ import pytest from httpware import Response +from httpware.response import _parse_charset def test_response_is_frozen() -> None: @@ -68,6 +69,24 @@ def test_response_text_strips_quotes_around_charset(content_type: str) -> None: assert resp.text == "café" +def test_response_text_strips_inner_whitespace_in_quoted_charset() -> None: + # Direct parser check: inner whitespace inside the quoted value must not survive. + # (Python's codec registry happens to normalize whitespace, masking the bug + # end-to-end on most charsets; verify the parser itself returns a clean value.) + assert _parse_charset('text/plain; charset=" iso-8859-1 "') == "iso-8859-1" + + # End-to-end smoke: decoding the Latin-1 body must yield the original text. + body = "café".encode("iso-8859-1") + resp = Response( + status=HTTPStatus.OK, + headers={"content-type": 'text/plain; charset=" iso-8859-1 "'}, + content=body, + url="/", + elapsed=0.0, + ) + assert resp.text == "café" + + def test_response_text_falls_back_to_utf8_on_unknown_charset() -> None: resp = Response( status=200, From b877a27a415bcb023cdfee33a1f8e8b8d22b0929 Mon Sep 17 00:00:00 2001 From: Artur Shiriev Date: Wed, 3 Jun 2026 07:40:57 +0300 Subject: [PATCH 2/5] fix: validate Request fields in __post_init__ Adds Request.__post_init__ that validates: - url is a non-empty str - headers, params, cookies, extensions are each a Mapping - header and cookie names/values are non-empty str, no CR or LF with_* methods inherit validation via dataclasses.replace; no per-method code needed. Header validation is minimal per spec (reject CR/LF, non-str, empty); full RFC 9110 token validation is out of scope. Closes deferred-work entries: "Header name/value validation", "URL validation" (Request.url part), "with_query(None) handling". Co-Authored-By: Claude Opus 4.7 (1M context) --- src/httpware/request.py | 29 +++++++++++++++ tests/test_request.py | 81 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) diff --git a/src/httpware/request.py b/src/httpware/request.py index 218c663..b5e3413 100644 --- a/src/httpware/request.py +++ b/src/httpware/request.py @@ -6,6 +6,18 @@ from typing import Any, Self +def _validate_header_or_cookie(name: str, value: str, *, kind: str) -> None: + if not isinstance(name, str) or not isinstance(value, str): + msg = f"{kind} name and value must be str" + raise TypeError(msg) + if not name or not value: + msg = f"{kind} name and value must be non-empty" + raise ValueError(msg) + if any(c in name or c in value for c in ("\r", "\n")): + msg = f"{kind} name and value must not contain CR or LF" + raise ValueError(msg) + + @dataclass(frozen=True, slots=True) class Request: """Immutable HTTP request value type.""" @@ -18,6 +30,23 @@ class Request: body: bytes | None = None extensions: Mapping[str, Any] = field(default_factory=dict) + def __post_init__(self) -> None: + if not isinstance(self.url, str): + msg = "url must be str" + raise TypeError(msg) + if not self.url: + msg = "url must be non-empty" + raise ValueError(msg) + for field_name in ("headers", "params", "cookies", "extensions"): + field_value = getattr(self, field_name) + if not isinstance(field_value, Mapping): + msg = f"{field_name} must be a Mapping (got {type(field_value).__name__})" + raise TypeError(msg) + for name, value in self.headers.items(): + _validate_header_or_cookie(name, value, kind="header") + for name, value in self.cookies.items(): + _validate_header_or_cookie(name, value, kind="cookie") + def with_header(self, name: str, value: str) -> Self: """Return a copy with the given header added or replaced.""" return dataclasses.replace(self, headers={**self.headers, name: value}) diff --git a/tests/test_request.py b/tests/test_request.py index 47b746d..fc7879b 100644 --- a/tests/test_request.py +++ b/tests/test_request.py @@ -143,3 +143,84 @@ class _Marker: new = r.with_extension("marker", marker) assert new.extensions == {"marker": marker} assert new.extensions["marker"] is marker + + +def test_request_rejects_empty_url() -> None: + with pytest.raises(ValueError, match="url must be non-empty"): + Request(method="GET", url="") + + +def test_request_rejects_non_str_url() -> None: + with pytest.raises(TypeError, match="url must be str"): + Request(method="GET", url=None) # ty: ignore[invalid-argument-type] + + +def test_with_url_rejects_empty() -> None: + r = Request(method="GET", url="/") + with pytest.raises(ValueError, match="url must be non-empty"): + r.with_url("") + + +def test_request_rejects_header_with_crlf_in_value() -> None: + with pytest.raises(ValueError, match="header name and value must not contain CR or LF"): + Request(method="GET", url="/", headers={"X-Trace": "value\r\nInjected: yes"}) + + +def test_request_rejects_header_with_crlf_in_name() -> None: + with pytest.raises(ValueError, match="header name and value must not contain CR or LF"): + Request(method="GET", url="/", headers={"X-Bad\r\nInjected": "value"}) + + +def test_request_rejects_empty_header_name() -> None: + with pytest.raises(ValueError, match="header name and value must be non-empty"): + Request(method="GET", url="/", headers={"": "value"}) + + +def test_request_rejects_empty_header_value() -> None: + with pytest.raises(ValueError, match="header name and value must be non-empty"): + Request(method="GET", url="/", headers={"X-Trace": ""}) + + +def test_request_rejects_non_str_header_value() -> None: + with pytest.raises(TypeError, match="header name and value must be str"): + Request(method="GET", url="/", headers={"X-Trace": None}) # ty: ignore[invalid-argument-type] + + +def test_request_rejects_cookie_with_crlf() -> None: + with pytest.raises(ValueError, match="cookie name and value must not contain CR or LF"): + Request(method="GET", url="/", cookies={"session": "abc\r\nSet-Cookie: evil"}) + + +def test_request_rejects_empty_cookie_value() -> None: + with pytest.raises(ValueError, match="cookie name and value must be non-empty"): + Request(method="GET", url="/", cookies={"session": ""}) + + +def test_with_header_rejects_crlf() -> None: + r = Request(method="GET", url="/") + with pytest.raises(ValueError, match="header name and value must not contain CR or LF"): + r.with_header("X-Trace", "value\r\n") + + +def test_with_cookie_rejects_crlf() -> None: + r = Request(method="GET", url="/") + with pytest.raises(ValueError, match="cookie name and value must not contain CR or LF"): + r.with_cookie("session", "abc\r\n") + + +@pytest.mark.parametrize("field_name", ["headers", "params", "cookies", "extensions"]) +def test_request_rejects_none_mapping_field(field_name: str) -> None: + with pytest.raises(TypeError, match=f"{field_name} must be a Mapping"): + Request(method="GET", url="/", **{field_name: None}) # ty: ignore[invalid-argument-type] + + +@pytest.mark.parametrize("field_name", ["headers", "params", "cookies", "extensions"]) +def test_request_rejects_list_mapping_field(field_name: str) -> None: + with pytest.raises(TypeError, match=f"{field_name} must be a Mapping"): + Request(method="GET", url="/", **{field_name: []}) # ty: ignore[invalid-argument-type] + + +def test_with_query_none_raises() -> None: + r = Request(method="GET", url="/") + with pytest.raises(TypeError, match="params must be a Mapping"): + r.with_query(None) # ty: ignore[invalid-argument-type] From 45ddb2529a9e2bb57570ddb58a06274fa0c0a809 Mon Sep 17 00:00:00 2001 From: Artur Shiriev Date: Wed, 3 Jun 2026 07:45:31 +0300 Subject: [PATCH 3/5] fix: validate Timeout/Limits negatives in __post_init__ Both dataclasses now raise ValueError on construction if any field is negative. Zero is permitted (Timeout zero = fail-immediately sentinel; Limits zero = downstream's call on what it means, typically "no limit"). Closes deferred-work entry: "Timeout / Limits negative-value validation". Co-Authored-By: Claude Opus 4.7 (1M context) --- src/httpware/config.py | 18 ++++++++++++++++++ tests/test_config.py | 26 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/httpware/config.py b/src/httpware/config.py index 91fefdd..dbf6a80 100644 --- a/src/httpware/config.py +++ b/src/httpware/config.py @@ -17,6 +17,13 @@ class Timeout: write: float = 30.0 pool: float = 5.0 + def __post_init__(self) -> None: + for attr in ("connect", "read", "write", "pool"): + value = getattr(self, attr) + if value < 0: + msg = f"Timeout.{attr} must be non-negative (got {value})" + raise ValueError(msg) + @dataclass(frozen=True, slots=True) class Limits: @@ -26,6 +33,17 @@ class Limits: max_keepalive_connections: int = 20 keepalive_expiry: float = 5.0 + def __post_init__(self) -> None: + if self.max_connections < 0: + msg = f"max_connections must be non-negative (got {self.max_connections})" + raise ValueError(msg) + if self.max_keepalive_connections < 0: + msg = f"max_keepalive_connections must be non-negative (got {self.max_keepalive_connections})" + raise ValueError(msg) + if self.keepalive_expiry < 0: + msg = f"keepalive_expiry must be non-negative (got {self.keepalive_expiry})" + raise ValueError(msg) + @dataclass(frozen=True, slots=True) class ClientConfig: diff --git a/tests/test_config.py b/tests/test_config.py index 03a5535..14aec9c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -50,3 +50,29 @@ def test_client_config_is_frozen() -> None: cfg = ClientConfig() with pytest.raises(FrozenInstanceError): cfg.base_url = "https://example.com" # ty: ignore[invalid-assignment] + + +@pytest.mark.parametrize("field", ["connect", "read", "write", "pool"]) +def test_timeout_rejects_negative(field: str) -> None: + with pytest.raises(ValueError, match=rf"Timeout\.{field} must be non-negative"): + Timeout(**{field: -1.0}) + + +def test_timeout_accepts_zero() -> None: + # Zero is a valid sentinel (fail immediately on this phase). + Timeout(connect=0.0, read=0.0, write=0.0, pool=0.0) + + +@pytest.mark.parametrize("field", ["max_connections", "max_keepalive_connections"]) +def test_limits_rejects_negative_int(field: str) -> None: + with pytest.raises(ValueError, match=f"{field} must be non-negative"): + Limits(**{field: -1}) + + +def test_limits_rejects_negative_keepalive_expiry() -> None: + with pytest.raises(ValueError, match="keepalive_expiry must be non-negative"): + Limits(keepalive_expiry=-0.5) + + +def test_limits_accepts_zero() -> None: + Limits(max_connections=0, max_keepalive_connections=0, keepalive_expiry=0.0) From 4560b88aa96364e447e32a2722f0d7fe1af649bd Mon Sep 17 00:00:00 2001 From: Artur Shiriev Date: Wed, 3 Jun 2026 07:51:15 +0300 Subject: [PATCH 4/5] refactor: validate and normalize ClientConfig.base_url in __post_init__ ClientConfig.__post_init__ now: - rejects empty string / non-str base_url with ValueError - strips trailing slash so the stored value is canonical AsyncClient._resolve_url no longer does its own rstrip("/") on base_url since the stored value is already canonical (DRY: one source of truth for what a stored base_url looks like). Closes deferred-work entry: "URL validation" (base_url normalization part; the Request.url non-empty check shipped in the prior Request __post_init__ commit). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/httpware/client.py | 2 +- src/httpware/config.py | 11 +++++++++++ tests/test_config.py | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/httpware/client.py b/src/httpware/client.py index 4e70dce..ab6c401 100644 --- a/src/httpware/client.py +++ b/src/httpware/client.py @@ -119,7 +119,7 @@ def _resolve_url(self, path: str) -> str: base = self._config.base_url if base is None: return path - return f"{base.rstrip('/')}/{path.lstrip('/')}" + return f"{base}/{path.lstrip('/')}" def _build_request( self, diff --git a/src/httpware/config.py b/src/httpware/config.py index dbf6a80..bfa49bf 100644 --- a/src/httpware/config.py +++ b/src/httpware/config.py @@ -56,3 +56,14 @@ class ClientConfig: limits: Limits = field(default_factory=Limits) decoder: ResponseDecoder = field(default_factory=PydanticDecoder) middleware: tuple[Middleware, ...] = () + + def __post_init__(self) -> None: + if self.base_url is not None: + if not isinstance(self.base_url, str): + msg = "base_url must be a non-empty string or None" + raise ValueError(msg) + normalized = self.base_url.rstrip("/") + if not normalized: + msg = "base_url must be a non-empty string or None" + raise ValueError(msg) + object.__setattr__(self, "base_url", normalized) diff --git a/tests/test_config.py b/tests/test_config.py index 14aec9c..b0ded35 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -76,3 +76,43 @@ def test_limits_rejects_negative_keepalive_expiry() -> None: def test_limits_accepts_zero() -> None: Limits(max_connections=0, max_keepalive_connections=0, keepalive_expiry=0.0) + + +def test_client_config_strips_trailing_slash_from_base_url() -> None: + cfg = ClientConfig(base_url="https://api.example.com/") + assert cfg.base_url == "https://api.example.com" + + +def test_client_config_leaves_base_url_without_trailing_slash() -> None: + cfg = ClientConfig(base_url="https://api.example.com") + assert cfg.base_url == "https://api.example.com" + + +def test_client_config_strips_multiple_trailing_slashes() -> None: + cfg = ClientConfig(base_url="https://api.example.com///") + assert cfg.base_url == "https://api.example.com" + + +def test_client_config_allows_none_base_url() -> None: + cfg = ClientConfig(base_url=None) + assert cfg.base_url is None + + +def test_client_config_rejects_empty_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url="") + + +def test_client_config_rejects_slash_only_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url="/") + + +def test_client_config_rejects_multiple_slashes_only_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url="///") + + +def test_client_config_rejects_non_str_base_url() -> None: + with pytest.raises(ValueError, match="base_url must be a non-empty string or None"): + ClientConfig(base_url=123) # ty: ignore[invalid-argument-type] From 73697c94ba28455e79e7b37a14af3ad1d9a9d54c Mon Sep 17 00:00:00 2001 From: Artur Shiriev Date: Wed, 3 Jun 2026 10:17:28 +0300 Subject: [PATCH 5/5] docs: close deferred-work entries resolved by input-validation pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes five Story 1-2 entries closed by this PR: charset parser robustness, header name/value validation, URL validation, with_query (None) handling, Timeout/Limits negative-value validation. The remaining Story 1-2 entries (multi-valued query params, streaming request bodies, @final subclassing) stay open — they're different shapes of change, explicitly out of scope for this pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- planning/deferred-work.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/planning/deferred-work.md b/planning/deferred-work.md index 0a74b00..667f7b1 100644 --- a/planning/deferred-work.md +++ b/planning/deferred-work.md @@ -27,11 +27,6 @@ Items raised in reviews that are real but not actionable now. ## Deferred from: code review of story-1-2 (2026-05-13) -- **Charset parser robustness** — quoted whitespace, mismatched quotes, multi-`charset=` directives, substring false-positives (e.g. `boundary` containing `charset=`). (`src/httpware/response.py:20-25`) -- **Header name/value validation** — `with_header` / `with_headers` accept CR/LF (injection), `None`, empty string. CRLF lands with the `Redactor` middleware (Story 5.3); the rest is a generic input-validation pass. (`src/httpware/request.py:21-23,37-39`) -- **URL validation** — `with_url("")` accepts empty; `base_url` has no trailing-slash normalization in config (`AsyncClient._resolve_url` does `base.rstrip('/')` so the practical impact is reduced). (`src/httpware/request.py:25-27`, `src/httpware/config.py:34`) -- **`with_query(None)` handling** — type signature is `Mapping[str, str]`; a runtime `None` slips past typing and breaks downstream iteration with no guard. (`src/httpware/request.py:33-35`) -- **`Timeout` / `Limits` negative-value validation** — no `__post_init__` guard; nonsensical values silently accepted. (`src/httpware/config.py:11-27`) - **Multi-valued query params** — `Mapping[str, str]` cannot express `?tag=a&tag=b`. Type widening needed. (`src/httpware/request.py:16`) - **Streaming / async-iterable request bodies** — `body: bytes | None` only. Revisit in streaming work (Story 4.1). (`src/httpware/request.py:18`) - **`@final` to prevent subclassing** — frozen+slots subclassing is fragile. No current subclasser; defer until needed. (`src/httpware/request.py`, `response.py`, `config.py`)