From 421003620ee860c394a3dbcca1fdcc3592f55065 Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Sun, 14 Jun 2026 22:03:26 +0000 Subject: [PATCH] fix(backend): set Anthropic prompt cache TTL to 1h (was 5m default) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthropic changed default cache TTL from 1h→5m on 2026-03-06. Interactive chat sessions have gaps >5min between turns, so the 5-min default kills cache hit rate (currently 9% for omi-prod-chat). This restores the previous default: a single 'ttl': '1h' field in the cache_control dict at agentic.py:367. Estimated savings: $1,273–$2,918/mo (omi-prod-chat alone). --- .../unit/test_prompt_cache_integration.py | 44 +++++++++++++++++++ backend/utils/retrieval/agentic.py | 4 +- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/backend/tests/unit/test_prompt_cache_integration.py b/backend/tests/unit/test_prompt_cache_integration.py index 554ff5dc520..e8c4f64e929 100644 --- a/backend/tests/unit/test_prompt_cache_integration.py +++ b/backend/tests/unit/test_prompt_cache_integration.py @@ -820,6 +820,50 @@ def test_page_context_in_dynamic_section(): assert "Meeting with team" in dynamic_suffix +# --------------------------------------------------------------------------- +# Tests: Anthropic cache_control includes TTL +# --------------------------------------------------------------------------- + + +def test_anthropic_cache_control_has_ttl(): + """ + The cache_control dict in _run_anthropic_agent_stream must include + ttl="1h" so that interactive chat sessions (with gaps >5min between + turns) get cache hits instead of re-writing on every request. + + Regression: Anthropic changed default TTL from 1h→5m on 2026-03-06. + """ + agentic_mod = _get_agentic_module() + + # Inspect the source to find the system_blocks construction + import inspect + + src = inspect.getsource(agentic_mod._run_anthropic_agent_stream) + assert '"ttl": "1h"' in src or "'ttl': '1h'" in src, ( + "cache_control must include ttl='1h' to avoid 5-min default " + f"(source excerpt: ...{src[src.find('cache_control'):src.find('cache_control')+120]}...)" + ) + assert "ephemeral" in src, "cache type must be ephemeral" + + +def test_anthropic_cache_control_not_5min_default(): + """ + Guard against regression: ensure we are NOT relying on the 5-minute + default TTL that Anthropic introduced in March 2026. + """ + agentic_mod = _get_agentic_module() + import inspect + + src = inspect.getsource(agentic_mod._run_anthropic_agent_stream) + # The old (broken) pattern was just {"type": "ephemeral"} with no ttl field + # Find the cache_control line(s) + lines_with_cache_ctrl = [l for l in src.splitlines() if "cache_control" in l] + for line in lines_with_cache_ctrl: + # Must NOT be the bare {"type": "ephemeral"} form + if '"type": "ephemeral"' in line or "'type': 'ephemeral'" in line: + assert "ttl" in line, f"cache_control line missing ttl field: {line.strip()}" + + # --------------------------------------------------------------------------- # Utility # --------------------------------------------------------------------------- diff --git a/backend/utils/retrieval/agentic.py b/backend/utils/retrieval/agentic.py index 00a31df2432..1ff5892ab37 100644 --- a/backend/utils/retrieval/agentic.py +++ b/backend/utils/retrieval/agentic.py @@ -362,7 +362,9 @@ async def _run_anthropic_agent_stream( and feeds results back until the model stops requesting tools. """ # System prompt with cache_control for Anthropic prompt caching - system_blocks = [{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}}] + # TTL=1h: Anthropic changed default from 1h→5m on 2026-03-06; interactive chat + # sessions have gaps >5min between turns, so the 5-min default kills cache hit rate. + system_blocks = [{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral", "ttl": "1h"}}] loop_iteration = 0