diff --git a/backend/database/firestore_cache.py b/backend/database/firestore_cache.py new file mode 100644 index 0000000000..e4b06c9ba4 --- /dev/null +++ b/backend/database/firestore_cache.py @@ -0,0 +1,172 @@ +"""Conservative Redis read-through cache for Firestore projections. + +The cache is intentionally projection-oriented. Do not use it to cache whole +``users/{uid}`` documents: user docs mix low-risk preferences with entitlement, +BYOK, privacy consent, and data-protection fields that require stricter +correctness guarantees. +""" + +import base64 +import json +import logging +import os +import random +import time +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Callable, Optional, cast + +from database.redis_db import r +from database.firestore_cache_metrics import observe_fetch, observe_payload, record_request + +logger = logging.getLogger(__name__) + +_GLOBAL_VERSION = os.getenv('FIRESTORE_CACHE_GLOBAL_VERSION', '1') + + +@dataclass(frozen=True) +class CachePolicy: + """Policy for one typed Firestore cache namespace.""" + + namespace: str + version: int = 1 + ttl_seconds: int = 60 + jitter_ratio: float = 0.10 + enabled_env_var: str = 'FIRESTORE_CACHE_ENABLED' + max_payload_bytes: int = 256_000 + + +def is_enabled(policy: CachePolicy) -> bool: + """Return whether cache reads/writes are enabled for this policy. + + Global flag defaults to false. A per-namespace override can also enable or + disable a single policy, e.g. FIRESTORE_CACHE_USER_LANGUAGE_ENABLED=true. + """ + + namespace_flag = f"FIRESTORE_CACHE_{policy.namespace.upper()}_ENABLED" + namespace_value = os.getenv(namespace_flag) + if namespace_value is not None: + return namespace_value.lower() in {'1', 'true', 'yes', 'on'} + return os.getenv(policy.enabled_env_var, '').lower() in {'1', 'true', 'yes', 'on'} + + +def make_cache_key(policy: CachePolicy, entity_id: str) -> str: + """Build a deterministic, versioned key for a typed projection. + + Entity IDs are base64url encoded instead of sanitized with string + replacement so cache keys are collision-free. For example, ``a:b`` and + ``a_b`` must not map to the same Redis key because this cache can hold + per-user projections. + """ + + encoded_id = base64.urlsafe_b64encode(str(entity_id).encode('utf-8')).decode('ascii').rstrip('=') + return f'fs:v{_GLOBAL_VERSION}:{policy.namespace}:v{policy.version}:b64:{encoded_id}' + + +def invalidate(policy: CachePolicy, entity_id: str) -> None: + """Best-effort invalidation. Redis failures are logged and swallowed.""" + if not is_enabled(policy): + return + + key = make_cache_key(policy, entity_id) + try: + r.delete(key) + record_request(policy.namespace, 'invalidate') + except Exception as e: + logger.warning('Firestore cache invalidate failed namespace=%s error=%s', policy.namespace, e) + record_request(policy.namespace, 'invalidate_error') + + +def get_or_fetch(policy: CachePolicy, entity_id: str, fetch_fn: Callable[[], Any]) -> Any: + """Return cached projection or call ``fetch_fn`` and populate Redis. + + Correctness source remains Firestore. If cache is disabled, Redis is down, + the cached value is malformed/stale, or serialization fails, this function + falls back to ``fetch_fn`` and returns its result. + """ + + if not is_enabled(policy): + record_request(policy.namespace, 'disabled') + return _fetch(policy, fetch_fn) + + key = make_cache_key(policy, entity_id) + now = time.time() + + try: + raw = r.get(key) + except Exception as e: + logger.warning('Firestore cache read failed namespace=%s error=%s', policy.namespace, e) + record_request(policy.namespace, 'redis_error') + return _fetch(policy, fetch_fn) + + if raw: + try: + raw_str = raw.decode('utf-8') if isinstance(raw, bytes) else cast(str, raw) + envelope = json.loads(raw_str, object_hook=_json_object_hook) + if envelope.get('v') == policy.version and envelope.get('fresh_until', 0) >= now: + record_request(policy.namespace, 'hit') + return envelope.get('payload') + record_request(policy.namespace, 'stale') + except Exception as e: + logger.warning('Firestore cache decode failed namespace=%s error=%s', policy.namespace, e) + record_request(policy.namespace, 'decode_error') + else: + record_request(policy.namespace, 'miss') + + payload = _fetch(policy, fetch_fn) + now = time.time() + _set(policy, key, payload, now) + return payload + + +def _fetch(policy: CachePolicy, fetch_fn: Callable[[], Any]) -> Any: + start = time.monotonic() + try: + return fetch_fn() + finally: + observe_fetch(policy.namespace, time.monotonic() - start) + + +def _ttl_with_jitter(policy: CachePolicy) -> int: + ttl = max(1, policy.ttl_seconds) + if policy.jitter_ratio <= 0: + return ttl + spread = max(1, int(ttl * policy.jitter_ratio)) + return max(1, ttl + random.randint(-spread, spread)) + + +def _set(policy: CachePolicy, key: str, payload: Any, now: Optional[float] = None) -> None: + now = now or time.time() + ttl = _ttl_with_jitter(policy) + envelope = { + 'v': policy.version, + 'kind': 'value', + 'created_at': now, + 'fresh_until': now + ttl, + 'payload': payload, + } + + try: + encoded = json.dumps(envelope, default=_json_default) + payload_bytes = len(encoded.encode('utf-8')) + observe_payload(policy.namespace, payload_bytes) + if payload_bytes > policy.max_payload_bytes: + record_request(policy.namespace, 'payload_too_large') + return + r.set(key, encoded, ex=ttl) + record_request(policy.namespace, 'set') + except Exception as e: + logger.warning('Firestore cache set failed namespace=%s error=%s', policy.namespace, e) + record_request(policy.namespace, 'set_error') + + +def _json_default(value: Any) -> Any: + if isinstance(value, datetime): + return {'__firestore_cache_type__': 'datetime', 'iso': value.isoformat()} + raise TypeError(f'Object of type {type(value).__name__} is not JSON serializable') + + +def _json_object_hook(value: dict) -> Any: + if value.get('__firestore_cache_type__') == 'datetime': + return datetime.fromisoformat(value['iso']) + return value diff --git a/backend/database/firestore_cache_metrics.py b/backend/database/firestore_cache_metrics.py new file mode 100644 index 0000000000..5ac1997c5f --- /dev/null +++ b/backend/database/firestore_cache_metrics.py @@ -0,0 +1,40 @@ +"""Low-cardinality metrics for Firestore read-through caches. + +This module intentionally lives under ``database/`` so database modules can +record metrics without importing upward from ``utils``. ``prometheus_client`` +uses a global registry, so these metrics are exported automatically by the +existing /metrics endpoint. +""" + +from prometheus_client import Counter, Histogram + +FIRESTORE_CACHE_REQUESTS = Counter( + 'firestore_cache_requests_total', + 'Firestore cache requests by namespace and result', + ['namespace', 'result'], +) + +FIRESTORE_CACHE_FETCH_SECONDS = Histogram( + 'firestore_cache_fetch_seconds', + 'Time spent fetching Firestore cache misses from the source of truth', + ['namespace'], +) + +FIRESTORE_CACHE_PAYLOAD_BYTES = Histogram( + 'firestore_cache_payload_bytes', + 'Serialized Firestore cache payload size in bytes', + ['namespace'], + buckets=(128, 512, 1024, 4096, 16384, 65536, 262144, 1048576), +) + + +def record_request(namespace: str, result: str) -> None: + FIRESTORE_CACHE_REQUESTS.labels(namespace=namespace, result=result).inc() + + +def observe_fetch(namespace: str, seconds: float) -> None: + FIRESTORE_CACHE_FETCH_SECONDS.labels(namespace=namespace).observe(seconds) + + +def observe_payload(namespace: str, payload_bytes: int) -> None: + FIRESTORE_CACHE_PAYLOAD_BYTES.labels(namespace=namespace).observe(payload_bytes) diff --git a/backend/database/users.py b/backend/database/users.py index 89f43a7294..90bb5287c4 100644 --- a/backend/database/users.py +++ b/backend/database/users.py @@ -5,6 +5,7 @@ from google.cloud.firestore_v1 import FieldFilter, transactional from ._client import db, document_id_from_seed +from database.firestore_cache import CachePolicy, get_or_fetch, invalidate from database.redis_db import try_acquire_user_platform_write_lock from models.users import Subscription, PlanLimits, PlanType, SubscriptionStatus from utils.subscription import get_default_basic_subscription @@ -12,6 +13,12 @@ logger = logging.getLogger(__name__) +# Conservative low-risk user projections. Do NOT use these policies for +# entitlement, BYOK, data-protection, privacy-consent, or full user-doc caching. +_USER_LANGUAGE_CACHE = CachePolicy(namespace='user_language', version=1, ttl_seconds=300) +_USER_TRANSCRIPTION_PREFS_CACHE = CachePolicy(namespace='user_transcription_prefs', version=1, ttl_seconds=120) +_USER_AI_PROFILE_CACHE = CachePolicy(namespace='user_ai_profile', version=1, ttl_seconds=300) + # Industry-standard two-field pattern (Mixpanel / Amplitude / PostHog): # signup_platform — set once at account creation, immutable @@ -869,14 +876,28 @@ def get_user_language_preference(uid: str) -> str: Returns: Language code (e.g., 'en', 'vi') or empty string if not set """ - user_ref = db.collection('users').document(uid) - user_doc = user_ref.get() - if user_doc.exists: - user_data = user_doc.to_dict() - return user_data.get('language', '') + def fetch_language(): + user_ref = db.collection('users').document(uid) + user_doc = user_ref.get(['language']) - return '' # Return empty string if not set + if user_doc.exists: + user_data = user_doc.to_dict() + return user_data.get('language', '') + + return '' # Return empty string if not set + + # DESIGN DECISION: cache this typed user projection, not the full users/{uid} doc. + # + # Rationale: + # - Language preference is a low-risk, frequently-read setting used during + # listen startup. + # - Full user-doc caching is intentionally avoided because users/{uid} also + # contains entitlement, BYOK, privacy, and data-protection fields. + # + # Safety: cache is disabled by default, Redis failures fall back to Firestore, + # and set_user_language_preference() invalidates this namespace. + return get_or_fetch(_USER_LANGUAGE_CACHE, uid, fetch_language) def set_user_language_preference(uid: str, language: str) -> None: @@ -889,6 +910,8 @@ def set_user_language_preference(uid: str, language: str) -> None: """ user_ref = db.collection('users').document(uid) user_ref.set({'language': language}, merge=True) + invalidate(_USER_LANGUAGE_CACHE, uid) + invalidate(_USER_TRANSCRIPTION_PREFS_CACHE, uid) def get_user_onboarding_state(uid: str) -> dict: @@ -1189,27 +1212,34 @@ def get_user_transcription_preferences(uid: str) -> dict: Returns: dict with 'single_language_mode' (bool), 'vocabulary' (List[str]), and 'language' (str) """ - user_ref = db.collection('users').document(uid) - user_doc = user_ref.get() - if user_doc.exists: - user_data = user_doc.to_dict() - prefs = user_data.get('transcription_preferences', {}) + def fetch_preferences(): + user_ref = db.collection('users').document(uid) + user_doc = user_ref.get(['transcription_preferences', 'language']) + + if user_doc.exists: + user_data = user_doc.to_dict() + prefs = user_data.get('transcription_preferences', {}) + return { + 'single_language_mode': prefs.get('single_language_mode', False), + 'vocabulary': prefs.get('vocabulary', []), + 'language': user_data.get('language', ''), + 'uses_custom_stt': prefs.get('uses_custom_stt', False), + 'custom_stt_since': prefs.get('custom_stt_since'), + } + return { - 'single_language_mode': prefs.get('single_language_mode', False), - 'vocabulary': prefs.get('vocabulary', []), - 'language': user_data.get('language', ''), - 'uses_custom_stt': prefs.get('uses_custom_stt', False), - 'custom_stt_since': prefs.get('custom_stt_since'), + 'single_language_mode': False, + 'vocabulary': [], + 'language': '', + 'uses_custom_stt': False, + 'custom_stt_since': None, } - return { - 'single_language_mode': False, - 'vocabulary': [], - 'language': '', - 'uses_custom_stt': False, - 'custom_stt_since': None, - } + # DESIGN DECISION: cache this typed user projection, not the full users/{uid} doc. + # It includes only transcription startup preferences and language. It does not + # include entitlement, BYOK, data-protection, or privacy-consent fields. + return get_or_fetch(_USER_TRANSCRIPTION_PREFS_CACHE, uid, fetch_preferences) def get_agent_vm(uid: str) -> Optional[dict]: @@ -1249,6 +1279,7 @@ def set_user_transcription_preferences(uid: str, single_language_mode: bool = No if update_data: user_ref.update(update_data) + invalidate(_USER_TRANSCRIPTION_PREFS_CACHE, uid) def set_user_custom_stt_usage(uid: str, uses_custom_stt: bool) -> None: @@ -1269,6 +1300,7 @@ def set_user_custom_stt_usage(uid: str, uses_custom_stt: bool) -> None: update_data = {'transcription_preferences.uses_custom_stt': uses_custom_stt} update_data['transcription_preferences.custom_stt_since'] = datetime.now(timezone.utc) if uses_custom_stt else None user_ref.update(update_data) + invalidate(_USER_TRANSCRIPTION_PREFS_CACHE, uid) # ============================================================================ @@ -1367,20 +1399,29 @@ def update_assistant_settings(uid: str, settings: dict) -> dict: return existing -def get_ai_user_profile(uid: str) -> Optional[dict]: +def _get_ai_user_profile_from_firestore(uid: str) -> Optional[dict]: user_ref = db.collection('users').document(uid) - doc = user_ref.get() + doc = user_ref.get(['ai_user_profile']) if not doc.exists: return None return doc.to_dict().get('ai_user_profile') +def get_ai_user_profile(uid: str) -> Optional[dict]: + # DESIGN DECISION: cache only the low-risk ai_user_profile projection. + # Avoid full user-doc caching because high-risk entitlement/BYOK/privacy + # fields live on the same Firestore document. + return get_or_fetch(_USER_AI_PROFILE_CACHE, uid, lambda: _get_ai_user_profile_from_firestore(uid)) + + def update_ai_user_profile( uid: str, profile_text: str = None, generated_at=None, data_sources_used: int = None ) -> dict: """Update AI user profile. Only writes non-None fields (partial update).""" - # Read existing profile and merge updates - existing = get_ai_user_profile(uid) or {} + # Read existing profile directly from Firestore — never from cache — because + # this is a read-modify-write path. Using a stale cached projection here + # could overwrite newer profile fields. + existing = _get_ai_user_profile_from_firestore(uid) or {} if profile_text is not None: existing['profile_text'] = profile_text if generated_at is not None: @@ -1389,4 +1430,5 @@ def update_ai_user_profile( existing['data_sources_used'] = data_sources_used user_ref = db.collection('users').document(uid) user_ref.update({'ai_user_profile': existing}) + invalidate(_USER_AI_PROFILE_CACHE, uid) return existing diff --git a/backend/docs/firestore-cache-architecture.md b/backend/docs/firestore-cache-architecture.md new file mode 100644 index 0000000000..b898198bff --- /dev/null +++ b/backend/docs/firestore-cache-architecture.md @@ -0,0 +1,185 @@ +# Firestore Cache Architecture + +## Status + +Accepted for incremental rollout (DD-007 PR #29). + +## Context + +Firestore reads are a P0 cost driver. The default database is responsible for roughly **$3,807/mo net** with **10.7B reads/month**. DD-007 found repeated reads of the same `users/{uid}` document on WebSocket startup and other hot paths. + +The tempting fix is to cache the full user document. We explicitly do **not** do that. + +## Decision + +Introduce a reusable projection-based Firestore read-through cache under `backend/database/` and apply it only to low-risk typed projections first. + +```text +Firestore source of truth + ↓ +typed projection fetcher in database/* + ↓ +database.firestore_cache.get_or_fetch(...) + ↓ +Redis shared L2 cache, disabled by default +``` + +## Why projection cache instead of full user-doc cache + +`users/{uid}` mixes low-risk preferences with correctness-critical fields: + +| Field family | Risk if stale | +|---|---| +| Subscription / entitlement | grants or blocks paid features incorrectly | +| BYOK | accepts removed keys or misroutes provider billing | +| Data protection / migration state | writes data under wrong storage/encryption policy | +| Privacy consent | continues recording/training/syncing after opt-out | +| Account/security state | stale security decisions | + +Therefore, this cache stores only allowlisted projections such as language, transcription preferences, and AI profile metadata. It must not be used for whole documents or critical fields without a separate design review and shadow rollout. + +## Current PR scope + +Cached projections: + +| Projection | Namespace | TTL | Notes | +|---|---|---:|---| +| language preference | `user_language` | 300s | low-risk listen startup setting | +| transcription preferences | `user_transcription_prefs` | 120s | low-risk startup prefs; includes language | +| AI user profile | `user_ai_profile` | 300s | read-mostly metadata | + +Not cached: + +- `get_user_subscription()` / `get_user_valid_subscription()` +- BYOK state +- data protection level +- private cloud sync flag +- recording / training consent +- full `get_user_profile()` + +## Runtime flags + +Cache reads/writes are disabled by default. + +```bash +FIRESTORE_CACHE_ENABLED=true +``` + +Per-namespace override: + +```bash +FIRESTORE_CACHE_USER_LANGUAGE_ENABLED=true +FIRESTORE_CACHE_USER_TRANSCRIPTION_PREFS_ENABLED=true +FIRESTORE_CACHE_USER_AI_PROFILE_ENABLED=true +``` + +Emergency namespace reset: + +```bash +FIRESTORE_CACHE_GLOBAL_VERSION=2 +``` + +Bumping the global version abandons all existing keys without scanning Redis. + +## Cache keys + +```text +fs:v{global_version}:{namespace}:v{policy_version}:b64:{base64url(entity_id)} +``` + +Example for entity ID `uid_123`: + +```text +fs:v1:user_transcription_prefs:v1:b64:dWlkXzEyMw +``` + +Keys must include: + +- global version +- namespace +- policy version +- base64url-encoded UID / entity ID + +Entity IDs are encoded rather than character-replaced so keys are collision-free. For example, `a:b` and `a_b` must not map to the same Redis key. + +Never include raw names, emails, user text, or secrets in key names. + +## Cache envelope + +Redis stores a typed envelope: + +```json +{ + "v": 1, + "kind": "value", + "created_at": 1781460000.123, + "fresh_until": 1781460120.123, + "payload": {} +} +``` + +The envelope supports schema version checks, TTL jitter, payload-size metrics, and future stale-while-revalidate support. + +## Failure behavior + +Redis is never the source of truth. + +- cache disabled → fetch Firestore +- Redis read error → fetch Firestore +- malformed envelope → fetch Firestore +- Redis write error → return Firestore result +- payload too large → do not cache + +## Invalidation + +Setter functions for cached projections must invalidate after successful Firestore writes. + +Current invalidation hooks: + +| Write path | Invalidates | +|---|---| +| `set_user_language_preference()` | `user_language`, `user_transcription_prefs` | +| `set_user_transcription_preferences()` | `user_transcription_prefs` | +| `set_user_custom_stt_usage()` | `user_transcription_prefs` | +| `update_ai_user_profile()` | `user_ai_profile` | + +## Metrics + +Prometheus metrics use low-cardinality labels only: + +- `firestore_cache_requests_total{namespace,result}` +- `firestore_cache_fetch_seconds{namespace}` +- `firestore_cache_payload_bytes{namespace}` + +Do not add UID, email, route path, or free-form cache keys as labels. + +## Rollout plan + +1. Deploy with cache disabled. +2. Enable `user_language` for internal users / small percentage. +3. Monitor cache hit/miss, Redis errors, Firestore reads, listen startup errors. +4. Enable `user_transcription_prefs`. +5. Enable `user_ai_profile`. +6. Only consider entitlement/BYOK/data-protection caches after shadow-mode mismatch metrics exist. + +## Rollback + +Disable all cache reads/writes: + +```bash +FIRESTORE_CACHE_ENABLED=false +``` + +Or abandon all existing keys: + +```bash +FIRESTORE_CACHE_GLOBAL_VERSION= +``` + +No data migration is required because Firestore remains the source of truth. + +## References + +- `deep-dives/DD-007-firestore-read-amplification.md` +- `deep-dives/DD-007-firestore-cache-architecture-proposal.md` +- `deep-dives/DD-007-committee-synthesis.md` diff --git a/backend/test.sh b/backend/test.sh index 5ee65c457e..1c05edf170 100755 --- a/backend/test.sh +++ b/backend/test.sh @@ -44,6 +44,7 @@ pytest tests/unit/test_folder_name_enrichment.py -v pytest tests/unit/test_conversations_count.py -v pytest tests/unit/test_prompt_cache_optimization.py -v pytest tests/unit/test_prompt_cache_integration.py -v +pytest tests/unit/test_firestore_cache.py -v pytest tests/unit/test_task_sharing.py -v pytest tests/unit/test_firmware_pagination.py -v pytest tests/unit/test_vad_gate.py -v diff --git a/backend/tests/unit/test_firestore_cache.py b/backend/tests/unit/test_firestore_cache.py new file mode 100644 index 0000000000..50d3c1f91e --- /dev/null +++ b/backend/tests/unit/test_firestore_cache.py @@ -0,0 +1,158 @@ +import json +from datetime import datetime, timezone + +from database import firestore_cache as fc + + +class FakeRedis: + def __init__(self): + self.store = {} + self.get_calls = 0 + self.set_calls = 0 + self.delete_calls = 0 + self.fail_get = False + self.fail_set = False + + def get(self, key): + self.get_calls += 1 + if self.fail_get: + raise RuntimeError('redis get failed') + return self.store.get(key) + + def set(self, key, value, ex=None): + self.set_calls += 1 + if self.fail_set: + raise RuntimeError('redis set failed') + self.store[key] = value + return True + + def delete(self, key): + self.delete_calls += 1 + self.store.pop(key, None) + return 1 + + +def test_cache_disabled_fetches_every_time(monkeypatch): + fake = FakeRedis() + monkeypatch.setattr(fc, 'r', fake) + monkeypatch.delenv('FIRESTORE_CACHE_ENABLED', raising=False) + monkeypatch.delenv('FIRESTORE_CACHE_TEST_PROJECTION_ENABLED', raising=False) + + policy = fc.CachePolicy(namespace='test_projection', ttl_seconds=60) + calls = {'count': 0} + + def fetch(): + calls['count'] += 1 + return {'value': calls['count']} + + assert fc.get_or_fetch(policy, 'uid_1', fetch) == {'value': 1} + assert fc.get_or_fetch(policy, 'uid_1', fetch) == {'value': 2} + assert calls['count'] == 2 + assert fake.get_calls == 0 + assert fake.set_calls == 0 + + +def test_cache_enabled_populates_and_hits(monkeypatch): + fake = FakeRedis() + monkeypatch.setattr(fc, 'r', fake) + monkeypatch.setenv('FIRESTORE_CACHE_ENABLED', 'true') + monkeypatch.delenv('FIRESTORE_CACHE_TEST_PROJECTION_ENABLED', raising=False) + + policy = fc.CachePolicy(namespace='test_projection', ttl_seconds=60, jitter_ratio=0) + calls = {'count': 0} + + def fetch(): + calls['count'] += 1 + return {'value': 'from-firestore'} + + assert fc.get_or_fetch(policy, 'uid_1', fetch) == {'value': 'from-firestore'} + assert fc.get_or_fetch(policy, 'uid_1', fetch) == {'value': 'from-firestore'} + assert calls['count'] == 1 + assert fake.get_calls == 2 + assert fake.set_calls == 1 + + key = fc.make_cache_key(policy, 'uid_1') + envelope = json.loads(fake.store[key]) + assert envelope['v'] == policy.version + assert envelope['kind'] == 'value' + assert envelope['payload'] == {'value': 'from-firestore'} + + +def test_redis_get_failure_falls_back_to_fetch(monkeypatch): + fake = FakeRedis() + fake.fail_get = True + monkeypatch.setattr(fc, 'r', fake) + monkeypatch.setenv('FIRESTORE_CACHE_ENABLED', 'true') + + policy = fc.CachePolicy(namespace='test_projection', ttl_seconds=60) + + assert fc.get_or_fetch(policy, 'uid_1', lambda: {'value': 'fallback'}) == {'value': 'fallback'} + + +def test_cache_round_trips_datetime_payloads(monkeypatch): + fake = FakeRedis() + monkeypatch.setattr(fc, 'r', fake) + monkeypatch.setenv('FIRESTORE_CACHE_ENABLED', 'true') + + policy = fc.CachePolicy(namespace='test_projection', ttl_seconds=60, jitter_ratio=0) + stamp = datetime(2026, 6, 14, 22, 0, tzinfo=timezone.utc) + calls = {'count': 0} + + def fetch(): + calls['count'] += 1 + return {'custom_stt_since': stamp} + + assert fc.get_or_fetch(policy, 'uid_1', fetch) == {'custom_stt_since': stamp} + assert fc.get_or_fetch(policy, 'uid_1', fetch) == {'custom_stt_since': stamp} + assert calls['count'] == 1 + + +def test_cache_key_includes_namespace_version_and_encoded_entity_id(monkeypatch): + monkeypatch.setenv('FIRESTORE_CACHE_GLOBAL_VERSION', '1') + policy = fc.CachePolicy(namespace='user_language', version=3) + key = fc.make_cache_key(policy, 'uid:abc') + + assert key.startswith('fs:v') + assert ':user_language:v3:b64:' in key + assert key.endswith('dWlkOmFiYw') + + +def test_cache_key_entity_encoding_is_collision_free(monkeypatch): + monkeypatch.setenv('FIRESTORE_CACHE_GLOBAL_VERSION', '1') + policy = fc.CachePolicy(namespace='user_language', version=3) + + assert fc.make_cache_key(policy, 'a:b') != fc.make_cache_key(policy, 'a_b') + + +def test_users_module_only_wires_safe_projection_caches(): + source = open('database/users.py').read() + + assert "namespace='user_language'" in source + assert "namespace='user_transcription_prefs'" in source + assert "namespace='user_ai_profile'" in source + + forbidden_sections = [ + 'def get_user_subscription', + 'def get_user_valid_subscription', + 'def get_byok_state', + 'def get_data_protection_level', + 'def get_user_private_cloud_sync_enabled', + 'def get_user_training_data_opt_in', + ] + for section in forbidden_sections: + start = source.find(section) + assert start != -1, f'missing expected section {section}' + next_def = source.find('\ndef ', start + 1) + block = source[start : next_def if next_def != -1 else len(source)] + assert 'get_or_fetch(' not in block, f'{section} must not use projection cache in PR #29' + + +def test_ai_profile_update_bypasses_cached_getter_for_merge_safety(): + source = open('database/users.py').read() + start = source.find('def update_ai_user_profile') + assert start != -1 + next_def = source.find('\ndef ', start + 1) + block = source[start : next_def if next_def != -1 else len(source)] + + assert '_get_ai_user_profile_from_firestore(uid)' in block + assert 'get_ai_user_profile(uid)' not in block