From 574e7e7fe256eb6be63e2387d2b56f719d23f68b Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Sun, 14 Jun 2026 22:38:27 +0000 Subject: [PATCH 1/6] feat(backend): sentence-level translation dedup + merge Redis lookup (DD-008) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 (sentence-level dedup): Rewrote translate_units_batch() to split each text into sentences before cache lookup. Identical sentences across different segments/users now share cache hits instead of each triggering a separate API call. Preserves full-sentence NMT quality while eliminating 25-35% of redundant character throughput. Phase 3 (merge-aware Redis lookup): On segment merge (prefix reset), check Redis for the merged text before re-translating. Eliminates 10-15% of redundant translations from STT segment combination. Documentation: Added DESIGN DECISION comment at the batch_buffer call site, updated TranslationCoordinator class docstring with architecture/cost notes, and created ADR-001 document at backend/docs/translation-architecture.md. Estimated savings: $1,500–2,140/mo (25-50% of $4,282/mo Google Translate bill). Files changed: - backend/utils/translation.py: translate_units_batch() sentence-level rewrite - backend/utils/translation_coordinator.py: comments + merge Redis lookup - backend/docs/translation-architecture.md: NEW — ADR-001 --- backend/docs/translation-architecture.md | 89 ++++++++++++++++ backend/utils/translation.py | 127 ++++++++++++++--------- backend/utils/translation_coordinator.py | 75 ++++++++++--- 3 files changed, 230 insertions(+), 61 deletions(-) create mode 100644 backend/docs/translation-architecture.md diff --git a/backend/docs/translation-architecture.md b/backend/docs/translation-architecture.md new file mode 100644 index 00000000000..41ad2219c30 --- /dev/null +++ b/backend/docs/translation-architecture.md @@ -0,0 +1,89 @@ +# ADR-001: Full-Text vs Delta Translation in Streaming Coordinator + +## Status + +**Accepted** (2026-03-31, implicit — original implementation) +**Superseded By:** DD-008 improvements (PR #28, 2026-06-14) + +## Context + +Real-time translation during speech-to-text (STT) produces evolving text. As Deepgram streams transcript segments, each segment's text grows incrementally: + +``` +"Hola" → "Hola como" → "Hola como estas" → "Hola como estas bien" +``` + +The `TranslationCoordinator` must decide what to send to the Google Translate V3 API on each update cycle. + +## Decision + +Send **full segment text** (not just the delta/new portion) to the batch translator. + +### Rationale + +1. **Translation quality**: Google Translate V3's NMT model produces better output for complete sentences than for fragments. Full context enables: + - Correct gender agreement (`"Las estudiantes son inteligentes"` → `"The students are intelligent"`, not `"intelligent"` losing feminine) + - Idiom recognition (`"Estoy de acuerdo"` → `"I agree"`, not `"acuerdo"` → `"agreement"`) + - Proper word ordering in language pairs with different SVO structures + +2. **Assembly simplicity**: The `assembled_translation` stored in `SegmentState` IS the final persisted result — it must be high quality since it's written to Firestore and displayed to users. No stitching logic needed. + +3. **Stability gates filter most fragments**: Text only reaches the TRANSLATE gate when it has sentence-ending punctuation, a speaker switch, >700ms silence, STT `is_final` signal, or ≥12 tokens open for ≥3 seconds. By the time text reaches Google, it's usually a complete clause. + +## Consequences + +### Positive + +| Aspect | Impact | +|--------|--------| +| Translation quality | High — full NMT context for all persisted results | +| Code simplicity | Single-phase architecture, no delta-stitching logic | +| Debuggability | Each translation is self-contained; easy to inspect | +| Cache correctness | Full-text cache entries are always valid as-is | + +### Negative + +| Aspect | Impact | Dollar Cost | +|--------|--------|-------------| +| Redundant API calls | Evolving text generates unique MD5 cache keys at every step | ~$1,700–2,350/mo avoidable | +| Cache hit rate depression | Identical sentences across different segments don't dedup | Current: ~9% effective hit rate | +| Firestore write amplification | Each intermediate translation overwrites previous one | ~5x writes per stabilized segment | +| Character throughput | 284M chars/mo vs ~120–170M chars/mo potential | $4,282/mo vs ~$1,900–2,500/mo target | + +## Alternatives Considered + +### Alternative A: Delta Text + Stitching +Send only `new_text` (the uncommitted portion) to the API. Stitch onto previous `assembled_translation`. + +**Rejected initially** because: +- Fragment quality risk (see Rationale #1 above) +- Complex stitching logic needed for STT backtracking +- Would need to detect when STT revises earlier words + +**Re-evaluated in DD-008** as Phase 2 (future work) with streaming best-effort + finalization quality guarantee. + +### Alternative B: Two-Phase Architecture (Streaming + Finalization) +- **Streaming phase**: Send deltas for real-time UX (best-effort quality) +- **Finalization phase**: On stability signal, send full text split into sentences with per-sentence caching + +**Selected as future direction** (DD-008 Phase 2). Not implemented yet because it requires significant architectural changes and STT backtracking handling. + +### Alternative C: Sentence-Level Dedup Only (CHOSEN — This PR) +Keep sending full text, but split into sentences before cache lookup. Preserves full-sentence translation quality while eliminating redundant translations of identical sentences across segments/users. + +**Implemented in PR #28** alongside merge-aware Redis lookup. + +## This PR's Changes (DD-008 Phase 1 + 3) + +| Change | File | What | Savings | +|--------|------|------|---------| +| Sentence-level dedup | `translation.py:translate_units_batch()` | Split texts → per-sentence cache check → reassemble | $1,070–$1,500/mo | +| Merge-aware Redis lookup | `translation_coordinator.py:199-214` | On prefix reset, check Redis before re-translating | $430–640/mo | +| Design-decision comments | `translation_coordinator.py` | Document why full text is sent, trade-offs, future path | $0 (documentation) | + +## References + +- DD-008 deep dive: `deep-dives/DD-008-translate-dedup-gap.md` +- DD-008 design review: `deep-dives/DD-008-design-review.md` +- Original implementation: commit `fd25ede51` (PR #6155/#6178 by Thinh, 2026-03-31) +- Google Cloud Translate V3 API: https://cloud.google.com/translate/docs/basic/translating-text diff --git a/backend/utils/translation.py b/backend/utils/translation.py index 6a209f6cc73..fe8567cc664 100644 --- a/backend/utils/translation.py +++ b/backend/utils/translation.py @@ -530,60 +530,75 @@ def translate_text_by_sentence(self, dest_language: str, text: str) -> Tuple[str def translate_units_batch(self, dest_language: str, units: List[Tuple[str, str]]) -> List[Tuple[str, str, str]]: """Translate a batch of (unit_id, text) pairs in minimal GCP API calls. - Deduplicates identical texts, checks all cache layers, and batches - only truly uncached texts into a single API call. + Splits each text into sentences, checks all cache layers PER SENTENCE, + batches only truly uncached sentences into a single API call, then + reassembles per-unit results. + + This sentence-level dedup means that if two different units share + a common sentence (e.g., "How are you?"), only the first occurrence + triggers an API call — subsequent units get the cached result. Returns list of (unit_id, translated_text, detected_lang) in input order. """ if not units: return [] - # Build deduplicated mapping: text_hash -> (text, [indices]) - results = [None] * len(units) - hash_to_info = {} # text_hash -> {'text': str, 'indices': [int]} - - for i, (unit_id, text) in enumerate(units): - text_hash = hashlib.md5(text.encode()).hexdigest() - if text_hash not in hash_to_info: - hash_to_info[text_hash] = {'text': text, 'indices': [], 'hash': text_hash} - hash_to_info[text_hash]['indices'].append(i) - - # Phase 1: Check caches for each unique text - uncached_hashes = [] - for text_hash, info in hash_to_info.items(): + # Phase 0: Split each unit's text into sentences + # unit_sentences[i] = list of (sentence_text, sentence_hash) for unit i + unit_sentences = [] + for unit_id, text in units: + sentences = split_into_sentences(text) + hashed = [(s, hashlib.md5(s.encode()).hexdigest()) for s in sentences] + unit_sentences.append((unit_id, text, hashed)) + + # Build global sentence-level dedup map: + # sent_hash -> {'text': str, 'indices': [(unit_idx, sent_idx), ...]} + sent_hash_to_info = {} + for unit_idx, (_, _, sentences) in enumerate(unit_sentences): + for sent_idx, (sent_text, sent_hash) in enumerate(sentences): + if sent_hash not in sent_hash_to_info: + sent_hash_to_info[sent_hash] = { + 'text': sent_text, + 'indices': [], + } + sent_hash_to_info[sent_hash]['indices'].append((unit_idx, sent_idx)) + + # Phase 1: Check caches for each unique sentence + # sent_translation[hash] = (translated_text, detected_lang) or None + sent_translation = {} # hash -> (str, str) + uncached_sent_hashes = [] + + for sent_hash, info in sent_hash_to_info.items(): # Check negative cache first - if get_negative_cache(text_hash, dest_language): - for idx in info['indices']: - results[idx] = (units[idx][0], info['text'], '') # return original text + if get_negative_cache(sent_hash, dest_language): + sent_translation[sent_hash] = (info['text'], '') # return original continue # Check memory cache - cached = self._check_memory_cache(text_hash, dest_language) + cached = self._check_memory_cache(sent_hash, dest_language) if cached: - for idx in info['indices']: - results[idx] = (units[idx][0], cached[0], cached[1]) + sent_translation[sent_hash] = cached continue # Check Redis cache - redis_cached = get_cached_translation(text_hash, dest_language) + redis_cached = get_cached_translation(sent_hash, dest_language) if redis_cached: translated = redis_cached["text"] detected = redis_cached.get("detected_lang", "") - self._set_memory_cache(text_hash, dest_language, translated, detected) - for idx in info['indices']: - results[idx] = (units[idx][0], translated, detected) + self._set_memory_cache(sent_hash, dest_language, translated, detected) + sent_translation[sent_hash] = (translated, detected) continue - uncached_hashes.append(text_hash) + uncached_sent_hashes.append(sent_hash) - # Phase 2: Batch translate uncached texts - if uncached_hashes: - uncached_texts = [hash_to_info[h]['text'] for h in uncached_hashes] + # Phase 2: Batch translate uncached sentences + if uncached_sent_hashes: + uncached_texts = [sent_hash_to_info[h]['text'] for h in uncached_sent_hashes] for chunk_start in range(0, len(uncached_texts), MAX_BATCH_SIZE): chunk_end = min(chunk_start + MAX_BATCH_SIZE, len(uncached_texts)) chunk = uncached_texts[chunk_start:chunk_end] - chunk_hashes = uncached_hashes[chunk_start:chunk_end] + chunk_hashes = uncached_sent_hashes[chunk_start:chunk_end] try: response = _client.translate_text( @@ -594,29 +609,47 @@ def translate_units_batch(self, dest_language: str, units: List[Tuple[str, str]] ) for j, translation in enumerate(response.translations): - text_hash = chunk_hashes[j] + sent_hash = chunk_hashes[j] translated_text = translation.translated_text detected_lang = translation.detected_language_code or "" - info = hash_to_info[text_hash] - - self._set_memory_cache(text_hash, dest_language, translated_text, detected_lang) - cache_translation(text_hash, dest_language, translated_text, detected_lang) - for idx in info['indices']: - results[idx] = (units[idx][0], translated_text, detected_lang) + self._set_memory_cache(sent_hash, dest_language, translated_text, detected_lang) + cache_translation(sent_hash, dest_language, translated_text, detected_lang) + sent_translation[sent_hash] = (translated_text, detected_lang) except Exception as e: - logger.error(f"Batch translation error: {e}") + logger.error(f"Sentence-level batch translation error: {e}") for h in chunk_hashes: - info = hash_to_info[h] - for idx in info['indices']: - if results[idx] is None: - results[idx] = (units[idx][0], info['text'], '') - - # Fill any remaining None results (shouldn't happen, but be safe) - for i in range(len(results)): - if results[i] is None: - results[i] = (units[i][0], units[i][1], '') + if h not in sent_translation: + sent_translation[h] = (sent_hash_to_info[h]['text'], '') + + # Phase 3: Reassemble per-unit results from sentence translations + results = [] + for unit_idx, (unit_id, original_text, sentences) in enumerate(unit_sentences): + if not sentences: + results.append((unit_id, original_text, '')) + continue + + translated_parts = [] + detected_langs = [] + for sent_text, sent_hash in sentences: + if sent_hash in sent_translation: + trans_text, det_lang = sent_translation[sent_hash] + translated_parts.append(trans_text) + if det_lang: + detected_langs.append(det_lang) + else: + # Fallback: should not happen, but use original text + translated_parts.append(sent_text) + + assembled = ' '.join(translated_parts) + # Dominant detected language from constituent sentences + dominant_lang = '' + if detected_langs: + lang_counts = Counter(detected_langs) + dominant_lang = lang_counts.most_common(1)[0][0] + + results.append((unit_id, assembled, dominant_lang)) return results diff --git a/backend/utils/translation_coordinator.py b/backend/utils/translation_coordinator.py index f4e0f7b7adc..c535e75ede8 100644 --- a/backend/utils/translation_coordinator.py +++ b/backend/utils/translation_coordinator.py @@ -21,6 +21,7 @@ from utils.translation import ( TranslationNeed, classify_translation_need, + get_cached_translation, set_negative_cache, TranslationService, ) @@ -97,16 +98,30 @@ def _compute_stability_signals( class TranslationCoordinator: """Orchestrates real-time translation for a single WebSocket session. - Usage in transcribe.py: - coordinator = TranslationCoordinator( - target_language='en', - translation_service=translation_service, - on_translation_ready=callback, - ) - # On each segment update: - await coordinator.observe(updated_segments, removed_ids, conversation_id) - # On session close: - await coordinator.flush() + ## Architecture + + This coordinator implements SINGLE-PHASE translation: every stable text + update is sent in full to the batch translator, which calls Google + Translate V3 with the complete segment text. See DD-008 design doc + (`deep-dives/DD-008-design-review.md`) for the planned TWO-PHASE + architecture (streaming deltas + final full-sentence translation). + + ## Data Flow + + observe() → [stability gates] → batch_buffer → _flush_batch() + → translate_units_batch() → [LRU → Redis → API] + → on_translation_ready() → Firestore persist + WebSocket push + + ## Cost Note + + Because we send full text (not delta), each evolving segment generates + multiple translations of overlapping content. Current cost: ~$4,282/mo + for 284M characters. Target (with DD-008 fixes): ~$1,900–2,500/mo. + + ## Key Trade-off + + Translation quality (full context) vs cost (redundant chars). + Currently optimized for quality. See DD-008 for path to both. """ def __init__( @@ -184,10 +199,21 @@ async def observe( # Prefix-safe check: if prefix changed, reset if state.committed_text and not text.startswith(state.committed_text): - state.committed_text = '' - state.assembled_translation = None - state.detected_lang = None - self.metrics['prefix_resets'] += 1 + # Check if the new merged text was already translated (Redis cache) + text_hash = hashlib.md5(text.encode()).hexdigest() + redis_cached = get_cached_translation(text_hash, self.target_language) + if redis_cached: + # Found in Redis — adopt as committed, skip re-translation + state.committed_text = text + state.assembled_translation = redis_cached['text'] + state.detected_lang = redis_cached.get('detected_lang', '') + self.metrics['prefix_resets'] += 1 + continue # Don't add to batch buffer + else: + state.committed_text = '' + state.assembled_translation = None + state.detected_lang = None + self.metrics['prefix_resets'] += 1 # Only translate the new (uncommitted) portion new_text = text[len(state.committed_text) :].strip() if state.committed_text else text @@ -242,6 +268,27 @@ async def observe( self.metrics['classify_translates'] += 1 version = self._next_version() state.version = version + + # DESIGN DECISION: We send `text` (full segment text) instead of + # `new_text` (the uncommitted delta) to the batch translator. + # + # Rationale: + # - Google Translate V3 translates each content string independently; + # full sentence context improves disambiguation (gender agreement, + # idioms like "estoy de acuerdo" → "I agree", not "acuerdo" → "agreement") + # - The assembled_translation IS the final persisted result — it must be + # high quality since it's stored in Firestore and displayed to users + # + # Trade-off: This means evolving text ("Hola" → "Hola como" → "Hola como estas") + # generates unique MD5 cache keys at every step, causing 3–4x redundant + # translations per stabilized segment. See DD-008 for cost analysis and + # proposed two-phase architecture that preserves quality while reducing cost. + # + # If you change this to send new_text (delta), you MUST also: + # 1. Update assembly stitching logic in _flush_batch() + # 2. Ensure stability gates filter out sub-sentence fragments + # 3. Update the cache key strategy + # 4. Measure translation quality regression in production self._batch_buffer.append((segment.id, text, conversation_id, version)) # (Re)start batch aggregation timer From f15573d9e664ac5914db0081db520a8e0473657c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 14 Jun 2026 22:41:34 +0000 Subject: [PATCH 2/6] Fix Redis cache hit missing callback and full-text cache write - Call on_translation_ready when prefix-reset adopts a Redis-cached translation so Firestore persist and WebSocket push run for cached segments. - Cache assembled full-text translations in translate_units_batch so prefix-reset lookups hit Redis for multi-sentence segments. --- backend/utils/translation.py | 4 ++++ backend/utils/translation_coordinator.py | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/backend/utils/translation.py b/backend/utils/translation.py index fe8567cc664..70319febf99 100644 --- a/backend/utils/translation.py +++ b/backend/utils/translation.py @@ -649,6 +649,10 @@ def translate_units_batch(self, dest_language: str, units: List[Tuple[str, str]] lang_counts = Counter(detected_langs) dominant_lang = lang_counts.most_common(1)[0][0] + text_hash = hashlib.md5(original_text.encode()).hexdigest() + self._set_memory_cache(text_hash, dest_language, assembled, dominant_lang) + cache_translation(text_hash, dest_language, assembled, dominant_lang) + results.append((unit_id, assembled, dominant_lang)) return results diff --git a/backend/utils/translation_coordinator.py b/backend/utils/translation_coordinator.py index c535e75ede8..7a6784526a9 100644 --- a/backend/utils/translation_coordinator.py +++ b/backend/utils/translation_coordinator.py @@ -204,10 +204,13 @@ async def observe( redis_cached = get_cached_translation(text_hash, self.target_language) if redis_cached: # Found in Redis — adopt as committed, skip re-translation + translated_text = redis_cached['text'] + detected_lang = redis_cached.get('detected_lang', '') state.committed_text = text - state.assembled_translation = redis_cached['text'] - state.detected_lang = redis_cached.get('detected_lang', '') + state.assembled_translation = translated_text + state.detected_lang = detected_lang self.metrics['prefix_resets'] += 1 + await self.on_translation_ready(segment.id, translated_text, detected_lang, conversation_id) continue # Don't add to batch buffer else: state.committed_text = '' From e13a77b323321a9e961301e0b6ef3b8d9fbdc083 Mon Sep 17 00:00:00 2001 From: David Zhang Date: Mon, 15 Jun 2026 06:10:57 +0700 Subject: [PATCH 3/6] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20cache=20poisoning=20+=20stale=20batch=20invalidatio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1: Skip Redis cache write when any sentence fell back to original text due to API failure. Previously the exception path stored untranslated fallback text into Redis, poisoning the 14-day TTL cache so future requests would return raw text instead of retrying. P2: Filter pending batch buffer entries for a segment when adopting a Redis-cached translation on prefix reset. Without this, a stale buffered entry could later overwrite the cached result in _flush_batch(). --- backend/utils/translation.py | 9 ++++++++- backend/utils/translation_coordinator.py | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/utils/translation.py b/backend/utils/translation.py index 70319febf99..17fb3c26835 100644 --- a/backend/utils/translation.py +++ b/backend/utils/translation.py @@ -592,6 +592,7 @@ def translate_units_batch(self, dest_language: str, units: List[Tuple[str, str]] uncached_sent_hashes.append(sent_hash) # Phase 2: Batch translate uncached sentences + _failed_sent_hashes: set = set() # track which sentences fell back to original text if uncached_sent_hashes: uncached_texts = [sent_hash_to_info[h]['text'] for h in uncached_sent_hashes] @@ -619,9 +620,12 @@ def translate_units_batch(self, dest_language: str, units: List[Tuple[str, str]] except Exception as e: logger.error(f"Sentence-level batch translation error: {e}") + # Mark failed sentences as fallbacks so Phase 3 does NOT + # write them to Redis (avoids poisoning cache with raw text). for h in chunk_hashes: if h not in sent_translation: sent_translation[h] = (sent_hash_to_info[h]['text'], '') + _failed_sent_hashes.add(h) # Phase 3: Reassemble per-unit results from sentence translations results = [] @@ -651,7 +655,10 @@ def translate_units_batch(self, dest_language: str, units: List[Tuple[str, str]] text_hash = hashlib.md5(original_text.encode()).hexdigest() self._set_memory_cache(text_hash, dest_language, assembled, dominant_lang) - cache_translation(text_hash, dest_language, assembled, dominant_lang) + # Only persist to long-lived Redis cache if NO sentence fell back + # to original text (avoids poisoning cache with untranslated output). + if not any(sh in _failed_sent_hashes for _, sh in sentences): + cache_translation(text_hash, dest_language, assembled, dominant_lang) results.append((unit_id, assembled, dominant_lang)) diff --git a/backend/utils/translation_coordinator.py b/backend/utils/translation_coordinator.py index 7a6784526a9..64210c631c1 100644 --- a/backend/utils/translation_coordinator.py +++ b/backend/utils/translation_coordinator.py @@ -210,6 +210,11 @@ async def observe( state.assembled_translation = translated_text state.detected_lang = detected_lang self.metrics['prefix_resets'] += 1 + # Invalidate any pending batch entries for this segment so + # _flush_batch() does not overwrite with stale buffered text. + self._batch_buffer = [ + entry for entry in self._batch_buffer if entry[0] != segment.id + ] await self.on_translation_ready(segment.id, translated_text, detected_lang, conversation_id) continue # Don't add to batch buffer else: From bd21d935e89adfcff71999128c3bd3fdc29b8cd1 Mon Sep 17 00:00:00 2001 From: David Zhang Date: Wed, 17 Jun 2026 06:10:41 +0700 Subject: [PATCH 4/6] style: fix black formatting in translation_coordinator.py --- backend/utils/translation_coordinator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/utils/translation_coordinator.py b/backend/utils/translation_coordinator.py index 64210c631c1..184325f1d85 100644 --- a/backend/utils/translation_coordinator.py +++ b/backend/utils/translation_coordinator.py @@ -212,9 +212,7 @@ async def observe( self.metrics['prefix_resets'] += 1 # Invalidate any pending batch entries for this segment so # _flush_batch() does not overwrite with stale buffered text. - self._batch_buffer = [ - entry for entry in self._batch_buffer if entry[0] != segment.id - ] + self._batch_buffer = [entry for entry in self._batch_buffer if entry[0] != segment.id] await self.on_translation_ready(segment.id, translated_text, detected_lang, conversation_id) continue # Don't add to batch buffer else: From cfaa910f267cd8fdf6e3fe773b217e41a4523d2a Mon Sep 17 00:00:00 2001 From: David Zhang Date: Fri, 19 Jun 2026 05:12:53 +0700 Subject: [PATCH 5/6] fix: guard Redis cache path with should_persist_translation + version bump + timing refresh Addresses remaining P1/P2 bot review comments: - Add should_persist_translation guard to prevent no-op translations from triggering spurious UI badges (P1) - Bump state.version after adopting cached translation so in-flight batch jobs are rejected as stale (P1) - Refresh latest_text/last_update_at timing to prevent premature stability detection (P2) --- backend/utils/translation_coordinator.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/backend/utils/translation_coordinator.py b/backend/utils/translation_coordinator.py index 184325f1d85..d66e3387b2e 100644 --- a/backend/utils/translation_coordinator.py +++ b/backend/utils/translation_coordinator.py @@ -206,13 +206,26 @@ async def observe( # Found in Redis — adopt as committed, skip re-translation translated_text = redis_cached['text'] detected_lang = redis_cached.get('detected_lang', '') + # Guard: skip no-op "translations" that would spam UI badges. + if not should_persist_translation(text, translated_text, detected_lang, self.target_language): + state.committed_text = text + state.assembled_translation = translated_text + state.detected_lang = detected_lang + state.latest_text = text + state.last_update_at = now + self.metrics['prefix_resets'] += 1 + continue # Don't add to batch buffer state.committed_text = text state.assembled_translation = translated_text state.detected_lang = detected_lang + # Bump version so any in-flight batch job is rejected as stale. + state.version = self._next_version() self.metrics['prefix_resets'] += 1 # Invalidate any pending batch entries for this segment so # _flush_batch() does not overwrite with stale buffered text. self._batch_buffer = [entry for entry in self._batch_buffer if entry[0] != segment.id] + state.latest_text = text + state.last_update_at = now await self.on_translation_ready(segment.id, translated_text, detected_lang, conversation_id) continue # Don't add to batch buffer else: From 1b8ebf0efdaae63bf85591fdea03ee2b60bb7c2b Mon Sep 17 00:00:00 2001 From: David Zhang Date: Fri, 19 Jun 2026 07:08:10 +0700 Subject: [PATCH 6/6] =?UTF-8?q?fix:=20address=20new=20bot=20review=20comme?= =?UTF-8?q?nts=20(P1=C3=972=20+=20P2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 fixes: - Bump version + clear batch buffer in no-op translation path so in-flight batch jobs cannot overwrite cached results - Gate memory cache write on _failed_sent_hashes same as Redis cache to prevent poisoning LRU with untranslated fallback text P2 fix: - Offload Redis get_cached_translation to run_blocking in async observe() to avoid blocking the event loop --- backend/utils/translation.py | 6 +++--- backend/utils/translation_coordinator.py | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/utils/translation.py b/backend/utils/translation.py index 17fb3c26835..9f21eac2dd1 100644 --- a/backend/utils/translation.py +++ b/backend/utils/translation.py @@ -654,10 +654,10 @@ def translate_units_batch(self, dest_language: str, units: List[Tuple[str, str]] dominant_lang = lang_counts.most_common(1)[0][0] text_hash = hashlib.md5(original_text.encode()).hexdigest() - self._set_memory_cache(text_hash, dest_language, assembled, dominant_lang) - # Only persist to long-lived Redis cache if NO sentence fell back - # to original text (avoids poisoning cache with untranslated output). + # Only persist to any cache if NO sentence fell back to original text + # (avoids poisoning both in-memory LRU and Redis with untranslated output). if not any(sh in _failed_sent_hashes for _, sh in sentences): + self._set_memory_cache(text_hash, dest_language, assembled, dominant_lang) cache_translation(text_hash, dest_language, assembled, dominant_lang) results.append((unit_id, assembled, dominant_lang)) diff --git a/backend/utils/translation_coordinator.py b/backend/utils/translation_coordinator.py index d66e3387b2e..8d389408bd6 100644 --- a/backend/utils/translation_coordinator.py +++ b/backend/utils/translation_coordinator.py @@ -201,7 +201,9 @@ async def observe( if state.committed_text and not text.startswith(state.committed_text): # Check if the new merged text was already translated (Redis cache) text_hash = hashlib.md5(text.encode()).hexdigest() - redis_cached = get_cached_translation(text_hash, self.target_language) + redis_cached = await run_blocking( + sync_executor, get_cached_translation, text_hash, self.target_language + ) if redis_cached: # Found in Redis — adopt as committed, skip re-translation translated_text = redis_cached['text'] @@ -211,8 +213,10 @@ async def observe( state.committed_text = text state.assembled_translation = translated_text state.detected_lang = detected_lang + state.version = self._next_version() state.latest_text = text state.last_update_at = now + self._batch_buffer = [entry for entry in self._batch_buffer if entry[0] != segment.id] self.metrics['prefix_resets'] += 1 continue # Don't add to batch buffer state.committed_text = text