From 8d610a934e0d21ef16bd115b4155d7c165f8ad30 Mon Sep 17 00:00:00 2001
From: jenny-miromind <jenny@miromind.ai>
Date: Tue, 6 Jan 2026 01:18:02 +0800
Subject: [PATCH 1/6] feat: check for google_search empty organic results

---
 apps/miroflow-agent/src/core/orchestrator.py | 59 +++++++++++++++-----
 1 file changed, 45 insertions(+), 14 deletions(-)

diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py
index 68284b89..696595ba 100644
--- a/apps/miroflow-agent/src/core/orchestrator.py
+++ b/apps/miroflow-agent/src/core/orchestrator.py
@@ -228,6 +228,29 @@ async def _stream_tool_call(
 
         return tool_call_id
 
+    def _is_google_search_empty_result(self, tool_name: str, tool_result: dict) -> bool:
+        """
+        Check if google_search result has empty organic results.
+        This indicates a poor search query that should be retried.
+        """
+        if tool_name != "google_search":
+            return False
+
+        result = tool_result.get("result")
+        if not result:
+            return False
+
+        try:
+            if isinstance(result, str):
+                result_dict = json.loads(result)
+            else:
+                result_dict = result
+
+            organic = result_dict.get("organic", [])
+            return len(organic) == 0
+        except (json.JSONDecodeError, TypeError, AttributeError):
+            return False
+
     def get_scrape_result(self, result: str) -> str:
         """
         Process scrape result and truncate if too long to support more conversation turns.
@@ -634,9 +657,13 @@ async def run_sub_agent(
                         else tool_result.get("error")
                     )
 
-                    # Check for "Unknown tool:" error and rollback
-                    if str(result).startswith("Unknown tool:"):
-                        # If we haven't reached rollback limit, rollback and retry
+                    # Check for errors that should trigger rollback
+                    should_rollback_result = (
+                        str(result).startswith("Unknown tool:")
+                        or str(result).startswith("Error executing tool")
+                        or self._is_google_search_empty_result(tool_name, tool_result)
+                    )
+                    if should_rollback_result:
                         if consecutive_rollbacks < self.MAX_CONSECUTIVE_ROLLBACKS - 1:
                             message_history.pop()
                             turn_count -= 1
@@ -645,15 +672,14 @@ async def run_sub_agent(
                             self.task_log.log_step(
                                 "warning",
                                 f"{sub_agent_name} | Turn: {turn_count} | Rollback",
-                                f"Unknown tool error - tool: {tool_name}, error: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}",
+                                f"Tool result error - tool: {tool_name}, result: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}",
                             )
-                            break  # Exit inner for loop, then continue outer while loop
+                            break
                         else:
-                            # Reached rollback limit, allow error to be sent to LLM as feedback
                             self.task_log.log_step(
                                 "warning",
                                 f"{sub_agent_name} | Turn: {turn_count} | Allow Error Feedback",
-                                f"Allowing unknown tool error to be sent to LLM after {consecutive_rollbacks} rollbacks - tool: {tool_name}, error: '{str(result)[:200]}'",
+                                f"Allowing error result after {consecutive_rollbacks} rollbacks - tool: {tool_name}, result: '{str(result)[:200]}'",
                             )
 
                     await self._stream_tool_call(
@@ -1147,9 +1173,15 @@ async def run_main_agent(
                             else tool_result.get("error")
                         )
 
-                        # Check for "Unknown tool:" error and rollback
-                        if str(result).startswith("Unknown tool:"):
-                            # If we haven't reached rollback limit, rollback and retry
+                        # Check for errors that should trigger rollback
+                        should_rollback_result = (
+                            str(result).startswith("Unknown tool:")
+                            or str(result).startswith("Error executing tool")
+                            or self._is_google_search_empty_result(
+                                tool_name, tool_result
+                            )
+                        )
+                        if should_rollback_result:
                             if (
                                 consecutive_rollbacks
                                 < self.MAX_CONSECUTIVE_ROLLBACKS - 1
@@ -1161,15 +1193,14 @@ async def run_main_agent(
                                 self.task_log.log_step(
                                     "warning",
                                     f"Main Agent | Turn: {turn_count} | Rollback",
-                                    f"Unknown tool error - tool: {tool_name}, error: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}",
+                                    f"Tool result error - tool: {tool_name}, result: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}",
                                 )
-                                break  # Exit inner for loop, then continue outer while loop
+                                break
                             else:
-                                # Reached rollback limit, allow error to be sent to LLM as feedback
                                 self.task_log.log_step(
                                     "warning",
                                     f"Main Agent | Turn: {turn_count} | Allow Error Feedback",
-                                    f"Allowing unknown tool error to be sent to LLM after {consecutive_rollbacks} rollbacks - tool: {tool_name}, error: '{str(result)[:200]}'",
+                                    f"Allowing error result after {consecutive_rollbacks} rollbacks - tool: {tool_name}, result: '{str(result)[:200]}'",
                                 )
 
                         await self._stream_tool_call(

From 694067b6fffffbe911caae9673bb97c344e9f2e3 Mon Sep 17 00:00:00 2001
From: jenny-miromind <jenny@miromind.ai>
Date: Thu, 8 Jan 2026 04:41:05 +0800
Subject: [PATCH 2/6] support failure report generation

---
 .../benchmarks/common_benchmark.py            | 100 +++++++++++++++++-
 apps/miroflow-agent/main.py                   |   2 +-
 apps/miroflow-agent/src/core/orchestrator.py  |  99 +++++++++++++++--
 apps/miroflow-agent/src/core/pipeline.py      |  21 +++-
 .../miroflow-agent/src/io/output_formatter.py |   4 +-
 .../src/llm/providers/openai_client.py        |   5 +
 .../miroflow-agent/src/utils/parsing_utils.py |  51 +++++++++
 apps/miroflow-agent/src/utils/prompt_utils.py |  57 ++++++++++
 8 files changed, 321 insertions(+), 18 deletions(-)

diff --git a/apps/miroflow-agent/benchmarks/common_benchmark.py b/apps/miroflow-agent/benchmarks/common_benchmark.py
index a2e74164..8fec0138 100644
--- a/apps/miroflow-agent/benchmarks/common_benchmark.py
+++ b/apps/miroflow-agent/benchmarks/common_benchmark.py
@@ -23,9 +23,12 @@
     execute_task_pipeline,
 )
 from src.logging.summary_time_cost import generate_summary
-
-# Constants for format error detection
-FORMAT_ERROR_MESSAGE = "No \\boxed{} content found in the final answer."
+from src.utils.prompt_utils import (
+    FAILURE_EXPERIENCE_FOOTER,
+    FAILURE_EXPERIENCE_HEADER,
+    FAILURE_EXPERIENCE_ITEM,
+    FORMAT_ERROR_MESSAGE,
+)
 
 
 def _task_worker(task_dict, cfg_dict, evaluator_kwargs):
@@ -292,17 +295,68 @@ def extract_timestamp(file_path):
 
                     max_format_retries = self.format_error_retry_limit
 
+                    # Track accumulated failure experiences for this attempt
+                    # Start with the original task description
+                    current_task_description = task_description
+                    failure_experiences = []
+
+                    # Resume: Recover failure experiences from previous retry logs
+                    if format_retry_count > 0 and logs_dir.exists():
+                        print(
+                            f"    Resuming from retry {format_retry_count}, recovering previous failure experiences..."
+                        )
+                        for prev_retry in range(format_retry_count):
+                            prev_log_pattern = f"task_{task.task_id}_attempt-{attempt}_format-retry-{prev_retry}_*.json"
+                            prev_logs = sorted(list(logs_dir.glob(prev_log_pattern)))
+                            if prev_logs:
+                                prev_log_file = prev_logs[-1]  # Get the latest one
+                                try:
+                                    with open(
+                                        prev_log_file, "r", encoding="utf-8"
+                                    ) as f:
+                                        prev_log_data = json.load(f)
+                                        # Extract failure experience from trace_data
+                                        trace_data = prev_log_data.get("trace_data", {})
+                                        prev_failure_exp = trace_data.get(
+                                            "failure_experience_summary"
+                                        )
+                                        if prev_failure_exp:
+                                            failure_experiences.append(prev_failure_exp)
+                                            print(
+                                                f"      Recovered failure experience from retry {prev_retry}"
+                                            )
+                                except Exception as e:
+                                    print(
+                                        f"      Warning: Failed to load previous log {prev_log_file}: {e}"
+                                    )
+
+                        # Rebuild enhanced task description with recovered failure experiences
+                        if failure_experiences:
+                            current_task_description += FAILURE_EXPERIENCE_HEADER
+                            for idx, exp in enumerate(failure_experiences, 1):
+                                current_task_description += (
+                                    FAILURE_EXPERIENCE_ITEM.format(
+                                        attempt_number=idx,
+                                        failure_summary=exp,
+                                    )
+                                )
+                            current_task_description += FAILURE_EXPERIENCE_FOOTER
+                            print(
+                                f"    Recovered {len(failure_experiences)} failure experience(s) from previous retries"
+                            )
+
                     while format_retry_count <= max_format_retries:
                         try:
                             (
                                 response,
                                 final_boxed_answer,
                                 log_file_path,
+                                failure_experience_summary,
                             ) = await execute_task_pipeline(
                                 cfg=self.cfg,
                                 task_id=f"{task.task_id}_attempt-{attempt}_format-retry-{format_retry_count}",
                                 task_file_name=task_file_path,
-                                task_description=task_description,
+                                task_description=current_task_description,
                                 main_agent_tool_manager=self.main_agent_tool_manager,
                                 sub_agent_tool_managers=self.sub_agent_tool_managers,
                                 output_formatter=self.output_formatter,
@@ -322,12 +376,48 @@ def extract_timestamp(file_path):
                             ):
                                 format_retry_count += 1
                                 if format_retry_count <= max_format_retries:
+                                    # Use the model-generated failure experience summary
+                                    print(
+                                        f"    Format error detected, using model-generated failure summary for retry {format_retry_count}..."
+                                    )
+
+                                    if failure_experience_summary:
+                                        failure_experiences.append(
+                                            failure_experience_summary
+                                        )
+
+                                        # Build enhanced task description with accumulated failure experiences
+                                        # Start fresh from original task_description each time
+                                        current_task_description = task_description
+                                        current_task_description += (
+                                            FAILURE_EXPERIENCE_HEADER
+                                        )
+                                        for idx, exp in enumerate(
+                                            failure_experiences, 1
+                                        ):
+                                            current_task_description += (
+                                                FAILURE_EXPERIENCE_ITEM.format(
+                                                    attempt_number=idx,
+                                                    failure_summary=exp,
+                                                )
+                                            )
+                                        current_task_description += (
+                                            FAILURE_EXPERIENCE_FOOTER
+                                        )
+
+                                        print(
+                                            f"    Enhanced task description with {len(failure_experiences)} failure experience(s)"
+                                        )
+                                    else:
+                                        print(
+                                            "    No failure experience summary generated, retrying without enhancement..."
+                                        )
                                     continue
                                 else:
                                     # Exceeded format retry limit
                                     attempt_result["status"] = "success"
                                     attempt_result["model_boxed_answer"] = (
-                                        "No \\boxed{} content found after format error retry limit exceeded."
+                                        f"{FORMAT_ERROR_MESSAGE} (after {max_format_retries} retries)"
                                     )
                                     attempt_result["error_message"] = (
                                         f"Exceeded format error retry limit ({max_format_retries})"
diff --git a/apps/miroflow-agent/main.py b/apps/miroflow-agent/main.py
index 75e61e18..c3d7676b 100644
--- a/apps/miroflow-agent/main.py
+++ b/apps/miroflow-agent/main.py
@@ -33,7 +33,7 @@ async def amain(cfg: DictConfig) -> None:
     task_file_name = ""
 
     # Execute task using the pipeline
-    final_summary, final_boxed_answer, log_file_path = await execute_task_pipeline(
+    final_summary, final_boxed_answer, log_file_path, _ = await execute_task_pipeline(
         cfg=cfg,
         task_id=task_id,
         task_file_name=task_file_name,
diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py
index 696595ba..0617c138 100644
--- a/apps/miroflow-agent/src/core/orchestrator.py
+++ b/apps/miroflow-agent/src/core/orchestrator.py
@@ -23,8 +23,14 @@
     TaskLog,
     get_utc_plus_8_time,
 )
-from ..utils.parsing_utils import extract_llm_response_text
+from ..utils.parsing_utils import (
+    extract_failure_experience_summary,
+    extract_llm_response_text,
+)
 from ..utils.prompt_utils import (
+    FAILURE_SUMMARY_ASSISTANT_PREFIX,
+    FAILURE_SUMMARY_PROMPT,
+    FORMAT_ERROR_MESSAGE,
     generate_agent_specific_system_prompt,
     generate_agent_summarize_prompt,
     mcp_tags,
@@ -406,6 +412,79 @@ async def _handle_llm_call(
             # Return empty response with should_break=False, need to retry
             return "", False, None, original_message_history
 
+    async def _generate_failure_summary(
+        self,
+        system_prompt: str,
+        message_history: List[Dict[str, Any]],
+        tool_definitions: List[Dict],
+        turn_count: int,
+    ) -> Optional[str]:
+        """Generate a failure experience summary when task was not completed successfully.
+
+        Args:
+            message_history: The conversation history.
+            tool_definitions: Available tool definitions.
+            turn_count: Current turn count for step ID.
+
+        Returns:
+            The extracted failure experience summary, or None if generation failed.
+        """
+        self.task_log.log_step(
+            "info",
+            "Main Agent | Failure Summary",
+            "Generating failure experience summary for potential retry...",
+        )
+
+        # Build failure summary history
+        failure_summary_history = message_history.copy()
+        if failure_summary_history and failure_summary_history[-1]["role"] == "user":
+            failure_summary_history.pop()
+
+        # Add failure summary prompt and assistant prefix for structured output
+        failure_summary_history.append(
+            {"role": "user", "content": FAILURE_SUMMARY_PROMPT}
+        )
+        failure_summary_history.append(
+            {"role": "assistant", "content": FAILURE_SUMMARY_ASSISTANT_PREFIX}
+        )
+
+        # Call LLM to generate failure summary (auto-detects assistant prefix for vLLM continuation)
+        (
+            failure_summary_text,
+            _,
+            _,
+            _,
+        ) = await self._handle_llm_call(
+            system_prompt,
+            failure_summary_history,
+            tool_definitions,
+            turn_count + 10,  # Use a different step id
+            "Main Agent | Failure Experience Summary",
+            agent_type="main",
+        )
+
+        # Prepend the assistant prefix to the response for complete output
+        if failure_summary_text:
+            failure_summary_text = (
+                FAILURE_SUMMARY_ASSISTANT_PREFIX + failure_summary_text
+            )
+            failure_experience_summary = extract_failure_experience_summary(
+                failure_summary_text
+            )
+            self.task_log.log_step(
+                "info",
+                "Main Agent | Failure Summary",
+                f"Generated failure experience summary:\n{failure_experience_summary[:500]}...",
+            )
+            return failure_experience_summary
+        else:
+            self.task_log.log_step(
+                "warning",
+                "Main Agent | Failure Summary",
+                "Failed to generate failure experience summary",
+            )
+            return None
+
     async def run_sub_agent(
         self,
         sub_agent_name: str,
@@ -1371,10 +1450,7 @@ async def run_main_agent(
                 )
 
                 # Check if we got a valid boxed answer
-                if (
-                    final_boxed_answer
-                    != "No \\boxed{} content found in the final answer."
-                ):
+                if final_boxed_answer != FORMAT_ERROR_MESSAGE:
                     self.task_log.log_step(
                         "info",
                         "Main Agent | Final Answer",
@@ -1415,7 +1491,7 @@ async def run_main_agent(
         if not final_answer_text:
             final_answer_text = "No final answer generated."
             final_summary = final_answer_text
-            final_boxed_answer = "No \\boxed{} content found in the final answer."
+            final_boxed_answer = FORMAT_ERROR_MESSAGE
             self.task_log.log_step(
                 "error",
                 "Main Agent | Final Answer",
@@ -1430,7 +1506,7 @@ async def run_main_agent(
 
         # Fallback to intermediate answer if still no boxed answer
         if (
-            final_boxed_answer == "No \\boxed{} content found in the final answer."
+            final_boxed_answer == FORMAT_ERROR_MESSAGE
             and self.intermediate_boxed_answers
         ):
             final_boxed_answer = self.intermediate_boxed_answers[-1]
@@ -1440,6 +1516,13 @@ async def run_main_agent(
                 f"Using intermediate boxed answer as fallback: {final_boxed_answer}",
             )
 
+        # Generate failure experience summary if no valid boxed answer found
+        failure_experience_summary = None
+        if final_boxed_answer == FORMAT_ERROR_MESSAGE:
+            failure_experience_summary = await self._generate_failure_summary(
+                system_prompt, message_history, tool_definitions, turn_count
+            )
+
         await self._stream_tool_call("show_text", {"text": final_boxed_answer})
         await self._stream_end_llm("Final Summary")
         await self._stream_end_agent("Final Summary", self.current_agent_id)
@@ -1461,4 +1544,4 @@ async def run_main_agent(
             f"Main agent task {task_id} completed successfully",
         )
         gc.collect()
-        return final_summary, final_boxed_answer
+        return final_summary, final_boxed_answer, failure_experience_summary
diff --git a/apps/miroflow-agent/src/core/pipeline.py b/apps/miroflow-agent/src/core/pipeline.py
index 78d41402..b0cc1e6c 100644
--- a/apps/miroflow-agent/src/core/pipeline.py
+++ b/apps/miroflow-agent/src/core/pipeline.py
@@ -98,7 +98,11 @@ async def execute_task_pipeline(
             sub_agent_tool_definitions=sub_agent_tool_definitions,
         )
 
-        final_summary, final_boxed_answer = await orchestrator.run_main_agent(
+        (
+            final_summary,
+            final_boxed_answer,
+            failure_experience_summary,
+        ) = await orchestrator.run_main_agent(
             task_description=task_description,
             task_file_name=task_file_name,
             task_id=task_id,
@@ -109,8 +113,19 @@ async def execute_task_pipeline(
         task_log.final_boxed_answer = final_boxed_answer
         task_log.status = "success"
 
+        # Store failure experience summary in task log if available
+        if failure_experience_summary:
+            task_log.trace_data["failure_experience_summary"] = (
+                failure_experience_summary
+            )
+
         log_file_path = task_log.save()
-        return final_summary, final_boxed_answer, log_file_path
+        return (
+            final_summary,
+            final_boxed_answer,
+            log_file_path,
+            failure_experience_summary,
+        )
 
     except Exception as e:
         error_details = traceback.format_exc()
@@ -134,7 +149,7 @@ async def execute_task_pipeline(
 
         log_file_path = task_log.save()
 
-        return error_message, "", log_file_path
+        return error_message, "", log_file_path, None
 
     finally:
         task_log.end_time = get_utc_plus_8_time()
diff --git a/apps/miroflow-agent/src/io/output_formatter.py b/apps/miroflow-agent/src/io/output_formatter.py
index 3c1d0fc5..8e63b046 100644
--- a/apps/miroflow-agent/src/io/output_formatter.py
+++ b/apps/miroflow-agent/src/io/output_formatter.py
@@ -3,6 +3,8 @@
 
 import re
 
+from ..utils.prompt_utils import FORMAT_ERROR_MESSAGE
+
 
 class OutputFormatter:
     def _extract_boxed_content(self, text: str) -> str:
@@ -117,7 +119,7 @@ def format_final_summary_and_log(self, final_answer_text, client=None):
             summary_lines.append(boxed_result)
         elif final_answer_text:
             summary_lines.append("No \\boxed{} content found.")
-            boxed_result = "No \\boxed{} content found in the final answer."
+            boxed_result = FORMAT_ERROR_MESSAGE
 
         # Token usage statistics and cost estimation - use client method
         if client and hasattr(client, "format_token_usage_summary"):
diff --git a/apps/miroflow-agent/src/llm/providers/openai_client.py b/apps/miroflow-agent/src/llm/providers/openai_client.py
index 2eda891f..a81bc76d 100644
--- a/apps/miroflow-agent/src/llm/providers/openai_client.py
+++ b/apps/miroflow-agent/src/llm/providers/openai_client.py
@@ -137,6 +137,11 @@ async def _create_message(
             if "deepseek-v3-1" in self.model_name:
                 params["extra_body"]["thinking"] = {"type": "enabled"}
 
+            # auto-detect if we need to continue from the last assistant message
+            if messages_for_llm and messages_for_llm[-1].get("role") == "assistant":
+                params["extra_body"]["continue_final_message"] = True
+                params["extra_body"]["add_generation_prompt"] = False
+
             try:
                 if self.async_client:
                     response = await self.client.chat.completions.create(**params)
diff --git a/apps/miroflow-agent/src/utils/parsing_utils.py b/apps/miroflow-agent/src/utils/parsing_utils.py
index 5f6e5274..04733ae6 100644
--- a/apps/miroflow-agent/src/utils/parsing_utils.py
+++ b/apps/miroflow-agent/src/utils/parsing_utils.py
@@ -100,6 +100,57 @@ def safe_json_loads(arguments_str: str) -> dict:
     }
 
 
+def extract_failure_experience_summary(text: str) -> str:
+    """
+    Extract failure experience summary from LLM response text.
+
+    The text may contain:
+    - <think>...</think> block (thinking content)
+    - Main content after </think> and before <use_mcp_tool>
+    - <use_mcp_tool>...</use_mcp_tool> block (tool call, ignored)
+
+    Examples:
+        "<think>\n{xxx}\n</think>\n\n{content}\n\n<use_mcp_tool>..."
+        "<think>\n{xxx}\n</think>\n\n{content}"
+        "{content}"  (no think block)
+
+    Returns:
+        - If content is empty after strip, return think_content
+        - If both think_content and content are non-empty, return content
+        - mcp_block is never used
+    """
+    if not text:
+        return ""
+
+    think_content = ""
+    content = ""
+
+    # Extract think content
+    think_match = re.search(r"<think>([\s\S]*?)</think>", text)
+    if think_match:
+        think_content = think_match.group(1).strip()
+        # Get content after </think>
+        after_think = text[think_match.end() :]
+    else:
+        # No think block, entire text is potential content
+        after_think = text
+
+    # Remove <use_mcp_tool>...</use_mcp_tool> block from content
+    mcp_match = re.search(r"<use_mcp_tool>[\s\S]*", after_think)
+    if mcp_match:
+        content = after_think[: mcp_match.start()].strip()
+    else:
+        content = after_think.strip()
+
+    # Apply the rules:
+    # - If content is empty, use think_content
+    # - If both are non-empty, use content
+    if content:
+        return content
+    else:
+        return think_content
+
+
 def extract_llm_response_text(llm_response):
     """
     Extract text from LLM response, excluding <use_mcp_tool> tags. Stop immediately when this opening tag is encountered.
diff --git a/apps/miroflow-agent/src/utils/prompt_utils.py b/apps/miroflow-agent/src/utils/prompt_utils.py
index 1157eb0e..6aeb4ea6 100644
--- a/apps/miroflow-agent/src/utils/prompt_utils.py
+++ b/apps/miroflow-agent/src/utils/prompt_utils.py
@@ -1,6 +1,63 @@
 # Copyright (c) 2025 MiroMind
 # This source code is licensed under the MIT License.
 
+# ============================================================================
+# Format Error Messages
+# ============================================================================
+
+FORMAT_ERROR_MESSAGE = "No \\boxed{} content found in the final answer."
+
+# ============================================================================
+# Failure Experience Templates (for format error retry)
+# ============================================================================
+
+# Header that appears once before all failure experiences
+FAILURE_EXPERIENCE_HEADER = """
+
+=== Previous Attempts Analysis ===
+The following summarizes what was tried before and why it didn't work. Use this to guide a NEW approach.
+
+"""
+
+# Template for each individual failure experience (used multiple times)
+FAILURE_EXPERIENCE_ITEM = """[Attempt {attempt_number}]
+{failure_summary}
+
+"""
+
+# Footer that appears once after all failure experiences
+FAILURE_EXPERIENCE_FOOTER = """=== End of Analysis ===
+
+Based on the above, you should try a different strategy this time.
+"""
+
+FAILURE_SUMMARY_PROMPT = """The task was not completed successfully. Do NOT call any tools. Provide a summary:
+
+Failure type: [incomplete / blocked / misdirected / format_missed]
+  - incomplete: ran out of turns before finishing
+  - blocked: got stuck due to tool failure or missing information
+  - misdirected: went down the wrong path
+  - format_missed: found the answer but forgot to use \\boxed{}
+What happened: [describe the approach taken and why a final answer was not reached]
+Useful findings: [list any facts, intermediate results, or conclusions discovered that should be reused]
+Remaining: [what still needs to be done to reach the answer]"""
+
+# Assistant prefix for failure summary generation (guides model to follow structured format)
+FAILURE_SUMMARY_THINK_CONTENT = """We need to write a structured post-mortem style summary **without calling any tools**, explaining why the task was not completed, using these required sections:
+
+* **Failure type**: pick one from **incomplete / blocked / misdirected / format_missed**
+* **What happened**: describe the approach taken and why it didn't reach a final answer
+* **Useful findings**: list any facts, intermediate results, or conclusions that can be reused
+* **Remaining**: state what still needs to be done to reach the final answer"""
+
+FAILURE_SUMMARY_ASSISTANT_PREFIX = (
+    f"<think>\n{FAILURE_SUMMARY_THINK_CONTENT}\n</think>\n\n"
+)
+
+# ============================================================================
+# MCP Tags for Parsing
+# ============================================================================
+
 mcp_tags = [
     "<use_mcp_tool>",
     "</use_mcp_tool>",

From ca53d16028b26f9f411856f051e75486e207b04f Mon Sep 17 00:00:00 2001
From: jenny-miromind <jenny@miromind.ai>
Date: Thu, 8 Jan 2026 13:04:01 +0800
Subject: [PATCH 3/6] update final answer generation

---
 apps/miroflow-agent/src/core/orchestrator.py  | 449 +++++++++++++-----
 apps/miroflow-agent/src/utils/prompt_utils.py |   6 +-
 2 files changed, 337 insertions(+), 118 deletions(-)

diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py
index 0617c138..bd402fc4 100644
--- a/apps/miroflow-agent/src/core/orchestrator.py
+++ b/apps/miroflow-agent/src/core/orchestrator.py
@@ -98,6 +98,7 @@ def __init__(
         # Retry loop protection limits
         self.MAX_CONSECUTIVE_ROLLBACKS = 5
         self.MAX_FINAL_ANSWER_RETRIES = 3 if cfg.agent.keep_tool_result == -1 else 1
+        self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0)
 
     async def _stream_update(self, event_type: str, data: dict):
         """Send streaming update in new SSE protocol format"""
@@ -485,6 +486,321 @@ async def _generate_failure_summary(
             )
             return None
 
+    async def _generate_final_answer_with_retries(
+        self,
+        system_prompt: str,
+        message_history: List[Dict[str, Any]],
+        tool_definitions: List[Dict],
+        turn_count: int,
+        task_description: str,
+    ) -> Tuple[Optional[str], str, Optional[str], str, List[Dict[str, Any]]]:
+        """Generate final answer with retry mechanism.
+
+        Returns:
+            Tuple of (final_answer_text, final_summary, final_boxed_answer, usage_log, message_history)
+        """
+        # Generate summary prompt
+        summary_prompt = generate_agent_summarize_prompt(
+            task_description,
+            agent_type="main",
+        )
+
+        if message_history[-1]["role"] == "user":
+            message_history.pop(-1)
+        message_history.append({"role": "user", "content": summary_prompt})
+
+        final_answer_text = None
+        final_boxed_answer = None
+        final_summary = ""
+        usage_log = ""
+
+        for retry_idx in range(self.MAX_FINAL_ANSWER_RETRIES):
+            (
+                final_answer_text,
+                should_break,
+                tool_calls_info,
+                message_history,
+            ) = await self._handle_llm_call(
+                system_prompt,
+                message_history,
+                tool_definitions,
+                turn_count + 1 + retry_idx,
+                f"Main agent | Final Summary (attempt {retry_idx + 1}/{self.MAX_FINAL_ANSWER_RETRIES})",
+                agent_type="main",
+            )
+
+            if final_answer_text:
+                final_summary, final_boxed_answer, usage_log = (
+                    self.output_formatter.format_final_summary_and_log(
+                        final_answer_text, self.llm_client
+                    )
+                )
+
+                if final_boxed_answer != FORMAT_ERROR_MESSAGE:
+                    self.task_log.log_step(
+                        "info",
+                        "Main Agent | Final Answer",
+                        f"Boxed answer found on attempt {retry_idx + 1}",
+                    )
+                    break
+                else:
+                    self.task_log.log_step(
+                        "warning",
+                        "Main Agent | Final Answer",
+                        f"No boxed answer on attempt {retry_idx + 1}, retrying...",
+                    )
+                    if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1:
+                        if (
+                            message_history
+                            and message_history[-1]["role"] == "assistant"
+                        ):
+                            message_history.pop()
+            else:
+                self.task_log.log_step(
+                    "warning",
+                    "Main Agent | Final Answer",
+                    f"Failed to generate answer on attempt {retry_idx + 1}",
+                )
+                if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1:
+                    if message_history and message_history[-1]["role"] == "assistant":
+                        message_history.pop()
+
+        # Ensure final_boxed_answer is never None - treat it as FORMAT_ERROR_MESSAGE
+        if final_boxed_answer is None:
+            final_boxed_answer = FORMAT_ERROR_MESSAGE
+
+        return (
+            final_answer_text,
+            final_summary,
+            final_boxed_answer,
+            usage_log,
+            message_history,
+        )
+
+    def _handle_no_context_management_fallback(
+        self,
+        final_answer_text: Optional[str],
+        final_summary: str,
+        final_boxed_answer: Optional[str],
+    ) -> Tuple[str, str, str]:
+        """Handle fallback when format_error_retry_limit == 0 (no context management).
+
+        In this mode, the model has only one chance to answer.
+        We should try to use intermediate answers as fallback to maximize accuracy.
+
+        Returns:
+            Tuple of (final_answer_text, final_summary, final_boxed_answer)
+        """
+        # Validate final_answer_text
+        if not final_answer_text:
+            final_answer_text = "No final answer generated."
+            final_summary = final_answer_text
+            final_boxed_answer = FORMAT_ERROR_MESSAGE
+            self.task_log.log_step(
+                "error",
+                "Main Agent | Final Answer",
+                "Unable to generate final answer after all retries",
+            )
+        else:
+            self.task_log.log_step(
+                "info",
+                "Main Agent | Final Answer",
+                f"Final answer content:\n\n{final_answer_text}",
+            )
+
+        # Fallback to intermediate answer if no valid boxed answer
+        # This is important when context management is disabled to maximize answer accuracy
+        # Check for both FORMAT_ERROR_MESSAGE and None (defensive)
+        if (
+            final_boxed_answer == FORMAT_ERROR_MESSAGE or final_boxed_answer is None
+        ) and self.intermediate_boxed_answers:
+            final_boxed_answer = self.intermediate_boxed_answers[-1]
+            self.task_log.log_step(
+                "info",
+                "Main Agent | Final Answer (No Context Management)",
+                f"Using intermediate boxed answer as fallback: {final_boxed_answer}",
+            )
+
+        # Ensure final_boxed_answer is never None
+        if final_boxed_answer is None:
+            final_boxed_answer = FORMAT_ERROR_MESSAGE
+
+        return final_answer_text, final_summary, final_boxed_answer
+
+    def _handle_context_management_no_fallback(
+        self,
+        final_answer_text: Optional[str],
+        final_summary: str,
+        final_boxed_answer: Optional[str],
+    ) -> Tuple[str, str, str]:
+        """Handle failure when format_error_retry_limit > 0 (context management enabled).
+
+        In this mode, the model has multiple chances to retry with context management.
+        We should NOT guess or use intermediate answers, because:
+        - A wrong guess can reduce accuracy
+        - The model will have another chance to answer with failure experience
+
+        Returns:
+            Tuple of (final_answer_text, final_summary, final_boxed_answer)
+        """
+        # Validate final_answer_text
+        if not final_answer_text:
+            final_answer_text = "No final answer generated."
+            final_summary = final_answer_text
+            final_boxed_answer = FORMAT_ERROR_MESSAGE
+            self.task_log.log_step(
+                "error",
+                "Main Agent | Final Answer",
+                "Unable to generate final answer after all retries",
+            )
+        else:
+            self.task_log.log_step(
+                "info",
+                "Main Agent | Final Answer",
+                f"Final answer content:\n\n{final_answer_text}",
+            )
+
+        # Ensure final_boxed_answer is never None
+        if final_boxed_answer is None:
+            final_boxed_answer = FORMAT_ERROR_MESSAGE
+
+        # With context management, do NOT fallback to intermediate answers
+        # Keep FORMAT_ERROR_MESSAGE to trigger failure summary and allow retry
+        if final_boxed_answer == FORMAT_ERROR_MESSAGE:
+            self.task_log.log_step(
+                "info",
+                "Main Agent | Final Answer (Context Management Mode)",
+                "No valid boxed answer found. Not using intermediate fallback - will generate failure summary for retry.",
+            )
+
+        return final_answer_text, final_summary, final_boxed_answer
+
+    async def _generate_and_finalize_answer(
+        self,
+        system_prompt: str,
+        message_history: List[Dict[str, Any]],
+        tool_definitions: List[Dict],
+        turn_count: int,
+        task_description: str,
+        reached_max_turns: bool = False,
+    ) -> Tuple[str, str, Optional[str], str, List[Dict[str, Any]]]:
+        """Generate final answer and handle fallback based on context management settings.
+
+        There are 4 possible scenarios based on (context_management, reached_max_turns):
+
+        | Context Management | Reached Max Turns | Behavior                                    |
+        |--------------------|-------------------|---------------------------------------------|
+        | OFF (limit=0)      | No                | Generate answer → fallback to intermediate  |
+        | OFF (limit=0)      | Yes               | Generate answer → fallback to intermediate  |
+        | ON  (limit>0)      | No                | Generate answer → no fallback, fail summary |
+        | ON  (limit>0)      | Yes               | SKIP generation → fail summary directly     |
+
+        Args:
+            reached_max_turns: Whether the main loop ended due to reaching max turns or context limit.
+
+        Returns:
+            Tuple of (final_summary, final_boxed_answer, failure_experience_summary, usage_log, message_history)
+        """
+        context_management_enabled = self.format_error_retry_limit > 0
+        failure_experience_summary = None
+        usage_log = ""
+
+        # =============================================================================
+        # CASE: Context management ON + reached max turns
+        # Skip answer generation entirely - any answer would be a blind guess
+        # =============================================================================
+        if context_management_enabled and reached_max_turns:
+            self.task_log.log_step(
+                "info",
+                "Main Agent | Final Answer (Context Management Mode)",
+                "Reached max turns. Skipping answer generation to avoid blind guessing.",
+            )
+
+            self._save_message_history(system_prompt, message_history)
+
+            failure_experience_summary = await self._generate_failure_summary(
+                system_prompt, message_history, tool_definitions, turn_count
+            )
+
+            return (
+                "Task incomplete - reached maximum turns. Will retry with failure experience.",
+                FORMAT_ERROR_MESSAGE,
+                failure_experience_summary,
+                usage_log,
+                message_history,
+            )
+
+        # =============================================================================
+        # ALL OTHER CASES: Generate final answer first
+        # =============================================================================
+        (
+            final_answer_text,
+            final_summary,
+            final_boxed_answer,
+            usage_log,
+            message_history,
+        ) = await self._generate_final_answer_with_retries(
+            system_prompt=system_prompt,
+            message_history=message_history,
+            tool_definitions=tool_definitions,
+            turn_count=turn_count,
+            task_description=task_description,
+        )
+
+        self._save_message_history(system_prompt, message_history)
+
+        # =============================================================================
+        # CASE: Context management OFF
+        # Try to use intermediate answers as fallback to maximize accuracy
+        # =============================================================================
+        if not context_management_enabled:
+            final_answer_text, final_summary, final_boxed_answer = (
+                self._handle_no_context_management_fallback(
+                    final_answer_text, final_summary, final_boxed_answer
+                )
+            )
+            # No failure summary needed - won't be used without context management
+            return (
+                final_summary,
+                final_boxed_answer,
+                None,
+                usage_log,
+                message_history,
+            )
+
+        # =============================================================================
+        # CASE: Context management ON + normal completion (not reached max turns)
+        # Don't use fallback - wrong guess would reduce accuracy
+        # =============================================================================
+        final_answer_text, final_summary, final_boxed_answer = (
+            self._handle_context_management_no_fallback(
+                final_answer_text, final_summary, final_boxed_answer
+            )
+        )
+
+        if final_boxed_answer == FORMAT_ERROR_MESSAGE:
+            failure_experience_summary = await self._generate_failure_summary(
+                system_prompt, message_history, tool_definitions, turn_count
+            )
+
+        return (
+            final_summary,
+            final_boxed_answer,
+            failure_experience_summary,
+            usage_log,
+            message_history,
+        )
+
+    def _save_message_history(
+        self, system_prompt: str, message_history: List[Dict[str, Any]]
+    ):
+        """Save message history to task log."""
+        self.task_log.main_agent_message_history = {
+            "system_prompt": system_prompt,
+            "message_history": message_history,
+        }
+        self.task_log.save()
+
     async def run_sub_agent(
         self,
         sub_agent_name: str,
@@ -1387,8 +1703,9 @@ async def run_main_agent(
         await self._stream_end_llm("main")
         await self._stream_end_agent("main", self.current_agent_id)
 
-        # Record main loop end
-        if turn_count >= max_turns:
+        # Record main loop end and determine if max turns was reached
+        reached_max_turns = turn_count >= max_turns
+        if reached_max_turns:
             self.task_log.log_step(
                 "warning",
                 "Main Agent | Max Turns Reached / Context Limit Reached",
@@ -1409,120 +1726,24 @@ async def run_main_agent(
         self.current_agent_id = await self._stream_start_agent("Final Summary")
         await self._stream_start_llm("Final Summary")
 
-        # Generate summary prompt (generate only once)
-        summary_prompt = generate_agent_summarize_prompt(
-            task_description,
-            agent_type="main",
+        # Generate final answer and handle fallback based on format_error_retry_limit
+        # If reached_max_turns is True and context management is enabled,
+        # skip answer generation to avoid blind guessing
+        (
+            final_summary,
+            final_boxed_answer,
+            failure_experience_summary,
+            usage_log,
+            message_history,
+        ) = await self._generate_and_finalize_answer(
+            system_prompt=system_prompt,
+            message_history=message_history,
+            tool_definitions=tool_definitions,
+            turn_count=turn_count,
+            task_description=task_description,
+            reached_max_turns=reached_max_turns,
         )
 
-        if message_history[-1]["role"] == "user":
-            message_history.pop(-1)
-        message_history.append({"role": "user", "content": summary_prompt})
-
-        # Retry mechanism for generating boxed answer
-        final_answer_text = None
-        final_boxed_answer = None
-        final_summary = ""
-        usage_log = ""
-
-        for retry_idx in range(self.MAX_FINAL_ANSWER_RETRIES):
-            # Use unified LLM call processing
-            (
-                final_answer_text,
-                should_break,
-                tool_calls_info,
-                message_history,
-            ) = await self._handle_llm_call(
-                system_prompt,
-                message_history,
-                tool_definitions,
-                turn_count + 1 + retry_idx,
-                f"Main agent | Final Summary (attempt {retry_idx + 1}/{self.MAX_FINAL_ANSWER_RETRIES})",
-                agent_type="main",
-            )
-
-            if final_answer_text:
-                # Try to extract boxed answer
-                final_summary, final_boxed_answer, usage_log = (
-                    self.output_formatter.format_final_summary_and_log(
-                        final_answer_text, self.llm_client
-                    )
-                )
-
-                # Check if we got a valid boxed answer
-                if final_boxed_answer != FORMAT_ERROR_MESSAGE:
-                    self.task_log.log_step(
-                        "info",
-                        "Main Agent | Final Answer",
-                        f"Boxed answer found on attempt {retry_idx + 1}",
-                    )
-                    break
-                else:
-                    self.task_log.log_step(
-                        "warning",
-                        "Main Agent | Final Answer",
-                        f"No boxed answer on attempt {retry_idx + 1}, retrying...",
-                    )
-                    # Remove the failed assistant response before retry
-                    if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1:
-                        if (
-                            message_history
-                            and message_history[-1]["role"] == "assistant"
-                        ):
-                            message_history.pop()
-            else:
-                self.task_log.log_step(
-                    "warning",
-                    "Main Agent | Final Answer",
-                    f"Failed to generate answer on attempt {retry_idx + 1}",
-                )
-                # Remove the failed assistant response before retry
-                if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1:
-                    if message_history and message_history[-1]["role"] == "assistant":
-                        message_history.pop()
-
-        self.task_log.main_agent_message_history = {
-            "system_prompt": system_prompt,
-            "message_history": message_history,
-        }
-        self.task_log.save()
-
-        # Final validation and fallback
-        if not final_answer_text:
-            final_answer_text = "No final answer generated."
-            final_summary = final_answer_text
-            final_boxed_answer = FORMAT_ERROR_MESSAGE
-            self.task_log.log_step(
-                "error",
-                "Main Agent | Final Answer",
-                "Unable to generate final answer after all retries",
-            )
-        else:
-            self.task_log.log_step(
-                "info",
-                "Main Agent | Final Answer",
-                f"Final answer content:\n\n{final_answer_text}",
-            )
-
-        # Fallback to intermediate answer if still no boxed answer
-        if (
-            final_boxed_answer == FORMAT_ERROR_MESSAGE
-            and self.intermediate_boxed_answers
-        ):
-            final_boxed_answer = self.intermediate_boxed_answers[-1]
-            self.task_log.log_step(
-                "info",
-                "Main Agent | Final Answer",
-                f"Using intermediate boxed answer as fallback: {final_boxed_answer}",
-            )
-
-        # Generate failure experience summary if no valid boxed answer found
-        failure_experience_summary = None
-        if final_boxed_answer == FORMAT_ERROR_MESSAGE:
-            failure_experience_summary = await self._generate_failure_summary(
-                system_prompt, message_history, tool_definitions, turn_count
-            )
-
         await self._stream_tool_call("show_text", {"text": final_boxed_answer})
         await self._stream_end_llm("Final Summary")
         await self._stream_end_agent("Final Summary", self.current_agent_id)
diff --git a/apps/miroflow-agent/src/utils/prompt_utils.py b/apps/miroflow-agent/src/utils/prompt_utils.py
index 6aeb4ea6..0c4b62cc 100644
--- a/apps/miroflow-agent/src/utils/prompt_utils.py
+++ b/apps/miroflow-agent/src/utils/prompt_utils.py
@@ -39,16 +39,14 @@
   - misdirected: went down the wrong path
   - format_missed: found the answer but forgot to use \\boxed{}
 What happened: [describe the approach taken and why a final answer was not reached]
-Useful findings: [list any facts, intermediate results, or conclusions discovered that should be reused]
-Remaining: [what still needs to be done to reach the answer]"""
+Useful findings: [list any facts, intermediate results, or conclusions discovered that should be reused]"""
 
 # Assistant prefix for failure summary generation (guides model to follow structured format)
 FAILURE_SUMMARY_THINK_CONTENT = """We need to write a structured post-mortem style summary **without calling any tools**, explaining why the task was not completed, using these required sections:
 
 * **Failure type**: pick one from **incomplete / blocked / misdirected / format_missed**
 * **What happened**: describe the approach taken and why it didn't reach a final answer
-* **Useful findings**: list any facts, intermediate results, or conclusions that can be reused
-* **Remaining**: state what still needs to be done to reach the final answer"""
+* **Useful findings**: list any facts, intermediate results, or conclusions that can be reused"""
 
 FAILURE_SUMMARY_ASSISTANT_PREFIX = (
     f"<think>\n{FAILURE_SUMMARY_THINK_CONTENT}\n</think>\n\n"

From c397522dc075dc82c65f593a83e3f6e11dcd9142 Mon Sep 17 00:00:00 2001
From: jenny-miromind <jenny@miromind.ai>
Date: Thu, 8 Jan 2026 17:38:48 +0800
Subject: [PATCH 4/6] fix: turn_count not decremented when LLM response is
 empty

---
 apps/miroflow-agent/src/core/orchestrator.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py
index bd402fc4..76bd7b6c 100644
--- a/apps/miroflow-agent/src/core/orchestrator.py
+++ b/apps/miroflow-agent/src/core/orchestrator.py
@@ -1364,8 +1364,10 @@ async def run_main_agent(
                     )
                     break
             else:
+                # LLM call failed, don't count this as a valid turn
+                turn_count -= 1
                 self.task_log.log_step(
-                    "info",
+                    "warning",
                     f"Main Agent | Turn: {turn_count} | LLM Call",
                     "No valid response from LLM, retrying",
                 )

From f37e7215ed01533869a8b3609316925af67ec1ee Mon Sep 17 00:00:00 2001
From: jenny-miromind <jenny@miromind.ai>
Date: Thu, 8 Jan 2026 17:51:59 +0800
Subject: [PATCH 5/6] update the code comments related to context management

---
 apps/miroflow-agent/src/core/orchestrator.py | 41 ++++++++++++++++++--
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py
index 76bd7b6c..94a64050 100644
--- a/apps/miroflow-agent/src/core/orchestrator.py
+++ b/apps/miroflow-agent/src/core/orchestrator.py
@@ -98,6 +98,7 @@ def __init__(
         # Retry loop protection limits
         self.MAX_CONSECUTIVE_ROLLBACKS = 5
         self.MAX_FINAL_ANSWER_RETRIES = 3 if cfg.agent.keep_tool_result == -1 else 1
+        # When format_error_retry_limit > 0, enables a context compression mechanism
         self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0)
 
     async def _stream_update(self, event_type: str, data: dict):
@@ -420,15 +421,27 @@ async def _generate_failure_summary(
         tool_definitions: List[Dict],
         turn_count: int,
     ) -> Optional[str]:
-        """Generate a failure experience summary when task was not completed successfully.
+        """Generate a failure experience summary for context compression.
+
+        This is the core of the context management mechanism. When a task attempt fails
+        (i.e., the task is not completed within the given turns and context window),
+        we compress the entire conversation history into a structured summary containing:
+        - Failure type: incomplete / blocked / misdirected / format_missed
+        - What happened: the approach taken and why a final answer was not reached
+        - Useful findings: facts, intermediate results, or conclusions to be reused
+
+        This summary will be injected into the task description for the next retry,
+        effectively compressing potentially thousands of tokens of conversation into
+        a focused summary of ~500-1000 tokens.
 
         Args:
-            message_history: The conversation history.
+            system_prompt: The system prompt used in the conversation.
+            message_history: The full conversation history to be compressed.
             tool_definitions: Available tool definitions.
             turn_count: Current turn count for step ID.
 
         Returns:
-            The extracted failure experience summary, or None if generation failed.
+            The compressed failure experience summary, or None if generation failed.
         """
         self.task_log.log_step(
             "info",
@@ -686,7 +699,27 @@ async def _generate_and_finalize_answer(
     ) -> Tuple[str, str, Optional[str], str, List[Dict[str, Any]]]:
         """Generate final answer and handle fallback based on context management settings.
 
-        There are 4 possible scenarios based on (context_management, reached_max_turns):
+        Context Management (format_error_retry_limit > 0) is essentially a context compression
+        mechanism that enables multi-attempt problem solving:
+
+        1. When the task is not completed within the given turns and context window,
+           we generate a "failure_experience_summary" - a structured post-mortem containing:
+           - Failure type: incomplete / blocked / misdirected / format_missed
+           - What happened: the approach taken and why it didn't reach a final answer
+           - Useful findings: facts, intermediate results, or conclusions to be reused
+
+        2. On retry, this compressed summary is injected into the task description,
+           allowing the model to:
+           - Start fresh with a clean context window
+           - Learn from previous failures without context overflow
+           - Try different approaches informed by past experience
+
+        This is more effective than simply continuing with a long context because:
+        - Avoids context window limits
+        - Focuses attention on key learnings rather than raw conversation
+        - Allows multiple "fresh starts" with accumulated wisdom
+
+        Decision table based on (context_management, reached_max_turns):
 
         | Context Management | Reached Max Turns | Behavior                                    |
         |--------------------|-------------------|---------------------------------------------|

From e133439c4659b70b77a2b430682e7d39bd506327 Mon Sep 17 00:00:00 2001
From: jenny-miromind <jenny@miromind.ai>
Date: Fri, 9 Jan 2026 11:33:18 +0800
Subject: [PATCH 6/6] rename format_error_retry_limit to context_compress_limit

---
 apps/miroflow-agent/benchmarks/common_benchmark.py |  4 ++--
 apps/miroflow-agent/conf/agent/default.yaml        |  2 +-
 .../conf/agent/mirothinker_v1.0.yaml               |  2 +-
 .../conf/agent/mirothinker_v1.0_keep5.yaml         |  2 +-
 .../conf/agent/mirothinker_v1.5.yaml               |  2 +-
 .../conf/agent/mirothinker_v1.5_keep5_max200.yaml  |  2 +-
 .../conf/agent/mirothinker_v1.5_keep5_max400.yaml  |  2 +-
 apps/miroflow-agent/conf/agent/multi_agent.yaml    |  2 +-
 apps/miroflow-agent/conf/agent/multi_agent_os.yaml |  2 +-
 apps/miroflow-agent/conf/agent/single_agent.yaml   |  2 +-
 .../conf/agent/single_agent_keep5.yaml             |  2 +-
 apps/miroflow-agent/src/core/orchestrator.py       | 14 +++++++-------
 12 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/apps/miroflow-agent/benchmarks/common_benchmark.py b/apps/miroflow-agent/benchmarks/common_benchmark.py
index 8fec0138..4b26138a 100644
--- a/apps/miroflow-agent/benchmarks/common_benchmark.py
+++ b/apps/miroflow-agent/benchmarks/common_benchmark.py
@@ -138,7 +138,7 @@ def __init__(self, data_dir: str, benchmark_name: str, cfg: DictConfig):
 
         # Format error tracking and retry configuration
         # Read from agent config as it's part of context management
-        self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0)
+        self.context_compress_limit = cfg.agent.get("context_compress_limit", 0)
 
         # Get LLM provider and model from the config object
         self.llm_provider = cfg.llm.provider
@@ -293,7 +293,7 @@ def extract_timestamp(file_path):
                     # Try to get a valid response with format retry
                     print(f"TASK ID: {task.task_id}, ATTEMPT: {attempt}")
 
-                    max_format_retries = self.format_error_retry_limit
+                    max_format_retries = self.context_compress_limit
 
                     # Track accumulated failure experiences for this attempt
                     # Start with the original task description
diff --git a/apps/miroflow-agent/conf/agent/default.yaml b/apps/miroflow-agent/conf/agent/default.yaml
index 7ad6d72d..2d32ce1e 100644
--- a/apps/miroflow-agent/conf/agent/default.yaml
+++ b/apps/miroflow-agent/conf/agent/default.yaml
@@ -21,4 +21,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: -1
-format_error_retry_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml
index 455ec927..5cf8230e 100644
--- a/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml
+++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml
@@ -19,4 +19,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: -1
-format_error_retry_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml
index 8efa6e65..273e14ce 100644
--- a/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml
+++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml
@@ -19,4 +19,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: 5
-format_error_retry_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml
index ddce2f10..67ddd661 100644
--- a/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml
+++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml
@@ -19,4 +19,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: -1
-format_error_retry_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml
index dd42f9ae..a602ef8f 100644
--- a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml
+++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml
@@ -19,4 +19,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: 5
-format_error_retry_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml
index 8dd348fd..011359a8 100644
--- a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml
+++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml
@@ -19,4 +19,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: 5
-format_error_retry_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/conf/agent/multi_agent.yaml b/apps/miroflow-agent/conf/agent/multi_agent.yaml
index b4d46614..ed642c82 100644
--- a/apps/miroflow-agent/conf/agent/multi_agent.yaml
+++ b/apps/miroflow-agent/conf/agent/multi_agent.yaml
@@ -25,4 +25,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: -1
-format_error_retry_limit: 0  # Maximum number of retries for format errors in context management
+context_compress_limit: 0  # Maximum number of retries for format errors in context management
diff --git a/apps/miroflow-agent/conf/agent/multi_agent_os.yaml b/apps/miroflow-agent/conf/agent/multi_agent_os.yaml
index d2fd46c9..1fbe557a 100644
--- a/apps/miroflow-agent/conf/agent/multi_agent_os.yaml
+++ b/apps/miroflow-agent/conf/agent/multi_agent_os.yaml
@@ -25,4 +25,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: -1
-format_error_retry_limit: 0  # Maximum number of retries for format errors in context management
+context_compress_limit: 0  # Maximum number of retries for format errors in context management
diff --git a/apps/miroflow-agent/conf/agent/single_agent.yaml b/apps/miroflow-agent/conf/agent/single_agent.yaml
index 349f1f58..a04a60fe 100644
--- a/apps/miroflow-agent/conf/agent/single_agent.yaml
+++ b/apps/miroflow-agent/conf/agent/single_agent.yaml
@@ -19,4 +19,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: -1
-format_error_retry_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 0  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml b/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml
index 0233358a..4a074360 100644
--- a/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml
+++ b/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml
@@ -19,4 +19,4 @@ sub_agents:
 
 # Settings for context management
 keep_tool_result: 5
-format_error_retry_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
+context_compress_limit: 10  # Maximum number of retries for format errors in context management
\ No newline at end of file
diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py
index 94a64050..05a00195 100644
--- a/apps/miroflow-agent/src/core/orchestrator.py
+++ b/apps/miroflow-agent/src/core/orchestrator.py
@@ -98,8 +98,8 @@ def __init__(
         # Retry loop protection limits
         self.MAX_CONSECUTIVE_ROLLBACKS = 5
         self.MAX_FINAL_ANSWER_RETRIES = 3 if cfg.agent.keep_tool_result == -1 else 1
-        # When format_error_retry_limit > 0, enables a context compression mechanism
-        self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0)
+        # When context_compress_limit > 0, enables a context compression mechanism
+        self.context_compress_limit = cfg.agent.get("context_compress_limit", 0)
 
     async def _stream_update(self, event_type: str, data: dict):
         """Send streaming update in new SSE protocol format"""
@@ -596,7 +596,7 @@ def _handle_no_context_management_fallback(
         final_summary: str,
         final_boxed_answer: Optional[str],
     ) -> Tuple[str, str, str]:
-        """Handle fallback when format_error_retry_limit == 0 (no context management).
+        """Handle fallback when context_compress_limit == 0 (no context management).
 
         In this mode, the model has only one chance to answer.
         We should try to use intermediate answers as fallback to maximize accuracy.
@@ -646,7 +646,7 @@ def _handle_context_management_no_fallback(
         final_summary: str,
         final_boxed_answer: Optional[str],
     ) -> Tuple[str, str, str]:
-        """Handle failure when format_error_retry_limit > 0 (context management enabled).
+        """Handle failure when context_compress_limit > 0 (context management enabled).
 
         In this mode, the model has multiple chances to retry with context management.
         We should NOT guess or use intermediate answers, because:
@@ -699,7 +699,7 @@ async def _generate_and_finalize_answer(
     ) -> Tuple[str, str, Optional[str], str, List[Dict[str, Any]]]:
         """Generate final answer and handle fallback based on context management settings.
 
-        Context Management (format_error_retry_limit > 0) is essentially a context compression
+        Context Management (context_compress_limit > 0) is essentially a context compression
         mechanism that enables multi-attempt problem solving:
 
         1. When the task is not completed within the given turns and context window,
@@ -734,7 +734,7 @@ async def _generate_and_finalize_answer(
         Returns:
             Tuple of (final_summary, final_boxed_answer, failure_experience_summary, usage_log, message_history)
         """
-        context_management_enabled = self.format_error_retry_limit > 0
+        context_management_enabled = self.context_compress_limit > 0
         failure_experience_summary = None
         usage_log = ""
 
@@ -1761,7 +1761,7 @@ async def run_main_agent(
         self.current_agent_id = await self._stream_start_agent("Final Summary")
         await self._stream_start_llm("Final Summary")
 
-        # Generate final answer and handle fallback based on format_error_retry_limit
+        # Generate final answer and handle fallback based on context_compress_limit
         # If reached_max_turns is True and context management is enabled,
         # skip answer generation to avoid blind guessing
         (