From 8d610a934e0d21ef16bd115b4155d7c165f8ad30 Mon Sep 17 00:00:00 2001 From: jenny-miromind Date: Tue, 6 Jan 2026 01:18:02 +0800 Subject: [PATCH 1/6] feat: check for google_search empty organic results --- apps/miroflow-agent/src/core/orchestrator.py | 59 +++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py index 68284b89..696595ba 100644 --- a/apps/miroflow-agent/src/core/orchestrator.py +++ b/apps/miroflow-agent/src/core/orchestrator.py @@ -228,6 +228,29 @@ async def _stream_tool_call( return tool_call_id + def _is_google_search_empty_result(self, tool_name: str, tool_result: dict) -> bool: + """ + Check if google_search result has empty organic results. + This indicates a poor search query that should be retried. + """ + if tool_name != "google_search": + return False + + result = tool_result.get("result") + if not result: + return False + + try: + if isinstance(result, str): + result_dict = json.loads(result) + else: + result_dict = result + + organic = result_dict.get("organic", []) + return len(organic) == 0 + except (json.JSONDecodeError, TypeError, AttributeError): + return False + def get_scrape_result(self, result: str) -> str: """ Process scrape result and truncate if too long to support more conversation turns. @@ -634,9 +657,13 @@ async def run_sub_agent( else tool_result.get("error") ) - # Check for "Unknown tool:" error and rollback - if str(result).startswith("Unknown tool:"): - # If we haven't reached rollback limit, rollback and retry + # Check for errors that should trigger rollback + should_rollback_result = ( + str(result).startswith("Unknown tool:") + or str(result).startswith("Error executing tool") + or self._is_google_search_empty_result(tool_name, tool_result) + ) + if should_rollback_result: if consecutive_rollbacks < self.MAX_CONSECUTIVE_ROLLBACKS - 1: message_history.pop() turn_count -= 1 @@ -645,15 +672,14 @@ async def run_sub_agent( self.task_log.log_step( "warning", f"{sub_agent_name} | Turn: {turn_count} | Rollback", - f"Unknown tool error - tool: {tool_name}, error: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}", + f"Tool result error - tool: {tool_name}, result: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}", ) - break # Exit inner for loop, then continue outer while loop + break else: - # Reached rollback limit, allow error to be sent to LLM as feedback self.task_log.log_step( "warning", f"{sub_agent_name} | Turn: {turn_count} | Allow Error Feedback", - f"Allowing unknown tool error to be sent to LLM after {consecutive_rollbacks} rollbacks - tool: {tool_name}, error: '{str(result)[:200]}'", + f"Allowing error result after {consecutive_rollbacks} rollbacks - tool: {tool_name}, result: '{str(result)[:200]}'", ) await self._stream_tool_call( @@ -1147,9 +1173,15 @@ async def run_main_agent( else tool_result.get("error") ) - # Check for "Unknown tool:" error and rollback - if str(result).startswith("Unknown tool:"): - # If we haven't reached rollback limit, rollback and retry + # Check for errors that should trigger rollback + should_rollback_result = ( + str(result).startswith("Unknown tool:") + or str(result).startswith("Error executing tool") + or self._is_google_search_empty_result( + tool_name, tool_result + ) + ) + if should_rollback_result: if ( consecutive_rollbacks < self.MAX_CONSECUTIVE_ROLLBACKS - 1 @@ -1161,15 +1193,14 @@ async def run_main_agent( self.task_log.log_step( "warning", f"Main Agent | Turn: {turn_count} | Rollback", - f"Unknown tool error - tool: {tool_name}, error: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}", + f"Tool result error - tool: {tool_name}, result: '{str(result)[:200]}'. Consecutive rollbacks: {consecutive_rollbacks}/{self.MAX_CONSECUTIVE_ROLLBACKS}, Total attempts: {total_attempts}/{max_attempts}", ) - break # Exit inner for loop, then continue outer while loop + break else: - # Reached rollback limit, allow error to be sent to LLM as feedback self.task_log.log_step( "warning", f"Main Agent | Turn: {turn_count} | Allow Error Feedback", - f"Allowing unknown tool error to be sent to LLM after {consecutive_rollbacks} rollbacks - tool: {tool_name}, error: '{str(result)[:200]}'", + f"Allowing error result after {consecutive_rollbacks} rollbacks - tool: {tool_name}, result: '{str(result)[:200]}'", ) await self._stream_tool_call( From 694067b6fffffbe911caae9673bb97c344e9f2e3 Mon Sep 17 00:00:00 2001 From: jenny-miromind Date: Thu, 8 Jan 2026 04:41:05 +0800 Subject: [PATCH 2/6] support failure report generation --- .../benchmarks/common_benchmark.py | 100 +++++++++++++++++- apps/miroflow-agent/main.py | 2 +- apps/miroflow-agent/src/core/orchestrator.py | 99 +++++++++++++++-- apps/miroflow-agent/src/core/pipeline.py | 21 +++- .../miroflow-agent/src/io/output_formatter.py | 4 +- .../src/llm/providers/openai_client.py | 5 + .../miroflow-agent/src/utils/parsing_utils.py | 51 +++++++++ apps/miroflow-agent/src/utils/prompt_utils.py | 57 ++++++++++ 8 files changed, 321 insertions(+), 18 deletions(-) diff --git a/apps/miroflow-agent/benchmarks/common_benchmark.py b/apps/miroflow-agent/benchmarks/common_benchmark.py index a2e74164..8fec0138 100644 --- a/apps/miroflow-agent/benchmarks/common_benchmark.py +++ b/apps/miroflow-agent/benchmarks/common_benchmark.py @@ -23,9 +23,12 @@ execute_task_pipeline, ) from src.logging.summary_time_cost import generate_summary - -# Constants for format error detection -FORMAT_ERROR_MESSAGE = "No \\boxed{} content found in the final answer." +from src.utils.prompt_utils import ( + FAILURE_EXPERIENCE_FOOTER, + FAILURE_EXPERIENCE_HEADER, + FAILURE_EXPERIENCE_ITEM, + FORMAT_ERROR_MESSAGE, +) def _task_worker(task_dict, cfg_dict, evaluator_kwargs): @@ -292,17 +295,68 @@ def extract_timestamp(file_path): max_format_retries = self.format_error_retry_limit + # Track accumulated failure experiences for this attempt + # Start with the original task description + current_task_description = task_description + failure_experiences = [] + + # Resume: Recover failure experiences from previous retry logs + if format_retry_count > 0 and logs_dir.exists(): + print( + f" Resuming from retry {format_retry_count}, recovering previous failure experiences..." + ) + for prev_retry in range(format_retry_count): + prev_log_pattern = f"task_{task.task_id}_attempt-{attempt}_format-retry-{prev_retry}_*.json" + prev_logs = sorted(list(logs_dir.glob(prev_log_pattern))) + if prev_logs: + prev_log_file = prev_logs[-1] # Get the latest one + try: + with open( + prev_log_file, "r", encoding="utf-8" + ) as f: + prev_log_data = json.load(f) + # Extract failure experience from trace_data + trace_data = prev_log_data.get("trace_data", {}) + prev_failure_exp = trace_data.get( + "failure_experience_summary" + ) + if prev_failure_exp: + failure_experiences.append(prev_failure_exp) + print( + f" Recovered failure experience from retry {prev_retry}" + ) + except Exception as e: + print( + f" Warning: Failed to load previous log {prev_log_file}: {e}" + ) + + # Rebuild enhanced task description with recovered failure experiences + if failure_experiences: + current_task_description += FAILURE_EXPERIENCE_HEADER + for idx, exp in enumerate(failure_experiences, 1): + current_task_description += ( + FAILURE_EXPERIENCE_ITEM.format( + attempt_number=idx, + failure_summary=exp, + ) + ) + current_task_description += FAILURE_EXPERIENCE_FOOTER + print( + f" Recovered {len(failure_experiences)} failure experience(s) from previous retries" + ) + while format_retry_count <= max_format_retries: try: ( response, final_boxed_answer, log_file_path, + failure_experience_summary, ) = await execute_task_pipeline( cfg=self.cfg, task_id=f"{task.task_id}_attempt-{attempt}_format-retry-{format_retry_count}", task_file_name=task_file_path, - task_description=task_description, + task_description=current_task_description, main_agent_tool_manager=self.main_agent_tool_manager, sub_agent_tool_managers=self.sub_agent_tool_managers, output_formatter=self.output_formatter, @@ -322,12 +376,48 @@ def extract_timestamp(file_path): ): format_retry_count += 1 if format_retry_count <= max_format_retries: + # Use the model-generated failure experience summary + print( + f" Format error detected, using model-generated failure summary for retry {format_retry_count}..." + ) + + if failure_experience_summary: + failure_experiences.append( + failure_experience_summary + ) + + # Build enhanced task description with accumulated failure experiences + # Start fresh from original task_description each time + current_task_description = task_description + current_task_description += ( + FAILURE_EXPERIENCE_HEADER + ) + for idx, exp in enumerate( + failure_experiences, 1 + ): + current_task_description += ( + FAILURE_EXPERIENCE_ITEM.format( + attempt_number=idx, + failure_summary=exp, + ) + ) + current_task_description += ( + FAILURE_EXPERIENCE_FOOTER + ) + + print( + f" Enhanced task description with {len(failure_experiences)} failure experience(s)" + ) + else: + print( + " No failure experience summary generated, retrying without enhancement..." + ) continue else: # Exceeded format retry limit attempt_result["status"] = "success" attempt_result["model_boxed_answer"] = ( - "No \\boxed{} content found after format error retry limit exceeded." + f"{FORMAT_ERROR_MESSAGE} (after {max_format_retries} retries)" ) attempt_result["error_message"] = ( f"Exceeded format error retry limit ({max_format_retries})" diff --git a/apps/miroflow-agent/main.py b/apps/miroflow-agent/main.py index 75e61e18..c3d7676b 100644 --- a/apps/miroflow-agent/main.py +++ b/apps/miroflow-agent/main.py @@ -33,7 +33,7 @@ async def amain(cfg: DictConfig) -> None: task_file_name = "" # Execute task using the pipeline - final_summary, final_boxed_answer, log_file_path = await execute_task_pipeline( + final_summary, final_boxed_answer, log_file_path, _ = await execute_task_pipeline( cfg=cfg, task_id=task_id, task_file_name=task_file_name, diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py index 696595ba..0617c138 100644 --- a/apps/miroflow-agent/src/core/orchestrator.py +++ b/apps/miroflow-agent/src/core/orchestrator.py @@ -23,8 +23,14 @@ TaskLog, get_utc_plus_8_time, ) -from ..utils.parsing_utils import extract_llm_response_text +from ..utils.parsing_utils import ( + extract_failure_experience_summary, + extract_llm_response_text, +) from ..utils.prompt_utils import ( + FAILURE_SUMMARY_ASSISTANT_PREFIX, + FAILURE_SUMMARY_PROMPT, + FORMAT_ERROR_MESSAGE, generate_agent_specific_system_prompt, generate_agent_summarize_prompt, mcp_tags, @@ -406,6 +412,79 @@ async def _handle_llm_call( # Return empty response with should_break=False, need to retry return "", False, None, original_message_history + async def _generate_failure_summary( + self, + system_prompt: str, + message_history: List[Dict[str, Any]], + tool_definitions: List[Dict], + turn_count: int, + ) -> Optional[str]: + """Generate a failure experience summary when task was not completed successfully. + + Args: + message_history: The conversation history. + tool_definitions: Available tool definitions. + turn_count: Current turn count for step ID. + + Returns: + The extracted failure experience summary, or None if generation failed. + """ + self.task_log.log_step( + "info", + "Main Agent | Failure Summary", + "Generating failure experience summary for potential retry...", + ) + + # Build failure summary history + failure_summary_history = message_history.copy() + if failure_summary_history and failure_summary_history[-1]["role"] == "user": + failure_summary_history.pop() + + # Add failure summary prompt and assistant prefix for structured output + failure_summary_history.append( + {"role": "user", "content": FAILURE_SUMMARY_PROMPT} + ) + failure_summary_history.append( + {"role": "assistant", "content": FAILURE_SUMMARY_ASSISTANT_PREFIX} + ) + + # Call LLM to generate failure summary (auto-detects assistant prefix for vLLM continuation) + ( + failure_summary_text, + _, + _, + _, + ) = await self._handle_llm_call( + system_prompt, + failure_summary_history, + tool_definitions, + turn_count + 10, # Use a different step id + "Main Agent | Failure Experience Summary", + agent_type="main", + ) + + # Prepend the assistant prefix to the response for complete output + if failure_summary_text: + failure_summary_text = ( + FAILURE_SUMMARY_ASSISTANT_PREFIX + failure_summary_text + ) + failure_experience_summary = extract_failure_experience_summary( + failure_summary_text + ) + self.task_log.log_step( + "info", + "Main Agent | Failure Summary", + f"Generated failure experience summary:\n{failure_experience_summary[:500]}...", + ) + return failure_experience_summary + else: + self.task_log.log_step( + "warning", + "Main Agent | Failure Summary", + "Failed to generate failure experience summary", + ) + return None + async def run_sub_agent( self, sub_agent_name: str, @@ -1371,10 +1450,7 @@ async def run_main_agent( ) # Check if we got a valid boxed answer - if ( - final_boxed_answer - != "No \\boxed{} content found in the final answer." - ): + if final_boxed_answer != FORMAT_ERROR_MESSAGE: self.task_log.log_step( "info", "Main Agent | Final Answer", @@ -1415,7 +1491,7 @@ async def run_main_agent( if not final_answer_text: final_answer_text = "No final answer generated." final_summary = final_answer_text - final_boxed_answer = "No \\boxed{} content found in the final answer." + final_boxed_answer = FORMAT_ERROR_MESSAGE self.task_log.log_step( "error", "Main Agent | Final Answer", @@ -1430,7 +1506,7 @@ async def run_main_agent( # Fallback to intermediate answer if still no boxed answer if ( - final_boxed_answer == "No \\boxed{} content found in the final answer." + final_boxed_answer == FORMAT_ERROR_MESSAGE and self.intermediate_boxed_answers ): final_boxed_answer = self.intermediate_boxed_answers[-1] @@ -1440,6 +1516,13 @@ async def run_main_agent( f"Using intermediate boxed answer as fallback: {final_boxed_answer}", ) + # Generate failure experience summary if no valid boxed answer found + failure_experience_summary = None + if final_boxed_answer == FORMAT_ERROR_MESSAGE: + failure_experience_summary = await self._generate_failure_summary( + system_prompt, message_history, tool_definitions, turn_count + ) + await self._stream_tool_call("show_text", {"text": final_boxed_answer}) await self._stream_end_llm("Final Summary") await self._stream_end_agent("Final Summary", self.current_agent_id) @@ -1461,4 +1544,4 @@ async def run_main_agent( f"Main agent task {task_id} completed successfully", ) gc.collect() - return final_summary, final_boxed_answer + return final_summary, final_boxed_answer, failure_experience_summary diff --git a/apps/miroflow-agent/src/core/pipeline.py b/apps/miroflow-agent/src/core/pipeline.py index 78d41402..b0cc1e6c 100644 --- a/apps/miroflow-agent/src/core/pipeline.py +++ b/apps/miroflow-agent/src/core/pipeline.py @@ -98,7 +98,11 @@ async def execute_task_pipeline( sub_agent_tool_definitions=sub_agent_tool_definitions, ) - final_summary, final_boxed_answer = await orchestrator.run_main_agent( + ( + final_summary, + final_boxed_answer, + failure_experience_summary, + ) = await orchestrator.run_main_agent( task_description=task_description, task_file_name=task_file_name, task_id=task_id, @@ -109,8 +113,19 @@ async def execute_task_pipeline( task_log.final_boxed_answer = final_boxed_answer task_log.status = "success" + # Store failure experience summary in task log if available + if failure_experience_summary: + task_log.trace_data["failure_experience_summary"] = ( + failure_experience_summary + ) + log_file_path = task_log.save() - return final_summary, final_boxed_answer, log_file_path + return ( + final_summary, + final_boxed_answer, + log_file_path, + failure_experience_summary, + ) except Exception as e: error_details = traceback.format_exc() @@ -134,7 +149,7 @@ async def execute_task_pipeline( log_file_path = task_log.save() - return error_message, "", log_file_path + return error_message, "", log_file_path, None finally: task_log.end_time = get_utc_plus_8_time() diff --git a/apps/miroflow-agent/src/io/output_formatter.py b/apps/miroflow-agent/src/io/output_formatter.py index 3c1d0fc5..8e63b046 100644 --- a/apps/miroflow-agent/src/io/output_formatter.py +++ b/apps/miroflow-agent/src/io/output_formatter.py @@ -3,6 +3,8 @@ import re +from ..utils.prompt_utils import FORMAT_ERROR_MESSAGE + class OutputFormatter: def _extract_boxed_content(self, text: str) -> str: @@ -117,7 +119,7 @@ def format_final_summary_and_log(self, final_answer_text, client=None): summary_lines.append(boxed_result) elif final_answer_text: summary_lines.append("No \\boxed{} content found.") - boxed_result = "No \\boxed{} content found in the final answer." + boxed_result = FORMAT_ERROR_MESSAGE # Token usage statistics and cost estimation - use client method if client and hasattr(client, "format_token_usage_summary"): diff --git a/apps/miroflow-agent/src/llm/providers/openai_client.py b/apps/miroflow-agent/src/llm/providers/openai_client.py index 2eda891f..a81bc76d 100644 --- a/apps/miroflow-agent/src/llm/providers/openai_client.py +++ b/apps/miroflow-agent/src/llm/providers/openai_client.py @@ -137,6 +137,11 @@ async def _create_message( if "deepseek-v3-1" in self.model_name: params["extra_body"]["thinking"] = {"type": "enabled"} + # auto-detect if we need to continue from the last assistant message + if messages_for_llm and messages_for_llm[-1].get("role") == "assistant": + params["extra_body"]["continue_final_message"] = True + params["extra_body"]["add_generation_prompt"] = False + try: if self.async_client: response = await self.client.chat.completions.create(**params) diff --git a/apps/miroflow-agent/src/utils/parsing_utils.py b/apps/miroflow-agent/src/utils/parsing_utils.py index 5f6e5274..04733ae6 100644 --- a/apps/miroflow-agent/src/utils/parsing_utils.py +++ b/apps/miroflow-agent/src/utils/parsing_utils.py @@ -100,6 +100,57 @@ def safe_json_loads(arguments_str: str) -> dict: } +def extract_failure_experience_summary(text: str) -> str: + """ + Extract failure experience summary from LLM response text. + + The text may contain: + - ... block (thinking content) + - Main content after and before + - ... block (tool call, ignored) + + Examples: + "\n{xxx}\n\n\n{content}\n\n..." + "\n{xxx}\n\n\n{content}" + "{content}" (no think block) + + Returns: + - If content is empty after strip, return think_content + - If both think_content and content are non-empty, return content + - mcp_block is never used + """ + if not text: + return "" + + think_content = "" + content = "" + + # Extract think content + think_match = re.search(r"([\s\S]*?)", text) + if think_match: + think_content = think_match.group(1).strip() + # Get content after + after_think = text[think_match.end() :] + else: + # No think block, entire text is potential content + after_think = text + + # Remove ... block from content + mcp_match = re.search(r"[\s\S]*", after_think) + if mcp_match: + content = after_think[: mcp_match.start()].strip() + else: + content = after_think.strip() + + # Apply the rules: + # - If content is empty, use think_content + # - If both are non-empty, use content + if content: + return content + else: + return think_content + + def extract_llm_response_text(llm_response): """ Extract text from LLM response, excluding tags. Stop immediately when this opening tag is encountered. diff --git a/apps/miroflow-agent/src/utils/prompt_utils.py b/apps/miroflow-agent/src/utils/prompt_utils.py index 1157eb0e..6aeb4ea6 100644 --- a/apps/miroflow-agent/src/utils/prompt_utils.py +++ b/apps/miroflow-agent/src/utils/prompt_utils.py @@ -1,6 +1,63 @@ # Copyright (c) 2025 MiroMind # This source code is licensed under the MIT License. +# ============================================================================ +# Format Error Messages +# ============================================================================ + +FORMAT_ERROR_MESSAGE = "No \\boxed{} content found in the final answer." + +# ============================================================================ +# Failure Experience Templates (for format error retry) +# ============================================================================ + +# Header that appears once before all failure experiences +FAILURE_EXPERIENCE_HEADER = """ + +=== Previous Attempts Analysis === +The following summarizes what was tried before and why it didn't work. Use this to guide a NEW approach. + +""" + +# Template for each individual failure experience (used multiple times) +FAILURE_EXPERIENCE_ITEM = """[Attempt {attempt_number}] +{failure_summary} + +""" + +# Footer that appears once after all failure experiences +FAILURE_EXPERIENCE_FOOTER = """=== End of Analysis === + +Based on the above, you should try a different strategy this time. +""" + +FAILURE_SUMMARY_PROMPT = """The task was not completed successfully. Do NOT call any tools. Provide a summary: + +Failure type: [incomplete / blocked / misdirected / format_missed] + - incomplete: ran out of turns before finishing + - blocked: got stuck due to tool failure or missing information + - misdirected: went down the wrong path + - format_missed: found the answer but forgot to use \\boxed{} +What happened: [describe the approach taken and why a final answer was not reached] +Useful findings: [list any facts, intermediate results, or conclusions discovered that should be reused] +Remaining: [what still needs to be done to reach the answer]""" + +# Assistant prefix for failure summary generation (guides model to follow structured format) +FAILURE_SUMMARY_THINK_CONTENT = """We need to write a structured post-mortem style summary **without calling any tools**, explaining why the task was not completed, using these required sections: + +* **Failure type**: pick one from **incomplete / blocked / misdirected / format_missed** +* **What happened**: describe the approach taken and why it didn't reach a final answer +* **Useful findings**: list any facts, intermediate results, or conclusions that can be reused +* **Remaining**: state what still needs to be done to reach the final answer""" + +FAILURE_SUMMARY_ASSISTANT_PREFIX = ( + f"\n{FAILURE_SUMMARY_THINK_CONTENT}\n\n\n" +) + +# ============================================================================ +# MCP Tags for Parsing +# ============================================================================ + mcp_tags = [ "", "", From ca53d16028b26f9f411856f051e75486e207b04f Mon Sep 17 00:00:00 2001 From: jenny-miromind Date: Thu, 8 Jan 2026 13:04:01 +0800 Subject: [PATCH 3/6] update final answer generation --- apps/miroflow-agent/src/core/orchestrator.py | 449 +++++++++++++----- apps/miroflow-agent/src/utils/prompt_utils.py | 6 +- 2 files changed, 337 insertions(+), 118 deletions(-) diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py index 0617c138..bd402fc4 100644 --- a/apps/miroflow-agent/src/core/orchestrator.py +++ b/apps/miroflow-agent/src/core/orchestrator.py @@ -98,6 +98,7 @@ def __init__( # Retry loop protection limits self.MAX_CONSECUTIVE_ROLLBACKS = 5 self.MAX_FINAL_ANSWER_RETRIES = 3 if cfg.agent.keep_tool_result == -1 else 1 + self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0) async def _stream_update(self, event_type: str, data: dict): """Send streaming update in new SSE protocol format""" @@ -485,6 +486,321 @@ async def _generate_failure_summary( ) return None + async def _generate_final_answer_with_retries( + self, + system_prompt: str, + message_history: List[Dict[str, Any]], + tool_definitions: List[Dict], + turn_count: int, + task_description: str, + ) -> Tuple[Optional[str], str, Optional[str], str, List[Dict[str, Any]]]: + """Generate final answer with retry mechanism. + + Returns: + Tuple of (final_answer_text, final_summary, final_boxed_answer, usage_log, message_history) + """ + # Generate summary prompt + summary_prompt = generate_agent_summarize_prompt( + task_description, + agent_type="main", + ) + + if message_history[-1]["role"] == "user": + message_history.pop(-1) + message_history.append({"role": "user", "content": summary_prompt}) + + final_answer_text = None + final_boxed_answer = None + final_summary = "" + usage_log = "" + + for retry_idx in range(self.MAX_FINAL_ANSWER_RETRIES): + ( + final_answer_text, + should_break, + tool_calls_info, + message_history, + ) = await self._handle_llm_call( + system_prompt, + message_history, + tool_definitions, + turn_count + 1 + retry_idx, + f"Main agent | Final Summary (attempt {retry_idx + 1}/{self.MAX_FINAL_ANSWER_RETRIES})", + agent_type="main", + ) + + if final_answer_text: + final_summary, final_boxed_answer, usage_log = ( + self.output_formatter.format_final_summary_and_log( + final_answer_text, self.llm_client + ) + ) + + if final_boxed_answer != FORMAT_ERROR_MESSAGE: + self.task_log.log_step( + "info", + "Main Agent | Final Answer", + f"Boxed answer found on attempt {retry_idx + 1}", + ) + break + else: + self.task_log.log_step( + "warning", + "Main Agent | Final Answer", + f"No boxed answer on attempt {retry_idx + 1}, retrying...", + ) + if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1: + if ( + message_history + and message_history[-1]["role"] == "assistant" + ): + message_history.pop() + else: + self.task_log.log_step( + "warning", + "Main Agent | Final Answer", + f"Failed to generate answer on attempt {retry_idx + 1}", + ) + if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1: + if message_history and message_history[-1]["role"] == "assistant": + message_history.pop() + + # Ensure final_boxed_answer is never None - treat it as FORMAT_ERROR_MESSAGE + if final_boxed_answer is None: + final_boxed_answer = FORMAT_ERROR_MESSAGE + + return ( + final_answer_text, + final_summary, + final_boxed_answer, + usage_log, + message_history, + ) + + def _handle_no_context_management_fallback( + self, + final_answer_text: Optional[str], + final_summary: str, + final_boxed_answer: Optional[str], + ) -> Tuple[str, str, str]: + """Handle fallback when format_error_retry_limit == 0 (no context management). + + In this mode, the model has only one chance to answer. + We should try to use intermediate answers as fallback to maximize accuracy. + + Returns: + Tuple of (final_answer_text, final_summary, final_boxed_answer) + """ + # Validate final_answer_text + if not final_answer_text: + final_answer_text = "No final answer generated." + final_summary = final_answer_text + final_boxed_answer = FORMAT_ERROR_MESSAGE + self.task_log.log_step( + "error", + "Main Agent | Final Answer", + "Unable to generate final answer after all retries", + ) + else: + self.task_log.log_step( + "info", + "Main Agent | Final Answer", + f"Final answer content:\n\n{final_answer_text}", + ) + + # Fallback to intermediate answer if no valid boxed answer + # This is important when context management is disabled to maximize answer accuracy + # Check for both FORMAT_ERROR_MESSAGE and None (defensive) + if ( + final_boxed_answer == FORMAT_ERROR_MESSAGE or final_boxed_answer is None + ) and self.intermediate_boxed_answers: + final_boxed_answer = self.intermediate_boxed_answers[-1] + self.task_log.log_step( + "info", + "Main Agent | Final Answer (No Context Management)", + f"Using intermediate boxed answer as fallback: {final_boxed_answer}", + ) + + # Ensure final_boxed_answer is never None + if final_boxed_answer is None: + final_boxed_answer = FORMAT_ERROR_MESSAGE + + return final_answer_text, final_summary, final_boxed_answer + + def _handle_context_management_no_fallback( + self, + final_answer_text: Optional[str], + final_summary: str, + final_boxed_answer: Optional[str], + ) -> Tuple[str, str, str]: + """Handle failure when format_error_retry_limit > 0 (context management enabled). + + In this mode, the model has multiple chances to retry with context management. + We should NOT guess or use intermediate answers, because: + - A wrong guess can reduce accuracy + - The model will have another chance to answer with failure experience + + Returns: + Tuple of (final_answer_text, final_summary, final_boxed_answer) + """ + # Validate final_answer_text + if not final_answer_text: + final_answer_text = "No final answer generated." + final_summary = final_answer_text + final_boxed_answer = FORMAT_ERROR_MESSAGE + self.task_log.log_step( + "error", + "Main Agent | Final Answer", + "Unable to generate final answer after all retries", + ) + else: + self.task_log.log_step( + "info", + "Main Agent | Final Answer", + f"Final answer content:\n\n{final_answer_text}", + ) + + # Ensure final_boxed_answer is never None + if final_boxed_answer is None: + final_boxed_answer = FORMAT_ERROR_MESSAGE + + # With context management, do NOT fallback to intermediate answers + # Keep FORMAT_ERROR_MESSAGE to trigger failure summary and allow retry + if final_boxed_answer == FORMAT_ERROR_MESSAGE: + self.task_log.log_step( + "info", + "Main Agent | Final Answer (Context Management Mode)", + "No valid boxed answer found. Not using intermediate fallback - will generate failure summary for retry.", + ) + + return final_answer_text, final_summary, final_boxed_answer + + async def _generate_and_finalize_answer( + self, + system_prompt: str, + message_history: List[Dict[str, Any]], + tool_definitions: List[Dict], + turn_count: int, + task_description: str, + reached_max_turns: bool = False, + ) -> Tuple[str, str, Optional[str], str, List[Dict[str, Any]]]: + """Generate final answer and handle fallback based on context management settings. + + There are 4 possible scenarios based on (context_management, reached_max_turns): + + | Context Management | Reached Max Turns | Behavior | + |--------------------|-------------------|---------------------------------------------| + | OFF (limit=0) | No | Generate answer → fallback to intermediate | + | OFF (limit=0) | Yes | Generate answer → fallback to intermediate | + | ON (limit>0) | No | Generate answer → no fallback, fail summary | + | ON (limit>0) | Yes | SKIP generation → fail summary directly | + + Args: + reached_max_turns: Whether the main loop ended due to reaching max turns or context limit. + + Returns: + Tuple of (final_summary, final_boxed_answer, failure_experience_summary, usage_log, message_history) + """ + context_management_enabled = self.format_error_retry_limit > 0 + failure_experience_summary = None + usage_log = "" + + # ============================================================================= + # CASE: Context management ON + reached max turns + # Skip answer generation entirely - any answer would be a blind guess + # ============================================================================= + if context_management_enabled and reached_max_turns: + self.task_log.log_step( + "info", + "Main Agent | Final Answer (Context Management Mode)", + "Reached max turns. Skipping answer generation to avoid blind guessing.", + ) + + self._save_message_history(system_prompt, message_history) + + failure_experience_summary = await self._generate_failure_summary( + system_prompt, message_history, tool_definitions, turn_count + ) + + return ( + "Task incomplete - reached maximum turns. Will retry with failure experience.", + FORMAT_ERROR_MESSAGE, + failure_experience_summary, + usage_log, + message_history, + ) + + # ============================================================================= + # ALL OTHER CASES: Generate final answer first + # ============================================================================= + ( + final_answer_text, + final_summary, + final_boxed_answer, + usage_log, + message_history, + ) = await self._generate_final_answer_with_retries( + system_prompt=system_prompt, + message_history=message_history, + tool_definitions=tool_definitions, + turn_count=turn_count, + task_description=task_description, + ) + + self._save_message_history(system_prompt, message_history) + + # ============================================================================= + # CASE: Context management OFF + # Try to use intermediate answers as fallback to maximize accuracy + # ============================================================================= + if not context_management_enabled: + final_answer_text, final_summary, final_boxed_answer = ( + self._handle_no_context_management_fallback( + final_answer_text, final_summary, final_boxed_answer + ) + ) + # No failure summary needed - won't be used without context management + return ( + final_summary, + final_boxed_answer, + None, + usage_log, + message_history, + ) + + # ============================================================================= + # CASE: Context management ON + normal completion (not reached max turns) + # Don't use fallback - wrong guess would reduce accuracy + # ============================================================================= + final_answer_text, final_summary, final_boxed_answer = ( + self._handle_context_management_no_fallback( + final_answer_text, final_summary, final_boxed_answer + ) + ) + + if final_boxed_answer == FORMAT_ERROR_MESSAGE: + failure_experience_summary = await self._generate_failure_summary( + system_prompt, message_history, tool_definitions, turn_count + ) + + return ( + final_summary, + final_boxed_answer, + failure_experience_summary, + usage_log, + message_history, + ) + + def _save_message_history( + self, system_prompt: str, message_history: List[Dict[str, Any]] + ): + """Save message history to task log.""" + self.task_log.main_agent_message_history = { + "system_prompt": system_prompt, + "message_history": message_history, + } + self.task_log.save() + async def run_sub_agent( self, sub_agent_name: str, @@ -1387,8 +1703,9 @@ async def run_main_agent( await self._stream_end_llm("main") await self._stream_end_agent("main", self.current_agent_id) - # Record main loop end - if turn_count >= max_turns: + # Record main loop end and determine if max turns was reached + reached_max_turns = turn_count >= max_turns + if reached_max_turns: self.task_log.log_step( "warning", "Main Agent | Max Turns Reached / Context Limit Reached", @@ -1409,120 +1726,24 @@ async def run_main_agent( self.current_agent_id = await self._stream_start_agent("Final Summary") await self._stream_start_llm("Final Summary") - # Generate summary prompt (generate only once) - summary_prompt = generate_agent_summarize_prompt( - task_description, - agent_type="main", + # Generate final answer and handle fallback based on format_error_retry_limit + # If reached_max_turns is True and context management is enabled, + # skip answer generation to avoid blind guessing + ( + final_summary, + final_boxed_answer, + failure_experience_summary, + usage_log, + message_history, + ) = await self._generate_and_finalize_answer( + system_prompt=system_prompt, + message_history=message_history, + tool_definitions=tool_definitions, + turn_count=turn_count, + task_description=task_description, + reached_max_turns=reached_max_turns, ) - if message_history[-1]["role"] == "user": - message_history.pop(-1) - message_history.append({"role": "user", "content": summary_prompt}) - - # Retry mechanism for generating boxed answer - final_answer_text = None - final_boxed_answer = None - final_summary = "" - usage_log = "" - - for retry_idx in range(self.MAX_FINAL_ANSWER_RETRIES): - # Use unified LLM call processing - ( - final_answer_text, - should_break, - tool_calls_info, - message_history, - ) = await self._handle_llm_call( - system_prompt, - message_history, - tool_definitions, - turn_count + 1 + retry_idx, - f"Main agent | Final Summary (attempt {retry_idx + 1}/{self.MAX_FINAL_ANSWER_RETRIES})", - agent_type="main", - ) - - if final_answer_text: - # Try to extract boxed answer - final_summary, final_boxed_answer, usage_log = ( - self.output_formatter.format_final_summary_and_log( - final_answer_text, self.llm_client - ) - ) - - # Check if we got a valid boxed answer - if final_boxed_answer != FORMAT_ERROR_MESSAGE: - self.task_log.log_step( - "info", - "Main Agent | Final Answer", - f"Boxed answer found on attempt {retry_idx + 1}", - ) - break - else: - self.task_log.log_step( - "warning", - "Main Agent | Final Answer", - f"No boxed answer on attempt {retry_idx + 1}, retrying...", - ) - # Remove the failed assistant response before retry - if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1: - if ( - message_history - and message_history[-1]["role"] == "assistant" - ): - message_history.pop() - else: - self.task_log.log_step( - "warning", - "Main Agent | Final Answer", - f"Failed to generate answer on attempt {retry_idx + 1}", - ) - # Remove the failed assistant response before retry - if retry_idx < self.MAX_FINAL_ANSWER_RETRIES - 1: - if message_history and message_history[-1]["role"] == "assistant": - message_history.pop() - - self.task_log.main_agent_message_history = { - "system_prompt": system_prompt, - "message_history": message_history, - } - self.task_log.save() - - # Final validation and fallback - if not final_answer_text: - final_answer_text = "No final answer generated." - final_summary = final_answer_text - final_boxed_answer = FORMAT_ERROR_MESSAGE - self.task_log.log_step( - "error", - "Main Agent | Final Answer", - "Unable to generate final answer after all retries", - ) - else: - self.task_log.log_step( - "info", - "Main Agent | Final Answer", - f"Final answer content:\n\n{final_answer_text}", - ) - - # Fallback to intermediate answer if still no boxed answer - if ( - final_boxed_answer == FORMAT_ERROR_MESSAGE - and self.intermediate_boxed_answers - ): - final_boxed_answer = self.intermediate_boxed_answers[-1] - self.task_log.log_step( - "info", - "Main Agent | Final Answer", - f"Using intermediate boxed answer as fallback: {final_boxed_answer}", - ) - - # Generate failure experience summary if no valid boxed answer found - failure_experience_summary = None - if final_boxed_answer == FORMAT_ERROR_MESSAGE: - failure_experience_summary = await self._generate_failure_summary( - system_prompt, message_history, tool_definitions, turn_count - ) - await self._stream_tool_call("show_text", {"text": final_boxed_answer}) await self._stream_end_llm("Final Summary") await self._stream_end_agent("Final Summary", self.current_agent_id) diff --git a/apps/miroflow-agent/src/utils/prompt_utils.py b/apps/miroflow-agent/src/utils/prompt_utils.py index 6aeb4ea6..0c4b62cc 100644 --- a/apps/miroflow-agent/src/utils/prompt_utils.py +++ b/apps/miroflow-agent/src/utils/prompt_utils.py @@ -39,16 +39,14 @@ - misdirected: went down the wrong path - format_missed: found the answer but forgot to use \\boxed{} What happened: [describe the approach taken and why a final answer was not reached] -Useful findings: [list any facts, intermediate results, or conclusions discovered that should be reused] -Remaining: [what still needs to be done to reach the answer]""" +Useful findings: [list any facts, intermediate results, or conclusions discovered that should be reused]""" # Assistant prefix for failure summary generation (guides model to follow structured format) FAILURE_SUMMARY_THINK_CONTENT = """We need to write a structured post-mortem style summary **without calling any tools**, explaining why the task was not completed, using these required sections: * **Failure type**: pick one from **incomplete / blocked / misdirected / format_missed** * **What happened**: describe the approach taken and why it didn't reach a final answer -* **Useful findings**: list any facts, intermediate results, or conclusions that can be reused -* **Remaining**: state what still needs to be done to reach the final answer""" +* **Useful findings**: list any facts, intermediate results, or conclusions that can be reused""" FAILURE_SUMMARY_ASSISTANT_PREFIX = ( f"\n{FAILURE_SUMMARY_THINK_CONTENT}\n\n\n" From c397522dc075dc82c65f593a83e3f6e11dcd9142 Mon Sep 17 00:00:00 2001 From: jenny-miromind Date: Thu, 8 Jan 2026 17:38:48 +0800 Subject: [PATCH 4/6] fix: turn_count not decremented when LLM response is empty --- apps/miroflow-agent/src/core/orchestrator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py index bd402fc4..76bd7b6c 100644 --- a/apps/miroflow-agent/src/core/orchestrator.py +++ b/apps/miroflow-agent/src/core/orchestrator.py @@ -1364,8 +1364,10 @@ async def run_main_agent( ) break else: + # LLM call failed, don't count this as a valid turn + turn_count -= 1 self.task_log.log_step( - "info", + "warning", f"Main Agent | Turn: {turn_count} | LLM Call", "No valid response from LLM, retrying", ) From f37e7215ed01533869a8b3609316925af67ec1ee Mon Sep 17 00:00:00 2001 From: jenny-miromind Date: Thu, 8 Jan 2026 17:51:59 +0800 Subject: [PATCH 5/6] update the code comments related to context management --- apps/miroflow-agent/src/core/orchestrator.py | 41 ++++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py index 76bd7b6c..94a64050 100644 --- a/apps/miroflow-agent/src/core/orchestrator.py +++ b/apps/miroflow-agent/src/core/orchestrator.py @@ -98,6 +98,7 @@ def __init__( # Retry loop protection limits self.MAX_CONSECUTIVE_ROLLBACKS = 5 self.MAX_FINAL_ANSWER_RETRIES = 3 if cfg.agent.keep_tool_result == -1 else 1 + # When format_error_retry_limit > 0, enables a context compression mechanism self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0) async def _stream_update(self, event_type: str, data: dict): @@ -420,15 +421,27 @@ async def _generate_failure_summary( tool_definitions: List[Dict], turn_count: int, ) -> Optional[str]: - """Generate a failure experience summary when task was not completed successfully. + """Generate a failure experience summary for context compression. + + This is the core of the context management mechanism. When a task attempt fails + (i.e., the task is not completed within the given turns and context window), + we compress the entire conversation history into a structured summary containing: + - Failure type: incomplete / blocked / misdirected / format_missed + - What happened: the approach taken and why a final answer was not reached + - Useful findings: facts, intermediate results, or conclusions to be reused + + This summary will be injected into the task description for the next retry, + effectively compressing potentially thousands of tokens of conversation into + a focused summary of ~500-1000 tokens. Args: - message_history: The conversation history. + system_prompt: The system prompt used in the conversation. + message_history: The full conversation history to be compressed. tool_definitions: Available tool definitions. turn_count: Current turn count for step ID. Returns: - The extracted failure experience summary, or None if generation failed. + The compressed failure experience summary, or None if generation failed. """ self.task_log.log_step( "info", @@ -686,7 +699,27 @@ async def _generate_and_finalize_answer( ) -> Tuple[str, str, Optional[str], str, List[Dict[str, Any]]]: """Generate final answer and handle fallback based on context management settings. - There are 4 possible scenarios based on (context_management, reached_max_turns): + Context Management (format_error_retry_limit > 0) is essentially a context compression + mechanism that enables multi-attempt problem solving: + + 1. When the task is not completed within the given turns and context window, + we generate a "failure_experience_summary" - a structured post-mortem containing: + - Failure type: incomplete / blocked / misdirected / format_missed + - What happened: the approach taken and why it didn't reach a final answer + - Useful findings: facts, intermediate results, or conclusions to be reused + + 2. On retry, this compressed summary is injected into the task description, + allowing the model to: + - Start fresh with a clean context window + - Learn from previous failures without context overflow + - Try different approaches informed by past experience + + This is more effective than simply continuing with a long context because: + - Avoids context window limits + - Focuses attention on key learnings rather than raw conversation + - Allows multiple "fresh starts" with accumulated wisdom + + Decision table based on (context_management, reached_max_turns): | Context Management | Reached Max Turns | Behavior | |--------------------|-------------------|---------------------------------------------| From e133439c4659b70b77a2b430682e7d39bd506327 Mon Sep 17 00:00:00 2001 From: jenny-miromind Date: Fri, 9 Jan 2026 11:33:18 +0800 Subject: [PATCH 6/6] rename format_error_retry_limit to context_compress_limit --- apps/miroflow-agent/benchmarks/common_benchmark.py | 4 ++-- apps/miroflow-agent/conf/agent/default.yaml | 2 +- .../conf/agent/mirothinker_v1.0.yaml | 2 +- .../conf/agent/mirothinker_v1.0_keep5.yaml | 2 +- .../conf/agent/mirothinker_v1.5.yaml | 2 +- .../conf/agent/mirothinker_v1.5_keep5_max200.yaml | 2 +- .../conf/agent/mirothinker_v1.5_keep5_max400.yaml | 2 +- apps/miroflow-agent/conf/agent/multi_agent.yaml | 2 +- apps/miroflow-agent/conf/agent/multi_agent_os.yaml | 2 +- apps/miroflow-agent/conf/agent/single_agent.yaml | 2 +- .../conf/agent/single_agent_keep5.yaml | 2 +- apps/miroflow-agent/src/core/orchestrator.py | 14 +++++++------- 12 files changed, 19 insertions(+), 19 deletions(-) diff --git a/apps/miroflow-agent/benchmarks/common_benchmark.py b/apps/miroflow-agent/benchmarks/common_benchmark.py index 8fec0138..4b26138a 100644 --- a/apps/miroflow-agent/benchmarks/common_benchmark.py +++ b/apps/miroflow-agent/benchmarks/common_benchmark.py @@ -138,7 +138,7 @@ def __init__(self, data_dir: str, benchmark_name: str, cfg: DictConfig): # Format error tracking and retry configuration # Read from agent config as it's part of context management - self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0) + self.context_compress_limit = cfg.agent.get("context_compress_limit", 0) # Get LLM provider and model from the config object self.llm_provider = cfg.llm.provider @@ -293,7 +293,7 @@ def extract_timestamp(file_path): # Try to get a valid response with format retry print(f"TASK ID: {task.task_id}, ATTEMPT: {attempt}") - max_format_retries = self.format_error_retry_limit + max_format_retries = self.context_compress_limit # Track accumulated failure experiences for this attempt # Start with the original task description diff --git a/apps/miroflow-agent/conf/agent/default.yaml b/apps/miroflow-agent/conf/agent/default.yaml index 7ad6d72d..2d32ce1e 100644 --- a/apps/miroflow-agent/conf/agent/default.yaml +++ b/apps/miroflow-agent/conf/agent/default.yaml @@ -21,4 +21,4 @@ sub_agents: # Settings for context management keep_tool_result: -1 -format_error_retry_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml index 455ec927..5cf8230e 100644 --- a/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml +++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.0.yaml @@ -19,4 +19,4 @@ sub_agents: # Settings for context management keep_tool_result: -1 -format_error_retry_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml index 8efa6e65..273e14ce 100644 --- a/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml +++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.0_keep5.yaml @@ -19,4 +19,4 @@ sub_agents: # Settings for context management keep_tool_result: 5 -format_error_retry_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml index ddce2f10..67ddd661 100644 --- a/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml +++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.5.yaml @@ -19,4 +19,4 @@ sub_agents: # Settings for context management keep_tool_result: -1 -format_error_retry_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml index dd42f9ae..a602ef8f 100644 --- a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml +++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max200.yaml @@ -19,4 +19,4 @@ sub_agents: # Settings for context management keep_tool_result: 5 -format_error_retry_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml index 8dd348fd..011359a8 100644 --- a/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml +++ b/apps/miroflow-agent/conf/agent/mirothinker_v1.5_keep5_max400.yaml @@ -19,4 +19,4 @@ sub_agents: # Settings for context management keep_tool_result: 5 -format_error_retry_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/conf/agent/multi_agent.yaml b/apps/miroflow-agent/conf/agent/multi_agent.yaml index b4d46614..ed642c82 100644 --- a/apps/miroflow-agent/conf/agent/multi_agent.yaml +++ b/apps/miroflow-agent/conf/agent/multi_agent.yaml @@ -25,4 +25,4 @@ sub_agents: # Settings for context management keep_tool_result: -1 -format_error_retry_limit: 0 # Maximum number of retries for format errors in context management +context_compress_limit: 0 # Maximum number of retries for format errors in context management diff --git a/apps/miroflow-agent/conf/agent/multi_agent_os.yaml b/apps/miroflow-agent/conf/agent/multi_agent_os.yaml index d2fd46c9..1fbe557a 100644 --- a/apps/miroflow-agent/conf/agent/multi_agent_os.yaml +++ b/apps/miroflow-agent/conf/agent/multi_agent_os.yaml @@ -25,4 +25,4 @@ sub_agents: # Settings for context management keep_tool_result: -1 -format_error_retry_limit: 0 # Maximum number of retries for format errors in context management +context_compress_limit: 0 # Maximum number of retries for format errors in context management diff --git a/apps/miroflow-agent/conf/agent/single_agent.yaml b/apps/miroflow-agent/conf/agent/single_agent.yaml index 349f1f58..a04a60fe 100644 --- a/apps/miroflow-agent/conf/agent/single_agent.yaml +++ b/apps/miroflow-agent/conf/agent/single_agent.yaml @@ -19,4 +19,4 @@ sub_agents: # Settings for context management keep_tool_result: -1 -format_error_retry_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 0 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml b/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml index 0233358a..4a074360 100644 --- a/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml +++ b/apps/miroflow-agent/conf/agent/single_agent_keep5.yaml @@ -19,4 +19,4 @@ sub_agents: # Settings for context management keep_tool_result: 5 -format_error_retry_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file +context_compress_limit: 10 # Maximum number of retries for format errors in context management \ No newline at end of file diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py index 94a64050..05a00195 100644 --- a/apps/miroflow-agent/src/core/orchestrator.py +++ b/apps/miroflow-agent/src/core/orchestrator.py @@ -98,8 +98,8 @@ def __init__( # Retry loop protection limits self.MAX_CONSECUTIVE_ROLLBACKS = 5 self.MAX_FINAL_ANSWER_RETRIES = 3 if cfg.agent.keep_tool_result == -1 else 1 - # When format_error_retry_limit > 0, enables a context compression mechanism - self.format_error_retry_limit = cfg.agent.get("format_error_retry_limit", 0) + # When context_compress_limit > 0, enables a context compression mechanism + self.context_compress_limit = cfg.agent.get("context_compress_limit", 0) async def _stream_update(self, event_type: str, data: dict): """Send streaming update in new SSE protocol format""" @@ -596,7 +596,7 @@ def _handle_no_context_management_fallback( final_summary: str, final_boxed_answer: Optional[str], ) -> Tuple[str, str, str]: - """Handle fallback when format_error_retry_limit == 0 (no context management). + """Handle fallback when context_compress_limit == 0 (no context management). In this mode, the model has only one chance to answer. We should try to use intermediate answers as fallback to maximize accuracy. @@ -646,7 +646,7 @@ def _handle_context_management_no_fallback( final_summary: str, final_boxed_answer: Optional[str], ) -> Tuple[str, str, str]: - """Handle failure when format_error_retry_limit > 0 (context management enabled). + """Handle failure when context_compress_limit > 0 (context management enabled). In this mode, the model has multiple chances to retry with context management. We should NOT guess or use intermediate answers, because: @@ -699,7 +699,7 @@ async def _generate_and_finalize_answer( ) -> Tuple[str, str, Optional[str], str, List[Dict[str, Any]]]: """Generate final answer and handle fallback based on context management settings. - Context Management (format_error_retry_limit > 0) is essentially a context compression + Context Management (context_compress_limit > 0) is essentially a context compression mechanism that enables multi-attempt problem solving: 1. When the task is not completed within the given turns and context window, @@ -734,7 +734,7 @@ async def _generate_and_finalize_answer( Returns: Tuple of (final_summary, final_boxed_answer, failure_experience_summary, usage_log, message_history) """ - context_management_enabled = self.format_error_retry_limit > 0 + context_management_enabled = self.context_compress_limit > 0 failure_experience_summary = None usage_log = "" @@ -1761,7 +1761,7 @@ async def run_main_agent( self.current_agent_id = await self._stream_start_agent("Final Summary") await self._stream_start_llm("Final Summary") - # Generate final answer and handle fallback based on format_error_retry_limit + # Generate final answer and handle fallback based on context_compress_limit # If reached_max_turns is True and context management is enabled, # skip answer generation to avoid blind guessing (