From e1842859789fbdba97c7f380b6526c842ea5e164 Mon Sep 17 00:00:00 2001 From: x1051445024 <1051445024@qq.com> Date: Tue, 12 May 2026 01:11:39 +0800 Subject: [PATCH 1/2] fix(provider/openai): normalize oversized tool_call IDs to avoid 400 from strict upstreams --- .../core/provider/sources/openai_source.py | 96 ++++++++++++++++++- 1 file changed, 93 insertions(+), 3 deletions(-) diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 64e3a6645a..eeb1c6602e 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -1,7 +1,9 @@ import asyncio import base64 import copy +import hashlib import inspect + import json import random import re @@ -562,6 +564,78 @@ def _is_empty(content: Any) -> bool: payloads["messages"] = cleaned + @staticmethod + def _normalize_tool_call_ids(payloads: dict) -> None: + """Normalize oversized tool_call IDs before sending the request. + + Some OpenAI-compatible relay services return tool_call IDs that far + exceed the 64-character limit enforced by the OpenAI API spec + (observed lengths of 660 / 1650+ chars in the wild). Round-tripping + those IDs into the next request's messages triggers HTTP 400 + ``string_above_max_length`` from the upstream. + + This method rewrites any oversized ID to a deterministic short form + (``call_``, 37 chars). A shared map keeps assistant + ``tool_calls[].id`` and its matching tool ``tool_call_id`` in sync. + """ + messages = payloads.get("messages") + if not isinstance(messages, list): + return + + id_map: dict[str, str] = {} + + for msg in messages: + if not isinstance(msg, dict): + continue + role = msg.get("role") + + if role == "assistant": + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if not isinstance(tc, dict): + continue + tid = tc.get("id") + if tid and len(tid) > 64 and tid not in id_map: + id_map[tid] = "call_" + hashlib.md5( + tid.encode("utf-8") + ).hexdigest() + + elif role == "tool": + tid = msg.get("tool_call_id") + if tid and len(tid) > 64 and tid not in id_map: + id_map[tid] = "call_" + hashlib.md5( + tid.encode("utf-8") + ).hexdigest() + + if not id_map: + return + + logger.warning( + "Normalized %d oversized tool_call ID(s) before sending request.", + len(id_map), + ) + + for msg in messages: + if not isinstance(msg, dict): + continue + role = msg.get("role") + + if role == "assistant": + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if not isinstance(tc, dict): + continue + tid = tc.get("id") + if tid in id_map: + tc["id"] = id_map[tid] + + elif role == "tool": + tid = msg.get("tool_call_id") + if tid and tid in id_map: + msg["tool_call_id"] = id_map[tid] + async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse: if tools: model = payloads.get("model", "").lower() @@ -592,6 +666,7 @@ async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse: model = payloads.get("model", "").lower() self._sanitize_assistant_messages(payloads) + self._normalize_tool_call_ids(payloads) completion = await self.client.chat.completions.create( **payloads, @@ -644,6 +719,7 @@ async def _query_stream( self._apply_provider_specific_extra_body_overrides(extra_body) self._sanitize_assistant_messages(payloads) + self._normalize_tool_call_ids(payloads) stream = await self.client.chat.completions.create( **payloads, @@ -903,13 +979,27 @@ async def _parse_openai_completion( args = tool_call.function.arguments args_ls.append(args) func_name_ls.append(tool_call.function.name) - tool_call_ids.append(tool_call.id) + + raw_id = tool_call.id + if raw_id and len(raw_id) > 64: + safe_id = "call_" + hashlib.md5( + raw_id.encode("utf-8") + ).hexdigest() + logger.warning( + "tool_call.id exceeded 64 chars (length=%d); " + "normalized to a short ID. Original prefix: %s...", + len(raw_id), + raw_id[:80], + ) + else: + safe_id = raw_id + tool_call_ids.append(safe_id) # gemini-2.5 / gemini-3 series extra_content handling extra_content = getattr(tool_call, "extra_content", None) if extra_content is not None: - tool_call_extra_content_dict[tool_call.id] = extra_content - + tool_call_extra_content_dict[safe_id] = extra_content + llm_response.role = "tool" llm_response.tools_call_args = args_ls llm_response.tools_call_name = func_name_ls From 2be5ce814b23f31c71d13a68bb279030e47b3b83 Mon Sep 17 00:00:00 2001 From: x1051445024 <1051445024@qq.com> Date: Tue, 12 May 2026 01:45:21 +0800 Subject: [PATCH 2/2] =?UTF-8?q?refactor(provider/openai):=20address=20revi?= =?UTF-8?q?ew=20=E2=80=94=20extract=20helper,=20avoid=20logging=20raw=20ID?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract the shortening logic into a dedicated `_shorten_tool_call_id` helper, shared by `_parse_openai_completion` and `_normalize_tool_call_ids`, and document it as explicitly non-cryptographic. Addresses sourcery-ai and gemini-code-assist review comments about duplicated hashing logic. - Log only the original ID length and the normalized short form when a long tool_call ID is encountered; avoid including the raw ID prefix, which may be provider-specific and opaque. Addresses sourcery-ai review comment about potentially leaking raw IDs. - Apply `_normalize_tool_call_ids` through a small internal registration closure so each role's collection path is a single line. No behavior change when all IDs are already ≤64 chars. --- .../core/provider/sources/openai_source.py | 86 +++++++++++-------- 1 file changed, 52 insertions(+), 34 deletions(-) diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index eeb1c6602e..6d8f6e6df5 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -564,19 +564,39 @@ def _is_empty(content: Any) -> bool: payloads["messages"] = cleaned - @staticmethod + @staticmethod + def _shorten_tool_call_id(raw_id: str | None) -> str | None: + """Deterministically shorten an oversized tool_call ID. + + Non-cryptographic by design; MUST NOT be used for any + security-sensitive purpose. Its only job is to normalize IDs + that exceed the 64-character limit enforced by the OpenAI API + spec into a stable, compact form so the same ID collapses to + the same short form across retries of the same request. + + Short IDs (or empty/None) are returned unchanged. + """ + if not raw_id or len(raw_id) <= 64: + return raw_id + # MD5 is used purely for deterministic compact hashing, not security. + return "call_" + hashlib.md5(raw_id.encode("utf-8")).hexdigest() + + @staticmethod def _normalize_tool_call_ids(payloads: dict) -> None: - """Normalize oversized tool_call IDs before sending the request. + """Normalize oversized tool_call IDs in outgoing payloads. - Some OpenAI-compatible relay services return tool_call IDs that far - exceed the 64-character limit enforced by the OpenAI API spec + Some OpenAI-compatible relay services return tool_call IDs that + far exceed the 64-character limit enforced by the OpenAI API spec (observed lengths of 660 / 1650+ chars in the wild). Round-tripping - those IDs into the next request's messages triggers HTTP 400 - ``string_above_max_length`` from the upstream. - - This method rewrites any oversized ID to a deterministic short form - (``call_``, 37 chars). A shared map keeps assistant - ``tool_calls[].id`` and its matching tool ``tool_call_id`` in sync. + those IDs into the next request's ``messages[].tool_calls[].id`` or + ``tool_call_id`` fields triggers HTTP 400 ``string_above_max_length`` + from the upstream. Some relays internally translate Chat Completions + payloads into the Responses API format, which renames + ``tool_call_id`` to ``call_id`` — but the root cause is the same. + + A shared map keeps assistant ``tool_calls[].id`` and its matching + tool ``tool_call_id`` in sync after normalization. The conversation + history is mutated in place. """ messages = payloads.get("messages") if not isinstance(messages, list): @@ -584,6 +604,14 @@ def _normalize_tool_call_ids(payloads: dict) -> None: id_map: dict[str, str] = {} + def _register(tid: str | None) -> None: + if not tid or tid in id_map or len(tid) <= 64: + return + shortened = ProviderOpenAIOfficial._shorten_tool_call_id(tid) + if shortened is not None and shortened != tid: + id_map[tid] = shortened + + # First pass: collect every oversized ID. for msg in messages: if not isinstance(msg, dict): continue @@ -593,20 +621,10 @@ def _normalize_tool_call_ids(payloads: dict) -> None: tool_calls = msg.get("tool_calls") if isinstance(tool_calls, list): for tc in tool_calls: - if not isinstance(tc, dict): - continue - tid = tc.get("id") - if tid and len(tid) > 64 and tid not in id_map: - id_map[tid] = "call_" + hashlib.md5( - tid.encode("utf-8") - ).hexdigest() - + if isinstance(tc, dict): + _register(tc.get("id")) elif role == "tool": - tid = msg.get("tool_call_id") - if tid and len(tid) > 64 and tid not in id_map: - id_map[tid] = "call_" + hashlib.md5( - tid.encode("utf-8") - ).hexdigest() + _register(msg.get("tool_call_id")) if not id_map: return @@ -616,6 +634,7 @@ def _normalize_tool_call_ids(payloads: dict) -> None: len(id_map), ) + # Second pass: apply the rewrite map. for msg in messages: if not isinstance(msg, dict): continue @@ -630,7 +649,6 @@ def _normalize_tool_call_ids(payloads: dict) -> None: tid = tc.get("id") if tid in id_map: tc["id"] = id_map[tid] - elif role == "tool": tid = msg.get("tool_call_id") if tid and tid in id_map: @@ -979,26 +997,26 @@ async def _parse_openai_completion( args = tool_call.function.arguments args_ls.append(args) func_name_ls.append(tool_call.function.name) - + raw_id = tool_call.id - if raw_id and len(raw_id) > 64: - safe_id = "call_" + hashlib.md5( - raw_id.encode("utf-8") - ).hexdigest() + safe_id = self._shorten_tool_call_id(raw_id) + if raw_id and safe_id != raw_id: + # Log only the length and the normalized short ID — + # the raw ID is opaque and may be provider-specific, + # so we avoid leaking its prefix into logs. logger.warning( "tool_call.id exceeded 64 chars (length=%d); " - "normalized to a short ID. Original prefix: %s...", + "normalized to %s", len(raw_id), - raw_id[:80], + safe_id, ) - else: - safe_id = raw_id + tool_call_ids.append(safe_id) # gemini-2.5 / gemini-3 series extra_content handling extra_content = getattr(tool_call, "extra_content", None) if extra_content is not None: - tool_call_extra_content_dict[safe_id] = extra_content + tool_call_extra_content_dict[safe_id] = extra_content llm_response.role = "tool" llm_response.tools_call_args = args_ls