diff --git a/core/relay/adaptor/openai/chat.go b/core/relay/adaptor/openai/chat.go index 40db1c7e..7eae6bdb 100644 --- a/core/relay/adaptor/openai/chat.go +++ b/core/relay/adaptor/openai/chat.go @@ -186,9 +186,9 @@ func (s *chatCompletionStreamState) handleFunctionCallArgumentsDelta( // handleOutputItemDone handles response.output_item.done event for ChatCompletion func (s *chatCompletionStreamState) handleOutputItemDone( event *relaymodel.ResponseStreamEvent, -) *relaymodel.ChatCompletionsStreamResponse { +) { if event.Item == nil { - return nil + return } // Handle function call completion @@ -205,32 +205,8 @@ func (s *chatCompletionStreamState) handleOutputItemDone( s.toolCallArgs = "" // No need to send another chunk - arguments already streamed - return nil - } - - // Handle message content - if len(event.Item.Content) > 0 { - for _, content := range event.Item.Content { - if (content.Type == "text" || content.Type == "output_text") && content.Text != "" { - return &relaymodel.ChatCompletionsStreamResponse{ - ID: s.messageID, - Object: relaymodel.ChatCompletionChunkObject, - Created: time.Now().Unix(), - Model: s.meta.ActualModel, - Choices: []*relaymodel.ChatCompletionsStreamResponseChoice{ - { - Index: 0, - Delta: relaymodel.Message{ - Content: content.Text, - }, - }, - }, - } - } - } + return } - - return nil } // handleResponseCompleted handles response.completed/done event for ChatCompletion @@ -1221,7 +1197,7 @@ func ConvertResponsesToChatCompletionStreamResponse( case relaymodel.EventFunctionCallArgumentsDelta: chatStreamResp = state.handleFunctionCallArgumentsDelta(&event) case relaymodel.EventOutputItemDone: - chatStreamResp = state.handleOutputItemDone(&event) + state.handleOutputItemDone(&event) case relaymodel.EventResponseCompleted, relaymodel.EventResponseDone: chatStreamResp = state.handleResponseCompleted(&event) } diff --git a/core/relay/adaptor/openai/chat_test.go b/core/relay/adaptor/openai/chat_test.go index dd5923f1..8cd68133 100644 --- a/core/relay/adaptor/openai/chat_test.go +++ b/core/relay/adaptor/openai/chat_test.go @@ -6,6 +6,7 @@ import ( "encoding/json" "net/http" "net/http/httptest" + "strings" "testing" "github.com/gin-gonic/gin" @@ -515,6 +516,81 @@ func TestConvertResponsesToChatCompletionResponse(t *testing.T) { } } +func TestConvertResponsesToChatCompletionStreamResponseSkipsOutputItemDoneContent(t *testing.T) { + gin.SetMode(gin.TestMode) + + stream := strings.Join([]string{ + `data: {"type":"response.created","response":{"id":"resp_123","object":"response","created_at":1780731105,"status":"in_progress","model":"gpt-5.1","output":[],"parallel_tool_calls":true,"store":false}}`, + "", + `data: {"type":"response.output_item.added","item":{"id":"msg_123","type":"message","role":"assistant","content":[]}}`, + "", + `data: {"type":"response.output_text.delta","item_id":"msg_123","output_index":0,"content_index":0,"delta":"Hello! What would you like to discuss or work on?"}`, + "", + `data: {"type":"response.output_item.done","item":{"id":"msg_123","type":"message","role":"assistant","content":[{"type":"output_text","text":"Hello! What would you like to discuss or work on?"}]}}`, + "", + `data: {"type":"response.completed","response":{"id":"resp_123","object":"response","created_at":1780731105,"status":"completed","model":"gpt-5.1","output":[{"id":"msg_123","type":"message","role":"assistant","content":[{"type":"output_text","text":"Hello! What would you like to discuss or work on?"}]}],"parallel_tool_calls":true,"store":false,"usage":{"input_tokens":7,"output_tokens":22,"total_tokens":29}}}`, + "", + `data: [DONE]`, + "", + }, "\n") + + httpResp := &http.Response{ + StatusCode: http.StatusOK, + Body: &mockReadCloser{Reader: bytes.NewReader([]byte(stream))}, + Header: make(http.Header), + } + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequestWithContext( + t.Context(), + http.MethodPost, + "/v1/chat/completions", + nil, + ) + + m := &meta.Meta{ + ActualModel: "gpt-5.1", + } + + _, err := openai.ConvertResponsesToChatCompletionStreamResponse(m, c, httpResp) + require.Nil(t, err) + + content := collectChatCompletionStreamContent(t, w.Body.String()) + assert.Equal(t, "Hello! What would you like to discuss or work on?", content) + assert.Equal( + t, + 1, + strings.Count(w.Body.String(), "Hello! What would you like to discuss or work on?"), + ) +} + +func collectChatCompletionStreamContent(t *testing.T, body string) string { + t.Helper() + + var builder strings.Builder + + for line := range strings.SplitSeq(body, "\n") { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "data: ") || line == "data: [DONE]" { + continue + } + + var chunk relaymodel.ChatCompletionsStreamResponse + + err := json.Unmarshal([]byte(strings.TrimPrefix(line, "data: ")), &chunk) + require.NoError(t, err) + + for _, choice := range chunk.Choices { + if content, ok := choice.Delta.Content.(string); ok { + builder.WriteString(content) + } + } + } + + return builder.String() +} + // mockReadCloser is a helper to create a ReadCloser from a Reader type mockReadCloser struct { *bytes.Reader diff --git a/core/relay/adaptor/openai/gemini_test.go b/core/relay/adaptor/openai/gemini_test.go index cd0ceca9..dbbecbef 100644 --- a/core/relay/adaptor/openai/gemini_test.go +++ b/core/relay/adaptor/openai/gemini_test.go @@ -61,6 +61,20 @@ func TestConvertGeminiRequest_MapsThinkingConfigToReasoningEffort(t *testing.T) }`, expectedEffort: "low", }, + { + name: "gpt-5.4 mini snapshot does not receive minimal", + actualModel: "gpt-5.4-mini-2026-03-17", + requestJSON: `{ + "generationConfig": { + "thinkingConfig": { + "thinkingBudget": 512, + "includeThoughts": true + } + }, + "contents": [{"role":"user","parts":[{"text":"hello"}]}] + }`, + expectedEffort: "low", + }, { name: "gpt-5 does not receive xhigh", actualModel: "gpt-5", diff --git a/core/relay/adaptor/openai/reasoning.go b/core/relay/adaptor/openai/reasoning.go index 548bb110..a77d1eb8 100644 --- a/core/relay/adaptor/openai/reasoning.go +++ b/core/relay/adaptor/openai/reasoning.go @@ -332,7 +332,19 @@ func isKnownOpenAIModelSuffix(suffix string) bool { } if matched, ok := strings.CutPrefix(suffix, "-"); ok { - return isDateSuffix(matched) + if isDateSuffix(matched) { + return true + } + + for _, variant := range []string{"mini", "nano", "chat-latest"} { + if matched == variant { + return true + } + + if dateSuffix, ok := strings.CutPrefix(matched, variant+"-"); ok { + return isDateSuffix(dateSuffix) + } + } } return false diff --git a/core/relay/adaptor/openai/reasoning_test.go b/core/relay/adaptor/openai/reasoning_test.go index 59920ee6..96ce81aa 100644 --- a/core/relay/adaptor/openai/reasoning_test.go +++ b/core/relay/adaptor/openai/reasoning_test.go @@ -127,6 +127,18 @@ func TestOpenAIReasoningEffortForModel(t *testing.T) { effort: relaymodel.ReasoningEffortMinimal, want: relaymodel.ReasoningEffortLow, }, + { + name: "series keyword matching handles mini dated snapshot names", + originModel: "gpt-5.4-mini-2026-03-17", + effort: relaymodel.ReasoningEffortMinimal, + want: relaymodel.ReasoningEffortLow, + }, + { + name: "series keyword matching handles nano dated snapshot names", + originModel: "gpt-5.4-nano-2026-03-17", + effort: relaymodel.ReasoningEffortMinimal, + want: relaymodel.ReasoningEffortLow, + }, } for _, tt := range tests { @@ -171,6 +183,13 @@ func TestConvertRequest_OpenAIReasoningEffortCompatibility(t *testing.T) { body: `{"model":"alias","input":"hi","reasoning":{"effort":"xhigh"}}`, wantEffort: "high", }, + { + name: "native responses gpt-5.4 mini snapshot minimal to low", + mode: mode.Responses, + actualModel: "gpt-5.4-mini-2026-03-17", + body: `{"model":"alias","input":"hi","reasoning":{"effort":"minimal"}}`, + wantEffort: "low", + }, { name: "native chat origin match beats actual fallback", mode: mode.ChatCompletions,