diff --git a/core/common/consume/consume.go b/core/common/consume/consume.go index b72f5b9c..381f0f9d 100644 --- a/core/common/consume/consume.go +++ b/core/common/consume/consume.go @@ -197,11 +197,15 @@ func checkNeedRecordConsume(code int, meta *meta.Meta) bool { mode.VideosDelete, mode.GeminiFiles, mode.GeminiVideoOperations, + mode.AliVideoTasks, + mode.DoubaoVideoTasks, mode.ResponsesGet, mode.ResponsesDelete, mode.ResponsesCancel, mode.ResponsesInputItems: return code != http.StatusOK + case mode.DoubaoVideoTasksDelete: + return code != http.StatusOK && code != http.StatusNoContent default: return true } diff --git a/core/common/consume/consume_record_test.go b/core/common/consume/consume_record_test.go index 7b5e48dc..3486f0ec 100644 --- a/core/common/consume/consume_record_test.go +++ b/core/common/consume/consume_record_test.go @@ -11,11 +11,16 @@ import ( func TestNeedRecordConsumeSkipsSuccessfulStoredVideoReads(t *testing.T) { tests := []mode.Mode{ + mode.VideoGenerationsGetJobs, + mode.VideoGenerationsContent, mode.VideosGet, mode.VideosContent, mode.VideosDelete, mode.GeminiFiles, mode.GeminiVideoOperations, + mode.AliVideoTasks, + mode.DoubaoVideoTasks, + mode.DoubaoVideoTasksDelete, } for _, relayMode := range tests { @@ -37,6 +42,15 @@ func TestNeedRecordConsumeSkipsSuccessfulStoredVideoReads(t *testing.T) { } } +func TestNeedRecordConsumeSkipsSuccessfulDoubaoNativeDeleteNoContent(t *testing.T) { + if consume.NeedRecordConsumeForTest( + http.StatusNoContent, + &meta.Meta{Mode: mode.DoubaoVideoTasksDelete}, + ) { + t.Fatal("expected successful doubao native delete 204 request not to record consume") + } +} + func TestNeedRecordConsumeRecordsVideoCreateAndRemix(t *testing.T) { tests := []mode.Mode{ mode.Videos, diff --git a/core/controller/log_export.go b/core/controller/log_export.go index 8469472f..bd40e936 100644 --- a/core/controller/log_export.go +++ b/core/controller/log_export.go @@ -457,6 +457,8 @@ func buildLogExportHeader(includeChannel, includeRetryAt bool) []string { "native_resolution", "quality", "service_tier", + "input_video", + "output_audio", "ttfb_milliseconds", "retry_times", "input_tokens", @@ -537,6 +539,8 @@ func buildLogExportRow( sanitizeCSVCell(logItem.UsageContext.NativeResolution), sanitizeCSVCell(logItem.UsageContext.Quality), sanitizeCSVCell(logItem.UsageContext.ServiceTier), + formatOptionalBool(logItem.UsageContext.InputVideo), + formatOptionalBool(logItem.UsageContext.OutputAudio), strconv.FormatInt(int64(logItem.TTFBMilliseconds), 10), strconv.FormatInt(int64(logItem.RetryTimes), 10), strconv.FormatInt(int64(logItem.Usage.InputTokens), 10), @@ -570,6 +574,14 @@ func buildLogExportRow( ) } +func formatOptionalBool(value *bool) string { + if value == nil { + return "" + } + + return strconv.FormatBool(*value) +} + func formatTimeForExport(t time.Time, location *time.Location) string { if t.IsZero() { return "" diff --git a/core/controller/log_export_test.go b/core/controller/log_export_test.go index 253309c6..f243fc2a 100644 --- a/core/controller/log_export_test.go +++ b/core/controller/log_export_test.go @@ -185,6 +185,8 @@ func TestBuildLogExportCSVIncludesFullUsageContext(t *testing.T) { NativeResolution: "1080p", Quality: "high", ServiceTier: "priority", + InputVideo: new(true), + OutputAudio: new(false), }, }, }, time.UTC, false, false) @@ -209,6 +211,14 @@ func TestBuildLogExportCSVIncludesFullUsageContext(t *testing.T) { if values["service_tier"] != "priority" { t.Fatalf("expected service_tier to be exported, got %q", values["service_tier"]) } + + if values["input_video"] != "true" { + t.Fatalf("expected input_video to be exported, got %q", values["input_video"]) + } + + if values["output_audio"] != "false" { + t.Fatalf("expected output_audio to be exported, got %q", values["output_audio"]) + } } func TestSanitizeFilename(t *testing.T) { diff --git a/core/controller/relay-controller.go b/core/controller/relay-controller.go index 3001ec4f..da18f687 100644 --- a/core/controller/relay-controller.go +++ b/core/controller/relay-controller.go @@ -206,6 +206,14 @@ func relayController(m mode.Mode) RelayController { c.ValidateRequest = controller.ValidateGeminiVideoRequest c.GetRequestPrice = controller.GetGeminiVideoRequestPrice c.GetRequestUsage = controller.GetGeminiVideoRequestUsage + case mode.AliVideo: + c.ValidateRequest = controller.ValidateAliVideoRequest + c.GetRequestPrice = controller.GetAliVideoRequestPrice + c.GetRequestUsage = controller.GetAliVideoRequestUsage + case mode.DoubaoVideo: + c.ValidateRequest = controller.ValidateDoubaoVideoRequest + c.GetRequestPrice = controller.GetDoubaoVideoRequestPrice + c.GetRequestUsage = controller.GetDoubaoVideoRequestUsage case mode.Responses: c.GetRequestUsage = controller.GetResponsesRequestUsage } diff --git a/core/controller/relay-controller_test.go b/core/controller/relay-controller_test.go index f127c800..ae115f5d 100644 --- a/core/controller/relay-controller_test.go +++ b/core/controller/relay-controller_test.go @@ -140,6 +140,16 @@ func TestRelayControllerVideoModesValidateRequests(t *testing.T) { mode: mode.GeminiVideo, want: relaycontroller.ValidateGeminiVideoRequest, }, + { + name: "ali native video", + mode: mode.AliVideo, + want: relaycontroller.ValidateAliVideoRequest, + }, + { + name: "doubao native video", + mode: mode.DoubaoVideo, + want: relaycontroller.ValidateDoubaoVideoRequest, + }, } for _, tt := range tests { diff --git a/core/docs/docs.go b/core/docs/docs.go index d5049d7a..239df11b 100644 --- a/core/docs/docs.go +++ b/core/docs/docs.go @@ -15006,8 +15006,17 @@ const docTemplate = `{ "resolution": { "type": "string" }, + "native_resolution": { + "type": "string" + }, "service_tier": { "type": "string" + }, + "input_video": { + "type": "boolean" + }, + "output_audio": { + "type": "boolean" } } }, diff --git a/core/docs/swagger.json b/core/docs/swagger.json index dec99f63..1e712e32 100644 --- a/core/docs/swagger.json +++ b/core/docs/swagger.json @@ -14997,8 +14997,17 @@ "resolution": { "type": "string" }, + "native_resolution": { + "type": "string" + }, "service_tier": { "type": "string" + }, + "input_video": { + "type": "boolean" + }, + "output_audio": { + "type": "boolean" } } }, @@ -15259,4 +15268,4 @@ "in": "header" } } -} \ No newline at end of file +} diff --git a/core/docs/swagger.yaml b/core/docs/swagger.yaml index f2913770..f554968a 100644 --- a/core/docs/swagger.yaml +++ b/core/docs/swagger.yaml @@ -3508,8 +3508,14 @@ definitions: type: string resolution: type: string + native_resolution: + type: string service_tier: type: string + input_video: + type: boolean + output_audio: + type: boolean type: object model.Video: properties: diff --git a/core/model/usage.go b/core/model/usage.go index 0660deec..36b6db90 100644 --- a/core/model/usage.go +++ b/core/model/usage.go @@ -21,6 +21,8 @@ type PriceCondition struct { Resolution []string `json:"resolution,omitempty"` Quality []string `json:"quality,omitempty"` ServiceTier string `json:"service_tier,omitempty"` + InputVideo *bool `json:"input_video,omitempty"` + OutputAudio *bool `json:"output_audio,omitempty"` } type ConditionalPrice struct { @@ -128,6 +130,14 @@ func qualityConditionValuesOverlap(values1, values2 []string) bool { return false } +func boolConditionOverlap(value1, value2 *bool) bool { + if value1 == nil || value2 == nil { + return true + } + + return *value1 == *value2 +} + func priceConditionSpecificity(condition PriceCondition) int { specificity := 0 @@ -143,6 +153,14 @@ func priceConditionSpecificity(condition PriceCondition) int { specificity++ } + if condition.InputVideo != nil { + specificity++ + } + + if condition.OutputAudio != nil { + specificity++ + } + if condition.InputTokenMin > 0 { specificity++ } @@ -380,6 +398,11 @@ func (p *Price) ValidateConditionalPrices() error { continue } + if !boolConditionOverlap(condition.InputVideo, otherCondition.InputVideo) || + !boolConditionOverlap(condition.OutputAudio, otherCondition.OutputAudio) { + continue + } + // Check input token range overlap if hasRangeOverlap( condition.InputTokenMin, condition.InputTokenMax, @@ -673,6 +696,8 @@ type UsageContext struct { NativeResolution string `gorm:"size:32" json:"native_resolution,omitempty"` Quality string `gorm:"size:32" json:"quality,omitempty"` ServiceTier string `gorm:"size:32" json:"service_tier,omitempty"` + InputVideo *bool ` json:"input_video,omitempty"` + OutputAudio *bool ` json:"output_audio,omitempty"` } func (c UsageContext) PriceConditionMatches(condition PriceCondition) bool { @@ -708,6 +733,18 @@ func (c UsageContext) priceConditionMatches( return false } + if condition.InputVideo != nil { + if c.InputVideo == nil || *c.InputVideo != *condition.InputVideo { + return false + } + } + + if condition.OutputAudio != nil { + if c.OutputAudio == nil || *c.OutputAudio != *condition.OutputAudio { + return false + } + } + return true } @@ -728,6 +765,14 @@ func (c UsageContext) WithFallback(fallback UsageContext) UsageContext { c.Quality = fallback.Quality } + if c.InputVideo == nil { + c.InputVideo = fallback.InputVideo + } + + if c.OutputAudio == nil { + c.OutputAudio = fallback.OutputAudio + } + return c } diff --git a/core/model/usage_test.go b/core/model/usage_test.go index e65f605f..7ee460c2 100644 --- a/core/model/usage_test.go +++ b/core/model/usage_test.go @@ -981,6 +981,23 @@ func TestUsageContextWithFallbackPreservesNativeResolution(t *testing.T) { } } +func TestUsageContextWithFallbackPreservesMediaFlags(t *testing.T) { + resultContext := model.UsageContext{OutputAudio: new(false)} + requestContext := model.UsageContext{ + InputVideo: new(true), + OutputAudio: new(true), + } + + got := resultContext.WithFallback(requestContext) + if got.InputVideo == nil || !*got.InputVideo { + t.Fatalf("expected input video fallback true, got %#v", got.InputVideo) + } + + if got.OutputAudio == nil || *got.OutputAudio { + t.Fatalf("expected existing output audio false, got %#v", got.OutputAudio) + } +} + func TestPrice_SelectConditionalPrice_WithMediaConditions(t *testing.T) { price := model.Price{ OutputPrice: 0.08, @@ -1125,6 +1142,73 @@ func TestPrice_SelectConditionalPrice_WithMediaConditions(t *testing.T) { } } +func TestPrice_SelectConditionalPrice_WithMediaFlags(t *testing.T) { + price := model.Price{ + OutputPrice: 0.20, + ConditionalPrices: []model.ConditionalPrice{ + { + Condition: model.PriceCondition{ + Resolution: []string{"720p"}, + InputVideo: new(false), + }, + Price: model.Price{OutputPrice: 0.046}, + }, + { + Condition: model.PriceCondition{ + Resolution: []string{"720p"}, + InputVideo: new(true), + }, + Price: model.Price{OutputPrice: 0.028}, + }, + { + Condition: model.PriceCondition{ + ServiceTier: "flex", + OutputAudio: new(false), + }, + Price: model.Price{OutputPrice: 0.004}, + }, + { + Condition: model.PriceCondition{ServiceTier: "flex"}, + Price: model.Price{OutputPrice: 0.008}, + }, + }, + } + + inputVideoPrice := price.SelectConditionalPrice(model.Usage{}, model.UsageContext{ + Resolution: "1280x720", + InputVideo: new(true), + }) + if float64(inputVideoPrice.OutputPrice) != 0.028 { + t.Fatalf("expected input video price 0.028, got %v", inputVideoPrice.OutputPrice) + } + + textOnlyPrice := price.SelectConditionalPrice(model.Usage{}, model.UsageContext{ + Resolution: "720p", + InputVideo: new(false), + }) + if float64(textOnlyPrice.OutputPrice) != 0.046 { + t.Fatalf("expected text-only price 0.046, got %v", textOnlyPrice.OutputPrice) + } + + unknownInputVideoPrice := price.SelectConditionalPrice(model.Usage{}, model.UsageContext{ + Resolution: "720p", + }) + if float64(unknownInputVideoPrice.OutputPrice) != 0.20 { + t.Fatalf( + "expected base price when input video is unknown, got %v", + unknownInputVideoPrice.OutputPrice, + ) + } + + silentFlexPrice := price.SelectConditionalPrice(model.Usage{}, model.UsageContext{ + ServiceTier: "flex", + OutputAudio: new(false), + }) + if float64(silentFlexPrice.OutputPrice) != 0.004 { + t.Fatalf("expected specific silent flex price 0.004, got %v", silentFlexPrice.OutputPrice) + } +} + func TestPrice_SelectConditionalPrice_ResolutionAndQualityNormalizationAreIndependent( t *testing.T, ) { @@ -1515,6 +1599,54 @@ func TestPrice_ValidateConditionalPrices_WithMediaConditions(t *testing.T) { }, wantErr: false, }, + { + name: "same ranges with different input video flags are allowed", + price: model.Price{ + ConditionalPrices: []model.ConditionalPrice{ + { + Condition: model.PriceCondition{InputVideo: new(false)}, + Price: model.Price{OutputPrice: 0.08}, + }, + { + Condition: model.PriceCondition{InputVideo: new(true)}, + Price: model.Price{OutputPrice: 0.04}, + }, + }, + }, + wantErr: false, + }, + { + name: "same ranges with different output audio flags are allowed", + price: model.Price{ + ConditionalPrices: []model.ConditionalPrice{ + { + Condition: model.PriceCondition{OutputAudio: new(false)}, + Price: model.Price{OutputPrice: 0.08}, + }, + { + Condition: model.PriceCondition{OutputAudio: new(true)}, + Price: model.Price{OutputPrice: 0.04}, + }, + }, + }, + wantErr: false, + }, + { + name: "same media flag overlaps", + price: model.Price{ + ConditionalPrices: []model.ConditionalPrice{ + { + Condition: model.PriceCondition{InputVideo: new(true)}, + Price: model.Price{OutputPrice: 0.08}, + }, + { + Condition: model.PriceCondition{InputVideo: new(true)}, + Price: model.Price{OutputPrice: 0.04}, + }, + }, + }, + wantErr: true, + }, } for _, tt := range tests { diff --git a/core/relay/adaptor/ali/adaptor_test.go b/core/relay/adaptor/ali/adaptor_test.go index 1cdd04bb..65543fbe 100644 --- a/core/relay/adaptor/ali/adaptor_test.go +++ b/core/relay/adaptor/ali/adaptor_test.go @@ -3355,6 +3355,186 @@ func TestAliVideoAsyncUsageUsesStoredSizeWhenUpstreamRatioMissing(t *testing.T) } } +func TestAliNativeVideoAsyncUsageUsesNativeResolution(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/custom/api/v1/tasks/task-123" { + t.Fatalf("expected task path, got %s", r.URL.Path) + } + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "output": { + "task_id": "task-123", + "task_status": "SUCCEEDED" + }, + "usage": { + "duration": 8, + "input_video_duration": 3, + "output_video_duration": 5, + "SR": 720, + "ratio": "9:16" + } + }`)) + })) + defer server.Close() + + aliAdaptor := &Adaptor{} + + usage, usageContext, completed, err := aliAdaptor.FetchAsyncUsage( + context.Background(), + adaptor.AsyncUsageRequest{ + Channel: &coremodel.Channel{ + BaseURL: server.URL + "/fallback", + Key: "test-key", + }, + Info: &coremodel.AsyncUsageInfo{ + Mode: int(mode.AliVideo), + BaseURL: server.URL + "/custom", + UpstreamID: "task-123", + GroupID: "group-1", + TokenID: 7, + }, + }, + ) + if err != nil { + t.Fatalf("FetchAsyncUsage returned error: %v", err) + } + + if !completed { + t.Fatal("expected async usage to be completed") + } + + if usageContext.Resolution != "720P" || + usageContext.NativeResolution != "720P" { + t.Fatalf("unexpected native usage context: %#v", usageContext) + } + + if usage.VideoInputTokens != coremodel.ZeroNullInt64(3) || + usage.OutputTokens != coremodel.ZeroNullInt64(5) || + usage.TotalTokens != coremodel.ZeroNullInt64(8) { + t.Fatalf("unexpected usage: %#v", usage) + } +} + +func TestAliNativeVideoAsyncUsageUsesNativeFallback(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/custom/api/v1/tasks/task-123" { + t.Fatalf("expected task path, got %s", r.URL.Path) + } + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "output": { + "task_id": "task-123", + "task_status": "SUCCEEDED" + }, + "usage": { + "duration": 8, + "input_video_duration": 3, + "output_video_duration": 5 + } + }`)) + })) + defer server.Close() + + aliAdaptor := &Adaptor{} + + _, usageContext, completed, err := aliAdaptor.FetchAsyncUsage( + context.Background(), + adaptor.AsyncUsageRequest{ + Channel: &coremodel.Channel{ + BaseURL: server.URL + "/fallback", + Key: "test-key", + }, + Info: &coremodel.AsyncUsageInfo{ + Mode: int(mode.AliVideo), + BaseURL: server.URL + "/custom", + UpstreamID: "task-123", + GroupID: "group-1", + TokenID: 7, + UsageContext: coremodel.UsageContext{ + Resolution: "1080P", + NativeResolution: "1080P", + }, + }, + }, + ) + if err != nil { + t.Fatalf("FetchAsyncUsage returned error: %v", err) + } + + if !completed { + t.Fatal("expected async usage to be completed") + } + + if usageContext.Resolution != "1080P" || + usageContext.NativeResolution != "1080P" { + t.Fatalf("unexpected native usage context fallback: %#v", usageContext) + } +} + +func TestAliNativeVideoAsyncUsageUsesStoredNativeFallback(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/custom/api/v1/tasks/task-123" { + t.Fatalf("expected task path, got %s", r.URL.Path) + } + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "output": { + "task_id": "task-123", + "task_status": "SUCCEEDED" + }, + "usage": { + "duration": 8, + "input_video_duration": 3, + "output_video_duration": 5 + } + }`)) + })) + defer server.Close() + + aliAdaptor := &Adaptor{} + store := &aliTestStore{ + saved: []adaptor.StoreCache{ + { + ID: coremodel.VideoGenerationStoreID("task-123"), + Metadata: `{"prompt":"Stored prompt","seconds":5,"size":"1080P"}`, + }, + }, + } + + _, usageContext, completed, err := aliAdaptor.FetchAsyncUsage( + context.Background(), + adaptor.AsyncUsageRequest{ + Channel: &coremodel.Channel{ + BaseURL: server.URL + "/fallback", + Key: "test-key", + }, + Info: &coremodel.AsyncUsageInfo{ + Mode: int(mode.AliVideo), + BaseURL: server.URL + "/custom", + UpstreamID: "task-123", + GroupID: "group-1", + TokenID: 7, + }, + Store: store, + }, + ) + if err != nil { + t.Fatalf("FetchAsyncUsage returned error: %v", err) + } + + if !completed { + t.Fatal("expected async usage to be completed") + } + + if usageContext.Resolution != "1080P" || + usageContext.NativeResolution != "1080P" { + t.Fatalf("unexpected native usage context fallback: %#v", usageContext) + } +} + func TestAdaptorDoResponseResponsesDeleteNoContent(t *testing.T) { gin.SetMode(gin.TestMode) diff --git a/core/relay/adaptor/ali/ali_video.go b/core/relay/adaptor/ali/ali_video.go index 88b47eb5..1cc2ccd5 100644 --- a/core/relay/adaptor/ali/ali_video.go +++ b/core/relay/adaptor/ali/ali_video.go @@ -30,6 +30,8 @@ func ConvertAliNativeVideoRequest( return adaptor.ConvertResult{}, err } + setAliNativeVideoRequestMetadata(meta, &body) + data, err := body.MarshalJSON() if err != nil { return adaptor.ConvertResult{}, err @@ -92,9 +94,10 @@ func AliNativeVideoHandler( writeAliNativeJSONResponse(c, resp, body) return adaptor.DoResponseResult{ - UpstreamID: taskID, - AsyncUsage: true, - UsageContext: aliVideoUsageContext(meta, aliResponse.Usage), + UpstreamID: taskID, + AsyncUsage: true, + UsageContext: aliNativeVideoUsageContext(aliResponse.Usage). + WithFallback(aliNativeVideoRequestUsageContext(meta)), }, nil } @@ -124,6 +127,16 @@ func AliNativeVideoTaskHandler( } taskID := strings.TrimSpace(aliResponse.Output.TaskID) + if taskID == "" { + taskID = meta.VideoID + } + + applyStoredAliVideoRequestMetadata( + meta, + store, + coremodel.VideoGenerationStoreID(taskID), + ) + if taskID != "" { if err := saveAliNativeVideoStore( meta, @@ -138,11 +151,112 @@ func AliNativeVideoTaskHandler( writeAliNativeJSONResponse(c, resp, body) return adaptor.DoResponseResult{ - UpstreamID: firstNonEmpty(taskID, meta.VideoID), - UsageContext: aliVideoUsageContext(meta, aliResponse.Usage), + UpstreamID: firstNonEmpty(taskID, meta.VideoID), + UsageContext: aliNativeVideoUsageContext(aliResponse.Usage). + WithFallback(aliNativeVideoRequestUsageContext(meta)), }, nil } +func setAliNativeVideoRequestMetadata(meta *meta.Meta, body *ast.Node) { + if meta == nil { + return + } + + if input := body.Get( + "input", + ); input != nil && input.Exists() && + input.TypeSafe() != ast.V_NULL { + if prompt := aliNativeVideoString(input.Get("prompt")); prompt != "" { + meta.Set(metaAliVideoPrompt, prompt) + } + } + + parameters := body.Get("parameters") + if parameters == nil || !parameters.Exists() || parameters.TypeSafe() == ast.V_NULL { + return + } + + size := firstNonEmpty( + aliNativeVideoString(parameters.Get("size")), + aliNativeVideoString(parameters.Get("resolution")), + ) + if size != "" { + meta.Set(metaAliVideoSize, size) + } + + if duration := aliNativeVideoInt(parameters.Get("duration")); duration > 0 { + meta.Set(metaAliVideoSeconds, duration) + } +} + +func aliNativeVideoString(node *ast.Node) string { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return "" + } + + value, err := node.String() + if err != nil { + return "" + } + + return strings.TrimSpace(value) +} + +func aliNativeVideoInt(node *ast.Node) int { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return 0 + } + + if node.TypeSafe() == ast.V_STRING { + value, err := node.String() + if err != nil { + return 0 + } + + parsed, err := strconv.Atoi(strings.TrimSpace(value)) + if err != nil || parsed <= 0 { + return 0 + } + + return parsed + } + + value, err := node.Int64() + if err != nil || value <= 0 { + return 0 + } + + return int(value) +} + +func aliNativeVideoUsageContext(usage relaymodel.AliVideoUsage) coremodel.UsageContext { + nativeResolution := aliVideoNativeResolution(usage) + if nativeResolution == "" { + return coremodel.UsageContext{} + } + + return coremodel.UsageContext{ + Resolution: nativeResolution, + NativeResolution: nativeResolution, + } +} + +func aliNativeVideoRequestUsageContext(meta *meta.Meta) coremodel.UsageContext { + if meta == nil { + return coremodel.UsageContext{} + } + + nativeResolution := strings.TrimSpace(meta.GetString(metaAliVideoSize)) + if nativeResolution == "" { + return coremodel.UsageContext{} + } + + return coremodel.UsageContext{ + Resolution: nativeResolution, + NativeResolution: nativeResolution, + } +} + func readAliNativeVideoResponseBody(resp *http.Response) ([]byte, adaptor.Error) { if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { return nil, ErrorHanlder(resp) @@ -179,6 +293,7 @@ func saveAliNativeVideoStore( TokenID: meta.Token.ID, ChannelID: meta.Channel.ID, Model: meta.OriginModel, + Metadata: aliVideoStoreMetadataString(meta, taskID), ExpiresAt: expiresAt, }) } diff --git a/core/relay/adaptor/ali/ali_video_test.go b/core/relay/adaptor/ali/ali_video_test.go index 7752147f..d5bb2bd8 100644 --- a/core/relay/adaptor/ali/ali_video_test.go +++ b/core/relay/adaptor/ali/ali_video_test.go @@ -23,14 +23,15 @@ func TestConvertAliNativeVideoRequestPreservesBodyAndRewritesModel(t *testing.T) http.MethodPost, "/api/v1/services/aigc/video-generation/video-synthesis", bytes.NewBufferString( - `{"model":"wan2.5-t2v-preview","input":{"prompt":"go"},"parameters":{"size":"720P"}}`, + `{"model":"wan2.5-t2v-preview","input":{"prompt":"go"},"parameters":{"duration":5,"size":"720P"}}`, ), ) req.Header.Set("Content-Type", "application/json") - result, err := ConvertAliNativeVideoRequest(&meta.Meta{ - ActualModel: "mapped-wan", - }, req) + m := meta.NewMeta(nil, mode.AliVideo, "wan2.5-t2v-preview", coremodel.ModelConfig{}) + m.ActualModel = "mapped-wan" + + result, err := ConvertAliNativeVideoRequest(m, req) if err != nil { t.Fatalf("ConvertAliNativeVideoRequest returned error: %v", err) } @@ -57,6 +58,14 @@ func TestConvertAliNativeVideoRequestPreservesBodyAndRewritesModel(t *testing.T) if !ok || parameters["size"] != "720P" { t.Fatalf("parameters were not preserved: %#v", body["parameters"]) } + + if got := aliNativeVideoRequestUsageContext(m).Resolution; got != "720P" { + t.Fatalf("expected native request usage context resolution, got %q", got) + } + + if got := m.GetInt(metaAliVideoSeconds); got != 5 { + t.Fatalf("expected native request duration metadata, got %d", got) + } } func TestAliNativeVideoHandlerPassesThroughAndStoresTask(t *testing.T) { @@ -69,14 +78,14 @@ func TestAliNativeVideoHandlerPassesThroughAndStoresTask(t *testing.T) { ctx.Request = httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/", nil) store := &aliTestStore{} - m := &meta.Meta{ - Mode: mode.AliVideo, - OriginModel: "wan2.5-t2v-preview", - Group: coremodel.GroupCache{ID: "group-1"}, - Token: coremodel.TokenCache{ID: 7}, - Channel: meta.ChannelMeta{ID: 42}, - } - respBody := `{"request_id":"req-1","output":{"task_id":"task-123","task_status":"PENDING"}}` + m := meta.NewMeta(nil, mode.AliVideo, "wan2.5-t2v-preview", coremodel.ModelConfig{}) + m.Group = coremodel.GroupCache{ID: "group-1"} + m.Token = coremodel.TokenCache{ID: 7} + m.Channel = meta.ChannelMeta{ID: 42} + m.Set(metaAliVideoSize, "1080P") + m.Set(metaAliVideoSeconds, 6) + + respBody := `{"request_id":"req-1","output":{"task_id":"task-123","task_status":"PENDING"},"usage":{"SR":720,"ratio":"16:9"}}` resp := &http.Response{ StatusCode: http.StatusOK, Header: http.Header{"Content-Type": {"application/json"}}, @@ -96,6 +105,11 @@ func TestAliNativeVideoHandlerPassesThroughAndStoresTask(t *testing.T) { t.Fatalf("unexpected result: %#v", result) } + if result.UsageContext.Resolution != "720P" || + result.UsageContext.NativeResolution != "720P" { + t.Fatalf("unexpected native usage context: %#v", result.UsageContext) + } + if len(store.saved) != 1 { t.Fatalf("expected one store save, got %d", len(store.saved)) } @@ -105,4 +119,44 @@ func TestAliNativeVideoHandlerPassesThroughAndStoresTask(t *testing.T) { store.saved[0].Model != "wan2.5-t2v-preview" { t.Fatalf("unexpected saved store: %#v", store.saved[0]) } + + metadata, err := parseAliVideoStoreMetadata(store.saved[0].Metadata) + if err != nil { + t.Fatalf("parse saved metadata: %v", err) + } + + if metadata.Size != "1080P" || metadata.Seconds != 6 || metadata.UpstreamID != "task-123" { + t.Fatalf("unexpected saved metadata: %#v", metadata) + } +} + +func TestAliNativeVideoHandlerUsesNativeRequestResolutionFallback(t *testing.T) { + t.Parallel() + + gin.SetMode(gin.TestMode) + + recorder := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(recorder) + ctx.Request = httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/", nil) + + m := meta.NewMeta(nil, mode.AliVideo, "wan2.5-t2v-preview", coremodel.ModelConfig{}) + m.Set(metaAliVideoSize, "1080P") + + resp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{"Content-Type": {"application/json"}}, + Body: io.NopCloser(bytes.NewBufferString( + `{"request_id":"req-1","output":{"task_id":"task-123","task_status":"PENDING"}}`, + )), + } + + result, relayErr := AliNativeVideoHandler(m, nil, ctx, resp) + if relayErr != nil { + t.Fatalf("AliNativeVideoHandler returned error: %v", relayErr) + } + + if result.UsageContext.Resolution != "1080P" || + result.UsageContext.NativeResolution != "1080P" { + t.Fatalf("unexpected native usage context fallback: %#v", result.UsageContext) + } } diff --git a/core/relay/adaptor/ali/async_usage.go b/core/relay/adaptor/ali/async_usage.go index f524a962..e4662c6f 100644 --- a/core/relay/adaptor/ali/async_usage.go +++ b/core/relay/adaptor/ali/async_usage.go @@ -33,7 +33,7 @@ func (a *Adaptor) FetchAsyncUsage( switch mode.Mode(info.Mode) { case mode.AliVideo: - return a.fetchAliVideoJobUsage(ctx, channel, request.Store, info) + return a.fetchAliNativeVideoUsage(ctx, channel, request.Store, info) case mode.VideoGenerationsJobs, mode.Videos, mode.VideosRemix, @@ -48,6 +48,24 @@ func (a *Adaptor) FetchAsyncUsage( } } +func (a *Adaptor) fetchAliNativeVideoUsage( + ctx context.Context, + channel *coremodel.Channel, + store adaptor.Store, + info *coremodel.AsyncUsageInfo, +) (coremodel.Usage, coremodel.UsageContext, bool, error) { + usage, usageContext, completed, err := a.fetchAliVideoJobUsage(ctx, channel, store, info) + if !completed || err != nil { + return usage, usageContext, completed, err + } + + return usage, + aliNativeVideoUsageContextFromContext(usageContext). + WithFallback(aliNativeVideoUsageContextFromContext(info.UsageContext)), + completed, + nil +} + func (a *Adaptor) fetchAliVideoJobUsage( ctx context.Context, channel *coremodel.Channel, @@ -86,6 +104,26 @@ func (a *Adaptor) fetchAliVideoJobUsage( } } +func aliNativeVideoUsageContextFromContext( + usageContext coremodel.UsageContext, +) coremodel.UsageContext { + nativeResolution := usageContext.NativeResolution + if nativeResolution == "" { + nativeResolution = usageContext.Resolution + } + + if nativeResolution == "" { + return coremodel.UsageContext{} + } + + return coremodel.UsageContext{ + Resolution: nativeResolution, + NativeResolution: nativeResolution, + ServiceTier: usageContext.ServiceTier, + Quality: usageContext.Quality, + } +} + func aliVideoAsyncUsageContext( usage relaymodel.AliVideoUsage, store adaptor.Store, diff --git a/core/relay/adaptor/ali/tts.go b/core/relay/adaptor/ali/tts.go index 49b65cfe..aa517388 100644 --- a/core/relay/adaptor/ali/tts.go +++ b/core/relay/adaptor/ali/tts.go @@ -6,9 +6,11 @@ import ( "fmt" "io" "net/http" + "strconv" "strings" "github.com/bytedance/sonic" + "github.com/bytedance/sonic/ast" "github.com/gin-gonic/gin" "github.com/google/uuid" "github.com/gorilla/websocket" @@ -102,16 +104,15 @@ func ConvertTTSRequest(meta *meta.Meta, req *http.Request) (adaptor.ConvertResul return adaptor.ConvertResult{}, err } - reqMap, err := utils.UnmarshalMap(req) + node, err := common.UnmarshalRequest2NodeReusable(req) if err != nil { return adaptor.ConvertResult{}, err } var sampleRate int - sampleRateI, ok := reqMap["sample_rate"].(float64) - if ok { - sampleRate = int(sampleRateI) + if sampleRateNode := node.Get("sample_rate"); sampleRateNode.Exists() { + sampleRate, _ = intFromTTSNode(sampleRateNode) } request.Model = meta.ActualModel @@ -181,6 +182,33 @@ func ConvertTTSRequest(meta *meta.Meta, req *http.Request) (adaptor.ConvertResul }, nil } +func intFromTTSNode(node *ast.Node) (int, bool) { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return 0, false + } + + if node.TypeSafe() == ast.V_STRING { + value, err := node.String() + if err != nil { + return 0, false + } + + parsed, err := strconv.Atoi(strings.TrimSpace(value)) + if err != nil { + return 0, false + } + + return parsed, true + } + + value, err := node.Int64() + if err != nil { + return 0, false + } + + return int(value), true +} + func TTSDoRequest(meta *meta.Meta, req *http.Request) (*http.Response, error) { wsURL := req.URL wsURL.Scheme = "wss" diff --git a/core/relay/adaptor/doubao/async_usage.go b/core/relay/adaptor/doubao/async_usage.go index 7e39f829..12452397 100644 --- a/core/relay/adaptor/doubao/async_usage.go +++ b/core/relay/adaptor/doubao/async_usage.go @@ -49,9 +49,16 @@ func (a *Adaptor) FetchAsyncUsage( switch strings.ToLower(response.Status) { case "succeeded": + usageContext := doubaoVideoAsyncUsageContext(response, request.Store, info) + if mode.Mode(info.Mode) == mode.DoubaoVideo { + usageContext = doubaoNativeVideoUsageContextFromContext(usageContext). + WithFallback(doubaoNativeVideoUsageContextFromContext(info.UsageContext)) + } else { + usageContext = usageContext.WithFallback(info.UsageContext) + } + return doubaoVideoUsageToModelUsage(response.Usage), - doubaoVideoAsyncUsageContext(response, request.Store, info). - WithFallback(info.UsageContext), + usageContext, true, nil case "queued", "running", "": @@ -83,7 +90,15 @@ func doubaoVideoAsyncUsageContext( merged.Resolution = firstNonEmptyString(response.Resolution, metadata.Resolution) merged.Ratio = firstNonEmptyString(response.Ratio, metadata.Ratio) - return doubaoVideoUsageContext(&merged) + merged.ServiceTier = firstNonEmptyString(response.ServiceTier, metadata.ServiceTier) + if merged.GenerateAudio == nil { + merged.GenerateAudio = metadata.OutputAudio + } + + return doubaoVideoUsageContext(&merged).WithFallback(coremodel.UsageContext{ + InputVideo: metadata.InputVideo, + OutputAudio: metadata.OutputAudio, + }) } func doubaoVideoAsyncUsageContextFromStore( @@ -96,11 +111,16 @@ func doubaoVideoAsyncUsageContextFromStore( } response := &relaymodel.DoubaoVideoTaskResponse{ - Resolution: metadata.Resolution, - Ratio: metadata.Ratio, + Resolution: metadata.Resolution, + Ratio: metadata.Ratio, + ServiceTier: metadata.ServiceTier, + GenerateAudio: metadata.OutputAudio, } - return doubaoVideoUsageContext(response) + return doubaoVideoUsageContext(response).WithFallback(coremodel.UsageContext{ + InputVideo: metadata.InputVideo, + OutputAudio: metadata.OutputAudio, + }) } func doubaoVideoAsyncMetadataFromStore( diff --git a/core/relay/adaptor/doubao/constants.go b/core/relay/adaptor/doubao/constants.go index 8a68b128..f7cbdd8e 100644 --- a/core/relay/adaptor/doubao/constants.go +++ b/core/relay/adaptor/doubao/constants.go @@ -40,28 +40,55 @@ var ModelList = []model.ModelConfig{ }, }, { - Model: "doubao-seedance-2-0", - Type: mode.DoubaoVideo, - Owner: model.ModelOwnerDoubao, + Model: "doubao-seedance-2-0-260128", + Type: mode.DoubaoVideo, + Owner: model.ModelOwnerDoubao, + AllowedResolutions: []string{"480p", "720p", "1080p"}, + MaxVideoGenerationSeconds: 15, Price: model.Price{ // Seedance video billing uses the API response usage.completion_tokens. - // The token unit price depends on output resolution and whether the - // request used reference video input; current usage context only carries - // resolution, so this uses the no-reference-video official tier. - OutputPrice: 51, + // Official prices are RMB per million tokens; aiproxy stores prices per + // 1K tokens by default, so divide the official value by 1000. + OutputPrice: 0.051, OutputPriceUnit: model.PriceUnit, ConditionalPrices: []model.ConditionalPrice{ { - Condition: model.PriceCondition{Resolution: []string{"480p", "720p"}}, + Condition: model.PriceCondition{ + Resolution: []string{"480p", "720p"}, + InputVideo: new(false), + }, + Price: model.Price{ + OutputPrice: 0.046, + OutputPriceUnit: model.PriceUnit, + }, + }, + { + Condition: model.PriceCondition{ + Resolution: []string{"480p", "720p"}, + InputVideo: new(true), + }, Price: model.Price{ - OutputPrice: 46, + OutputPrice: 0.028, OutputPriceUnit: model.PriceUnit, }, }, { - Condition: model.PriceCondition{Resolution: []string{"1080p"}}, + Condition: model.PriceCondition{ + Resolution: []string{"1080p"}, + InputVideo: new(false), + }, Price: model.Price{ - OutputPrice: 51, + OutputPrice: 0.051, + OutputPriceUnit: model.PriceUnit, + }, + }, + { + Condition: model.PriceCondition{ + Resolution: []string{"1080p"}, + InputVideo: new(true), + }, + Price: model.Price{ + OutputPrice: 0.031, OutputPriceUnit: model.PriceUnit, }, }, @@ -69,18 +96,27 @@ var ModelList = []model.ModelConfig{ }, }, { - Model: "doubao-seedance-2-0-fast", - Type: mode.DoubaoVideo, - Owner: model.ModelOwnerDoubao, + Model: "doubao-seedance-2-0-fast-260128", + Type: mode.DoubaoVideo, + Owner: model.ModelOwnerDoubao, + AllowedResolutions: []string{"480p", "720p"}, + MaxVideoGenerationSeconds: 15, Price: model.Price{ // Seedance video billing uses the API response usage.completion_tokens. - OutputPrice: 37, + OutputPrice: 0.037, OutputPriceUnit: model.PriceUnit, ConditionalPrices: []model.ConditionalPrice{ { - Condition: model.PriceCondition{Resolution: []string{"480p", "720p"}}, + Condition: model.PriceCondition{InputVideo: new(false)}, Price: model.Price{ - OutputPrice: 37, + OutputPrice: 0.037, + OutputPriceUnit: model.PriceUnit, + }, + }, + { + Condition: model.PriceCondition{InputVideo: new(true)}, + Price: model.Price{ + OutputPrice: 0.022, OutputPriceUnit: model.PriceUnit, }, }, @@ -88,19 +124,43 @@ var ModelList = []model.ModelConfig{ }, }, { - Model: "doubao-seedance-1-5-pro", - Type: mode.DoubaoVideo, - Owner: model.ModelOwnerDoubao, + Model: "doubao-seedance-1-5-pro-251215", + Type: mode.DoubaoVideo, + Owner: model.ModelOwnerDoubao, + AllowedResolutions: []string{"480p", "720p", "1080p"}, + MaxVideoGenerationSeconds: 12, Price: model.Price{ - // Seedance 1.5 pro token price is for the default generated-audio case. - // The silent-video price is lower, but generate_audio is not part of PriceCondition. - OutputPrice: 16, + // Seedance 1.5 pro defaults generate_audio to true. + OutputPrice: 0.016, OutputPriceUnit: model.PriceUnit, ConditionalPrices: []model.ConditionalPrice{ { - Condition: model.PriceCondition{ServiceTier: "flex"}, + Condition: model.PriceCondition{ + ServiceTier: "default", + OutputAudio: new(false), + }, + Price: model.Price{ + OutputPrice: 0.008, + OutputPriceUnit: model.PriceUnit, + }, + }, + { + Condition: model.PriceCondition{ + ServiceTier: "flex", + OutputAudio: new(true), + }, Price: model.Price{ - OutputPrice: 8, + OutputPrice: 0.008, + OutputPriceUnit: model.PriceUnit, + }, + }, + { + Condition: model.PriceCondition{ + ServiceTier: "flex", + OutputAudio: new(false), + }, + Price: model.Price{ + OutputPrice: 0.004, OutputPriceUnit: model.PriceUnit, }, }, @@ -108,18 +168,20 @@ var ModelList = []model.ModelConfig{ }, }, { - Model: "doubao-seedance-1-0-pro", - Type: mode.DoubaoVideo, - Owner: model.ModelOwnerDoubao, + Model: "doubao-seedance-1-0-pro-250528", + Type: mode.DoubaoVideo, + Owner: model.ModelOwnerDoubao, + AllowedResolutions: []string{"480p", "720p", "1080p"}, + MaxVideoGenerationSeconds: 12, Price: model.Price{ // Seedance 1.0 pro bills by returned video completion tokens. - OutputPrice: 15, + OutputPrice: 0.015, OutputPriceUnit: model.PriceUnit, ConditionalPrices: []model.ConditionalPrice{ { Condition: model.PriceCondition{ServiceTier: "flex"}, Price: model.Price{ - OutputPrice: 7.5, + OutputPrice: 0.0075, OutputPriceUnit: model.PriceUnit, }, }, @@ -127,18 +189,20 @@ var ModelList = []model.ModelConfig{ }, }, { - Model: "doubao-seedance-1-0-pro-fast", - Type: mode.DoubaoVideo, - Owner: model.ModelOwnerDoubao, + Model: "doubao-seedance-1-0-pro-fast-251015", + Type: mode.DoubaoVideo, + Owner: model.ModelOwnerDoubao, + AllowedResolutions: []string{"480p", "720p", "1080p"}, + MaxVideoGenerationSeconds: 12, Price: model.Price{ // Seedance 1.0 pro fast bills by returned video completion tokens. - OutputPrice: 4.2, + OutputPrice: 0.0042, OutputPriceUnit: model.PriceUnit, ConditionalPrices: []model.ConditionalPrice{ { Condition: model.PriceCondition{ServiceTier: "flex"}, Price: model.Price{ - OutputPrice: 2.1, + OutputPrice: 0.0021, OutputPriceUnit: model.PriceUnit, }, }, diff --git a/core/relay/adaptor/doubao/constants_test.go b/core/relay/adaptor/doubao/constants_test.go index 307a847d..6200081c 100644 --- a/core/relay/adaptor/doubao/constants_test.go +++ b/core/relay/adaptor/doubao/constants_test.go @@ -11,6 +11,7 @@ import ( "github.com/labring/aiproxy/core/common/consume" "github.com/labring/aiproxy/core/model" "github.com/labring/aiproxy/core/relay/controller" + "github.com/labring/aiproxy/core/relay/mode" ) func doubaoModelPriceForTest(t *testing.T, modelName string) model.Price { @@ -39,11 +40,11 @@ func TestDoubaoSeedancePriceUsesReturnedCompletionTokens(t *testing.T) { model.UsageContext{ Resolution: "1080p", }, - doubaoModelPriceForTest(t, "doubao-seedance-2-0"), + doubaoModelPriceForTest(t, "doubao-seedance-2-0-260128"), ) - if amount.UsedAmount != 51 { - t.Fatalf("expected 51 token amount, got %#v", amount) + if amount.UsedAmount != 0.051 { + t.Fatalf("expected 0.051 token amount, got %#v", amount) } } @@ -57,11 +58,214 @@ func TestDoubaoSeedancePriceFallsBackToMostExpensiveResolution(t *testing.T) { TotalTokens: 1000, }, model.UsageContext{}, - doubaoModelPriceForTest(t, "doubao-seedance-2-0"), + doubaoModelPriceForTest(t, "doubao-seedance-2-0-260128"), ) - if amount.UsedAmount != 51 { - t.Fatalf("expected 51 token amount, got %#v", amount) + if amount.UsedAmount != 0.051 { + t.Fatalf("expected 0.051 token amount, got %#v", amount) + } +} + +func TestDoubaoSeedanceConditionalPricesValidate(t *testing.T) { + t.Parallel() + + for _, mc := range ModelList { + if mc.Type != mode.DoubaoVideo { + continue + } + + if err := mc.Price.ValidateConditionalPrices(); err != nil { + t.Fatalf("model %s has invalid conditional prices: %v", mc.Model, err) + } + } +} + +func TestDoubaoSeedanceConditionalPriceUsesInputVideoContext(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + priceModel string + usageContext model.UsageContext + want float64 + }{ + { + name: "seedance 2 720p without input video", + priceModel: "doubao-seedance-2-0-260128", + usageContext: model.UsageContext{ + Resolution: "1280x720", + InputVideo: new(false), + }, + want: 0.046, + }, + { + name: "seedance 2 720p with input video", + priceModel: "doubao-seedance-2-0-260128", + usageContext: model.UsageContext{ + Resolution: "1280x720", + InputVideo: new(true), + }, + want: 0.028, + }, + { + name: "seedance 2 1080p with input video", + priceModel: "doubao-seedance-2-0-260128", + usageContext: model.UsageContext{ + NativeResolution: "1080p", + InputVideo: new(true), + }, + want: 0.031, + }, + { + name: "seedance 2 fast without input video", + priceModel: "doubao-seedance-2-0-fast-260128", + usageContext: model.UsageContext{ + Resolution: "720p", + InputVideo: new(false), + }, + want: 0.037, + }, + { + name: "seedance 2 fast with input video", + priceModel: "doubao-seedance-2-0-fast-260128", + usageContext: model.UsageContext{ + Resolution: "720p", + InputVideo: new(true), + }, + want: 0.022, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + amount := consume.CalculateAmountDetail( + 200, + model.Usage{OutputTokens: 1000, TotalTokens: 1000}, + tt.usageContext, + doubaoModelPriceForTest(t, tt.priceModel), + ) + + if amount.UsedAmount != tt.want { + t.Fatalf("expected %v token amount, got %#v", tt.want, amount) + } + }) + } +} + +func TestDoubaoSeedance15ConditionalPriceUsesOutputAudioAndServiceTier(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + usageContext model.UsageContext + want float64 + }{ + { + name: "online generated audio", + usageContext: model.UsageContext{ + ServiceTier: "default", + OutputAudio: new(true), + }, + want: 0.016, + }, + { + name: "online silent", + usageContext: model.UsageContext{ + ServiceTier: "default", + OutputAudio: new(false), + }, + want: 0.008, + }, + { + name: "offline generated audio", + usageContext: model.UsageContext{ + ServiceTier: "flex", + OutputAudio: new(true), + }, + want: 0.008, + }, + { + name: "offline silent", + usageContext: model.UsageContext{ + ServiceTier: "flex", + OutputAudio: new(false), + }, + want: 0.004, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + amount := consume.CalculateAmountDetail( + 200, + model.Usage{OutputTokens: 1000, TotalTokens: 1000}, + tt.usageContext, + doubaoModelPriceForTest(t, "doubao-seedance-1-5-pro-251215"), + ) + + if amount.UsedAmount != tt.want { + t.Fatalf("expected %v token amount, got %#v", tt.want, amount) + } + }) + } +} + +func TestDoubaoSeedance10ConditionalPriceUsesServiceTier(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + priceModel string + usageContext model.UsageContext + want float64 + }{ + { + name: "seedance 1 pro online", + priceModel: "doubao-seedance-1-0-pro-250528", + want: 0.015, + }, + { + name: "seedance 1 pro offline", + priceModel: "doubao-seedance-1-0-pro-250528", + usageContext: model.UsageContext{ + ServiceTier: "flex", + }, + want: 0.0075, + }, + { + name: "seedance 1 pro fast online", + priceModel: "doubao-seedance-1-0-pro-fast-251015", + want: 0.0042, + }, + { + name: "seedance 1 pro fast offline", + priceModel: "doubao-seedance-1-0-pro-fast-251015", + usageContext: model.UsageContext{ + ServiceTier: "flex", + }, + want: 0.0021, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + amount := consume.CalculateAmountDetail( + 200, + model.Usage{OutputTokens: 1000, TotalTokens: 1000}, + tt.usageContext, + doubaoModelPriceForTest(t, tt.priceModel), + ) + + if amount.UsedAmount != tt.want { + t.Fatalf("expected %v token amount, got %#v", tt.want, amount) + } + }) } } @@ -75,7 +279,7 @@ func TestDoubaoSeedanceConditionalPriceKeepsTokenUnitAfterVideoController(t *tes http.MethodPost, "/v1/video/generations/jobs", bytes.NewBufferString(`{ - "model":"doubao-seedance-2-0", + "model":"doubao-seedance-2-0-260128", "prompt":"A city street", "n_seconds":5, "size":"1920x1080" @@ -87,7 +291,7 @@ func TestDoubaoSeedanceConditionalPriceKeepsTokenUnitAfterVideoController(t *tes ctx.Request = req price, err := controller.GetVideoGenerationJobRequestPrice(ctx, model.ModelConfig{ - Price: doubaoModelPriceForTest(t, "doubao-seedance-2-0"), + Price: doubaoModelPriceForTest(t, "doubao-seedance-2-0-260128"), }) if err != nil { t.Fatalf("GetVideoGenerationJobRequestPrice returned error: %v", err) @@ -105,7 +309,7 @@ func TestDoubaoSeedanceConditionalPriceKeepsTokenUnitAfterVideoController(t *tes price, ) - if amount.UsedAmount != 51 { - t.Fatalf("expected 51 token amount after controller price projection, got %#v", amount) + if amount.UsedAmount != 0.051 { + t.Fatalf("expected 0.051 token amount after controller price projection, got %#v", amount) } } diff --git a/core/relay/adaptor/doubao/doubao_video.go b/core/relay/adaptor/doubao/doubao_video.go index 52201620..ed334afe 100644 --- a/core/relay/adaptor/doubao/doubao_video.go +++ b/core/relay/adaptor/doubao/doubao_video.go @@ -9,6 +9,7 @@ import ( "github.com/bytedance/sonic/ast" "github.com/gin-gonic/gin" "github.com/labring/aiproxy/core/common" + coremodel "github.com/labring/aiproxy/core/model" "github.com/labring/aiproxy/core/relay/adaptor" "github.com/labring/aiproxy/core/relay/meta" relaymodel "github.com/labring/aiproxy/core/relay/model" @@ -27,6 +28,8 @@ func ConvertDoubaoNativeVideoRequest( return adaptor.ConvertResult{}, err } + setDoubaoNativeVideoRequestMetadata(meta, &body) + data, err := body.MarshalJSON() if err != nil { return adaptor.ConvertResult{}, err @@ -62,9 +65,9 @@ func DoubaoNativeVideoSubmitHandler( return adaptor.DoResponseResult{ UpstreamID: response.ID, AsyncUsage: true, - UsageContext: doubaoVideoUsageContext( + UsageContext: doubaoNativeVideoUsageContext( &response, - ).WithFallback(doubaoVideoRequestUsageContext(meta)), + ).WithFallback(doubaoNativeVideoRequestUsageContext(meta)), }, nil } @@ -83,6 +86,13 @@ func DoubaoNativeVideoTaskHandler( response.ID = meta.VideoID } + applyStoredDoubaoVideoMetadata( + meta, + store, + coremodel.VideoGenerationStoreID(response.ID), + &response, + ) + if response.ID != "" { expiresAt := doubaoVideoExpiresAt(response) if err := saveDoubaoVideoStore(meta, store, response.ID, expiresAt); err != nil { @@ -94,12 +104,179 @@ func DoubaoNativeVideoTaskHandler( return adaptor.DoResponseResult{ UpstreamID: response.ID, - UsageContext: doubaoVideoUsageContext( + UsageContext: doubaoNativeVideoUsageContext( &response, - ).WithFallback(doubaoVideoRequestUsageContext(meta)), + ).WithFallback(doubaoNativeVideoRequestUsageContext(meta)), }, nil } +func setDoubaoNativeVideoRequestMetadata(meta *meta.Meta, body *ast.Node) { + if meta == nil { + return + } + + metadata := doubaoVideoStoreMetadata{ + Prompt: doubaoVideoPrompt(doubaoNativeVideoContent(body.Get("content"))), + Resolution: doubaoNativeVideoString(body.Get("resolution")), + Ratio: doubaoNativeVideoString(body.Get("ratio")), + Duration: doubaoNativeVideoInt(body.Get("duration")), + ServiceTier: firstNonEmptyString( + doubaoNativeVideoString(body.Get("service_tier")), + "default", + ), + InputVideo: new(doubaoNativeVideoContentHasVideo(body.Get("content"))), + OutputAudio: doubaoNativeVideoOutputAudio(body.Get("generate_audio")), + } + + setDoubaoVideoMetadata(meta, metadata) +} + +func doubaoNativeVideoContent(node *ast.Node) []doubaoVideoContent { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return nil + } + + count, err := node.Len() + if err != nil || count <= 0 { + return nil + } + + content := make([]doubaoVideoContent, 0, count) + for i := range count { + item := node.Index(i) + if item == nil || !item.Exists() || item.TypeSafe() == ast.V_NULL { + continue + } + + content = append(content, doubaoVideoContent{ + Type: doubaoNativeVideoString(item.Get("type")), + Text: doubaoNativeVideoString(item.Get("text")), + }) + } + + return content +} + +func doubaoNativeVideoString(node *ast.Node) string { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return "" + } + + value, err := node.String() + if err != nil { + return "" + } + + return value +} + +func doubaoNativeVideoInt(node *ast.Node) int { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return 0 + } + + value, err := node.Int64() + if err != nil { + return 0 + } + + return int(value) +} + +func doubaoNativeVideoUsageContext( + response *relaymodel.DoubaoVideoTaskResponse, +) coremodel.UsageContext { + usageContext := doubaoVideoUsageContext(response) + return doubaoNativeVideoUsageContextFromContext(usageContext) +} + +func doubaoNativeVideoRequestUsageContext(meta *meta.Meta) coremodel.UsageContext { + usageContext := doubaoVideoRequestUsageContext(meta) + return doubaoNativeVideoUsageContextFromContext(usageContext) +} + +func doubaoNativeVideoUsageContextFromContext( + usageContext coremodel.UsageContext, +) coremodel.UsageContext { + nativeResolution := usageContext.NativeResolution + if nativeResolution == "" { + nativeResolution = usageContext.Resolution + } + + if nativeResolution == "" && + usageContext.ServiceTier == "" && + usageContext.Quality == "" && + usageContext.InputVideo == nil && + usageContext.OutputAudio == nil { + return coremodel.UsageContext{} + } + + return coremodel.UsageContext{ + Resolution: nativeResolution, + NativeResolution: nativeResolution, + ServiceTier: usageContext.ServiceTier, + Quality: usageContext.Quality, + InputVideo: usageContext.InputVideo, + OutputAudio: usageContext.OutputAudio, + } +} + +func doubaoNativeVideoContentHasVideo(node *ast.Node) bool { + if node == nil || !node.Exists() || node.TypeSafe() != ast.V_ARRAY { + return false + } + + hasVideo := false + + _ = node.ForEach(func(_ ast.Sequence, item *ast.Node) bool { + if item == nil || !item.Exists() || item.TypeSafe() != ast.V_OBJECT { + return true + } + + typeNode := item.Get("type") + if typeNode.Exists() && typeNode.TypeSafe() == ast.V_STRING { + itemType, err := typeNode.String() + if err == nil && (itemType == "video_url" || itemType == "draft_task") { + hasVideo = true + return false + } + } + + if videoNode := item.Get( + "video_url", + ); videoNode.Exists() && + videoNode.TypeSafe() != ast.V_NULL { + hasVideo = true + return false + } + + if draftTaskNode := item.Get( + "draft_task", + ); draftTaskNode.Exists() && + draftTaskNode.TypeSafe() != ast.V_NULL { + hasVideo = true + return false + } + + return true + }) + + return hasVideo +} + +func doubaoNativeVideoOutputAudio(node *ast.Node) *bool { + if node == nil { + return new(true) + } + + value, err := node.Bool() + if err != nil { + return new(true) + } + + return &value +} + func DoubaoNativeVideoTaskDeleteHandler( _ *meta.Meta, c *gin.Context, diff --git a/core/relay/adaptor/doubao/doubao_video_test.go b/core/relay/adaptor/doubao/doubao_video_test.go index 476e2535..90c278a2 100644 --- a/core/relay/adaptor/doubao/doubao_video_test.go +++ b/core/relay/adaptor/doubao/doubao_video_test.go @@ -15,6 +15,10 @@ import ( "github.com/labring/aiproxy/core/relay/mode" ) +func boolValue(value *bool) bool { + return value != nil && *value +} + func TestConvertNativeVideoRequestPreservesBodyAndRewritesModel(t *testing.T) { t.Parallel() @@ -23,14 +27,15 @@ func TestConvertNativeVideoRequestPreservesBodyAndRewritesModel(t *testing.T) { http.MethodPost, "/api/v3/contents/generations/tasks", bytes.NewBufferString( - `{"model":"doubao-seedance-2-0","content":[{"type":"text","text":"go"}],"resolution":"720p"}`, + `{"model":"doubao-seedance-2-0-260128","content":[{"type":"text","text":"go"},{"type":"video_url","video_url":{"url":"https://example.com/in.mp4"}}],"resolution":"720p","generate_audio":false}`, ), ) req.Header.Set("Content-Type", "application/json") - result, err := ConvertDoubaoNativeVideoRequest(&meta.Meta{ - ActualModel: "mapped-seedance", - }, req) + m := meta.NewMeta(nil, mode.DoubaoVideo, "doubao-seedance-2-0-260128", coremodel.ModelConfig{}) + m.ActualModel = "mapped-seedance" + + result, err := ConvertDoubaoNativeVideoRequest(m, req) if err != nil { t.Fatalf("ConvertDoubaoNativeVideoRequest returned error: %v", err) } @@ -49,9 +54,51 @@ func TestConvertNativeVideoRequestPreservesBodyAndRewritesModel(t *testing.T) { } content, ok := body["content"].([]any) - if !ok || len(content) != 1 { + if !ok || len(content) != 2 { t.Fatalf("content was not preserved: %#v", body["content"]) } + + if usageContext := doubaoNativeVideoRequestUsageContext(m); usageContext.Resolution != "720p" || + usageContext.NativeResolution != "720p" || + boolValue(usageContext.InputVideo) != true || + boolValue(usageContext.OutputAudio) != false { + t.Fatalf( + "unexpected native request usage context: %#v input_video=%t output_audio=%t", + usageContext, + boolValue(usageContext.InputVideo), + boolValue(usageContext.OutputAudio), + ) + } +} + +func TestConvertNativeVideoRequestCountsDraftTaskAsInputVideo(t *testing.T) { + t.Parallel() + + req := httptest.NewRequestWithContext( + t.Context(), + http.MethodPost, + "/api/v3/contents/generations/tasks", + bytes.NewBufferString( + `{"model":"doubao-seedance-1-5-pro-251215","content":[{"type":"text","text":"go"},{"type":"draft_task","draft_task":{"id":"task-123"}}],"resolution":"720p"}`, + ), + ) + req.Header.Set("Content-Type", "application/json") + + m := meta.NewMeta( + nil, + mode.DoubaoVideo, + "doubao-seedance-1-5-pro-251215", + coremodel.ModelConfig{}, + ) + + if _, err := ConvertDoubaoNativeVideoRequest(m, req); err != nil { + t.Fatalf("ConvertDoubaoNativeVideoRequest returned error: %v", err) + } + + usageContext := doubaoNativeVideoRequestUsageContext(m) + if usageContext.InputVideo == nil || !*usageContext.InputVideo { + t.Fatalf("expected draft_task to count as input video, got %#v", usageContext) + } } func TestDoubaoNativeVideoSubmitHandlerPassesThroughAndStoresTask(t *testing.T) { @@ -64,14 +111,19 @@ func TestDoubaoNativeVideoSubmitHandlerPassesThroughAndStoresTask(t *testing.T) ctx.Request = httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/", nil) store := &doubaoTestStore{} - m := &meta.Meta{ - Mode: mode.DoubaoVideo, - OriginModel: "doubao-seedance-2-0", - Group: coremodel.GroupCache{ID: "group-1"}, - Token: coremodel.TokenCache{ID: 7}, - Channel: meta.ChannelMeta{ID: 42}, - } - respBody := `{"id":"task-123","model":"doubao-seedance-2-0","status":"queued"}` + m := meta.NewMeta(nil, mode.DoubaoVideo, "doubao-seedance-2-0-260128", coremodel.ModelConfig{}) + m.Group = coremodel.GroupCache{ID: "group-1"} + m.Token = coremodel.TokenCache{ID: 7} + m.Channel = meta.ChannelMeta{ID: 42} + setDoubaoVideoMetadata(m, doubaoVideoStoreMetadata{ + Resolution: "1080p", + Ratio: "16:9", + ServiceTier: "priority", + InputVideo: new(true), + OutputAudio: new(false), + }) + + respBody := `{"id":"task-123","model":"doubao-seedance-2-0-260128","status":"queued","resolution":"720p","ratio":"16:9","service_tier":"default"}` resp := &http.Response{ StatusCode: http.StatusOK, Header: http.Header{"Content-Type": {"application/json"}}, @@ -91,17 +143,69 @@ func TestDoubaoNativeVideoSubmitHandlerPassesThroughAndStoresTask(t *testing.T) t.Fatalf("unexpected result: %#v", result) } + if result.UsageContext.Resolution != "720p" || + result.UsageContext.NativeResolution != "720p" || + result.UsageContext.ServiceTier != "default" || + result.UsageContext.InputVideo == nil || + !*result.UsageContext.InputVideo || + result.UsageContext.OutputAudio == nil || + *result.UsageContext.OutputAudio { + t.Fatalf("unexpected native usage context: %#v", result.UsageContext) + } + if len(store.saved) != 1 { t.Fatalf("expected one store save, got %d", len(store.saved)) } if store.saved[0].ID != coremodel.VideoGenerationStoreID("task-123") || store.saved[0].ChannelID != 42 || - store.saved[0].Model != "doubao-seedance-2-0" { + store.saved[0].Model != "doubao-seedance-2-0-260128" { t.Fatalf("unexpected saved store: %#v", store.saved[0]) } } +func TestDoubaoNativeVideoSubmitHandlerUsesNativeRequestResolutionFallback(t *testing.T) { + t.Parallel() + + gin.SetMode(gin.TestMode) + + recorder := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(recorder) + ctx.Request = httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/", nil) + + m := meta.NewMeta(nil, mode.DoubaoVideo, "doubao-seedance-2-0-260128", coremodel.ModelConfig{}) + setDoubaoVideoMetadata(m, doubaoVideoStoreMetadata{ + Resolution: "1080p", + Ratio: "16:9", + ServiceTier: "priority", + InputVideo: new(true), + OutputAudio: new(false), + }) + + resp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{"Content-Type": {"application/json"}}, + Body: io.NopCloser(bytes.NewBufferString( + `{"id":"task-123","model":"doubao-seedance-2-0-260128","status":"queued"}`, + )), + } + + result, relayErr := DoubaoNativeVideoSubmitHandler(m, nil, ctx, resp) + if relayErr != nil { + t.Fatalf("DoubaoNativeVideoSubmitHandler returned error: %v", relayErr) + } + + if result.UsageContext.Resolution != "1080p" || + result.UsageContext.NativeResolution != "1080p" || + result.UsageContext.ServiceTier != "priority" || + result.UsageContext.InputVideo == nil || + !*result.UsageContext.InputVideo || + result.UsageContext.OutputAudio == nil || + *result.UsageContext.OutputAudio { + t.Fatalf("unexpected native usage context fallback: %#v", result.UsageContext) + } +} + func TestDoubaoNativeVideoSubmitHandlerRequiresID(t *testing.T) { t.Parallel() @@ -136,12 +240,12 @@ func TestDoubaoNativeVideoTaskHandlerBackfillsMissingIDFromMeta(t *testing.T) { m := &meta.Meta{ Mode: mode.DoubaoVideoTasks, VideoID: "task-123", - OriginModel: "doubao-seedance-2-0", + OriginModel: "doubao-seedance-2-0-260128", Group: coremodel.GroupCache{ID: "group-1"}, Token: coremodel.TokenCache{ID: 7}, Channel: meta.ChannelMeta{ID: 42}, } - respBody := `{"model":"doubao-seedance-2-0","status":"succeeded","content":{"video_url":"https://example.com/out.mp4"}}` + respBody := `{"model":"doubao-seedance-2-0-260128","status":"succeeded","resolution":"720p","ratio":"16:9","content":{"video_url":"https://example.com/out.mp4"}}` resp := &http.Response{ StatusCode: http.StatusOK, Header: http.Header{"Content-Type": {"application/json"}}, @@ -161,13 +265,18 @@ func TestDoubaoNativeVideoTaskHandlerBackfillsMissingIDFromMeta(t *testing.T) { t.Fatalf("expected upstream id from meta, got %#v", result.UpstreamID) } + if result.UsageContext.Resolution != "720p" || + result.UsageContext.NativeResolution != "720p" { + t.Fatalf("unexpected native usage context: %#v", result.UsageContext) + } + if len(store.saved) != 1 { t.Fatalf("expected one store save, got %d", len(store.saved)) } if store.saved[0].ID != coremodel.VideoGenerationStoreID("task-123") || store.saved[0].ChannelID != 42 || - store.saved[0].Model != "doubao-seedance-2-0" { + store.saved[0].Model != "doubao-seedance-2-0-260128" { t.Fatalf("unexpected saved store: %#v", store.saved[0]) } } diff --git a/core/relay/adaptor/doubao/embed.go b/core/relay/adaptor/doubao/embed.go index a62e19a1..0d54784c 100644 --- a/core/relay/adaptor/doubao/embed.go +++ b/core/relay/adaptor/doubao/embed.go @@ -75,12 +75,12 @@ func patchEmbeddingsVisionInput(node *ast.Node) error { return err } - _, err = node.SetAny("input", []map[string]string{ - { - "type": "text", - "text": inputText, - }, - }) + _, err = node.Set("input", ast.NewArray([]ast.Node{ + ast.NewObject([]ast.Pair{ + ast.NewPair("type", ast.NewString("text")), + ast.NewPair("text", ast.NewString(inputText)), + }), + })) return err default: @@ -104,9 +104,9 @@ func setEmbeddingVisionURLItem( return err } - _, err = item.SetAny(targetKey, map[string]string{ - "url": url, - }) + _, err = item.Set(targetKey, ast.NewObject([]ast.Pair{ + ast.NewPair("url", ast.NewString(url)), + })) return err } diff --git a/core/relay/adaptor/doubao/main_test.go b/core/relay/adaptor/doubao/main_test.go index fda5ba44..a771bd34 100644 --- a/core/relay/adaptor/doubao/main_test.go +++ b/core/relay/adaptor/doubao/main_test.go @@ -206,14 +206,14 @@ func TestAdaptorGetRequestURL(t *testing.T) { { name: "video job create", mode: mode.VideoGenerationsJobs, - model: "doubao-seedance-2-0", + model: "doubao-seedance-2-0-260128", wantMethod: http.MethodPost, wantURL: "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks", }, { name: "video job get", mode: mode.VideoGenerationsGetJobs, - model: "doubao-seedance-2-0", + model: "doubao-seedance-2-0-260128", jobID: "task-123", wantMethod: http.MethodGet, wantURL: "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks/task-123", @@ -221,7 +221,7 @@ func TestAdaptorGetRequestURL(t *testing.T) { { name: "video job content", mode: mode.VideoGenerationsContent, - model: "doubao-seedance-2-0", + model: "doubao-seedance-2-0-260128", generationID: "task-456", wantMethod: http.MethodGet, wantURL: "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks/task-456", @@ -229,7 +229,7 @@ func TestAdaptorGetRequestURL(t *testing.T) { { name: "videos get", mode: mode.VideosGet, - model: "doubao-seedance-2-0", + model: "doubao-seedance-2-0-260128", videoID: "video-123", wantMethod: http.MethodGet, wantURL: "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks/video-123", @@ -237,7 +237,7 @@ func TestAdaptorGetRequestURL(t *testing.T) { { name: "videos delete", mode: mode.VideosDelete, - model: "doubao-seedance-2-0", + model: "doubao-seedance-2-0-260128", videoID: "video-123", wantMethod: http.MethodDelete, wantURL: "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks/video-123", @@ -807,7 +807,7 @@ func TestAdaptorConvertRequestVideoGenerationMapsOpenAIFields(t *testing.T) { m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -847,7 +847,7 @@ func TestAdaptorConvertRequestVideoGenerationMapsOpenAIFields(t *testing.T) { t.Fatalf("failed to unmarshal converted body %s: %v", string(body), err) } - if payload["model"] != "doubao-seedance-2-0" { + if payload["model"] != "doubao-seedance-2-0-260128" { t.Fatalf("expected actual model, got %#v", payload["model"]) } @@ -868,11 +868,17 @@ func TestAdaptorConvertRequestVideoGenerationMapsOpenAIFields(t *testing.T) { assertDoubaoVideoContent(t, content[1], "image_url", "https://example.com/reference.png", "") assertDoubaoVideoContent(t, content[2], "video_url", "https://example.com/reference.mp4", "") assertDoubaoVideoContent(t, content[3], "audio_url", "data:audio/wav;base64,AAAA", "") + + usageContext := doubaoVideoRequestUsageContext(m) + if usageContext.InputVideo == nil || !*usageContext.InputVideo || + usageContext.OutputAudio == nil || !*usageContext.OutputAudio { + t.Fatalf("expected converted request media usage context, got %#v", usageContext) + } } func TestAdaptorConvertVideosEditMapsVideoFieldToReferenceVideo(t *testing.T) { adaptor := &Adaptor{} - m := meta.NewMeta(nil, mode.VideosEdits, "doubao-seedance-2-0", coremodel.ModelConfig{}) + m := meta.NewMeta(nil, mode.VideosEdits, "doubao-seedance-2-0-260128", coremodel.ModelConfig{}) req, err := http.NewRequestWithContext( context.Background(), @@ -920,7 +926,7 @@ func TestAdaptorConvertVideosEditMapsVideoFieldToReferenceVideo(t *testing.T) { func TestAdaptorConvertVideosEditMapsStoredVideoIDToDraftTask(t *testing.T) { adaptor := &Adaptor{} - m := meta.NewMeta(nil, mode.VideosEdits, "doubao-seedance-2-0", coremodel.ModelConfig{}) + m := meta.NewMeta(nil, mode.VideosEdits, "doubao-seedance-2-0-260128", coremodel.ModelConfig{}) req, err := http.NewRequestWithContext( context.Background(), @@ -965,11 +971,21 @@ func TestAdaptorConvertVideosEditMapsStoredVideoIDToDraftTask(t *testing.T) { if !ok || draftTask["id"] != "video_123" { t.Fatalf("expected draft task video_123, got %#v", item["draft_task"]) } + + usageContext := doubaoVideoRequestUsageContext(m) + if usageContext.InputVideo == nil || !*usageContext.InputVideo { + t.Fatalf("expected stored video draft task to count as input video, got %#v", usageContext) + } } func TestAdaptorConvertVideosExtensionMapsVideoFieldToFirstVideo(t *testing.T) { adaptor := &Adaptor{} - m := meta.NewMeta(nil, mode.VideosExtensions, "doubao-seedance-2-0", coremodel.ModelConfig{}) + m := meta.NewMeta( + nil, + mode.VideosExtensions, + "doubao-seedance-2-0-260128", + coremodel.ModelConfig{}, + ) req, err := http.NewRequestWithContext( context.Background(), @@ -1020,7 +1036,7 @@ func TestAdaptorConvertRequestVideoGenerationMapsPixelSize(t *testing.T) { m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -1069,7 +1085,7 @@ func TestAdaptorConvertRequestVideoGenerationMapsPortraitPixelSize(t *testing.T) m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -1118,7 +1134,7 @@ func TestAdaptorConvertRequestVideosIgnoresJobOnlyDuration(t *testing.T) { m := meta.NewMeta( nil, mode.Videos, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -1162,7 +1178,7 @@ func TestAdaptorConvertRequestVideoGenerationIgnoresVideosSeconds(t *testing.T) m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -1208,7 +1224,7 @@ func TestAdaptorConvertRequestVideoGenerationMapsMultipartPixelSize(t *testing.T m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -1280,7 +1296,7 @@ func TestAdaptorConvertRequestVideoGenerationIgnoresDoubaoDurationField(t *testi m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -1326,10 +1342,10 @@ func TestAdaptorConvertRequestDoubaoVideoMissingContentReturnsRelayError(t *test m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) - m.ActualModel = "doubao-seedance-2-0" + m.ActualModel = "doubao-seedance-2-0-260128" req := httptest.NewRequestWithContext( t.Context(), @@ -1369,7 +1385,7 @@ func TestAdaptorConvertRequestVideoGenerationKeepsNativeContentOnce(t *testing.T m := meta.NewMeta( nil, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) @@ -1434,16 +1450,17 @@ func TestAdaptorDoResponseVideoSubmitStoresJob(t *testing.T) { m := meta.NewMeta( &coremodel.Channel{ID: 9}, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) m.Group.ID = "group-1" m.Token.ID = 7 - m.Set(metaDoubaoVideoRequest, doubaoVideoRequest{ - Content: []doubaoVideoContent{{Type: "text", Text: "Animate a calm ocean"}}, + setDoubaoVideoMetadata(m, doubaoVideoStoreMetadata{ + Prompt: "Animate a calm ocean", Resolution: "720p", Ratio: "16:9", - Duration: intPtrFromAny(5), + Duration: 5, + InputVideo: new(false), }) resp := &http.Response{ @@ -1452,7 +1469,7 @@ func TestAdaptorDoResponseVideoSubmitStoresJob(t *testing.T) { Body: io.NopCloser(strings.NewReader(`{ "id": "task-123", "status": "queued", - "model": "doubao-seedance-2-0", + "model": "doubao-seedance-2-0-260128", "created_at": 1770000000, "execution_expires_after": 172800 }`)), @@ -1471,7 +1488,7 @@ func TestAdaptorDoResponseVideoSubmitStoresJob(t *testing.T) { t.Fatalf("expected video job store, got %#v", store.saved) } - if store.saved[0].Metadata != `{"prompt":"Animate a calm ocean","resolution":"720p","ratio":"16:9","duration":5}` { + if store.saved[0].Metadata != `{"prompt":"Animate a calm ocean","resolution":"720p","ratio":"16:9","duration":5,"input_video":false}` { t.Fatalf("unexpected saved metadata: %s", store.saved[0].Metadata) } @@ -1484,7 +1501,7 @@ func TestAdaptorDoResponseVideoSubmitStoresJob(t *testing.T) { t.Fatalf("unexpected job: %#v", job) } - if job.Model != "doubao-seedance-2-0" || + if job.Model != "doubao-seedance-2-0-260128" || job.Prompt != "Animate a calm ocean" || job.NSeconds != 5 || job.Width != 1280 || @@ -1509,16 +1526,16 @@ func TestAdaptorDoResponseVideoSubmitStoresCompletedGeneration(t *testing.T) { m := meta.NewMeta( &coremodel.Channel{ID: 9}, mode.VideoGenerationsJobs, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, ) m.Group.ID = "group-1" m.Token.ID = 7 - m.Set(metaDoubaoVideoRequest, doubaoVideoRequest{ - Content: []doubaoVideoContent{{Type: "text", Text: "Animate a calm ocean"}}, + setDoubaoVideoMetadata(m, doubaoVideoStoreMetadata{ + Prompt: "Animate a calm ocean", Resolution: "720p", Ratio: "9:16", - Duration: intPtrFromAny(5), + Duration: 5, }) resp := &http.Response{ @@ -1527,7 +1544,7 @@ func TestAdaptorDoResponseVideoSubmitStoresCompletedGeneration(t *testing.T) { Body: io.NopCloser(strings.NewReader(`{ "id": "task-123", "status": "succeeded", - "model": "doubao-seedance-2-0", + "model": "doubao-seedance-2-0-260128", "created_at": 1770000000, "updated_at": 1770000100, "execution_expires_after": 172800, @@ -1597,7 +1614,7 @@ func TestAdaptorDoResponseVideoStatusRestoresOpenAIFieldsFromStore(t *testing.T) m := meta.NewMeta( &coremodel.Channel{ID: 9}, mode.VideosGet, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, meta.WithVideoID("video-123"), ) @@ -1630,7 +1647,7 @@ func TestAdaptorDoResponseVideoStatusRestoresOpenAIFieldsFromStore(t *testing.T) if video.ID != "video-123" || video.Object != relaymodel.VideoObject || video.Status != relaymodel.VideoStatusCompleted || - video.Model != "doubao-seedance-2-0" || + video.Model != "doubao-seedance-2-0-260128" || video.Prompt != "A stored prompt" || video.Seconds != 6 || video.Size != "720x1280" || @@ -1674,7 +1691,7 @@ func TestAdaptorDoResponseVideoContentDownloadsGeneratedVideo(t *testing.T) { meta: meta.NewMeta( &coremodel.Channel{ID: 9}, mode.VideoGenerationsContent, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, meta.WithGenerationID("generation-123"), ), @@ -1686,7 +1703,7 @@ func TestAdaptorDoResponseVideoContentDownloadsGeneratedVideo(t *testing.T) { meta: meta.NewMeta( &coremodel.Channel{ID: 9}, mode.VideosContent, - "doubao-seedance-2-0", + "doubao-seedance-2-0-260128", coremodel.ModelConfig{}, meta.WithVideoID("video-123"), ), @@ -1754,6 +1771,7 @@ func TestAdaptorFetchAsyncUsageUsesDoubaoCompletionTokens(t *testing.T) { "resolution": "720p", "ratio": "16:9", "service_tier": "default", + "generate_audio": false, "usage": { "completion_tokens": 411300, "total_tokens": 411300, @@ -1768,7 +1786,7 @@ func TestAdaptorFetchAsyncUsageUsesDoubaoCompletionTokens(t *testing.T) { saved: []adaptor.StoreCache{ { ID: coremodel.VideoJobStoreID("task-123"), - Metadata: `{"prompt":"Stored prompt","resolution":"720p","ratio":"9:16","duration":6}`, + Metadata: `{"prompt":"Stored prompt","resolution":"720p","ratio":"9:16","duration":6,"input_video":true,"output_audio":true}`, }, }, } @@ -1793,7 +1811,11 @@ func TestAdaptorFetchAsyncUsageUsesDoubaoCompletionTokens(t *testing.T) { if usageContext.Resolution != "1280x720" || usageContext.NativeResolution != "720p" || - usageContext.ServiceTier != "default" { + usageContext.ServiceTier != "default" || + usageContext.InputVideo == nil || + !*usageContext.InputVideo || + usageContext.OutputAudio == nil || + *usageContext.OutputAudio { t.Fatalf("unexpected usage context: %#v", usageContext) } } @@ -1845,10 +1867,145 @@ func TestAdaptorFetchAsyncUsageCombinesStoredRatioBeforeDerivingSize(t *testing. } } +func TestAdaptorFetchAsyncUsageDoubaoNativeUsesNativeResolution(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/custom/api/v3/contents/generations/tasks/task-123" { + t.Fatalf("expected task path, got %s", r.URL.Path) + } + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "id": "task-123", + "status": "succeeded", + "resolution": "720p", + "ratio": "16:9", + "service_tier": "default", + "usage": { + "completion_tokens": 411300, + "total_tokens": 411300 + } + }`)) + })) + defer server.Close() + + doubaoAdaptor := &Adaptor{} + store := &doubaoTestStore{ + saved: []adaptor.StoreCache{ + { + ID: coremodel.VideoGenerationStoreID("task-123"), + Metadata: `{"prompt":"Stored prompt","resolution":"1080p","ratio":"9:16","duration":6,"input_video":true,"output_audio":false}`, + }, + }, + } + + _, usageContext, completed, err := doubaoAdaptor.FetchAsyncUsage( + context.Background(), + doubaoAsyncUsageRequestWithMode( + mode.DoubaoVideo, + server.URL+"/custom", + "task-123", + store, + ), + ) + if err != nil { + t.Fatalf("FetchAsyncUsage returned error: %v", err) + } + + if !completed { + t.Fatal("expected async usage to be completed") + } + + if usageContext.Resolution != "720p" || + usageContext.NativeResolution != "720p" || + usageContext.ServiceTier != "default" || + usageContext.InputVideo == nil || + !*usageContext.InputVideo || + usageContext.OutputAudio == nil || + *usageContext.OutputAudio { + t.Fatalf("unexpected native usage context: %#v", usageContext) + } +} + +func TestAdaptorFetchAsyncUsageDoubaoNativeUsesNativeFallback(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/custom/api/v3/contents/generations/tasks/task-123" { + t.Fatalf("expected task path, got %s", r.URL.Path) + } + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "id": "task-123", + "status": "succeeded", + "usage": { + "completion_tokens": 411300, + "total_tokens": 411300 + } + }`)) + })) + defer server.Close() + + doubaoAdaptor := &Adaptor{} + + _, usageContext, completed, err := doubaoAdaptor.FetchAsyncUsage( + context.Background(), + adaptor.AsyncUsageRequest{ + Channel: &coremodel.Channel{ + BaseURL: server.URL + "/fallback", + Key: "test-key", + }, + Info: &coremodel.AsyncUsageInfo{ + Mode: int(mode.DoubaoVideo), + BaseURL: server.URL + "/custom", + UpstreamID: "task-123", + GroupID: "group-1", + TokenID: 7, + UsageContext: coremodel.UsageContext{ + Resolution: "1080p", + NativeResolution: "1080p", + ServiceTier: "priority", + InputVideo: new(true), + OutputAudio: new(false), + }, + }, + }, + ) + if err != nil { + t.Fatalf("FetchAsyncUsage returned error: %v", err) + } + + if !completed { + t.Fatal("expected async usage to be completed") + } + + if usageContext.Resolution != "1080p" || + usageContext.NativeResolution != "1080p" || + usageContext.ServiceTier != "priority" || + usageContext.InputVideo == nil || + !*usageContext.InputVideo || + usageContext.OutputAudio == nil || + *usageContext.OutputAudio { + t.Fatalf("unexpected native usage context fallback: %#v", usageContext) + } +} + func doubaoAsyncUsageRequest( baseURL string, upstreamID string, store adaptor.Store, +) adaptor.AsyncUsageRequest { + return doubaoAsyncUsageRequestWithMode( + mode.VideoGenerationsJobs, + baseURL, + upstreamID, + store, + ) +} + +func doubaoAsyncUsageRequestWithMode( + relayMode mode.Mode, + baseURL string, + upstreamID string, + store adaptor.Store, ) adaptor.AsyncUsageRequest { return adaptor.AsyncUsageRequest{ Channel: &coremodel.Channel{ @@ -1856,7 +2013,7 @@ func doubaoAsyncUsageRequest( Key: "test-key", }, Info: &coremodel.AsyncUsageInfo{ - Mode: int(mode.VideoGenerationsJobs), + Mode: int(relayMode), BaseURL: baseURL, UpstreamID: upstreamID, GroupID: "group-1", diff --git a/core/relay/adaptor/doubao/video.go b/core/relay/adaptor/doubao/video.go index 29411809..0f0b16e7 100644 --- a/core/relay/adaptor/doubao/video.go +++ b/core/relay/adaptor/doubao/video.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/base64" + "encoding/json" "fmt" "io" "mime" @@ -25,8 +26,8 @@ import ( ) const ( - metaDoubaoVideoRequest = "doubao_video_request" - doubaoVideoTTL = 7 * 24 * time.Hour + metaDoubaoVideoMetadata = "doubao_video_metadata" + doubaoVideoTTL = 7 * 24 * time.Hour ) type doubaoVideoRequest struct { @@ -38,7 +39,7 @@ type doubaoVideoRequest struct { ExecutionExpiresAfter *int `json:"execution_expires_after,omitempty"` GenerateAudio *bool `json:"generate_audio,omitempty"` Draft *bool `json:"draft,omitempty"` - Tools []map[string]any `json:"tools,omitempty"` + Tools []any `json:"tools,omitempty"` SafetyIdentifier string `json:"safety_identifier,omitempty"` Priority *int `json:"priority,omitempty"` Resolution string `json:"resolution,omitempty"` @@ -51,6 +52,225 @@ type doubaoVideoRequest struct { Watermark *bool `json:"watermark,omitempty"` } +type doubaoOpenAIVideoRequest struct { + Content []doubaoOpenAIVideoContent `json:"content,omitempty"` + Prompt string `json:"prompt,omitempty"` + Model string `json:"model,omitempty"` + Width doubaoFlexibleInt `json:"width,omitempty"` + Height doubaoFlexibleInt `json:"height,omitempty"` + NVariants doubaoFlexibleInt `json:"n_variants,omitempty"` + NSeconds doubaoFlexibleInt `json:"n_seconds,omitempty"` + CallbackURL string `json:"callback_url,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` + Resolution string `json:"resolution,omitempty"` + Ratio string `json:"ratio,omitempty"` + Size string `json:"size,omitempty"` + Seconds doubaoFlexibleInt `json:"seconds,omitempty"` + Seed any `json:"seed,omitempty"` + ExecutionExpiresAfter doubaoFlexibleInt `json:"execution_expires_after,omitempty"` + GenerateAudio doubaoFlexibleBool `json:"generate_audio,omitempty"` + Draft doubaoFlexibleBool `json:"draft,omitempty"` + Priority doubaoFlexibleInt `json:"priority,omitempty"` + Frames doubaoFlexibleInt `json:"frames,omitempty"` + FramesPerSecond doubaoFlexibleInt `json:"framespersecond,omitempty"` + FPS doubaoFlexibleInt `json:"fps,omitempty"` + CameraFixed doubaoFlexibleBool `json:"camera_fixed,omitempty"` + Watermark doubaoFlexibleBool `json:"watermark,omitempty"` + Tools []any `json:"tools,omitempty"` + InputReference doubaoFlexibleString `json:"input_reference,omitempty"` + Image doubaoFlexibleString `json:"image,omitempty"` + ImageURL doubaoFlexibleString `json:"image_url,omitempty"` + FirstFrameURL doubaoFlexibleString `json:"first_frame_url,omitempty"` + LastFrameURL doubaoFlexibleString `json:"last_frame_url,omitempty"` + VideoURL doubaoFlexibleString `json:"video_url,omitempty"` + AudioURL doubaoFlexibleString `json:"audio_url,omitempty"` + InputAudio *doubaoOpenAIInputAudio `json:"input_audio,omitempty"` + DraftTaskID string `json:"draft_task_id,omitempty"` + VideoID string `json:"video_id,omitempty"` + Video doubaoFlexibleString `json:"video,omitempty"` +} + +type doubaoOpenAIVideoContent struct { + Type string `json:"type,omitempty"` + Text string `json:"text,omitempty"` + Role string `json:"role,omitempty"` + ImageURL doubaoFlexibleString `json:"image_url,omitempty"` + VideoURL doubaoFlexibleString `json:"video_url,omitempty"` + AudioURL doubaoFlexibleString `json:"audio_url,omitempty"` + InputAudio *doubaoOpenAIInputAudio `json:"input_audio,omitempty"` + DraftTask doubaoFlexibleID `json:"draft_task,omitempty"` +} + +type doubaoOpenAIInputAudio struct { + URL string `json:"url,omitempty"` + Data string `json:"data,omitempty"` + Format string `json:"format,omitempty"` +} + +func (audio *doubaoOpenAIInputAudio) DoubaoURL() *doubaoVideoURLContent { + if audio == nil { + return nil + } + + if url := strings.TrimSpace(audio.URL); url != "" { + return &doubaoVideoURLContent{URL: url} + } + + data := strings.TrimSpace(audio.Data) + if data == "" { + return nil + } + + if strings.HasPrefix(data, "data:audio/") { + return &doubaoVideoURLContent{URL: data} + } + + format := strings.TrimSpace(strings.ToLower(audio.Format)) + if format == "" { + format = "wav" + } + + return &doubaoVideoURLContent{ + URL: "data:audio/" + format + ";base64," + data, + } +} + +type doubaoFlexibleInt struct { + Value int + Set bool +} + +func (value *doubaoFlexibleInt) UnmarshalJSON(data []byte) error { + text := strings.TrimSpace(string(data)) + if text == "" || text == "null" { + return nil + } + + if strings.HasPrefix(text, `"`) { + var raw string + if err := sonic.Unmarshal(data, &raw); err != nil { + return nil + } + + text = strings.TrimSpace(raw) + if text == "" { + return nil + } + } + + number := json.Number(text) + + parsed, err := number.Int64() + if err != nil { + floatValue, floatErr := number.Float64() + if floatErr != nil { + return nil + } + + parsed = int64(floatValue) + } + + value.Value = int(parsed) + value.Set = true + + return nil +} + +func (value doubaoFlexibleInt) Ptr() *int { + if !value.Set { + return nil + } + + return &value.Value +} + +type doubaoFlexibleBool struct { + Value bool + Set bool +} + +func (value *doubaoFlexibleBool) UnmarshalJSON(data []byte) error { + text := strings.TrimSpace(string(data)) + if text == "" || text == "null" { + return nil + } + + if strings.HasPrefix(text, `"`) { + var raw string + if err := sonic.Unmarshal(data, &raw); err != nil { + return nil + } + + text = strings.TrimSpace(raw) + } + + parsed, err := strconv.ParseBool(text) + if err != nil { + return nil + } + + value.Value = parsed + value.Set = true + + return nil +} + +func (value doubaoFlexibleBool) Ptr() *bool { + if !value.Set { + return nil + } + + return &value.Value +} + +type doubaoFlexibleString string + +func (value *doubaoFlexibleString) UnmarshalJSON(data []byte) error { + var text string + if err := sonic.Unmarshal(data, &text); err == nil { + *value = doubaoFlexibleString(strings.TrimSpace(text)) + return nil + } + + var object struct { + URL string `json:"url,omitempty"` + } + if err := sonic.Unmarshal(data, &object); err == nil { + *value = doubaoFlexibleString(strings.TrimSpace(object.URL)) + } + + return nil +} + +func (value doubaoFlexibleString) String() string { + return strings.TrimSpace(string(value)) +} + +type doubaoFlexibleID string + +func (value *doubaoFlexibleID) UnmarshalJSON(data []byte) error { + var text string + if err := sonic.Unmarshal(data, &text); err == nil { + *value = doubaoFlexibleID(strings.TrimSpace(text)) + return nil + } + + var object struct { + ID string `json:"id,omitempty"` + TaskID string `json:"task_id,omitempty"` + } + if err := sonic.Unmarshal(data, &object); err == nil { + *value = doubaoFlexibleID(firstNonEmptyString(object.ID, object.TaskID)) + } + + return nil +} + +func (value doubaoFlexibleID) String() string { + return strings.TrimSpace(string(value)) +} + type doubaoOpenAIVideoMode string const ( @@ -78,10 +298,13 @@ type doubaoDraftTask struct { } type doubaoVideoStoreMetadata struct { - Prompt string `json:"prompt,omitempty"` - Resolution string `json:"resolution,omitempty"` - Ratio string `json:"ratio,omitempty"` - Duration int `json:"duration,omitempty"` + Prompt string `json:"prompt,omitempty"` + Resolution string `json:"resolution,omitempty"` + Ratio string `json:"ratio,omitempty"` + Duration int `json:"duration,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + InputVideo *bool `json:"input_video,omitempty"` + OutputAudio *bool `json:"output_audio,omitempty"` } func ConvertVideoGenerationJobRequest( @@ -182,7 +405,7 @@ func convertDoubaoVideoRequest( } request.Model = meta.ActualModel - meta.Set(metaDoubaoVideoRequest, request) + setDoubaoVideoMetadata(meta, doubaoVideoMetadataFromRequest(request)) data, err := sonic.Marshal(&request) if err != nil { @@ -203,7 +426,7 @@ func parseDoubaoVideoGenerationJobRequest(req *http.Request) (doubaoVideoRequest return parseDoubaoMultipartVideoGenerationJobRequest(req) } - var raw map[string]any + var raw doubaoOpenAIVideoRequest if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return doubaoVideoRequest{}, err } @@ -216,7 +439,7 @@ func parseDoubaoVideosRequest(req *http.Request) (doubaoVideoRequest, error) { return parseDoubaoMultipartVideosRequest(req, doubaoOpenAIVideoModeCreate) } - var raw map[string]any + var raw doubaoOpenAIVideoRequest if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return doubaoVideoRequest{}, err } @@ -240,67 +463,62 @@ func parseDoubaoVideosModeRequest( return parseDoubaoMultipartVideosRequest(req, openAIMode) } - var raw map[string]any + var raw doubaoOpenAIVideoRequest if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return doubaoVideoRequest{}, err } request := parseDoubaoJSONVideosRequest(raw) - addDoubaoOpenAIVideoField(&request.Content, raw["video"], openAIMode) + addDoubaoOpenAIVideoField(&request.Content, raw.Video.String(), openAIMode) return request, nil } -func parseDoubaoJSONVideoGenerationJobRequest(raw map[string]any) doubaoVideoRequest { +func parseDoubaoJSONVideoGenerationJobRequest(raw doubaoOpenAIVideoRequest) doubaoVideoRequest { request := parseDoubaoJSONOpenAIVideoCommonRequest(raw, doubaoVideoJobSizeFromJSON(raw)) - request.Duration = intPtrFromAny(raw["n_seconds"]) + request.Duration = raw.NSeconds.Ptr() return request } -func parseDoubaoJSONVideosRequest(raw map[string]any) doubaoVideoRequest { - request := parseDoubaoJSONOpenAIVideoCommonRequest(raw, stringFromAny(raw["size"])) - request.Duration = intPtrFromAny(raw["seconds"]) +func parseDoubaoJSONVideosRequest(raw doubaoOpenAIVideoRequest) doubaoVideoRequest { + request := parseDoubaoJSONOpenAIVideoCommonRequest(raw, raw.Size) + request.Duration = raw.Seconds.Ptr() return request } -func parseDoubaoJSONOpenAIVideoCommonRequest(raw map[string]any, size string) doubaoVideoRequest { +func parseDoubaoJSONOpenAIVideoCommonRequest( + raw doubaoOpenAIVideoRequest, + size string, +) doubaoVideoRequest { request := doubaoVideoRequest{ - Content: doubaoVideoContentFromAny(raw["content"]), - CallbackURL: stringFromAny(raw["callback_url"]), - ServiceTier: stringFromAny(raw["service_tier"]), - SafetyIdentifier: stringFromAny(raw["safety_identifier"]), + Content: doubaoVideoContentFromOpenAIContent(raw.Content), + CallbackURL: strings.TrimSpace(raw.CallbackURL), + ServiceTier: strings.TrimSpace(raw.ServiceTier), + SafetyIdentifier: strings.TrimSpace(raw.SafetyIdentifier), Resolution: firstNonEmptyString( - stringFromAny(raw["resolution"]), + raw.Resolution, doubaoVideoResolutionFromSize(size), ), Ratio: firstNonEmptyString( - stringFromAny(raw["ratio"]), + raw.Ratio, ratioFromSize(size), ), - Seed: raw["seed"], - ExecutionExpiresAfter: intPtrFromAny(raw["execution_expires_after"]), - GenerateAudio: boolPtrFromAny(raw["generate_audio"]), - Draft: boolPtrFromAny(raw["draft"]), - Priority: intPtrFromAny(raw["priority"]), - Frames: intPtrFromAny(raw["frames"]), - FramesPerSecond: intPtrFromAny(firstPresent(raw, "framespersecond", "fps")), - CameraFixed: boolPtrFromAny(raw["camera_fixed"]), - Watermark: boolPtrFromAny(raw["watermark"]), + Seed: raw.Seed, + ExecutionExpiresAfter: raw.ExecutionExpiresAfter.Ptr(), + GenerateAudio: raw.GenerateAudio.Ptr(), + Draft: raw.Draft.Ptr(), + Priority: raw.Priority.Ptr(), + Frames: raw.Frames.Ptr(), + FramesPerSecond: firstFlexibleIntPtr(raw.FramesPerSecond, raw.FPS), + CameraFixed: raw.CameraFixed.Ptr(), + Watermark: raw.Watermark.Ptr(), + Tools: raw.Tools, } if request.Content == nil { - request.Content = doubaoVideoContentFromOpenAI(raw) - } - - if tools, ok := raw["tools"].([]any); ok { - request.Tools = make([]map[string]any, 0, len(tools)) - for _, item := range tools { - if tool, ok := item.(map[string]any); ok { - request.Tools = append(request.Tools, tool) - } - } + request.Content = doubaoVideoContentFromOpenAIRequest(raw) } return request @@ -403,10 +621,10 @@ func doubaoVideoSizeFromForm(req *http.Request) string { return req.PostFormValue("size") } -func doubaoVideoJobSizeFromJSON(raw map[string]any) string { - width := intFromPtr(intPtrFromAny(raw["width"])) +func doubaoVideoJobSizeFromJSON(raw doubaoOpenAIVideoRequest) string { + width := raw.Width.Value - height := intFromPtr(intPtrFromAny(raw["height"])) + height := raw.Height.Value if width <= 0 || height <= 0 { return "" } @@ -425,30 +643,38 @@ func doubaoVideoJobSizeFromForm(req *http.Request) string { return fmt.Sprintf("%dx%d", width, height) } -func doubaoVideoContentFromAny(value any) []doubaoVideoContent { - items, ok := value.([]any) - if !ok { +func firstFlexibleIntPtr(values ...doubaoFlexibleInt) *int { + for _, value := range values { + if value.Set { + return &value.Value + } + } + + return nil +} + +func doubaoVideoContentFromOpenAIContent(items []doubaoOpenAIVideoContent) []doubaoVideoContent { + if len(items) == 0 { return nil } content := make([]doubaoVideoContent, 0, len(items)) for _, item := range items { - m, ok := item.(map[string]any) - if !ok { - continue - } + content = append(content, doubaoVideoContentFromOpenAIContentItem(item)) + } - content = append(content, doubaoVideoContentFromMap(m)) + if len(content) == 0 { + return nil } return content } -func doubaoVideoContentFromMap(m map[string]any) doubaoVideoContent { +func doubaoVideoContentFromOpenAIContentItem(raw doubaoOpenAIVideoContent) doubaoVideoContent { item := doubaoVideoContent{ - Type: strings.TrimSpace(stringFromAny(m["type"])), - Text: stringFromAny(m["text"]), - Role: stringFromAny(m["role"]), + Type: strings.TrimSpace(raw.Type), + Text: strings.TrimSpace(raw.Text), + Role: strings.TrimSpace(raw.Role), } if item.Type == "" && item.Text != "" { @@ -457,34 +683,34 @@ func doubaoVideoContentFromMap(m map[string]any) doubaoVideoContent { switch item.Type { case "image_url": - item.ImageURL = &doubaoVideoURLContent{URL: nestedURL(m["image_url"])} + item.ImageURL = &doubaoVideoURLContent{URL: raw.ImageURL.String()} case "video_url": - item.VideoURL = &doubaoVideoURLContent{URL: nestedURL(m["video_url"])} + item.VideoURL = &doubaoVideoURLContent{URL: raw.VideoURL.String()} case "audio_url": - item.AudioURL = &doubaoVideoURLContent{URL: nestedURL(m["audio_url"])} + item.AudioURL = &doubaoVideoURLContent{URL: raw.AudioURL.String()} case "input_audio": item.Type = "audio_url" - item.AudioURL = openAIAudioToDoubaoURL(m["input_audio"]) + item.AudioURL = raw.InputAudio.DoubaoURL() if item.Role == "" { item.Role = "reference_audio" } case "draft_task": - item.DraftTask = &doubaoDraftTask{ID: nestedID(m["draft_task"])} + item.DraftTask = &doubaoDraftTask{ID: raw.DraftTask.String()} } return item } -func doubaoVideoContentFromOpenAI(raw map[string]any) []doubaoVideoContent { +func doubaoVideoContentFromOpenAIRequest(raw doubaoOpenAIVideoRequest) []doubaoVideoContent { content := []doubaoVideoContent{} - if prompt := stringFromAny(raw["prompt"]); prompt != "" { + if prompt := strings.TrimSpace(raw.Prompt); prompt != "" { content = append(content, doubaoVideoContent{Type: "text", Text: prompt}) } - addStringContent := func(contentType string, value any, role string) { - urlValue := stringFromAny(value) + addStringContent := func(contentType, urlValue, role string) { + urlValue = strings.TrimSpace(urlValue) if urlValue == "" { return } @@ -502,21 +728,25 @@ func doubaoVideoContentFromOpenAI(raw map[string]any) []doubaoVideoContent { content = append(content, item) } - addStringContent("image_url", firstPresent(raw, "input_reference", "image", "image_url"), "") - addStringContent("image_url", raw["first_frame_url"], "first_frame") - addStringContent("image_url", raw["last_frame_url"], "last_frame") - addStringContent("video_url", raw["video_url"], "reference_video") - addStringContent("audio_url", raw["audio_url"], "reference_audio") + addStringContent("image_url", firstNonEmptyString( + raw.InputReference.String(), + raw.Image.String(), + raw.ImageURL.String(), + ), "") + addStringContent("image_url", raw.FirstFrameURL.String(), "first_frame") + addStringContent("image_url", raw.LastFrameURL.String(), "last_frame") + addStringContent("video_url", raw.VideoURL.String(), "reference_video") + addStringContent("audio_url", raw.AudioURL.String(), "reference_audio") - if inputAudio, ok := raw["input_audio"].(map[string]any); ok { + if inputAudio := raw.InputAudio.DoubaoURL(); inputAudio != nil { content = append(content, doubaoVideoContent{ Type: "audio_url", - AudioURL: openAIAudioToDoubaoURL(inputAudio), + AudioURL: inputAudio, Role: "reference_audio", }) } - if draftTaskID := doubaoVideoDraftTaskIDFromRaw(raw); draftTaskID != "" { + if draftTaskID := firstNonEmptyString(raw.DraftTaskID, raw.VideoID); draftTaskID != "" { addDoubaoDraftTaskContent(&content, draftTaskID) } @@ -525,14 +755,14 @@ func doubaoVideoContentFromOpenAI(raw map[string]any) []doubaoVideoContent { func addDoubaoOpenAIVideoField( content *[]doubaoVideoContent, - value any, + value string, openAIMode doubaoOpenAIVideoMode, ) { if openAIMode == doubaoOpenAIVideoModeCreate { return } - videoURL := strings.TrimSpace(stringFromAny(value)) + videoURL := strings.TrimSpace(value) if videoURL == "" { return } @@ -574,10 +804,6 @@ func addDoubaoOpenAIVideoField( }) } -func doubaoVideoDraftTaskIDFromRaw(raw map[string]any) string { - return stringFromAny(firstPresent(raw, "draft_task_id", "video_id")) -} - func addDoubaoDraftTaskContent(content *[]doubaoVideoContent, draftTaskID string) { draftTaskID = strings.TrimSpace(draftTaskID) if draftTaskID == "" { @@ -826,7 +1052,7 @@ func VideoGenerationJobStatusHandler( response.ID = meta.JobID } - applyStoredDoubaoVideoRequestMetadata( + applyStoredDoubaoVideoMetadata( meta, store, coremodel.VideoJobStoreID(response.ID), @@ -875,7 +1101,7 @@ func VideosStatusHandler( response.ID = meta.VideoID } - applyStoredDoubaoVideoRequestMetadata( + applyStoredDoubaoVideoMetadata( meta, store, coremodel.VideoGenerationStoreID(response.ID), @@ -977,7 +1203,7 @@ func buildDoubaoVideoJob( now := time.Now().Unix() createdAt := firstPositiveInt64(response.CreatedAt, now) expiresAt := doubaoVideoExpiresAt(*response).Unix() - request := doubaoVideoRequestFromMeta(meta) + metadata := doubaoVideoMetadataFromMeta(meta) status := doubaoVideoJobStatus(response.Status) job := relaymodel.VideoGenerationJob{ @@ -987,13 +1213,13 @@ func buildDoubaoVideoJob( CreatedAt: createdAt, ExpiresAt: &expiresAt, Generations: []relaymodel.VideoGenerations{}, - Prompt: doubaoVideoPrompt(request), + Prompt: metadata.Prompt, Model: meta.OriginModel, NVariants: 1, - NSeconds: firstPositiveInt(response.Duration, intFromPtr(request.Duration)), + NSeconds: firstPositiveInt(response.Duration, metadata.Duration), } - resolution, ratio := doubaoVideoResolutionAndRatio(response, request) + resolution, ratio := doubaoVideoResolutionAndRatio(response, metadata) job.Width, job.Height = doubaoVideoDimensions(resolution, ratio) if status == relaymodel.VideoGenerationJobStatusSucceeded || @@ -1030,16 +1256,16 @@ func buildDoubaoVideo( response *relaymodel.DoubaoVideoTaskResponse, ) relaymodel.Video { now := time.Now().Unix() - request := doubaoVideoRequestFromMeta(meta) - resolution, ratio := doubaoVideoResolutionAndRatio(response, request) + metadata := doubaoVideoMetadataFromMeta(meta) + resolution, ratio := doubaoVideoResolutionAndRatio(response, metadata) video := relaymodel.Video{ ID: id, Object: relaymodel.VideoObject, CreatedAt: firstPositiveInt64(response.CreatedAt, now), Status: doubaoVideoStatus(response.Status), Model: meta.OriginModel, - Prompt: doubaoVideoPrompt(request), - Seconds: firstPositiveInt(response.Duration, intFromPtr(request.Duration)), + Prompt: metadata.Prompt, + Seconds: firstPositiveInt(response.Duration, metadata.Duration), Size: doubaoVideoSize(resolution, ratio), } @@ -1091,16 +1317,19 @@ func doubaoVideoUsageContext(response *relaymodel.DoubaoVideoTaskResponse) corem Resolution: doubaoVideoSize(resolution, ratio), NativeResolution: resolution, ServiceTier: response.ServiceTier, + OutputAudio: response.GenerateAudio, } } func doubaoVideoRequestUsageContext(meta *meta.Meta) coremodel.UsageContext { - request := doubaoVideoRequestFromMeta(meta) + metadata := doubaoVideoMetadataFromMeta(meta) return coremodel.UsageContext{ - Resolution: doubaoVideoSize(request.Resolution, request.Ratio), - NativeResolution: request.Resolution, - ServiceTier: request.ServiceTier, + Resolution: doubaoVideoSize(metadata.Resolution, metadata.Ratio), + NativeResolution: metadata.Resolution, + ServiceTier: metadata.ServiceTier, + InputVideo: metadata.InputVideo, + OutputAudio: metadata.OutputAudio, } } @@ -1191,13 +1420,7 @@ func saveDoubaoVideoStore( } func doubaoVideoStoreMetadataString(meta *meta.Meta) string { - request := doubaoVideoRequestFromMeta(meta) - metadata := doubaoVideoStoreMetadata{ - Prompt: doubaoVideoPrompt(request), - Resolution: request.Resolution, - Ratio: request.Ratio, - Duration: intFromPtr(request.Duration), - } + metadata := doubaoVideoMetadataFromMeta(meta) data, err := sonic.MarshalString(metadata) if err != nil { @@ -1207,7 +1430,7 @@ func doubaoVideoStoreMetadataString(meta *meta.Meta) string { return data } -func applyStoredDoubaoVideoRequestMetadata( +func applyStoredDoubaoVideoMetadata( meta *meta.Meta, store adaptor.Store, storeID string, @@ -1227,66 +1450,103 @@ func applyStoredDoubaoVideoRequestMetadata( return } - var request doubaoVideoRequest - if value, ok := meta.Get(metaDoubaoVideoRequest); ok { - request, _ = value.(doubaoVideoRequest) + metadata = doubaoVideoMetadataFromMeta(meta).WithFallback(metadata) + setDoubaoVideoMetadata(meta, metadata) + + if response.Resolution == "" { + response.Resolution = metadata.Resolution } - if doubaoVideoPrompt(request) == "" && metadata.Prompt != "" { - request.Content = append( - request.Content, - doubaoVideoContent{Type: "text", Text: metadata.Prompt}, - ) + if response.Ratio == "" { + response.Ratio = metadata.Ratio } - if request.Resolution == "" { - request.Resolution = metadata.Resolution + if response.Duration == 0 { + response.Duration = metadata.Duration } - if request.Ratio == "" { - request.Ratio = metadata.Ratio + if response.ServiceTier == "" { + response.ServiceTier = metadata.ServiceTier } - if request.Duration == nil && metadata.Duration > 0 { - duration := metadata.Duration - request.Duration = &duration + if response.GenerateAudio == nil { + response.GenerateAudio = metadata.OutputAudio } +} - if response.Resolution == "" { - response.Resolution = metadata.Resolution +func doubaoVideoMetadataFromMeta(meta *meta.Meta) doubaoVideoStoreMetadata { + if meta == nil { + return doubaoVideoStoreMetadata{} } - if response.Ratio == "" { - response.Ratio = metadata.Ratio + if value, ok := meta.Get(metaDoubaoVideoMetadata); ok { + metadata, _ := value.(doubaoVideoStoreMetadata) + return metadata } - if response.Duration == 0 { - response.Duration = metadata.Duration + return doubaoVideoStoreMetadata{} +} + +func setDoubaoVideoMetadata(meta *meta.Meta, metadata doubaoVideoStoreMetadata) { + if meta == nil { + return } - if len(request.Content) > 0 || - request.Resolution != "" || - request.Ratio != "" || - request.Duration != nil { - meta.Set(metaDoubaoVideoRequest, request) + if metadata == (doubaoVideoStoreMetadata{}) { + return } + + meta.Set(metaDoubaoVideoMetadata, metadata) } -func doubaoVideoRequestFromMeta(meta *meta.Meta) doubaoVideoRequest { - if meta == nil { - return doubaoVideoRequest{} +func doubaoVideoMetadataFromRequest(request doubaoVideoRequest) doubaoVideoStoreMetadata { + return doubaoVideoStoreMetadata{ + Prompt: doubaoVideoPrompt(request.Content), + Resolution: request.Resolution, + Ratio: request.Ratio, + Duration: intFromPtr(request.Duration), + ServiceTier: firstNonEmptyString(request.ServiceTier, "default"), + InputVideo: new(doubaoVideoContentHasVideo(request.Content)), + OutputAudio: doubaoVideoOutputAudioFromRequest(request), + } +} + +func (metadata doubaoVideoStoreMetadata) WithFallback( + fallback doubaoVideoStoreMetadata, +) doubaoVideoStoreMetadata { + if metadata.Prompt == "" { + metadata.Prompt = fallback.Prompt + } + + if metadata.Resolution == "" { + metadata.Resolution = fallback.Resolution + } + + if metadata.Ratio == "" { + metadata.Ratio = fallback.Ratio + } + + if metadata.Duration == 0 { + metadata.Duration = fallback.Duration + } + + if metadata.ServiceTier == "" { + metadata.ServiceTier = fallback.ServiceTier } - if value, ok := meta.Get(metaDoubaoVideoRequest); ok { - request, _ := value.(doubaoVideoRequest) - return request + if metadata.InputVideo == nil { + metadata.InputVideo = fallback.InputVideo } - return doubaoVideoRequest{} + if metadata.OutputAudio == nil { + metadata.OutputAudio = fallback.OutputAudio + } + + return metadata } -func doubaoVideoPrompt(request doubaoVideoRequest) string { - for _, item := range request.Content { +func doubaoVideoPrompt(content []doubaoVideoContent) string { + for _, item := range content { if item.Type == "text" && item.Text != "" { return item.Text } @@ -1295,6 +1555,29 @@ func doubaoVideoPrompt(request doubaoVideoRequest) string { return "" } +func doubaoVideoContentHasVideo(content []doubaoVideoContent) bool { + for _, item := range content { + if item.Type == "video_url" || (item.VideoURL != nil && item.VideoURL.URL != "") { + return true + } + + if item.Type == "draft_task" || (item.DraftTask != nil && item.DraftTask.ID != "") { + return true + } + } + + return false +} + +func doubaoVideoOutputAudioFromRequest(request doubaoVideoRequest) *bool { + if request.GenerateAudio != nil { + return request.GenerateAudio + } + + // Ark Seedance 2.0 and 1.5 default generate_audio to true. + return new(true) +} + func doubaoVideoExpiresAt(response relaymodel.DoubaoVideoTaskResponse) time.Time { if response.CreatedAt > 0 && response.ExecutionExpiresAfter > 0 { return time.Unix(response.CreatedAt+response.ExecutionExpiresAfter, 0) @@ -1334,14 +1617,14 @@ func doubaoVideoDimensions(resolution, ratio string) (int, int) { func doubaoVideoResolutionAndRatio( response *relaymodel.DoubaoVideoTaskResponse, - request doubaoVideoRequest, + metadata doubaoVideoStoreMetadata, ) (string, string) { if response == nil { - return request.Resolution, request.Ratio + return metadata.Resolution, metadata.Ratio } - return firstNonEmptyString(response.Resolution, request.Resolution), - firstNonEmptyString(response.Ratio, request.Ratio) + return firstNonEmptyString(response.Resolution, metadata.Resolution), + firstNonEmptyString(response.Ratio, metadata.Ratio) } func doubaoVideoSize(resolution, ratio string) string { diff --git a/core/relay/adaptor/doubao/video_helpers.go b/core/relay/adaptor/doubao/video_helpers.go index 74c997da..baeb6e1d 100644 --- a/core/relay/adaptor/doubao/video_helpers.go +++ b/core/relay/adaptor/doubao/video_helpers.go @@ -5,17 +5,6 @@ import ( "strings" ) -func firstPresent(values map[string]any, keys ...string) any { - for _, key := range keys { - value, ok := values[key] - if ok && value != nil { - return value - } - } - - return nil -} - func firstNonEmptyString(values ...string) string { for _, value := range values { value = strings.TrimSpace(value) @@ -27,15 +16,6 @@ func firstNonEmptyString(values ...string) string { return "" } -func stringFromAny(value any) string { - switch v := value.(type) { - case string: - return strings.TrimSpace(v) - default: - return "" - } -} - func intPtrFromAny(value any) *int { switch v := value.(type) { case int: @@ -58,17 +38,6 @@ func intPtrFromAny(value any) *int { } } -func boolPtrFromAny(value any) *bool { - switch v := value.(type) { - case bool: - return &v - case string: - return boolPtrFromString(v) - default: - return nil - } -} - func boolPtrFromString(value string) *bool { if strings.TrimSpace(value) == "" { return nil @@ -120,57 +89,6 @@ func firstPositiveInt64(values ...int64) int64 { return 0 } -func nestedURL(value any) string { - switch v := value.(type) { - case string: - return strings.TrimSpace(v) - case map[string]any: - return stringFromAny(v["url"]) - default: - return "" - } -} - -func nestedID(value any) string { - switch v := value.(type) { - case string: - return strings.TrimSpace(v) - case map[string]any: - return firstNonEmptyString(stringFromAny(v["id"]), stringFromAny(v["task_id"])) - default: - return "" - } -} - -func openAIAudioToDoubaoURL(value any) *doubaoVideoURLContent { - audio, ok := value.(map[string]any) - if !ok { - return nil - } - - if url := stringFromAny(audio["url"]); url != "" { - return &doubaoVideoURLContent{URL: url} - } - - data := stringFromAny(audio["data"]) - if data == "" { - return nil - } - - if strings.HasPrefix(data, "data:audio/") { - return &doubaoVideoURLContent{URL: data} - } - - format := strings.TrimSpace(stringFromAny(audio["format"])) - if format == "" { - format = "wav" - } - - return &doubaoVideoURLContent{ - URL: "data:audio/" + strings.ToLower(format) + ";base64," + data, - } -} - func doubaoVideoResolutionFromSize(size string) string { size = normalizeDoubaoSize(size) switch size { diff --git a/core/relay/adaptor/doubaoaudio/tts.go b/core/relay/adaptor/doubaoaudio/tts.go index 85c6f113..128f6bed 100644 --- a/core/relay/adaptor/doubaoaudio/tts.go +++ b/core/relay/adaptor/doubaoaudio/tts.go @@ -9,9 +9,11 @@ import ( "fmt" "io" "net/http" + "strconv" "strings" "github.com/bytedance/sonic" + "github.com/bytedance/sonic/ast" "github.com/gin-gonic/gin" "github.com/google/uuid" "github.com/gorilla/websocket" @@ -75,7 +77,7 @@ func ConvertTTSRequest(meta *meta.Meta, req *http.Request) (adaptor.ConvertResul meta.Set("stream_format", request.StreamFormat) - reqMap, err := utils.UnmarshalMap(req) + node, err := common.UnmarshalRequest2NodeReusable(req) if err != nil { return adaptor.ConvertResult{}, err } @@ -125,13 +127,11 @@ func ConvertTTSRequest(meta *meta.Meta, req *http.Request) (adaptor.ConvertResul doubaoRequest.Audio.Encoding = request.ResponseFormat - volumeRatio, ok := reqMap["volume_ratio"].(float64) - if ok { + if volumeRatio, ok := floatFromTTSNode(node.Get("volume_ratio")); ok { doubaoRequest.Audio.VolumeRatio = volumeRatio } - pitchRatio, ok := reqMap["pitch_ratio"].(float64) - if ok { + if pitchRatio, ok := floatFromTTSNode(node.Get("pitch_ratio")); ok { doubaoRequest.Audio.PitchRatio = pitchRatio } @@ -157,6 +157,33 @@ func ConvertTTSRequest(meta *meta.Meta, req *http.Request) (adaptor.ConvertResul }, nil } +func floatFromTTSNode(node *ast.Node) (float64, bool) { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return 0, false + } + + if node.TypeSafe() == ast.V_STRING { + value, err := node.String() + if err != nil { + return 0, false + } + + parsed, err := strconv.ParseFloat(strings.TrimSpace(value), 64) + if err != nil { + return 0, false + } + + return parsed, true + } + + value, err := node.Float64() + if err != nil { + return 0, false + } + + return value, true +} + func TTSDoRequest(meta *meta.Meta, req *http.Request) (*http.Response, error) { wsURL := req.URL wsURL.Scheme = "wss" diff --git a/core/relay/adaptor/fake/adaptor_test.go b/core/relay/adaptor/fake/adaptor_test.go index f6320383..69fa0537 100644 --- a/core/relay/adaptor/fake/adaptor_test.go +++ b/core/relay/adaptor/fake/adaptor_test.go @@ -97,8 +97,6 @@ func TestFakeAdaptorResponsesForAllModes(t *testing.T) { t.Parallel() gin.SetMode(gin.TestMode) - boolPtr := func(v bool) *bool { return &v } - testCases := []struct { name string mode mode.Mode @@ -532,7 +530,7 @@ func TestFakeAdaptorResponsesForAllModes(t *testing.T) { requestBody: relaymodel.CreateResponseRequest{ Model: "fake-response", Input: "request-driven input", - Store: boolPtr(false), + Store: new(false), }, channelConfigs: model.ChannelConfigs{ "static_text": "request echoes request-driven input", diff --git a/core/relay/adaptor/gemini/video.go b/core/relay/adaptor/gemini/video.go index 812e57cf..f2f9c342 100644 --- a/core/relay/adaptor/gemini/video.go +++ b/core/relay/adaptor/gemini/video.go @@ -6,6 +6,7 @@ import ( "crypto/sha256" "encoding/base64" "encoding/hex" + "encoding/json" "errors" "fmt" "io" @@ -71,6 +72,85 @@ type geminiVideoParameters struct { PersonGeneration string `json:"personGeneration,omitempty"` } +type geminiOpenAIVideoRequest struct { + Prompt string `json:"prompt,omitempty"` + Model string `json:"model,omitempty"` + Width geminiFlexibleInt `json:"width,omitempty"` + Height geminiFlexibleInt `json:"height,omitempty"` + NVariants geminiFlexibleInt `json:"n_variants,omitempty"` + NSeconds geminiFlexibleInt `json:"n_seconds,omitempty"` + Seconds geminiFlexibleInt `json:"seconds,omitempty"` + Size string `json:"size,omitempty"` + NegativePrompt string `json:"negative_prompt,omitempty"` + PersonGeneration string `json:"person_generation,omitempty"` + InputReference geminiFlexibleString `json:"input_reference,omitempty"` + Image geminiFlexibleString `json:"image,omitempty"` + ImageURL geminiFlexibleString `json:"image_url,omitempty"` + VideoURL geminiFlexibleString `json:"video_url,omitempty"` + Video geminiFlexibleString `json:"video,omitempty"` +} + +type geminiFlexibleString string + +func (value *geminiFlexibleString) UnmarshalJSON(data []byte) error { + var text string + if err := sonic.Unmarshal(data, &text); err == nil { + *value = geminiFlexibleString(strings.TrimSpace(text)) + return nil + } + + var object struct { + URL string `json:"url,omitempty"` + } + if err := sonic.Unmarshal(data, &object); err == nil { + *value = geminiFlexibleString(strings.TrimSpace(object.URL)) + } + + return nil +} + +func (value geminiFlexibleString) String() string { + return strings.TrimSpace(string(value)) +} + +type geminiFlexibleInt struct { + Value int + Set bool +} + +func (value *geminiFlexibleInt) UnmarshalJSON(data []byte) error { + text := strings.TrimSpace(string(data)) + if text == "" || text == "null" { + return nil + } + + if strings.HasPrefix(text, `"`) { + var raw string + if err := sonic.Unmarshal(data, &raw); err != nil { + return nil + } + + text = strings.TrimSpace(raw) + } + + number := json.Number(text) + + parsed, err := number.Int64() + if err != nil { + floatValue, floatErr := number.Float64() + if floatErr != nil { + return nil + } + + parsed = int64(floatValue) + } + + value.Value = int(parsed) + value.Set = true + + return nil +} + type geminiVideoStoreMetadata struct { OperationName string `json:"operation_name,omitempty"` Prompt string `json:"prompt,omitempty"` @@ -554,12 +634,12 @@ func parseOpenAIVideoGenerationJobRequest(req *http.Request) (geminiVideoRequest return parseMultipartOpenAIVideoGenerationJobRequest(req) } - node, err := common.UnmarshalRequest2NodeReusable(req) - if err != nil { + var raw geminiOpenAIVideoRequest + if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return geminiVideoRequest{}, err } - return parseJSONOpenAIVideoGenerationJobRequest(&node), nil + return parseJSONOpenAIVideoGenerationJobRequest(raw), nil } func parseOpenAIVideosRequest(req *http.Request) (geminiVideoRequest, error) { @@ -567,12 +647,12 @@ func parseOpenAIVideosRequest(req *http.Request) (geminiVideoRequest, error) { return parseMultipartOpenAIVideosRequest(req) } - node, err := common.UnmarshalRequest2NodeReusable(req) - if err != nil { + var raw geminiOpenAIVideoRequest + if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return geminiVideoRequest{}, err } - return parseJSONOpenAIVideosRequest(&node), nil + return parseJSONOpenAIVideosRequest(raw), nil } func parseOpenAIVideosEditRequest(req *http.Request) (geminiVideoRequest, error) { @@ -601,14 +681,14 @@ func parseOpenAIVideosRequestWithVideoField(req *http.Request) (geminiVideoReque return request, nil } - node, err := common.UnmarshalRequest2NodeReusable(req) - if err != nil { + var raw geminiOpenAIVideoRequest + if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return geminiVideoRequest{}, err } - request := parseJSONOpenAIVideosRequest(&node) + request := parseJSONOpenAIVideosRequest(raw) if len(request.Instances) > 0 && request.Instances[0].Video == nil { - if media := mediaFromString(stringNode(&node, "video")); media != nil { + if media := mediaFromString(raw.Video.String()); media != nil { request.Instances[0].Video = media } } @@ -682,10 +762,10 @@ func hydrateGeminiOpenAIVideoReference( return nil } -func parseJSONOpenAIVideoGenerationJobRequest(node *ast.Node) geminiVideoRequest { - request := parseJSONOpenAIVideoCommonRequest(node, geminiVideoJobSizeFromJSON(node)) - request.Parameters.DurationSeconds = intNode(node, "n_seconds") - request.Parameters.NumberOfVideos = intNode(node, "n_variants") +func parseJSONOpenAIVideoGenerationJobRequest(raw geminiOpenAIVideoRequest) geminiVideoRequest { + request := parseJSONOpenAIVideoCommonRequest(raw, geminiVideoJobSizeFromJSON(raw)) + request.Parameters.DurationSeconds = raw.NSeconds.Value + request.Parameters.NumberOfVideos = raw.NVariants.Value if request.Parameters.NumberOfVideos <= 0 { request.Parameters.NumberOfVideos = 1 @@ -694,37 +774,40 @@ func parseJSONOpenAIVideoGenerationJobRequest(node *ast.Node) geminiVideoRequest return request } -func parseJSONOpenAIVideosRequest(node *ast.Node) geminiVideoRequest { - request := parseJSONOpenAIVideoCommonRequest(node, stringNode(node, "size")) - request.Parameters.DurationSeconds = intNode(node, "seconds") +func parseJSONOpenAIVideosRequest(raw geminiOpenAIVideoRequest) geminiVideoRequest { + request := parseJSONOpenAIVideoCommonRequest(raw, raw.Size) + request.Parameters.DurationSeconds = raw.Seconds.Value request.Parameters.NumberOfVideos = 1 return request } -func parseJSONOpenAIVideoCommonRequest(node *ast.Node, size string) geminiVideoRequest { +func parseJSONOpenAIVideoCommonRequest( + raw geminiOpenAIVideoRequest, + size string, +) geminiVideoRequest { request := geminiVideoRequest{ Parameters: geminiVideoParameters{ AspectRatio: geminiVideoAspectRatioFromSize(size), Resolution: geminiVideoResolutionFromSize(size), - NegativePrompt: stringNode(node, "negative_prompt"), - PersonGeneration: stringNode(node, "person_generation"), + NegativePrompt: strings.TrimSpace(raw.NegativePrompt), + PersonGeneration: strings.TrimSpace(raw.PersonGeneration), }, } instance := geminiVideoInstance{ - Prompt: stringNode(node, "prompt"), + Prompt: strings.TrimSpace(raw.Prompt), } if media := mediaFromString(firstNonEmpty( - stringNode(node, "input_reference"), - stringNode(node, "image"), - stringNode(node, "image_url"), + raw.InputReference.String(), + raw.Image.String(), + raw.ImageURL.String(), )); media != nil { instance.Image = media } - if media := mediaFromString(stringNode(node, "video_url")); media != nil { + if media := mediaFromString(raw.VideoURL.String()); media != nil { instance.Video = media } @@ -830,15 +913,12 @@ func geminiVideoSizeFromForm(req *http.Request) string { return req.PostFormValue("size") } -func geminiVideoJobSizeFromJSON(node *ast.Node) string { - width := intNode(node, "width") - - height := intNode(node, "height") - if width <= 0 || height <= 0 { +func geminiVideoJobSizeFromJSON(raw geminiOpenAIVideoRequest) string { + if !raw.Width.Set || !raw.Height.Set || raw.Width.Value <= 0 || raw.Height.Value <= 0 { return "" } - return fmt.Sprintf("%dx%d", width, height) + return fmt.Sprintf("%dx%d", raw.Width.Value, raw.Height.Value) } func geminiVideoJobSizeFromForm(req *http.Request) string { diff --git a/core/relay/adaptor/jina/rerank.go b/core/relay/adaptor/jina/rerank.go index 717b0250..88beea00 100644 --- a/core/relay/adaptor/jina/rerank.go +++ b/core/relay/adaptor/jina/rerank.go @@ -66,9 +66,9 @@ func RerankHandler( modelUsage := usage.ToModelUsage() - _, err = node.SetAny("meta", map[string]any{ - "tokens": modelUsage, - }) + _, err = node.Set("meta", ast.NewObject([]ast.Pair{ + ast.NewPair("tokens", ast.NewAny(modelUsage)), + })) if err != nil { return adaptor.DoResponseResult{Usage: modelUsage}, relaymodel.WrapperOpenAIError( err, diff --git a/core/relay/adaptor/minimax/tts.go b/core/relay/adaptor/minimax/tts.go index e048da76..d04e780f 100644 --- a/core/relay/adaptor/minimax/tts.go +++ b/core/relay/adaptor/minimax/tts.go @@ -6,8 +6,10 @@ import ( "encoding/hex" "net/http" "strconv" + "strings" "github.com/bytedance/sonic" + "github.com/bytedance/sonic/ast" "github.com/gin-gonic/gin" "github.com/labring/aiproxy/core/common" "github.com/labring/aiproxy/core/model" @@ -19,92 +21,240 @@ import ( ) func ConvertTTSRequest(meta *meta.Meta, req *http.Request) (adaptor.ConvertResult, error) { - reqMap, err := utils.UnmarshalMap(req) + node, err := common.UnmarshalRequest2NodeReusable(req) if err != nil { return adaptor.ConvertResult{}, err } - meta.Set("stream_format", reqMap["stream_format"]) + meta.Set("stream_format", stringFromTTSNode(node.Get("stream_format"))) - reqMap["model"] = meta.ActualModel + responseFormat, err := patchTTSRequestNode(&node, meta.ActualModel) + if err != nil { + return adaptor.ConvertResult{}, err + } + + meta.Set("audio_format", responseFormat) + + body, err := node.MarshalJSON() + if err != nil { + return adaptor.ConvertResult{}, err + } + + return adaptor.ConvertResult{ + Header: http.Header{ + "Content-Type": {"application/json"}, + "Content-Length": {strconv.Itoa(len(body))}, + }, + Body: bytes.NewReader(body), + }, nil +} - reqMap["text"] = reqMap["input"] - delete(reqMap, "input") +func patchTTSRequestNode(node *ast.Node, actualModel string) (string, error) { + if err := patchTTSModelAndText(node, actualModel); err != nil { + return "", err + } + + if err := patchTTSVoice(node); err != nil { + return "", err + } - voice, _ := reqMap["voice"].(string) - delete(reqMap, "voice") + responseFormat, err := patchTTSAudio(node) + if err != nil { + return "", err + } + + if err := patchTTSStreamOptions(node, responseFormat); err != nil { + return "", err + } + + if _, err := node.Set("language_boost", ast.NewString("auto")); err != nil { + return "", err + } + + return responseFormat, nil +} + +func patchTTSModelAndText(node *ast.Node, actualModel string) error { + if _, err := node.Set("model", ast.NewString(actualModel)); err != nil { + return err + } + + inputNode := node.Get("input") + if inputNode.Exists() { + if _, err := node.Set("text", *inputNode); err != nil { + return err + } + } else if _, err := node.Set("text", ast.NewNull()); err != nil { + return err + } + + if _, err := node.Unset("input"); err != nil { + return err + } + + return nil +} + +func patchTTSVoice(node *ast.Node) error { + voice := stringFromTTSNode(node.Get("voice")) + + if _, err := node.Unset("voice"); err != nil { + return err + } if voice == "" { voice = "male-qn-qingse" } - voiceSetting, ok := reqMap["voice_setting"].(map[string]any) - if !ok { - voiceSetting = map[string]any{} - reqMap["voice_setting"] = voiceSetting + voiceSetting, err := ttsObjectNode(node, "voice_setting") + if err != nil { + return err } - if timberWeights, ok := reqMap["timber_weights"].([]any); !ok || len(timberWeights) == 0 { - voiceSetting["voice_id"] = voice + timberWeightsNode := node.Get("timber_weights") + if !timberWeightsNode.Exists() || timberWeightsNode.TypeSafe() != ast.V_ARRAY || + ttsArrayLen(timberWeightsNode) == 0 { + if _, err := voiceSetting.Set("voice_id", ast.NewString(voice)); err != nil { + return err + } } - speed, ok := reqMap["speed"].(float64) - if ok { - voiceSetting["speed"] = int(speed) + if speed, ok := floatFromTTSNode(node.Get("speed")); ok { + if _, err := voiceSetting.Set( + "speed", + ast.NewNumber(strconv.Itoa(int(speed))), + ); err != nil { + return err + } } - delete(reqMap, "speed") + if _, err := node.Unset("speed"); err != nil { + return err + } + + return nil +} - audioSetting, ok := reqMap["audio_setting"].(map[string]any) - if !ok { - audioSetting = map[string]any{} - reqMap["audio_setting"] = audioSetting +func patchTTSAudio(node *ast.Node) (string, error) { + audioSetting, err := ttsObjectNode(node, "audio_setting") + if err != nil { + return "", err } - responseFormat, _ := reqMap["response_format"].(string) + responseFormat := stringFromTTSNode(node.Get("response_format")) if responseFormat == "" { - responseFormat, _ = reqMap["format"].(string) + responseFormat = stringFromTTSNode(node.Get("format")) } if responseFormat == "" { responseFormat = "mp3" } - audioSetting["format"] = responseFormat + if _, err := audioSetting.Set("format", ast.NewString(responseFormat)); err != nil { + return "", err + } - delete(reqMap, "response_format") - meta.Set("audio_format", responseFormat) + if _, err := node.Unset("response_format"); err != nil { + return "", err + } + + if sampleRate, ok := floatFromTTSNode(node.Get("sample_rate")); ok { + if _, err := audioSetting.Set( + "sample_rate", + ast.NewNumber(strconv.Itoa(int(sampleRate))), + ); err != nil { + return "", err + } + } - sampleRate, ok := reqMap["sample_rate"].(float64) - if ok { - audioSetting["sample_rate"] = int(sampleRate) + if _, err := node.Unset("sample_rate"); err != nil { + return "", err } - delete(reqMap, "sample_rate") + return responseFormat, nil +} +func patchTTSStreamOptions(node *ast.Node, responseFormat string) error { if responseFormat == "wav" { - reqMap["stream"] = false + if _, err := node.Set("stream", ast.NewBool(false)); err != nil { + return err + } } else { - reqMap["stream"] = true - reqMap["stream_options"] = map[string]any{ - "exclude_aggregated_audio": true, + if _, err := node.Set("stream", ast.NewBool(true)); err != nil { + return err + } + + if _, err := node.Set("stream_options", ast.NewObject([]ast.Pair{ + ast.NewPair("exclude_aggregated_audio", ast.NewBool(true)), + })); err != nil { + return err } } - reqMap["language_boost"] = "auto" + return nil +} - body, err := sonic.Marshal(reqMap) +func ttsObjectNode(node *ast.Node, key string) (*ast.Node, error) { + value := node.Get(key) + if value.Exists() && value.TypeSafe() == ast.V_OBJECT { + return value, nil + } + + if _, err := node.Set(key, ast.NewObject(nil)); err != nil { + return nil, err + } + + return node.Get(key), nil +} + +func stringFromTTSNode(node *ast.Node) string { + if node == nil || !node.Exists() || node.TypeSafe() != ast.V_STRING { + return "" + } + + value, err := node.String() if err != nil { - return adaptor.ConvertResult{}, err + return "" } - return adaptor.ConvertResult{ - Header: http.Header{ - "Content-Type": {"application/json"}, - "Content-Length": {strconv.Itoa(len(body))}, - }, - Body: bytes.NewReader(body), - }, nil + return strings.TrimSpace(value) +} + +func floatFromTTSNode(node *ast.Node) (float64, bool) { + if node == nil || !node.Exists() || node.TypeSafe() == ast.V_NULL { + return 0, false + } + + if node.TypeSafe() == ast.V_STRING { + value, err := node.String() + if err != nil { + return 0, false + } + + parsed, err := strconv.ParseFloat(strings.TrimSpace(value), 64) + if err != nil { + return 0, false + } + + return parsed, true + } + + value, err := node.Float64() + if err != nil { + return 0, false + } + + return value, true +} + +func ttsArrayLen(node *ast.Node) int { + count := 0 + _ = node.ForEach(func(_ ast.Sequence, _ *ast.Node) bool { + count++ + return true + }) + + return count } type TTSExtraInfo struct { diff --git a/core/relay/adaptor/openai/chat.go b/core/relay/adaptor/openai/chat.go index 6f5662a3..40db1c7e 100644 --- a/core/relay/adaptor/openai/chat.go +++ b/core/relay/adaptor/openai/chat.go @@ -366,9 +366,9 @@ func patchStreamOptions(node *ast.Node) error { streamOptionsNode := node.Get("stream_options") if !streamOptionsNode.Exists() { - _, err = node.SetAny("stream_options", map[string]any{ - "include_usage": true, - }) + _, err = node.Set("stream_options", ast.NewObject([]ast.Pair{ + ast.NewPair("include_usage", ast.NewBool(true)), + })) return err } diff --git a/core/relay/adaptor/openai/embeddings.go b/core/relay/adaptor/openai/embeddings.go index c2e0ffd7..77657eab 100644 --- a/core/relay/adaptor/openai/embeddings.go +++ b/core/relay/adaptor/openai/embeddings.go @@ -50,7 +50,7 @@ func ConvertEmbeddingsRequest( return adaptor.ConvertResult{}, err } - _, err = node.SetAny("input", []string{inputString}) + _, err = node.Set("input", ast.NewArray([]ast.Node{ast.NewString(inputString)})) if err != nil { return adaptor.ConvertResult{}, err } diff --git a/core/relay/adaptor/siliconflow/video.go b/core/relay/adaptor/siliconflow/video.go index ee938772..bb4da327 100644 --- a/core/relay/adaptor/siliconflow/video.go +++ b/core/relay/adaptor/siliconflow/video.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/base64" + "encoding/json" "fmt" "io" "mime" @@ -39,6 +40,79 @@ type videoSubmitRequest struct { Seed any `json:"seed,omitempty"` } +type openAIVideoRequest struct { + Prompt string `json:"prompt,omitempty"` + Model string `json:"model,omitempty"` + Width flexibleInt `json:"width,omitempty"` + Height flexibleInt `json:"height,omitempty"` + Size string `json:"size,omitempty"` + InputReference flexibleString `json:"input_reference,omitempty"` + Image flexibleString `json:"image,omitempty"` + NegativePrompt string `json:"negative_prompt,omitempty"` + Seed any `json:"seed,omitempty"` +} + +type flexibleString string + +func (value *flexibleString) UnmarshalJSON(data []byte) error { + var text string + if err := sonic.Unmarshal(data, &text); err == nil { + *value = flexibleString(strings.TrimSpace(text)) + return nil + } + + var object struct { + URL string `json:"url,omitempty"` + } + if err := sonic.Unmarshal(data, &object); err == nil { + *value = flexibleString(strings.TrimSpace(object.URL)) + } + + return nil +} + +func (value flexibleString) String() string { + return strings.TrimSpace(string(value)) +} + +type flexibleInt struct { + Value int + Set bool +} + +func (value *flexibleInt) UnmarshalJSON(data []byte) error { + text := strings.TrimSpace(string(data)) + if text == "" || text == "null" { + return nil + } + + if strings.HasPrefix(text, `"`) { + var raw string + if err := sonic.Unmarshal(data, &raw); err != nil { + return nil + } + + text = strings.TrimSpace(raw) + } + + number := json.Number(text) + + parsed, err := number.Int64() + if err != nil { + floatValue, floatErr := number.Float64() + if floatErr != nil { + return nil + } + + parsed = int64(floatValue) + } + + value.Value = int(parsed) + value.Set = true + + return nil +} + type videoSubmitResponse struct { RequestID string `json:"requestId"` } @@ -93,12 +167,12 @@ func convertSiliconFlowVideoGenerationJobRequest( request = parsed } else { - var reqMap map[string]any - if err := common.UnmarshalRequestReusable(req, &reqMap); err != nil { + var raw openAIVideoRequest + if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return adaptor.ConvertResult{}, err } - request = jsonVideoGenerationJobSubmitRequest(reqMap) + request = jsonVideoGenerationJobSubmitRequest(raw) } return convertSiliconFlowVideoRequest(meta, request) @@ -118,12 +192,12 @@ func convertSiliconFlowVideosRequest( request = parsed } else { - var reqMap map[string]any - if err := common.UnmarshalRequestReusable(req, &reqMap); err != nil { + var raw openAIVideoRequest + if err := common.UnmarshalRequestReusable(req, &raw); err != nil { return adaptor.ConvertResult{}, err } - request = jsonVideosSubmitRequest(reqMap) + request = jsonVideosSubmitRequest(raw) } return convertSiliconFlowVideoRequest(meta, request) @@ -198,26 +272,26 @@ func ConvertVideosStatusRequest(meta *meta.Meta, _ *http.Request) (adaptor.Conve }, nil } -func jsonVideoGenerationJobSubmitRequest(reqMap map[string]any) videoSubmitRequest { - request := jsonVideoCommonSubmitRequest(reqMap) - request.ImageSize = videoGenerationJobImageSize(reqMap) +func jsonVideoGenerationJobSubmitRequest(raw openAIVideoRequest) videoSubmitRequest { + request := jsonVideoCommonSubmitRequest(raw) + request.ImageSize = videoGenerationJobImageSize(raw) return request } -func jsonVideosSubmitRequest(reqMap map[string]any) videoSubmitRequest { - request := jsonVideoCommonSubmitRequest(reqMap) - request.ImageSize = normalizeSiliconFlowSize(stringFromMap(reqMap, "size")) +func jsonVideosSubmitRequest(raw openAIVideoRequest) videoSubmitRequest { + request := jsonVideoCommonSubmitRequest(raw) + request.ImageSize = normalizeSiliconFlowSize(raw.Size) return request } -func jsonVideoCommonSubmitRequest(reqMap map[string]any) videoSubmitRequest { +func jsonVideoCommonSubmitRequest(raw openAIVideoRequest) videoSubmitRequest { return videoSubmitRequest{ - Prompt: stringFromMap(reqMap, "prompt"), - Image: videoImage(reqMap), - NegativePrompt: stringFromMap(reqMap, "negative_prompt"), - Seed: reqMap["seed"], + Prompt: strings.TrimSpace(raw.Prompt), + Image: videoImage(raw), + NegativePrompt: strings.TrimSpace(raw.NegativePrompt), + Seed: raw.Seed, } } @@ -284,63 +358,26 @@ func multipartVideoCommonSubmitRequest( return request, nil } -func videoGenerationJobImageSize(reqMap map[string]any) string { - width, widthOK := intFromAny(reqMap["width"]) - - height, heightOK := intFromAny(reqMap["height"]) - if widthOK && heightOK && width > 0 && height > 0 { - return fmt.Sprintf("%dx%d", width, height) +func videoGenerationJobImageSize(raw openAIVideoRequest) string { + if raw.Width.Set && raw.Height.Set && raw.Width.Value > 0 && raw.Height.Value > 0 { + return fmt.Sprintf("%dx%d", raw.Width.Value, raw.Height.Value) } return "" } -func videoImage(reqMap map[string]any) string { - if inputReference := stringFromMap(reqMap, "input_reference"); inputReference != "" { +func videoImage(raw openAIVideoRequest) string { + if inputReference := raw.InputReference.String(); inputReference != "" { return inputReference } - if image := stringFromMap(reqMap, "image"); image != "" { + if image := raw.Image.String(); image != "" { return image } return "" } -func stringFromMap(reqMap map[string]any, key string) string { - value, ok := reqMap[key] - if !ok { - return "" - } - - str, ok := value.(string) - if !ok { - return "" - } - - return strings.TrimSpace(str) -} - -func intFromAny(value any) (int, bool) { - switch v := value.(type) { - case int: - return v, true - case int64: - return int(v), true - case float64: - return int(v), true - case string: - parsed, err := strconv.Atoi(strings.TrimSpace(v)) - if err != nil { - return 0, false - } - - return parsed, true - default: - return 0, false - } -} - func multipartVideoImageDataURL( meta *meta.Meta, files map[string][]*multipart.FileHeader, diff --git a/core/relay/controller/video_ali.go b/core/relay/controller/video_ali.go new file mode 100644 index 00000000..906f84f3 --- /dev/null +++ b/core/relay/controller/video_ali.go @@ -0,0 +1,105 @@ +package controller + +import ( + "strings" + + "github.com/gin-gonic/gin" + "github.com/labring/aiproxy/core/common" + "github.com/labring/aiproxy/core/model" +) + +type aliVideoRequestUsageParams struct { + seconds int + resolution string +} + +func ValidateAliVideoRequest(c *gin.Context, mc model.ModelConfig) error { + params, err := getAliVideoRequestUsageParams(c) + if err != nil { + return err + } + + return validateAliVideoRequestUsageParams(params, mc) +} + +func GetAliVideoRequestPrice(c *gin.Context, mc model.ModelConfig) (model.Price, error) { + params, err := getAliVideoRequestUsageParams(c) + if err != nil { + return model.Price{}, err + } + + if err := validateAliVideoRequestUsageParams(params, mc); err != nil { + return model.Price{}, err + } + + return getVideoRequestPrice(mc.Price), nil +} + +func GetAliVideoRequestUsage(c *gin.Context, mc model.ModelConfig) (RequestUsage, error) { + params, err := getAliVideoRequestUsageParams(c) + if err != nil { + return RequestUsage{}, err + } + + if err := validateAliVideoRequestUsageParams(params, mc); err != nil { + return RequestUsage{}, err + } + + return aliVideoRequestUsage(params), nil +} + +func getAliVideoRequestUsageParams(c *gin.Context) (aliVideoRequestUsageParams, error) { + node, err := common.UnmarshalRequest2NodeReusable(c.Request) + if err != nil { + return aliVideoRequestUsageParams{}, NewBadRequestParamError(err.Error()) + } + + parameters := node.Get("parameters") + + seconds, _, err := intValueFromNode(parameters, "duration") + if err != nil { + return aliVideoRequestUsageParams{}, err + } + + return aliVideoRequestUsageParams{ + seconds: seconds, + resolution: firstNonEmptyStringValueFromNode(parameters, "size", "resolution"), + }, nil +} + +func validateAliVideoRequestUsageParams( + params aliVideoRequestUsageParams, + mc model.ModelConfig, +) error { + if err := validateVideoGenerationSeconds( + params.seconds, + mc.MaxVideoGenerationSeconds, + ); err != nil { + return err + } + + return validateSupportedVideoResolution( + params.resolution, + mc, + aliVideoSupportedResolutionOptions(mc.AllowedResolutions), + ) +} + +func aliVideoRequestUsage(params aliVideoRequestUsageParams) RequestUsage { + return RequestUsage{ + Usage: model.Usage{}, + Context: model.UsageContext{ + Resolution: params.resolution, + NativeResolution: params.resolution, + }, + } +} + +func aliVideoSupportedResolutionOptions(supported []string) string { + options := normalizeSupportedResolutionValues(supported) + if len(options) == 0 { + return noResolutionOptions + } + + return strings.Join(options, ", ") +} diff --git a/core/relay/controller/video_ali_test.go b/core/relay/controller/video_ali_test.go new file mode 100644 index 00000000..2a53286f --- /dev/null +++ b/core/relay/controller/video_ali_test.go @@ -0,0 +1,104 @@ +//nolint:testpackage +package controller + +import ( + "bytes" + "net/http" + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" + "github.com/labring/aiproxy/core/model" + "github.com/stretchr/testify/require" +) + +func TestValidateAliVideoRequestRejectsTooLongDuration(t *testing.T) { + t.Parallel() + + ctx := newAliVideoJSONTestContext(t, `{ + "model":"wan2.5-t2v-preview", + "input":{"prompt":"A city street"}, + "parameters":{"duration":6,"size":"720P"} + }`) + + err := ValidateAliVideoRequest(ctx, model.ModelConfig{ + MaxVideoGenerationSeconds: 5, + }) + require.Error(t, err) + require.Equal(t, "seconds must be less than or equal to 5", err.Error()) + + var requestParamErr *RequestParamError + require.ErrorAs(t, err, &requestParamErr) + require.Equal(t, 400, requestParamErr.StatusCode) +} + +func TestValidateAliVideoRequestRejectsUnsupportedResolution(t *testing.T) { + t.Parallel() + + ctx := newAliVideoJSONTestContext(t, `{ + "model":"wan2.5-t2v-preview", + "input":{"prompt":"A city street"}, + "parameters":{"duration":5,"size":"1080P"} + }`) + + err := ValidateAliVideoRequest(ctx, model.ModelConfig{ + AllowedResolutions: []string{"720p"}, + }) + require.Error(t, err) + require.Equal( + t, + "unsupported video resolution `1080P`, supported resolutions: 720p", + err.Error(), + ) +} + +func TestValidateAliVideoRequestRejectsNegativeDuration(t *testing.T) { + t.Parallel() + + ctx := newAliVideoJSONTestContext(t, `{ + "model":"wan2.5-t2v-preview", + "input":{"prompt":"A city street"}, + "parameters":{"duration":-1,"size":"720P"} + }`) + + err := ValidateAliVideoRequest(ctx, model.ModelConfig{}) + require.Error(t, err) + require.Equal(t, "invalid duration: must be non-negative", err.Error()) +} + +func TestGetAliVideoRequestUsageUsesAliResolution(t *testing.T) { + t.Parallel() + + ctx := newAliVideoJSONTestContext(t, `{ + "model":"wan2.5-t2v-preview", + "input":{"prompt":"A city street"}, + "parameters":{"duration":5,"size":"720P"} + }`) + + usage, err := GetAliVideoRequestUsage(ctx, model.ModelConfig{ + AllowedResolutions: []string{"720p"}, + }) + require.NoError(t, err) + require.Zero(t, usage.Usage.OutputTokens) + require.Equal(t, "720P", usage.Context.Resolution) + require.Equal(t, "720P", usage.Context.NativeResolution) +} + +func newAliVideoJSONTestContext(t *testing.T, body string) *gin.Context { + t.Helper() + + gin.SetMode(gin.TestMode) + + req := httptest.NewRequestWithContext( + t.Context(), + http.MethodPost, + "/", + bytes.NewBufferString(body), + ) + req.Header.Set("Content-Type", "application/json") + + ctx, _ := gin.CreateTestContext(httptest.NewRecorder()) + ctx.Request = req + + return ctx +} diff --git a/core/relay/controller/video_doubao.go b/core/relay/controller/video_doubao.go new file mode 100644 index 00000000..9ad2b98b --- /dev/null +++ b/core/relay/controller/video_doubao.go @@ -0,0 +1,103 @@ +package controller + +import ( + "strings" + + "github.com/gin-gonic/gin" + "github.com/labring/aiproxy/core/common" + "github.com/labring/aiproxy/core/model" +) + +type doubaoVideoRequestUsageParams struct { + seconds int + resolution string +} + +func ValidateDoubaoVideoRequest(c *gin.Context, mc model.ModelConfig) error { + params, err := getDoubaoVideoRequestUsageParams(c) + if err != nil { + return err + } + + return validateDoubaoVideoRequestUsageParams(params, mc) +} + +func GetDoubaoVideoRequestPrice(c *gin.Context, mc model.ModelConfig) (model.Price, error) { + params, err := getDoubaoVideoRequestUsageParams(c) + if err != nil { + return model.Price{}, err + } + + if err := validateDoubaoVideoRequestUsageParams(params, mc); err != nil { + return model.Price{}, err + } + + return getVideoRequestPrice(mc.Price), nil +} + +func GetDoubaoVideoRequestUsage(c *gin.Context, mc model.ModelConfig) (RequestUsage, error) { + params, err := getDoubaoVideoRequestUsageParams(c) + if err != nil { + return RequestUsage{}, err + } + + if err := validateDoubaoVideoRequestUsageParams(params, mc); err != nil { + return RequestUsage{}, err + } + + return doubaoVideoRequestUsage(params), nil +} + +func getDoubaoVideoRequestUsageParams(c *gin.Context) (doubaoVideoRequestUsageParams, error) { + node, err := common.UnmarshalRequest2NodeReusable(c.Request) + if err != nil { + return doubaoVideoRequestUsageParams{}, NewBadRequestParamError(err.Error()) + } + + seconds, _, err := intValueFromNode(&node, "duration") + if err != nil { + return doubaoVideoRequestUsageParams{}, err + } + + return doubaoVideoRequestUsageParams{ + seconds: seconds, + resolution: stringValueFromNode(&node, "resolution"), + }, nil +} + +func validateDoubaoVideoRequestUsageParams( + params doubaoVideoRequestUsageParams, + mc model.ModelConfig, +) error { + if err := validateVideoGenerationSeconds( + params.seconds, + mc.MaxVideoGenerationSeconds, + ); err != nil { + return err + } + + return validateSupportedVideoResolution( + params.resolution, + mc, + doubaoVideoSupportedResolutionOptions(mc.AllowedResolutions), + ) +} + +func doubaoVideoRequestUsage(params doubaoVideoRequestUsageParams) RequestUsage { + return RequestUsage{ + Usage: model.Usage{}, + Context: model.UsageContext{ + Resolution: params.resolution, + NativeResolution: params.resolution, + }, + } +} + +func doubaoVideoSupportedResolutionOptions(supported []string) string { + options := normalizeSupportedResolutionValues(supported) + if len(options) == 0 { + return noResolutionOptions + } + + return strings.Join(options, ", ") +} diff --git a/core/relay/controller/video_doubao_test.go b/core/relay/controller/video_doubao_test.go new file mode 100644 index 00000000..8b8ecc33 --- /dev/null +++ b/core/relay/controller/video_doubao_test.go @@ -0,0 +1,104 @@ +//nolint:testpackage +package controller + +import ( + "bytes" + "net/http" + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" + "github.com/labring/aiproxy/core/model" + "github.com/stretchr/testify/require" +) + +func TestValidateDoubaoVideoRequestRejectsTooLongDuration(t *testing.T) { + t.Parallel() + + ctx := newDoubaoVideoJSONTestContext(t, `{ + "model":"doubao-seedance-2-0-260128", + "content":[{"type":"text","text":"A city street"}], + "duration":6, + "resolution":"720p" + }`) + + err := ValidateDoubaoVideoRequest(ctx, model.ModelConfig{ + MaxVideoGenerationSeconds: 5, + }) + require.Error(t, err) + require.Equal(t, "seconds must be less than or equal to 5", err.Error()) +} + +func TestValidateDoubaoVideoRequestRejectsUnsupportedResolution(t *testing.T) { + t.Parallel() + + ctx := newDoubaoVideoJSONTestContext(t, `{ + "model":"doubao-seedance-2-0-260128", + "content":[{"type":"text","text":"A city street"}], + "duration":5, + "resolution":"1080p" + }`) + + err := ValidateDoubaoVideoRequest(ctx, model.ModelConfig{ + AllowedResolutions: []string{"720p"}, + }) + require.Error(t, err) + require.Equal( + t, + "unsupported video resolution `1080p`, supported resolutions: 720p", + err.Error(), + ) +} + +func TestValidateDoubaoVideoRequestRejectsNegativeDuration(t *testing.T) { + t.Parallel() + + ctx := newDoubaoVideoJSONTestContext(t, `{ + "model":"doubao-seedance-2-0-260128", + "content":[{"type":"text","text":"A city street"}], + "duration":-1, + "resolution":"720p" + }`) + + err := ValidateDoubaoVideoRequest(ctx, model.ModelConfig{}) + require.Error(t, err) + require.Equal(t, "invalid duration: must be non-negative", err.Error()) +} + +func TestGetDoubaoVideoRequestUsageUsesDoubaoResolution(t *testing.T) { + t.Parallel() + + ctx := newDoubaoVideoJSONTestContext(t, `{ + "model":"doubao-seedance-2-0-260128", + "content":[{"type":"text","text":"A city street"}], + "duration":5, + "resolution":"720p" + }`) + + usage, err := GetDoubaoVideoRequestUsage(ctx, model.ModelConfig{ + AllowedResolutions: []string{"720p"}, + }) + require.NoError(t, err) + require.Zero(t, usage.Usage.OutputTokens) + require.Equal(t, "720p", usage.Context.Resolution) + require.Equal(t, "720p", usage.Context.NativeResolution) +} + +func newDoubaoVideoJSONTestContext(t *testing.T, body string) *gin.Context { + t.Helper() + + gin.SetMode(gin.TestMode) + + req := httptest.NewRequestWithContext( + t.Context(), + http.MethodPost, + "/", + bytes.NewBufferString(body), + ) + req.Header.Set("Content-Type", "application/json") + + ctx, _ := gin.CreateTestContext(httptest.NewRecorder()) + ctx.Request = req + + return ctx +} diff --git a/core/relay/utils/testreq.go b/core/relay/utils/testreq.go index 714a3268..943712d3 100644 --- a/core/relay/utils/testreq.go +++ b/core/relay/utils/testreq.go @@ -103,6 +103,20 @@ func BuildRequest(modelConfig model.ModelConfig) (io.Reader, mode.Mode, error) { } return body, mode.GeminiVideo, nil + case mode.AliVideo: + body, err := BuildAliVideoRequest(modelConfig.Model) + if err != nil { + return nil, mode.Unknown, err + } + + return body, mode.AliVideo, nil + case mode.DoubaoVideo: + body, err := BuildDoubaoVideoRequest(modelConfig.Model) + if err != nil { + return nil, mode.Unknown, err + } + + return body, mode.DoubaoVideo, nil default: return nil, mode.Unknown, NewErrUnsupportedModelType(modelConfig.Type.String()) } @@ -257,3 +271,45 @@ func BuildGeminiVideoRequest(_ string) (io.Reader, error) { return bytes.NewReader(jsonBytes), nil } + +func BuildAliVideoRequest(model string) (io.Reader, error) { + testRequest := map[string]any{ + "model": model, + "input": map[string]any{ + "prompt": "A calm cinematic shot of clouds moving over a mountain.", + }, + "parameters": map[string]any{ + "duration": 5, + "size": "720P", + }, + } + + jsonBytes, err := sonic.Marshal(testRequest) + if err != nil { + return nil, err + } + + return bytes.NewReader(jsonBytes), nil +} + +func BuildDoubaoVideoRequest(model string) (io.Reader, error) { + testRequest := map[string]any{ + "model": model, + "content": []map[string]any{ + { + "type": "text", + "text": "A calm cinematic shot of clouds moving over a mountain.", + }, + }, + "duration": 5, + "resolution": "720p", + "ratio": "16:9", + } + + jsonBytes, err := sonic.Marshal(testRequest) + if err != nil { + return nil, err + } + + return bytes.NewReader(jsonBytes), nil +} diff --git a/core/relay/utils/testreq_test.go b/core/relay/utils/testreq_test.go index 15458a08..6a3d3116 100644 --- a/core/relay/utils/testreq_test.go +++ b/core/relay/utils/testreq_test.go @@ -31,6 +31,55 @@ func TestBuildRequestGeminiVideo(t *testing.T) { ) } +func TestBuildRequestAliVideo(t *testing.T) { + body, relayMode, err := utils.BuildRequest(model.ModelConfig{ + Model: "wan2.5-t2v-preview", + Type: mode.AliVideo, + }) + require.NoError(t, err) + require.Equal(t, mode.AliVideo, relayMode) + + data, err := io.ReadAll(body) + require.NoError(t, err) + require.JSONEq( + t, + `{ + "model":"wan2.5-t2v-preview", + "input":{"prompt":"A calm cinematic shot of clouds moving over a mountain."}, + "parameters":{"duration":5,"size":"720P"} + }`, + string(data), + ) +} + +func TestBuildRequestDoubaoVideo(t *testing.T) { + body, relayMode, err := utils.BuildRequest(model.ModelConfig{ + Model: "doubao-seedance-2-0-260128", + Type: mode.DoubaoVideo, + }) + require.NoError(t, err) + require.Equal(t, mode.DoubaoVideo, relayMode) + + data, err := io.ReadAll(body) + require.NoError(t, err) + require.JSONEq( + t, + `{ + "model":"doubao-seedance-2-0-260128", + "content":[ + { + "type":"text", + "text":"A calm cinematic shot of clouds moving over a mountain." + } + ], + "duration":5, + "resolution":"720p", + "ratio":"16:9" + }`, + string(data), + ) +} + func TestBuildRequestVideoGenerationJob(t *testing.T) { body, relayMode, err := utils.BuildRequest(model.ModelConfig{ Model: "happyhorse-1.0-t2v", diff --git a/web/public/locales/en/translation.json b/web/public/locales/en/translation.json index 21cfa9ea..afe6494e 100644 --- a/web/public/locales/en/translation.json +++ b/web/public/locales/en/translation.json @@ -117,18 +117,18 @@ "totalTokens": "Total Tokens", "totalTokensTooltip": "Total tokens consumed in the time range", "totalInputTokens": "Total Input Tokens", - "totalInputTokensTooltip": "Total input-side tokens in the time range, including text, image, audio, cached, and cache creation tokens.", - "textInputTokens": "Text Input Tokens", + "totalInputTokensTooltip": "Total input-side tokens in the time range, including general, image, audio, cached, and cache creation tokens.", + "textInputTokens": "General Input Tokens", "totalInputAmount": "Total Input Cost", - "totalInputAmountTooltip": "Total input-side cost in the time range, including text, image, audio, cached, and cache creation costs.", - "textInputAmount": "Text Input Cost", + "totalInputAmountTooltip": "Total input-side cost in the time range, including general, image, audio, cached, and cache creation costs.", + "textInputAmount": "General Input Cost", "totalOutputTokens": "Total Output Tokens", - "totalOutputTokensTooltip": "Total output-side tokens in the time range, including text and image output tokens.", - "textOutputTokens": "Text Output Tokens", + "totalOutputTokensTooltip": "Total output-side tokens in the time range, including general and image output tokens.", + "textOutputTokens": "General Output Tokens", "totalOutputAmount": "Total Output Cost", - "totalOutputAmountTooltip": "Total output-side cost in the time range, including text and image output costs.", - "textOutputAmount": "Text Output Cost", - "outputTokens": "Output Tokens", + "totalOutputAmountTooltip": "Total output-side cost in the time range, including general and image output costs.", + "textOutputAmount": "General Output Cost", + "outputTokens": "General Output Tokens", "outputTokensTooltip": "Total output tokens in the time range", "cacheHitCount": "Cache Hits", "cacheHitCountTooltip": "Number of requests that hit cache in the time range", @@ -151,11 +151,13 @@ "tokensBreakdown": { "totalInput": "Total Input", "totalOutput": "Total Output", - "textInput": "Text Input", + "generalInput": "General Input", + "textInput": "General Input", "imageInput": "Image Input", "audioInput": "Audio Input", "videoInput": "Video Input", - "textOutput": "Text Output", + "generalOutput": "General Output", + "textOutput": "General Output", "imageOutput": "Image Output", "audioOutput": "Audio Output", "reasoning": "Reasoning", @@ -181,13 +183,15 @@ "costBreakdownTypes": { "totalInput": "Total Input", "totalOutput": "Total Output", - "textInput": "Text Input", + "generalInput": "General Input", + "textInput": "General Input", "cached": "Cached", "cacheCreation": "Cache Creation", "imageInput": "Image Input", "audioInput": "Audio Input", "videoInput": "Video Input", - "textOutput": "Text Output", + "generalOutput": "General Output", + "textOutput": "General Output", "imageOutput": "Image Output", "audioOutput": "Audio Output", "webSearch": "Web Search" @@ -210,8 +214,8 @@ "tokensChart": { "cacheCreationTokens": "Cache Creation Tokens", "cachedTokens": "Cached Tokens", - "inputTokens": "Input Tokens", - "outputTokens": "Output Tokens", + "inputTokens": "General Input Tokens", + "outputTokens": "General Output Tokens", "totalTokens": "Total Tokens", "webSearchCount": "Web Search Count" }, @@ -227,8 +231,8 @@ "cost": "Cost", "avgResponseTime": "Avg Response Time", "avgTtfb": "Avg TTFB", - "inputTokens": "Input Tokens", - "outputTokens": "Output Tokens", + "inputTokens": "General Input Tokens", + "outputTokens": "General Output Tokens", "cachedTokens": "Cached Tokens" } }, @@ -705,8 +709,8 @@ "title": "Log List", "keyName": "Key Name", "model": "Model", - "inputTokens": "Input Tokens", - "outputTokens": "Output Tokens", + "inputTokens": "General Input Tokens", + "outputTokens": "General Output Tokens", "duration": "Duration(s)", "state": "State", "time": "Time", @@ -741,6 +745,8 @@ "resolution": "Resolution", "nativeResolution": "Native Resolution", "quality": "Quality", + "inputVideo": "Input Has Video", + "outputAudio": "Output Has Audio", "metadata": "Metadata", "cacheCreation": "Cache Creation", "cached": "Cached", @@ -757,8 +763,8 @@ "retry": "Retry", "retryTimes": "Retry Times", "ttfb": "TTFB", - "inputPrice": "Input Price", - "outputPrice": "Output Price", + "inputPrice": "General Input Price", + "outputPrice": "General Output Price", "cacheCreationPrice": "Cache Creation Price", "cachedPrice": "Cached Price", "imageInputPrice": "Image Input Price", @@ -770,13 +776,13 @@ "thinkingPrice": "Thinking Price", "webSearchPrice": "Web Search Price", "costBreakdown": { - "input": "Text Input Cost", + "input": "General Input Cost", "cached": "Cached Cost", "cacheCreation": "Cache Creation Cost", "imageInput": "Image Input Cost", "audioInput": "Audio Input Cost", "videoInput": "Video Input Cost", - "output": "Text Output Cost", + "output": "General Output Cost", "imageOutput": "Image Output Cost", "audioOutput": "Audio Output Cost", "webSearch": "Web Search Cost" @@ -1043,12 +1049,31 @@ "11": "PDF Parsing", "12": "Anthropic", "13": "Video Generation", + "14": "Video Generation Get Jobs", + "15": "Video Generation Content", "16": "Responses", + "17": "Responses Get", + "18": "Responses Delete", + "19": "Responses Cancel", + "20": "Responses Input Items", "21": "Gemini", + "22": "OpenAI Videos", + "23": "OpenAI Videos Get", + "24": "OpenAI Videos Content", + "25": "OpenAI Videos Delete", + "26": "OpenAI Videos Remix", "27": "Gemini Video", "28": "Gemini Video Operations", "29": "Gemini TTS", - "30": "Gemini Image" + "30": "Gemini Image", + "31": "Gemini Files", + "32": "OpenAI Videos Edit", + "33": "OpenAI Videos Extend", + "34": "Ali Video", + "35": "Ali Video Tasks", + "36": "Doubao Video", + "37": "Doubao Video Tasks", + "38": "Doubao Video Tasks Delete" }, "group": { "management": "Group Management", @@ -1127,8 +1152,8 @@ "type": "Type", "rpm": "RPM", "tpm": "TPM", - "inputPrice": "Input Price", - "outputPrice": "Output Price", + "inputPrice": "General Input Price", + "outputPrice": "General Output Price", "plugins": "Plugins" }, "tokenDialog": { @@ -1142,9 +1167,9 @@ "price": { "title": "Price Configuration", "description": "Configure pricing for this model", - "inputPrice": "Input Price", + "inputPrice": "General Input Price", "inputPriceUnit": "Unit (tokens)", - "outputPrice": "Output Price", + "outputPrice": "General Output Price", "outputPriceUnit": "Unit (tokens)", "perRequestPrice": "Per Request Price", "cachedPrice": "Cached Price", @@ -1181,6 +1206,9 @@ "multiValueHint": "Enter one value per line. Commas are also supported.", "serviceTier": "Service Tier", "serviceTierAny": "Any", + "inputVideo": "Input Has Video", + "outputAudio": "Output Has Audio", + "booleanAny": "Any", "startTime": "Start Time", "endTime": "End Time", "noPrice": "No price configured", @@ -1252,8 +1280,8 @@ "model": "Model", "group": "Group", "requestCount": "Requests", - "inputTokens": "Input Tokens", - "outputTokens": "Output Tokens", + "inputTokens": "General Input Tokens", + "outputTokens": "General Output Tokens", "totalTokens": "Total Tokens", "usedAmount": "Used Amount", "actions": "Actions", diff --git a/web/public/locales/zh/translation.json b/web/public/locales/zh/translation.json index f7e38dcd..1a29d5a8 100644 --- a/web/public/locales/zh/translation.json +++ b/web/public/locales/zh/translation.json @@ -117,18 +117,18 @@ "totalTokens": "总 Token 数", "totalTokensTooltip": "统计时间范围内消耗的总 Token 数", "totalInputTokens": "总 Input Token", - "totalInputTokensTooltip": "统计时间范围内的总输入 Token 数,包含文本、图片、音频、缓存命中和缓存创建等输入侧 Token。", - "textInputTokens": "文本 Input Token", + "totalInputTokensTooltip": "统计时间范围内的总输入 Token 数,包含一般、图片、音频、缓存命中和缓存创建等输入侧 Token。", + "textInputTokens": "一般 Input Token", "totalInputAmount": "总 Input 消耗", - "totalInputAmountTooltip": "统计时间范围内的总输入侧消耗,包含文本、图片、音频、缓存命中和缓存创建等输入侧成本。", - "textInputAmount": "文本 Input 消耗", + "totalInputAmountTooltip": "统计时间范围内的总输入侧消耗,包含一般、图片、音频、缓存命中和缓存创建等输入侧成本。", + "textInputAmount": "一般 Input 消耗", "totalOutputTokens": "总 Output Token", - "totalOutputTokensTooltip": "统计时间范围内的总输出 Token 数,包含文本和图片等输出侧 Token。", - "textOutputTokens": "文本 Output Token", + "totalOutputTokensTooltip": "统计时间范围内的总输出 Token 数,包含一般和图片等输出侧 Token。", + "textOutputTokens": "一般 Output Token", "totalOutputAmount": "总 Output 消耗", - "totalOutputAmountTooltip": "统计时间范围内的总输出侧消耗,包含文本和图片等输出侧成本。", - "textOutputAmount": "文本 Output 消耗", - "outputTokens": "输出 Token 数", + "totalOutputAmountTooltip": "统计时间范围内的总输出侧消耗,包含一般和图片等输出侧成本。", + "textOutputAmount": "一般 Output 消耗", + "outputTokens": "一般输出 Token 数", "outputTokensTooltip": "统计时间范围内的输出 Token 总数", "cacheHitCount": "缓存命中", "cacheHitCountTooltip": "统计时间范围内命中缓存的请求数", @@ -161,13 +161,15 @@ "costBreakdownTypes": { "totalInput": "总输入", "totalOutput": "总输出", - "textInput": "文本输入", + "generalInput": "一般输入", + "textInput": "一般输入", "cached": "缓存命中", "cacheCreation": "缓存创建", "imageInput": "图片输入", "audioInput": "音频输入", "videoInput": "视频输入", - "textOutput": "文本输出", + "generalOutput": "一般输出", + "textOutput": "一般输出", "imageOutput": "图片输出", "audioOutput": "音频输出", "webSearch": "联网搜索" @@ -196,11 +198,13 @@ "tokensBreakdown": { "totalInput": "总输入", "totalOutput": "总输出", - "textInput": "文本输入", + "generalInput": "一般输入", + "textInput": "一般输入", "imageInput": "图片输入", "audioInput": "音频输入", "videoInput": "视频输入", - "textOutput": "文本输出", + "generalOutput": "一般输出", + "textOutput": "一般输出", "imageOutput": "图片输出", "audioOutput": "音频输出", "reasoning": "推理", @@ -216,8 +220,8 @@ "cost": "消费", "avgResponseTime": "平均耗时时间", "avgTtfb": "平均首字节时间", - "inputTokens": "输入 Tokens", - "outputTokens": "输出 Tokens", + "inputTokens": "一般输入 Tokens", + "outputTokens": "一般输出 Tokens", "cachedTokens": "缓存 Tokens" } }, @@ -693,8 +697,8 @@ "title": "日志列表", "keyName": "Key 名称", "model": "模型", - "inputTokens": "输入 Token 数", - "outputTokens": "输出 Token 数", + "inputTokens": "一般输入 Token 数", + "outputTokens": "一般输出 Token 数", "duration": "耗时(秒)", "state": "状态", "time": "时间", @@ -729,6 +733,8 @@ "resolution": "分辨率", "nativeResolution": "模型原生分辨率", "quality": "质量", + "inputVideo": "输入是否含视频", + "outputAudio": "输出是否有声音", "metadata": "元数据", "cacheCreation": "缓存创建", "cached": "缓存", @@ -745,8 +751,8 @@ "retry": "重试", "retryTimes": "重试次数", "ttfb": "首字节时间", - "inputPrice": "输入价格", - "outputPrice": "输出价格", + "inputPrice": "一般输入价格", + "outputPrice": "一般输出价格", "cacheCreationPrice": "缓存创建价格", "cachedPrice": "缓存价格", "imageInputPrice": "图片输入价格", @@ -758,13 +764,13 @@ "thinkingPrice": "思考价格", "webSearchPrice": "搜索价格", "costBreakdown": { - "input": "文本输入消费", + "input": "一般输入消费", "cached": "缓存命中消费", "cacheCreation": "缓存创建消费", "imageInput": "图片输入消费", "audioInput": "音频输入消费", "videoInput": "视频输入消费", - "output": "文本输出消费", + "output": "一般输出消费", "imageOutput": "图片输出消费", "audioOutput": "音频输出消费", "webSearch": "联网搜索消费" @@ -1031,12 +1037,31 @@ "11": "PDF解析", "12": "Anthropic", "13": "视频生成", + "14": "视频生成查询", + "15": "视频生成内容", "16": "Responses", + "17": "Responses 查询", + "18": "Responses 删除", + "19": "Responses 取消", + "20": "Responses 输入项", "21": "Gemini", + "22": "OpenAI 视频", + "23": "OpenAI 视频查询", + "24": "OpenAI 视频内容", + "25": "OpenAI 视频删除", + "26": "OpenAI 视频 Remix", "27": "Gemini 视频", "28": "Gemini 视频 Operations", "29": "Gemini TTS", - "30": "Gemini 图片" + "30": "Gemini 图片", + "31": "Gemini 文件", + "32": "OpenAI 视频编辑", + "33": "OpenAI 视频扩展", + "34": "Ali 视频", + "35": "Ali 视频任务", + "36": "Doubao 视频", + "37": "Doubao 视频任务", + "38": "Doubao 视频任务删除" }, "group": { "management": "组别管理", @@ -1115,8 +1140,8 @@ "type": "类型", "rpm": "RPM", "tpm": "TPM", - "inputPrice": "输入价格", - "outputPrice": "输出价格", + "inputPrice": "一般输入价格", + "outputPrice": "一般输出价格", "plugins": "插件" }, "tokenDialog": { @@ -1130,9 +1155,9 @@ "price": { "title": "价格配置", "description": "配置此模型的价格", - "inputPrice": "输入价格", + "inputPrice": "一般输入价格", "inputPriceUnit": "单位 (tokens)", - "outputPrice": "输出价格", + "outputPrice": "一般输出价格", "outputPriceUnit": "单位 (tokens)", "perRequestPrice": "每次请求价格", "cachedPrice": "缓存价格", @@ -1169,6 +1194,9 @@ "multiValueHint": "每行输入一个值,也支持用逗号分隔。", "serviceTier": "服务层级", "serviceTierAny": "任意", + "inputVideo": "输入是否含视频", + "outputAudio": "输出是否有声音", + "booleanAny": "任意", "startTime": "开始时间", "endTime": "结束时间", "noPrice": "未配置价格", @@ -1240,8 +1268,8 @@ "model": "模型", "group": "组别", "requestCount": "请求数", - "inputTokens": "输入 Tokens", - "outputTokens": "输出 Tokens", + "inputTokens": "一般输入 Tokens", + "outputTokens": "一般输出 Tokens", "totalTokens": "总 Tokens", "usedAmount": "消耗金额", "actions": "操作", diff --git a/web/src/components/price/PriceDisplay.tsx b/web/src/components/price/PriceDisplay.tsx index ba89bb00..b94f0938 100644 --- a/web/src/components/price/PriceDisplay.tsx +++ b/web/src/components/price/PriceDisplay.tsx @@ -71,6 +71,14 @@ export function PriceDisplay({ price }: PriceDisplayProps) { parts.push(`${t('group.price.serviceTier')}: ${SERVICE_TIER_LABELS[condition.service_tier]}`) } + if (condition.input_video !== undefined) { + parts.push(`${t('group.price.inputVideo')}: ${condition.input_video ? t('common.yes') : t('common.no')}`) + } + + if (condition.output_audio !== undefined) { + parts.push(`${t('group.price.outputAudio')}: ${condition.output_audio ? t('common.yes') : t('common.no')}`) + } + if (condition.input_token_min || condition.input_token_max) { parts.push(`${t('group.price.inputPrice')}: ${condition.input_token_min || 0} - ${condition.input_token_max || '∞'}`) } diff --git a/web/src/components/price/PriceFormFields.tsx b/web/src/components/price/PriceFormFields.tsx index 57e62e1a..e2f09f86 100644 --- a/web/src/components/price/PriceFormFields.tsx +++ b/web/src/components/price/PriceFormFields.tsx @@ -78,6 +78,15 @@ function ConditionFields({ condition, onChange }: { }) { const { t } = useTranslation() const anyServiceTier = '__any__' + const anyBool = '__any__' + const boolSelectValue = (value?: boolean) => { + if (value === undefined) return anyBool + return value ? 'true' : 'false' + } + const parseBoolSelectValue = (value: string) => { + if (value === anyBool) return undefined + return value === 'true' + } return (
@@ -143,6 +152,44 @@ function ConditionFields({ condition, onChange }: {
+
+ + +
+
+ + +
{
{t('log.nativeResolution')}: {usageContext?.native_resolution || '-'}
{t('log.quality')}: {usageContext?.quality || '-'}
{t('log.serviceTier')}: {usageContext?.service_tier || '-'}
+
{t('log.inputVideo')}: {usageContext?.input_video === undefined ? '-' : (usageContext.input_video ? t('common.yes') : t('common.no'))}
+
{t('log.outputAudio')}: {usageContext?.output_audio === undefined ? '-' : (usageContext.output_audio ? t('common.yes') : t('common.no'))}
diff --git a/web/src/feature/model/components/ModelForm.tsx b/web/src/feature/model/components/ModelForm.tsx index 07f21564..53dc2858 100644 --- a/web/src/feature/model/components/ModelForm.tsx +++ b/web/src/feature/model/components/ModelForm.tsx @@ -348,6 +348,10 @@ export function ModelForm({ case 22: case 26: case 27: + case 32: + case 33: + case 34: + case 36: return { tokenFields: ['max_input_tokens', 'max_output_tokens', 'max_context_tokens'] as Array<'max_input_tokens' | 'max_output_tokens' | 'max_context_tokens'>, showToolChoice: false, diff --git a/web/src/feature/model/components/api-doc/ApiDoc.tsx b/web/src/feature/model/components/api-doc/ApiDoc.tsx index 6abdd9b3..1a22f823 100644 --- a/web/src/feature/model/components/api-doc/ApiDoc.tsx +++ b/web/src/feature/model/components/api-doc/ApiDoc.tsx @@ -230,6 +230,693 @@ ${modelConfig?.config?.support_voices?.length "markdown": "sf ad fda daf da \\\\( f \\\\) ds f sd fs d afdas fsd asfad f\\n\\n\\n\\n![img](data:image/jpeg;base64,/9...)\\n\\n| sadsa | | |\\n| --- | --- | --- |\\n| | sadasdsa | sad |\\n| | | dsadsadsa |\\n| | | |\\n\\n\\n\\na fda" }` } + case 2: + return { + title: t('modeType.2'), + endpoint: '/completions', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/completions \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "prompt": "Write a short product tagline", + "max_tokens": 64, + "temperature": 0.7 +}'`, + responseExample: `{ + "object": "text_completion", + "model": "${modelConfig.model}", + "choices": [ + { + "text": "Cloud-native apps, shipped faster.", + "index": 0, + "finish_reason": "stop" + } + ] +}` + } + case 4: + return { + title: t('modeType.4'), + endpoint: '/moderations', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/moderations \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "input": "Text to classify" +}'`, + responseExample: `{ + "id": "modr_123", + "model": "${modelConfig.model}", + "results": [ + { + "flagged": false, + "categories": {} + } + ] +}` + } + case 5: + return { + title: t('modeType.5'), + endpoint: '/images/generations', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/images/generations \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "prompt": "A minimal cloud dashboard illustration", + "size": "1024x1024", + "n": 1 +}'`, + responseExample: `{ + "created": 1729672480, + "data": [ + { + "url": "https://example.com/image.png" + } + ] +}` + } + case 6: + return { + title: t('modeType.6'), + endpoint: '/images/edits', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/images/edits \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: multipart/form-data' \\ +--form model=${modelConfig.model} \\ +--form 'image=@"image.png"' \\ +--form 'prompt=Add a clean blue background' \\ +--form size=1024x1024`, + responseExample: `{ + "created": 1729672480, + "data": [ + { + "url": "https://example.com/edited-image.png" + } + ] +}` + } + case 9: + return { + title: t('modeType.9'), + endpoint: '/audio/translations', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/audio/translations \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: multipart/form-data' \\ +--form model=${modelConfig.model} \\ +--form 'file=@"audio.mp3"'`, + responseExample: `{ + "text": "Translated transcript text" +}` + } + case 12: + return { + title: t('modeType.12'), + endpoint: '/messages', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/messages \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "max_tokens": 512, + "messages": [ + { + "role": "user", + "content": "Summarize this release note" + } + ] +}'`, + responseExample: `{ + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Summary text" + } + ] +}` + } + case 13: + return { + title: t('modeType.13'), + endpoint: '/video/generations/jobs', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/video/generations/jobs \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "prompt": "A calm ocean at sunrise", + "width": 1280, + "height": 720, + "n_seconds": 5 +}'`, + responseExample: `{ + "id": "vgjob_123", + "object": "video.generation.job", + "status": "queued", + "model": "${modelConfig.model}" +}` + } + case 14: + return { + title: t('modeType.14'), + endpoint: '/video/generations/jobs/{id}', + method: 'GET', + responseFormat: 'json', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1/video/generations/jobs/vgjob_123 \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "id": "vgjob_123", + "object": "video.generation.job", + "status": "succeeded", + "generations": [ + { + "id": "video_123" + } + ] +}` + } + case 15: + return { + title: t('modeType.15'), + endpoint: '/video/generations/{id}/content/video', + method: 'GET', + responseFormat: 'binary', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1/video/generations/video_123/content/video \\ +--header "Authorization: Bearer $token" \\ +--output video.mp4`, + responseExample: 'Binary video data' + } + case 16: + return { + title: t('modeType.16'), + endpoint: '/responses', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/responses \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "input": "Write a concise status update" +}'`, + responseExample: `{ + "id": "resp_123", + "object": "response", + "status": "completed", + "output_text": "Status update text" +}` + } + case 17: + return { + title: t('modeType.17'), + endpoint: '/responses/{response_id}', + method: 'GET', + responseFormat: 'json', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1/responses/resp_123 \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "id": "resp_123", + "object": "response", + "status": "completed" +}` + } + case 18: + return { + title: t('modeType.18'), + endpoint: '/responses/{response_id}', + method: 'DELETE', + responseFormat: 'json', + requestExample: `curl --request DELETE \\ +--url ${apiEndpoint}/v1/responses/resp_123 \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "id": "resp_123", + "object": "response.deleted", + "deleted": true +}` + } + case 19: + return { + title: t('modeType.19'), + endpoint: '/responses/{response_id}/cancel', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/responses/resp_123/cancel \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "id": "resp_123", + "object": "response", + "status": "cancelled" +}` + } + case 20: + return { + title: t('modeType.20'), + endpoint: '/responses/{response_id}/input_items', + method: 'GET', + responseFormat: 'json', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1/responses/resp_123/input_items \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "object": "list", + "data": [ + { + "id": "item_123", + "type": "message" + } + ] +}` + } + case 21: + return { + title: t('modeType.21'), + endpoint: '/models/{model}:generateContent', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1beta/models/${modelConfig.model}:generateContent \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "contents": [ + { + "role": "user", + "parts": [ + { + "text": "Explain Kubernetes in one sentence" + } + ] + } + ] +}'`, + responseExample: `{ + "candidates": [ + { + "content": { + "parts": [ + { + "text": "Kubernetes automates deployment, scaling, and management of containerized applications." + } + ] + } + } + ] +}` + } + case 22: + return { + title: t('modeType.22'), + endpoint: '/videos', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/videos \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "prompt": "A calm ocean at sunrise", + "seconds": 5, + "size": "1280x720" +}'`, + responseExample: `{ + "id": "video_123", + "object": "video", + "status": "queued", + "model": "${modelConfig.model}" +}` + } + case 23: + return { + title: t('modeType.23'), + endpoint: '/videos/{video_id}', + method: 'GET', + responseFormat: 'json', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1/videos/video_123 \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "id": "video_123", + "object": "video", + "status": "completed" +}` + } + case 24: + return { + title: t('modeType.24'), + endpoint: '/videos/{video_id}/content', + method: 'GET', + responseFormat: 'binary', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1/videos/video_123/content \\ +--header "Authorization: Bearer $token" \\ +--output video.mp4`, + responseExample: 'Binary video data' + } + case 25: + return { + title: t('modeType.25'), + endpoint: '/videos/{video_id}', + method: 'DELETE', + responseFormat: 'text', + requestExample: `curl --request DELETE \\ +--url ${apiEndpoint}/v1/videos/video_123 \\ +--header "Authorization: Bearer $token"`, + responseExample: 'No content' + } + case 26: + return { + title: t('modeType.26'), + endpoint: '/videos/{video_id}/remix', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/videos/video_123/remix \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "prompt": "Make it cinematic", + "seconds": 5, + "size": "1280x720" +}'`, + responseExample: `{ + "id": "video_456", + "object": "video", + "status": "queued" +}` + } + case 27: + return { + title: t('modeType.27'), + endpoint: '/models/{model}:predictLongRunning', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1beta/models/${modelConfig.model}:predictLongRunning \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "instances": [ + { + "prompt": "A calm ocean at sunrise" + } + ], + "parameters": { + "durationSeconds": 8, + "resolution": "720p" + } +}'`, + responseExample: `{ + "name": "operations/video-operation-123", + "done": false +}` + } + case 28: + return { + title: t('modeType.28'), + endpoint: '/operations/{operation_id}', + method: 'GET', + responseFormat: 'json', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1beta/operations/video-operation-123 \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "name": "operations/video-operation-123", + "done": true, + "response": {} +}` + } + case 29: + return { + title: t('modeType.29'), + endpoint: '/models/{model}:generateContent', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1beta/models/${modelConfig.model}:generateContent \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "contents": [ + { + "parts": [ + { + "text": "Say hello in a friendly voice" + } + ] + } + ], + "generationConfig": { + "responseModalities": ["AUDIO"] + } +}'`, + responseExample: `{ + "candidates": [ + { + "content": { + "parts": [ + { + "inlineData": { + "mimeType": "audio/wav", + "data": "BASE64_AUDIO" + } + } + ] + } + } + ] +}` + } + case 30: + return { + title: t('modeType.30'), + endpoint: '/models/{model}:generateContent', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1beta/models/${modelConfig.model}:generateContent \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "contents": [ + { + "parts": [ + { + "text": "Generate a clean product icon" + } + ] + } + ], + "generationConfig": { + "responseModalities": ["IMAGE"] + } +}'`, + responseExample: `{ + "candidates": [ + { + "content": { + "parts": [ + { + "inlineData": { + "mimeType": "image/png", + "data": "BASE64_IMAGE" + } + } + ] + } + } + ] +}` + } + case 31: + return { + title: t('modeType.31'), + endpoint: '/files/{file}:download', + method: 'GET', + responseFormat: 'binary', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/v1beta/files/abc123:download?alt=media \\ +--header "Authorization: Bearer $token" \\ +--output video.mp4`, + responseExample: 'Binary file data' + } + case 32: + return { + title: t('modeType.32'), + endpoint: '/videos/edits', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/videos/edits \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: multipart/form-data' \\ +--form model=${modelConfig.model} \\ +--form 'video=@"source.mp4"' \\ +--form 'prompt=Replace the background with a sunrise' \\ +--form seconds=5 \\ +--form size=1280x720`, + responseExample: `{ + "id": "video_edited_123", + "object": "video", + "status": "queued" +}` + } + case 33: + return { + title: t('modeType.33'), + endpoint: '/videos/extensions', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/v1/videos/extensions \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: multipart/form-data' \\ +--form model=${modelConfig.model} \\ +--form 'video=@"source.mp4"' \\ +--form 'prompt=Continue the scene naturally' \\ +--form seconds=5 \\ +--form size=1280x720`, + responseExample: `{ + "id": "video_extended_123", + "object": "video", + "status": "queued" +}` + } + case 34: + return { + title: t('modeType.34'), + endpoint: '/services/aigc/video-generation/video-synthesis', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/api/v1/services/aigc/video-generation/video-synthesis \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "input": { + "prompt": "A calm ocean at sunrise" + }, + "parameters": { + "duration": 5, + "size": "720P" + } +}'`, + responseExample: `{ + "output": { + "task_id": "ali-task-123", + "task_status": "PENDING" + }, + "request_id": "request-123" +}` + } + case 35: + return { + title: t('modeType.35'), + endpoint: '/tasks/{task_id}', + method: 'GET', + responseFormat: 'json', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/api/v1/tasks/ali-task-123 \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "output": { + "task_id": "ali-task-123", + "task_status": "SUCCEEDED", + "video_url": "https://example.com/video.mp4" + } +}` + } + case 36: + return { + title: t('modeType.36'), + endpoint: '/contents/generations/tasks', + method: 'POST', + responseFormat: 'json', + requestExample: `curl --request POST \\ +--url ${apiEndpoint}/api/v3/contents/generations/tasks \\ +--header "Authorization: Bearer $token" \\ +--header 'Content-Type: application/json' \\ +--data '{ + "model": "${modelConfig.model}", + "content": [ + { + "type": "text", + "text": "A calm ocean at sunrise" + } + ], + "duration": 5, + "resolution": "720p", + "ratio": "16:9" +}'`, + responseExample: `{ + "id": "doubao-task-123", + "model": "${modelConfig.model}", + "status": "queued" +}` + } + case 37: + return { + title: t('modeType.37'), + endpoint: '/contents/generations/tasks/{task_id}', + method: 'GET', + responseFormat: 'json', + requestExample: `curl --request GET \\ +--url ${apiEndpoint}/api/v3/contents/generations/tasks/doubao-task-123 \\ +--header "Authorization: Bearer $token"`, + responseExample: `{ + "id": "doubao-task-123", + "status": "succeeded", + "content": { + "video_url": "https://example.com/video.mp4" + } +}` + } + case 38: + return { + title: t('modeType.38'), + endpoint: '/contents/generations/tasks/{task_id}', + method: 'DELETE', + responseFormat: 'text', + requestExample: `curl --request DELETE \\ +--url ${apiEndpoint}/api/v3/contents/generations/tasks/doubao-task-123 \\ +--header "Authorization: Bearer $token"`, + responseExample: 'No content' + } default: return { title: t('modeType.0'), diff --git a/web/src/feature/monitor/components/MonitorCharts.tsx b/web/src/feature/monitor/components/MonitorCharts.tsx index c4a09804..b66b9780 100644 --- a/web/src/feature/monitor/components/MonitorCharts.tsx +++ b/web/src/feature/monitor/components/MonitorCharts.tsx @@ -557,13 +557,13 @@ export function MonitorCharts({ chartData, modelRanking, detailRanking = [], has const tokenSeries: { key: keyof ChartDataPoint; name: string; color: string }[] = [ { key: 'inputTokens', name: t('monitor.charts.tokensBreakdown.totalInput'), color: '#1d4ed8' }, { key: 'outputTokens', name: t('monitor.charts.tokensBreakdown.totalOutput'), color: '#059669' }, - { key: 'textInputTokens', name: t('monitor.charts.tokensBreakdown.textInput'), color: '#3b82f6' }, + { key: 'generalInputTokens', name: t('monitor.charts.tokensBreakdown.generalInput'), color: '#3b82f6' }, { key: 'cachedTokens', name: t('monitor.charts.tokensBreakdown.cached'), color: '#6366f1' }, { key: 'cacheCreationTokens', name: t('monitor.charts.tokensBreakdown.cacheCreation'), color: '#a78bfa' }, { key: 'imageInputTokens', name: t('monitor.charts.tokensBreakdown.imageInput'), color: '#06b6d4' }, { key: 'audioInputTokens', name: t('monitor.charts.tokensBreakdown.audioInput'), color: '#8b5cf6' }, { key: 'videoInputTokens', name: t('monitor.charts.tokensBreakdown.videoInput'), color: '#ec4899' }, - { key: 'textOutputTokens', name: t('monitor.charts.tokensBreakdown.textOutput'), color: '#10b981' }, + { key: 'generalOutputTokens', name: t('monitor.charts.tokensBreakdown.generalOutput'), color: '#10b981' }, { key: 'imageOutputTokens', name: t('monitor.charts.tokensBreakdown.imageOutput'), color: '#14b8a6' }, { key: 'audioOutputTokens', name: t('monitor.charts.tokensBreakdown.audioOutput'), color: '#f97316' }, ] @@ -658,13 +658,13 @@ export function MonitorCharts({ chartData, modelRanking, detailRanking = [], has const costBreakdownSeries: { key: keyof ChartDataPoint; name: string; color: string }[] = [ { key: 'totalInputAmount', name: t('monitor.charts.costBreakdownTypes.totalInput'), color: '#1d4ed8' }, { key: 'totalOutputAmount', name: t('monitor.charts.costBreakdownTypes.totalOutput'), color: '#059669' }, - { key: 'inputAmount', name: t('monitor.charts.costBreakdownTypes.textInput'), color: '#3b82f6' }, + { key: 'inputAmount', name: t('monitor.charts.costBreakdownTypes.generalInput'), color: '#3b82f6' }, { key: 'cachedAmount', name: t('monitor.charts.costBreakdownTypes.cached'), color: '#6366f1' }, { key: 'cacheCreationAmount', name: t('monitor.charts.costBreakdownTypes.cacheCreation'), color: '#a78bfa' }, { key: 'imageInputAmount', name: t('monitor.charts.costBreakdownTypes.imageInput'), color: '#06b6d4' }, { key: 'audioInputAmount', name: t('monitor.charts.costBreakdownTypes.audioInput'), color: '#8b5cf6' }, { key: 'videoInputAmount', name: t('monitor.charts.costBreakdownTypes.videoInput'), color: '#ec4899' }, - { key: 'outputAmount', name: t('monitor.charts.costBreakdownTypes.textOutput'), color: '#10b981' }, + { key: 'outputAmount', name: t('monitor.charts.costBreakdownTypes.generalOutput'), color: '#10b981' }, { key: 'imageOutputAmount', name: t('monitor.charts.costBreakdownTypes.imageOutput'), color: '#14b8a6' }, { key: 'audioOutputAmount', name: t('monitor.charts.costBreakdownTypes.audioOutput'), color: '#f97316' }, { key: 'webSearchAmount', name: t('monitor.charts.costBreakdownTypes.webSearch'), color: '#0ea5e9' }, diff --git a/web/src/feature/monitor/hooks.ts b/web/src/feature/monitor/hooks.ts index 3e937199..19a77079 100644 --- a/web/src/feature/monitor/hooks.ts +++ b/web/src/feature/monitor/hooks.ts @@ -196,9 +196,9 @@ function toChartData(timeSeries: TimeSeriesPoint[], timespan?: string, hasModelF const totalInputAmount = inputAmount + imageInputAmount + audioInputAmount + videoInputAmount + cachedAmount + cacheCreationAmount const totalOutputAmount = outputAmount + imageOutputAmount + audioOutputAmount - // Non-overlapping text portions (subtract sub-categories from totals) - const textInputTokens = Math.max(0, inputTokens - imageInputTokens - audioInputTokens - videoInputTokens - cachedTokens - cacheCreationTokens) - const textOutputTokens = Math.max(0, outputTokens - imageOutputTokens - audioOutputTokens) + // Non-overlapping general portions (subtract modality-specific and cache categories from totals). + const generalInputTokens = Math.max(0, inputTokens - imageInputTokens - audioInputTokens - videoInputTokens - cachedTokens - cacheCreationTokens) + const generalOutputTokens = Math.max(0, outputTokens - imageOutputTokens - audioOutputTokens) const status2xxCount = summary.reduce((acc, s) => acc + (s?.status_2xx_count || 0), 0) const status4xxCount = summary.reduce((acc, s) => acc + (s?.status_4xx_count || 0), 0) @@ -255,12 +255,12 @@ function toChartData(timeSeries: TimeSeriesPoint[], timespan?: string, hasModelF status500Count, retryCount, inputTokens, - textInputTokens, + generalInputTokens, imageInputTokens, audioInputTokens, videoInputTokens, outputTokens, - textOutputTokens, + generalOutputTokens, imageOutputTokens, audioOutputTokens, cachedTokens, diff --git a/web/src/types/dashboard.ts b/web/src/types/dashboard.ts index 487b4111..816cec77 100644 --- a/web/src/types/dashboard.ts +++ b/web/src/types/dashboard.ts @@ -111,12 +111,12 @@ export interface ChartDataPoint { status500Count: number retryCount: number inputTokens: number - textInputTokens: number + generalInputTokens: number imageInputTokens: number audioInputTokens: number videoInputTokens: number outputTokens: number - textOutputTokens: number + generalOutputTokens: number imageOutputTokens: number audioOutputTokens: number cachedTokens: number diff --git a/web/src/types/log.ts b/web/src/types/log.ts index 8cd71463..3ca456ea 100644 --- a/web/src/types/log.ts +++ b/web/src/types/log.ts @@ -64,6 +64,8 @@ export interface LogUsageContext { native_resolution?: string quality?: string service_tier?: string + input_video?: boolean + output_audio?: boolean } // 请求详情 diff --git a/web/src/types/model.ts b/web/src/types/model.ts index 2027668e..8354187e 100644 --- a/web/src/types/model.ts +++ b/web/src/types/model.ts @@ -22,6 +22,8 @@ export interface PriceCondition { resolution?: string[] quality?: string[] service_tier?: '' | 'auto' | 'default' | 'flex' | 'scale' | 'priority' + input_video?: boolean + output_audio?: boolean } export interface ConditionalPrice { @@ -90,13 +92,18 @@ export interface ModelConfig { export type ModelSaveRequest = Omit -export const MODEL_TYPE_OPTIONS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 21, 22, 26, 27, 28, 29, 30] as const +export const MODEL_TYPE_OPTIONS = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, +] as const export const STREAM_TIMEOUT_SUPPORTED_MODEL_TYPES = [1, 2, 12, 16, 21] as const -export const IMAGE_GENERATION_COUNT_LIMIT_SUPPORTED_MODEL_TYPES = [5, 6] as const -export const VIDEO_GENERATION_SECONDS_LIMIT_SUPPORTED_MODEL_TYPES = [13, 22, 26, 27] as const +export const IMAGE_GENERATION_COUNT_LIMIT_SUPPORTED_MODEL_TYPES = [5, 6, 30] as const +export const VIDEO_GENERATION_SECONDS_LIMIT_SUPPORTED_MODEL_TYPES = [13, 22, 26, 27, 32, 33, 34, 36] as const export const VIDEO_GENERATION_COUNT_LIMIT_SUPPORTED_MODEL_TYPES = [13, 27] as const -export const RESOLUTION_FUZZY_MATCH_SUPPORTED_MODEL_TYPES = [5, 6, 13, 22, 26, 27, 28, 30] as const +export const RESOLUTION_FUZZY_MATCH_SUPPORTED_MODEL_TYPES = [5, 6, 13, 22, 26, 27, 28, 30, 32, 33, 34, 36] as const export type ModelTypeOption = (typeof MODEL_TYPE_OPTIONS)[number] diff --git a/web/src/validation/model.ts b/web/src/validation/model.ts index c7d37c04..a0148d7a 100644 --- a/web/src/validation/model.ts +++ b/web/src/validation/model.ts @@ -86,6 +86,8 @@ const priceConditionSchema = z.object({ resolution: z.array(z.string()).optional(), quality: z.array(z.string()).optional(), service_tier: z.enum(['auto', 'default', 'flex', 'scale', 'priority']).or(z.literal('')).optional(), + input_video: z.boolean().optional(), + output_audio: z.boolean().optional(), }) // Price schema (used for conditional prices)