diff --git a/apps/api/.env.example b/apps/api/.env.example index f2a0213..d02ccfd 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -12,3 +12,7 @@ ARK_ENDPOINT= # 样例 / 用户素材上传大小上限(MB),默认 200;需与 Fastify multipart 一致 # MAX_UPLOAD_MB=500 + +# 可选:Pexels 免费视频检索(用真实素材补齐缺口)。无 key 时缺口回退 text_card 占位。 +# 注册免费 key:https://www.pexels.com/api/ +PEXELS_API_KEY= diff --git a/apps/api/src/agents/__tests__/fillWithStock.test.ts b/apps/api/src/agents/__tests__/fillWithStock.test.ts new file mode 100644 index 0000000..0441c42 --- /dev/null +++ b/apps/api/src/agents/__tests__/fillWithStock.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, it } from 'vitest'; +import { sampleBlueprint } from '../../core/mocks/sample-blueprint'; +import { runRuleBasedMigration } from '../../core/migration'; +import type { TaggedAsset } from '../../core/slot'; +import { enhanceFillsWithStock } from '../fillWithStock'; + +const assets: TaggedAsset[] = [ + { id: 'a1', mediaType: 'video', assetTags: ['product_closeup'], durationSec: 5, confidence: 0.9, summary: '特写' }, +]; + +const basePlan = () => + runRuleBasedMigration({ + projectId: 'p', + sampleId: sampleBlueprint.sourceSampleId, + blueprint: sampleBlueprint, + assets, + topic: 'F1 赛车冠军征程', + durationSec: 30, + }); + +describe('enhanceFillsWithStock', () => { + it('检索命中:text_card 缺口被替换为 stock_clip,记一条 decision', async () => { + const plan = basePlan(); + const tcCount = plan.fills.filter((f) => f.kind === 'text_card').length; + expect(tcCount).toBeGreaterThan(0); + + const enhanced = await enhanceFillsWithStock(plan, { + search: async () => ({ path: '/tmp/stock.mp4', attribution: 'Pexels stub', sourceUrl: 'https://x' }), + outDir: '/tmp', + blueprint: sampleBlueprint, + }); + + expect(enhanced.fills.filter((f) => f.kind === 'stock_clip')).toHaveLength(tcCount); + expect(enhanced.fills.filter((f) => f.kind === 'text_card')).toHaveLength(0); + expect(enhanced.fills.every((f) => f.kind !== 'stock_clip' || f.source === '/tmp/stock.mp4')).toBe(true); + expect(enhanced.decisions.some((d) => d.chosen === 'stock_clip')).toBe(true); + }); + + it('检索全部 null:保留 text_card,计划不变', async () => { + const plan = basePlan(); + const enhanced = await enhanceFillsWithStock(plan, { + search: async () => null, + outDir: '/tmp', + blueprint: sampleBlueprint, + }); + expect(enhanced.fills.filter((f) => f.kind === 'stock_clip')).toHaveLength(0); + expect(enhanced.fills.filter((f) => f.kind === 'text_card').length).toBe( + plan.fills.filter((f) => f.kind === 'text_card').length, + ); + }); + + it('检索词来自 topic + 资产类型映射', async () => { + const plan = basePlan(); + const queries: string[] = []; + await enhanceFillsWithStock(plan, { + search: async (q) => { + queries.push(q); + return null; + }, + outDir: '/tmp', + blueprint: sampleBlueprint, + }); + // 至少有一个 query 同时含 topic 关键词 'F1' 和某个英文素材关键词 + const joined = queries.join(' | '); + expect(joined).toContain('F1'); + expect(/talking|product|comparison|b roll|using/i.test(joined)).toBe(true); + }); +}); diff --git a/apps/api/src/agents/fillWithStock.ts b/apps/api/src/agents/fillWithStock.ts new file mode 100644 index 0000000..36b1661 --- /dev/null +++ b/apps/api/src/agents/fillWithStock.ts @@ -0,0 +1,67 @@ +import type { VideoStructureBlueprint } from '../core/blueprint'; +import type { Decision } from '../core/explain'; +import type { MigrationPlan } from '../core/migration'; +import { searchAndDownloadStock, type StockSearchFn } from './stockFootage'; + +export interface StockFillOptions { + /** 检索函数;默认走 Pexels(需 PEXELS_API_KEY,缺则回退)。可注入用于测试。 */ + search?: StockSearchFn; + /** 下载落地目录。 */ + outDir: string; + /** 用于按 slot 上的 segmentRole / requiredAssetTypes 构造检索词。 */ + blueprint?: VideoStructureBlueprint; + /** 自定义检索词构造;默认 topic + 资产类型英语关键词。 */ + buildQuery?: (ctx: { topic: string; role?: string; requiredAssetTypes?: string[] }) => string; +} + +// AssetTag → Pexels 检索的英文关键词(Pexels 中文检索效果差) +const ASSET_TERM: Record = { + talking_head: 'person talking', + product_closeup: 'product closeup', + usage_demo: 'using product', + comparison: 'comparison', + b_roll: 'cinematic b roll', + text_card: '', +}; + +function defaultBuildQuery(ctx: { topic: string; requiredAssetTypes?: string[] }): string { + const term = ctx.requiredAssetTypes?.map((t) => ASSET_TERM[t]).find(Boolean) ?? ''; + return [ctx.topic, term].filter(Boolean).join(' ').trim(); +} + +/** + * 用免费 stock 素材替换 text_card 缺口补全;检索失败/无 key → 保留 text_card。 + * 借鉴 OpenMontage「免费档案补全」的范式,自研薄适配器(非依赖)。 + */ +export async function enhanceFillsWithStock( + plan: MigrationPlan, + opts: StockFillOptions, +): Promise { + const search = opts.search ?? searchAndDownloadStock; + const buildQuery = opts.buildQuery ?? defaultBuildQuery; + const slots = opts.blueprint?.slots ?? []; + const decisions: Decision[] = [...plan.decisions]; + + const fills = await Promise.all( + plan.fills.map(async (f) => { + if (f.kind !== 'text_card') return f; + const slot = slots.find((s) => s.id === f.slotId); + const query = buildQuery({ + topic: plan.topic, + role: slot?.segmentRole, + requiredAssetTypes: slot?.requiredAssetTypes, + }); + const clip = await search(query, opts.outDir); + if (!clip) return f; + decisions.push({ + chosen: 'stock_clip', + alternatives: ['text_card'], + confidence: 0.7, + reason: `Pexels 检索命中:${clip.attribution}(query=${query})`, + }); + return { ...f, kind: 'stock_clip' as const, source: clip.path }; + }), + ); + + return { ...plan, fills, decisions }; +} diff --git a/apps/api/src/agents/scripts/versions-demo.ts b/apps/api/src/agents/scripts/versions-demo.ts new file mode 100644 index 0000000..0eb8c05 --- /dev/null +++ b/apps/api/src/agents/scripts/versions-demo.ts @@ -0,0 +1,34 @@ +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import type { VideoStructureBlueprint } from '../../core/blueprint'; +import type { TaggedAsset } from '../../core/slot'; +import { generateVersions } from '../../core/versions'; + +// 多版本演示(规则版,无需 key):用 sample1 蓝图套预设变换。 +const blueprint = JSON.parse( + readFileSync(resolve('out/analysis/sample1/blueprint.json'), 'utf8'), +) as VideoStructureBlueprint; + +const assets: TaggedAsset[] = [ + { id: 'a', mediaType: 'video', assetTags: ['talking_head'], durationSec: 12, confidence: 0.8, summary: '口播' }, + { id: 'b', mediaType: 'video', assetTags: ['b_roll'], durationSec: 20, confidence: 0.8, summary: '空镜' }, +]; + +const variants = generateVersions({ + projectId: 'demo', + sampleId: blueprint.sourceSampleId, + blueprint, + assets, + topic: 'F1 赛车手追逐冠军梦想的赛季故事', + sellingPoints: ['全力以赴争取每一分'], + durationSec: 40, +}); + +console.log(`base: cutDensity=${blueprint.rhythmStructure.cutDensity} avgShot=${blueprint.rhythmStructure.avgShotSec} peakAt=${blueprint.rhythmStructure.peakAt}`); +for (const v of variants) { + const r = v.blueprint.rhythmStructure; + const hook = v.blueprint.scriptStructure.segments.find((s) => s.role === 'hook'); + console.log( + `\n[${v.label}] ${v.describe}\n cutDensity=${r.cutDensity} avgShot=${r.avgShotSec} peakAt=${r.peakAt} hook%=${hook ? Math.round(hook.durationRatio * 100) : '-'} gaps=${v.migration.gaps.length}`, + ); +} diff --git a/apps/api/src/agents/stockFootage.ts b/apps/api/src/agents/stockFootage.ts new file mode 100644 index 0000000..31dc33d --- /dev/null +++ b/apps/api/src/agents/stockFootage.ts @@ -0,0 +1,63 @@ +import { randomUUID } from 'node:crypto'; +import { createWriteStream, mkdirSync } from 'node:fs'; +import { join } from 'node:path'; +import { Readable } from 'node:stream'; +import { pipeline } from 'node:stream/promises'; +import type { ReadableStream as WebReadable } from 'node:stream/web'; +import { getPexelsKey } from '../config'; + +export interface StockClip { + /** 下载到本地的文件绝对路径(供 render 使用)。 */ + path: string; + /** 归属信息(供 UI / 答辩展示,遵守 Pexels 署名要求)。 */ + attribution: string; + sourceUrl: string; +} + +export type StockSearchFn = (query: string, outDir: string) => Promise; + +interface PexelsFile { + link: string; + width: number; + height: number; + file_type: string; +} +interface PexelsVideo { + url: string; + user?: { name?: string }; + video_files?: PexelsFile[]; +} + +const PEXELS_VIDEO_SEARCH = 'https://api.pexels.com/videos/search'; + +/** 选一个合适分辨率的 mp4 文件(尽量靠近 720p,避免超大)。 */ +function pickFile(files: PexelsFile[]): PexelsFile | null { + const mp4 = files.filter((f) => f.file_type === 'video/mp4'); + if (mp4.length === 0) return null; + return mp4.slice().sort((a, b) => (a.height - 720) ** 2 - (b.height - 720) ** 2)[0]; +} + +/** 默认实现:Pexels 视频检索 + 下载。无 key / 失败 → null(调用方回退 text_card)。 */ +export const searchAndDownloadStock: StockSearchFn = async (query, outDir) => { + const key = getPexelsKey(); + if (!key || !query.trim()) return null; + try { + const url = `${PEXELS_VIDEO_SEARCH}?query=${encodeURIComponent(query)}&per_page=1`; + const res = await fetch(url, { headers: { Authorization: key } }); + if (!res.ok) return null; + const data = (await res.json()) as { videos?: PexelsVideo[] }; + const video = data.videos?.[0]; + const file = pickFile(video?.video_files ?? []); + if (!video || !file) return null; + + mkdirSync(outDir, { recursive: true }); + const dest = join(outDir, `stock_${randomUUID().slice(0, 8)}.mp4`); + const dl = await fetch(file.link); + if (!dl.ok || !dl.body) return null; + await pipeline(Readable.fromWeb(dl.body as WebReadable), createWriteStream(dest)); + const by = video.user?.name ? ` by ${video.user.name}` : ''; + return { path: dest, attribution: `Pexels${by}`, sourceUrl: video.url }; + } catch { + return null; + } +}; diff --git a/apps/api/src/config.ts b/apps/api/src/config.ts index e118a14..0c8cdd5 100644 --- a/apps/api/src/config.ts +++ b/apps/api/src/config.ts @@ -34,3 +34,8 @@ export function requireArkConfig(): ArkConfig { } return cfg; } + +/** Pexels 免费视频检索 API key(可选);缺则 stock 补全自动回退为 text_card。 */ +export function getPexelsKey(): string | null { + return process.env.PEXELS_API_KEY?.trim() || null; +} diff --git a/apps/api/src/core/__tests__/versions.test.ts b/apps/api/src/core/__tests__/versions.test.ts new file mode 100644 index 0000000..b17e0f7 --- /dev/null +++ b/apps/api/src/core/__tests__/versions.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it } from 'vitest'; +import { applyBlueprintPatch } from '../applyPatch'; +import { sampleBlueprint } from '../mocks/sample-blueprint'; +import type { TaggedAsset } from '../slot'; +import { validateBlueprint, validateMigrationPlan } from '../validate'; +import { VERSION_PRESETS, generateVersions } from '../versions'; + +const assets: TaggedAsset[] = [ + { id: 'a1', mediaType: 'video', assetTags: ['product_closeup'], durationSec: 5, confidence: 0.9, summary: '特写' }, +]; + +describe('applyBlueprintPatch', () => { + it('set + scale 生效,且段落比例归一化到 1', () => { + const fast = VERSION_PRESETS.find((p) => p.id === 'fast')!; + const out = applyBlueprintPatch(sampleBlueprint, fast.patch); + + expect(out.rhythmStructure.cutDensity).toBe('high'); + expect(out.rhythmStructure.avgShotSec).toBeCloseTo(sampleBlueprint.rhythmStructure.avgShotSec * 0.7, 2); + const sum = out.scriptStructure.segments.reduce((a, s) => a + s.durationRatio, 0); + expect(sum).toBeCloseTo(1, 2); + expect(validateBlueprint(out).ok).toBe(true); + // 不改原对象(base 的 avgShotSec 不应被改动) + expect(sampleBlueprint.rhythmStructure.avgShotSec).not.toBe(out.rhythmStructure.avgShotSec); + }); + + it('未识别 path 被忽略、不报错', () => { + const out = applyBlueprintPatch(sampleBlueprint, { + origin: 'nl', + ops: [{ path: 'nonsense.path', op: 'set', value: 1 }], + }); + expect(validateBlueprint(out).ok).toBe(true); + }); +}); + +describe('generateVersions', () => { + it('产出 3 个差异明确且各自合法的版本', () => { + const variants = generateVersions({ + projectId: 'p', + sampleId: sampleBlueprint.sourceSampleId, + blueprint: sampleBlueprint, + assets, + topic: '测试主题', + durationSec: 30, + }); + + expect(variants).toHaveLength(3); + const byId = Object.fromEntries(variants.map((v) => [v.id, v])); + expect(byId.fast.blueprint.rhythmStructure.cutDensity).toBe('high'); + expect(byId.cinematic.blueprint.rhythmStructure.cutDensity).toBe('low'); + // 高节奏 vs 高质感的平均镜头时长应不同(差异明确) + expect(byId.fast.blueprint.rhythmStructure.avgShotSec).toBeLessThan( + byId.cinematic.blueprint.rhythmStructure.avgShotSec, + ); + for (const v of variants) { + expect(validateBlueprint(v.blueprint).ok).toBe(true); + expect(validateMigrationPlan(v.migration).ok).toBe(true); + } + }); +}); diff --git a/apps/api/src/core/applyPatch.ts b/apps/api/src/core/applyPatch.ts new file mode 100644 index 0000000..a6149e5 --- /dev/null +++ b/apps/api/src/core/applyPatch.ts @@ -0,0 +1,70 @@ +import type { VideoStructureBlueprint } from './blueprint'; +import type { CutDensity, SubtitleDensity } from './enums'; +import type { BlueprintPatch, BlueprintPatchOp } from './patch'; + +function round(n: number): number { + return Number(n.toFixed(3)); +} +function clamp01(n: number): number { + return Math.max(0, Math.min(1, n)); +} + +/** + * 应用一个 BlueprintPatch,返回新蓝图(不改原对象)。 + * 人工调参 / 多版本预设 / NL 编辑统一走这里(见 DESIGN §5.6)。 + * 支持的 path: + * - `rhythmStructure.cutDensity` (set) + * - `rhythmStructure.avgShotSec` (set | scale) + * - `rhythmStructure.peakAt` (set) + * - `packagingStructure.subtitleDensity` (set) + * - `segment..durationRatio` (scale,按 role 命中) + * 未识别的 path 忽略。最后对段落 durationRatio 归一化到和为 1。 + */ +export function applyBlueprintPatch( + bp: VideoStructureBlueprint, + patch: BlueprintPatch, +): VideoStructureBlueprint { + const next = structuredClone(bp); + for (const op of patch.ops) applyOp(next, op); + normalizeRatios(next); + return next; +} + +function applyOp(bp: VideoStructureBlueprint, op: BlueprintPatchOp): void { + const { path, op: kind, value } = op; + + if (path === 'rhythmStructure.cutDensity' && kind === 'set') { + bp.rhythmStructure.cutDensity = value as CutDensity; + return; + } + if (path === 'rhythmStructure.peakAt' && kind === 'set') { + bp.rhythmStructure.peakAt = clamp01(Number(value)); + return; + } + if (path === 'rhythmStructure.avgShotSec') { + if (kind === 'scale') bp.rhythmStructure.avgShotSec = round(bp.rhythmStructure.avgShotSec * Number(value)); + else if (kind === 'set') bp.rhythmStructure.avgShotSec = Number(value); + return; + } + if (path === 'packagingStructure.subtitleDensity' && kind === 'set' && bp.packagingStructure) { + bp.packagingStructure.subtitleDensity = value as SubtitleDensity; + return; + } + const seg = path.match(/^segment\.([a-z_]+)\.durationRatio$/); + if (seg && kind === 'scale') { + for (const s of bp.scriptStructure.segments) { + if (s.role === seg[1]) s.durationRatio = Math.max(0.01, s.durationRatio * Number(value)); + } + } + // 未识别 path:忽略(保持健壮) +} + +function normalizeRatios(bp: VideoStructureBlueprint): void { + const segs = bp.scriptStructure.segments; + const sum = segs.reduce((a, s) => a + s.durationRatio, 0); + if (sum <= 0 || segs.length === 0) return; + for (const s of segs) s.durationRatio = round(s.durationRatio / sum); + // 修正四舍五入漂移,让和恰好为 1 + const drift = round(1 - segs.reduce((a, s) => a + s.durationRatio, 0)); + segs[segs.length - 1].durationRatio = round(segs[segs.length - 1].durationRatio + drift); +} diff --git a/apps/api/src/core/enums.ts b/apps/api/src/core/enums.ts index 8116bf3..983567a 100644 --- a/apps/api/src/core/enums.ts +++ b/apps/api/src/core/enums.ts @@ -41,6 +41,7 @@ export const FillKind = z.enum([ 'reused_clip', 'text_card', 'packaging_overlay', + 'stock_clip', 'aigc_clip', 'aigc_image', ]); diff --git a/apps/api/src/core/index.ts b/apps/api/src/core/index.ts index a6a1b1e..bafd047 100644 --- a/apps/api/src/core/index.ts +++ b/apps/api/src/core/index.ts @@ -7,5 +7,7 @@ export * from './sample'; export * from './timeline'; export * from './migration'; export * from './patch'; +export * from './applyPatch'; +export * from './versions'; export * from './validate'; export * from './jsonSchema'; diff --git a/apps/api/src/core/versions.ts b/apps/api/src/core/versions.ts new file mode 100644 index 0000000..ea66cc2 --- /dev/null +++ b/apps/api/src/core/versions.ts @@ -0,0 +1,84 @@ +import { applyBlueprintPatch } from './applyPatch'; +import type { VideoStructureBlueprint } from './blueprint'; +import { type MigrationPlan, type RuleBasedMigrationInput, runRuleBasedMigration } from './migration'; +import type { BlueprintPatch } from './patch'; + +export interface VersionPreset { + id: string; + label: string; + describe: string; + patch: BlueprintPatch; +} + +/** 多版本预设:对蓝图做结构化变换(非换 prompt),差异明确、可解释。 */ +export const VERSION_PRESETS: VersionPreset[] = [ + { + id: 'fast', + label: '高节奏版', + describe: '镜头更密、平均镜头更短、开场更快', + patch: { + origin: 'preset', + note: '高节奏', + ops: [ + { path: 'rhythmStructure.cutDensity', op: 'set', value: 'high' }, + { path: 'rhythmStructure.avgShotSec', op: 'scale', value: 0.7 }, + { path: 'segment.hook.durationRatio', op: 'scale', value: 0.7 }, + ], + }, + }, + { + id: 'cinematic', + label: '高质感版', + describe: '慢节奏、镜头更长、字幕克制', + patch: { + origin: 'preset', + note: '高质感', + ops: [ + { path: 'rhythmStructure.cutDensity', op: 'set', value: 'low' }, + { path: 'rhythmStructure.avgShotSec', op: 'scale', value: 1.4 }, + { path: 'packagingStructure.subtitleDensity', op: 'set', value: 'sparse' }, + ], + }, + }, + { + id: 'strong_hook', + label: '强钩子版', + describe: '开场更长更抓人、高潮前移', + patch: { + origin: 'preset', + note: '强钩子', + ops: [ + { path: 'segment.hook.durationRatio', op: 'scale', value: 1.6 }, + { path: 'rhythmStructure.peakAt', op: 'set', value: 0.45 }, + ], + }, + }, +]; + +export interface VersionVariant { + id: string; + label: string; + describe: string; + patch: BlueprintPatch; + blueprint: VideoStructureBlueprint; + migration: MigrationPlan; +} + +export type RuleMigrateFn = (input: RuleBasedMigrationInput) => MigrationPlan; + +/** + * 多版本生成:对同一蓝图套用各预设变换 → 变体蓝图 → 迁移方案。 + * 默认用规则版迁移(同步、无需 LLM/key);可注入 migrate 走专家版。 + */ +export function generateVersions( + input: RuleBasedMigrationInput, + opts: { presets?: VersionPreset[]; migrate?: RuleMigrateFn } = {}, +): VersionVariant[] { + const presets = opts.presets ?? VERSION_PRESETS; + const migrate = opts.migrate ?? runRuleBasedMigration; + return presets.map((preset) => { + const blueprint = applyBlueprintPatch(input.blueprint, preset.patch); + const migration = migrate({ ...input, blueprint }); + return { id: preset.id, label: preset.label, describe: preset.describe, patch: preset.patch, blueprint, migration }; + }); +} diff --git a/apps/api/src/server/__tests__/server.test.ts b/apps/api/src/server/__tests__/server.test.ts index b956216..ce7c3e4 100644 --- a/apps/api/src/server/__tests__/server.test.ts +++ b/apps/api/src/server/__tests__/server.test.ts @@ -167,6 +167,7 @@ describe('api 骨架', () => { store, executors: {}, migrate: async (input) => runRuleBasedMigration(input), + stockFn: async () => null, }); const asset = await app.inject({ @@ -236,4 +237,58 @@ describe('api 骨架', () => { rmSync(dir, { recursive: true, force: true }); await app.close(); }); + + it('上传素材 multipart 自动打标(注入 tagFn)', async () => { + const dir = mkdtempSync(join(tmpdir(), 'vf-asset-')); + const assetPath = join(dir, 'clip.mp4'); + writeFileSync(assetPath, Buffer.from([0, 0, 0, 0])); + + const store = new Store(); + const app = await buildApp({ + store, + tagFn: async () => ({ assetTags: ['talking_head'], summary: '车手采访', confidence: 0.82 }), + }); + const project = await app.inject({ method: 'POST', url: '/api/projects', payload: { name: 'a' } }); + const pid = project.json().id as string; + + const form = new FormData(); + form.append('file', createReadStream(assetPath), { filename: 'clip.mp4', contentType: 'video/mp4' }); + const res = await app.inject({ + method: 'POST', + url: `/api/projects/${pid}/assets`, + payload: form, + headers: form.getHeaders(), + }); + expect(res.statusCode).toBe(201); + const asset = res.json().asset as { assetTags: string[]; summary: string; confidence: number }; + expect(asset.assetTags).toEqual(['talking_head']); + expect(asset.summary).toBe('车手采访'); + expect(asset.confidence).toBe(0.82); + + rmSync(dir, { recursive: true, force: true }); + await app.close(); + }); + + it('生成多版本:3 个差异明确的版本(规则版,无需 key)', async () => { + const store = new Store(); + const p = store.createProject('p'); + const sample = store.addSample(p.id, '/tmp/x.mp4', 'x.mp4'); + store.patchSample(sample.id, { analysis: mockAnalysis, blueprint: sampleBlueprint }); + const app = await buildApp({ store, executors: {} }); + + const res = await app.inject({ + method: 'POST', + url: `/api/projects/${p.id}/versions`, + payload: { sampleId: sample.id, topic: 'demo' }, + }); + expect(res.statusCode).toBe(201); + const versions = res.json().versions as { + id: string; + blueprint: { rhythmStructure: { cutDensity: string } }; + }[]; + expect(versions).toHaveLength(3); + expect(versions.find((v) => v.id === 'fast')?.blueprint.rhythmStructure.cutDensity).toBe('high'); + expect(versions.find((v) => v.id === 'cinematic')?.blueprint.rhythmStructure.cutDensity).toBe('low'); + await app.close(); + }); }); diff --git a/apps/api/src/server/app.ts b/apps/api/src/server/app.ts index 0edc0da..ec150df 100644 --- a/apps/api/src/server/app.ts +++ b/apps/api/src/server/app.ts @@ -1,16 +1,23 @@ import { randomUUID } from 'node:crypto'; import { createReadStream, existsSync } from 'node:fs'; -import { extname } from 'node:path'; +import { extname, join } from 'node:path'; import multipart from '@fastify/multipart'; import Fastify, { type FastifyInstance } from 'fastify'; import { type ExpertMigrationInput, runExpertMigration } from '../agents/expertMigration'; +import { enhanceFillsWithStock } from '../agents/fillWithStock'; +import { searchAndDownloadStock, type StockSearchFn } from '../agents/stockFootage'; +import { tagAsset } from '../agents/tagAsset'; +import { getArkConfig } from '../config'; import { AssetTag, MediaType, type AssetTag as AssetTagT, type MediaType as MediaTypeT } from '../core/enums'; import type { MigrationPlan } from '../core/migration'; +import { generateVersions } from '../core/versions'; +import { probeVideo } from '../media/probe'; import { defaultExecutors, type JobExecutor } from './jobs'; import { ALLOWED_ASSET_EXT, MAX_ASSET_BYTES, MAX_UPLOAD_BYTES, + UPLOADS_ROOT, assetUploadPath, sampleUploadPath, } from './paths'; @@ -19,14 +26,40 @@ import { UploadError, saveMultipartFile } from './upload'; export type MigrateFn = (input: ExpertMigrationInput) => Promise | MigrationPlan; +export interface AssetTagResult { + assetTags: AssetTagT[]; + summary: string; + confidence: number; +} +export type AssetTagFn = (input: { + path: string; + mediaType: MediaTypeT; + durationSec?: number; +}) => Promise; + export interface AppDeps { store?: Store; /** 可注入 executor,便于测试时替换掉真实渲染 / FFmpeg / LLM。 */ executors?: Partial>; /** 可注入迁移实现;默认走剪辑专家(LLM)。测试可注入规则版以离线运行。 */ migrate?: MigrateFn; + /** 可注入素材打标;默认 VLM(无 ARK key 或失败时返回 null → 回退默认标签)。 */ + tagFn?: AssetTagFn; + /** 可注入 stock 检索;默认 Pexels(无 PEXELS_API_KEY 或失败时返回 null → 保留 text_card)。 */ + stockFn?: StockSearchFn; } +/** 默认素材打标:有 ARK key 时走 VLM,否则 / 失败时返回 null(调用方回退默认标签)。 */ +const defaultTagFn: AssetTagFn = async ({ path, mediaType, durationSec }) => { + if (!getArkConfig()) return null; + try { + const t = await tagAsset({ sourcePath: path, mediaType, durationSec }); + return { assetTags: t.assetTags, summary: t.summary, confidence: t.confidence }; + } catch { + return null; + } +}; + function runJob(store: Store, executors: Partial>, job: Job) { const exec = executors[job.kind]; if (exec) void exec(store, job); @@ -87,6 +120,8 @@ export async function buildApp(deps: AppDeps = {}): Promise { const store = deps.store ?? new Store(); const executors = { ...defaultExecutors, ...deps.executors }; const migrate: MigrateFn = deps.migrate ?? ((input) => runExpertMigration(input)); + const tagFn: AssetTagFn = deps.tagFn ?? defaultTagFn; + const stockFn: StockSearchFn = deps.stockFn ?? searchAndDownloadStock; const app = Fastify({ logger: false }); await app.register(multipart, { limits: { fileSize: MAX_UPLOAD_BYTES } }); @@ -195,13 +230,37 @@ export async function buildApp(deps: AppDeps = {}): Promise { } throw e; } + const explicitTags = parseAssetTags(fieldValue(file.fields, 'assetTags'), []); + const explicitSummary = fieldValue(file.fields, 'summary'); + let durationSec = parsePositiveNumber(fieldValue(file.fields, 'durationSec')); + if (durationSec == null && mediaType === 'video') { + try { + durationSec = (await probeVideo(destPath)).durationSec; + } catch { + /* 探测失败则用默认 */ + } + } + + let assetTags = explicitTags.length ? explicitTags : defaultTagsFor(mediaType); + let summary = explicitSummary ?? file.filename ?? '用户素材'; + let confidence = 1; + // 无显式标签时自动 VLM 打标(video / image) + if (!explicitTags.length && mediaType !== 'text') { + const auto = await tagFn({ path: destPath, mediaType, durationSec }); + if (auto) { + assetTags = auto.assetTags; + summary = explicitSummary ?? auto.summary; + confidence = auto.confidence; + } + } + const saved = store.addAsset(projectId, { id: assetId, mediaType, - assetTags: parseAssetTags(fieldValue(file.fields, 'assetTags'), defaultTagsFor(mediaType)), - durationSec: parsePositiveNumber(fieldValue(file.fields, 'durationSec')) ?? 5, - confidence: 1, - summary: fieldValue(file.fields, 'summary') ?? file.filename ?? '用户素材', + assetTags, + durationSec: durationSec ?? 5, + confidence, + summary, filename: file.filename, sourcePath: destPath, }); @@ -316,7 +375,7 @@ export async function buildApp(deps: AppDeps = {}): Promise { reply.code(400); return { error: '需要先完成结构蓝图抽取' }; } - const migration = await migrate({ + const baseMigration = await migrate({ projectId: id, sampleId: sample.id, blueprint: sample.blueprint, @@ -325,6 +384,12 @@ export async function buildApp(deps: AppDeps = {}): Promise { sellingPoints: splitSellingPoints(body.sellingPoints), durationSec: parsePositiveNumber((body as { durationSec?: number }).durationSec) ?? 30, }); + // 缺口若是 text_card,尝试用 stock 真实素材替换;无 key/检索失败时保留 text_card + const migration = await enhanceFillsWithStock(baseMigration, { + search: stockFn, + outDir: join(UPLOADS_ROOT, id, 'stock'), + blueprint: sample.blueprint, + }); const record = store.saveMigration(migration); reply.code(201); return { migration: record }; @@ -344,6 +409,37 @@ export async function buildApp(deps: AppDeps = {}): Promise { return { migration }; }); + // 多版本:对蓝图套用预设变换 → 多个差异明确的迁移方案(规则版,无需 LLM/key)。 + app.post('/api/projects/:id/versions', async (req, reply) => { + const { id } = req.params as { id: string }; + if (!store.getProject(id)) { + reply.code(404); + return { error: 'project not found' }; + } + const body = (req.body ?? {}) as { + sampleId?: string; + topic?: string; + sellingPoints?: string[] | string; + durationSec?: number; + }; + const sample = store.getLatestSampleForStructure(id, body.sampleId); + if (!sample?.blueprint) { + reply.code(400); + return { error: '需要先完成结构蓝图抽取' }; + } + const versions = generateVersions({ + projectId: id, + sampleId: sample.id, + blueprint: sample.blueprint, + assets: store.listAssets(id), + topic: body.topic ?? '新主题', + sellingPoints: splitSellingPoints(body.sellingPoints), + durationSec: parsePositiveNumber(body.durationSec) ?? 30, + }); + reply.code(201); + return { versions }; + }); + app.post('/api/projects/:id/render', async (req, reply) => { const { id } = req.params as { id: string }; if (!store.getProject(id)) { diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..53a9e0a --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,168 @@ +# VisionForge — AI 架构、工具协议与安全边界(交付文档) + +> 面向答辩与评审:说明整体 AI 架构、各「工具/Agent」的协议、安全边界,以及 AI 工具使用与自主实现边界。 +> 与代码对应(`apps/api`、`apps/web`),与 [DESIGN.md](DESIGN.md) 的设计、[knowledge/](knowledge/) 的决策/案例互为补充。最后更新:2026-05-25。 + +--- + +## 1. 一句话与定位 + +VisionForge 是**爆款结构迁移引擎**:从优质样例短视频中抽取**可解释的结构蓝图**,再迁移到新主题与用户素材,产出脚本 / 分镜 / 时间线 / 包装建议与**成片 demo**。 + +- **不是**端到端「一键生成」黑盒;核心是**可解释的结构抽象层**(`VideoStructureBlueprint`)。 +- **体裁无关**:先判定 `videoGenre`(叙事 / 教程 / vlog / 解说 / 展示 / 带货 / 其他),带货只是其中一种;服务对象是**观众 / 观看者**。 + +--- + +## 2. 整体 AI 架构 + +```mermaid +flowchart TB + subgraph Web["Web UI (React/Vite)"] + U[上传/录入] --> Vz[结构与迁移可视化] --> Out[输出/成片预览] + end + subgraph API["API / Orchestrator (Fastify + 内存 Store + Job)"] + J[Job 队列与状态 / SSE] + end + subgraph Analysis["分析层(机器证据)"] + FF[FFmpeg/ffprobe: probe/切镜/关键帧/封面] + end + subgraph AI["AI 层(火山方舟 Doubao)"] + VLM[describe_frames / tag_asset 多模态] + SA[StructureAgent 结构抽取] + MA[剪辑专家迁移 + 编辑知识库] + end + subgraph Render["渲染层 (FFmpeg-only)"] + R[timeline → MP4 + 字幕 + 文字卡] + end + + Web --> API + API --> Analysis + API --> AI + API --> Render + Analysis --> AI + AI --> Render + Render --> API --> Web +``` + +**四层职责边界** + +| 层 | 职责 | 关键文件 | +| --- | --- | --- | +| Web UI | 上传 / 可视化 / 调整 / 对比;不直接理解视频 | `apps/web/src/pages/*`、`components/BlueprintPanel.tsx` | +| API / Orchestrator | 项目、素材、Job 编排(analyze/structure/render)、SSE 进度 | `apps/api/src/server/{app,jobs,store}.ts` | +| 分析层 | 机器可验证信号(时长/切镜/关键帧/封面) | `apps/api/src/media/*` | +| AI 层 | 画面理解、结构抽取、迁移决策(LLM/VLM) | `apps/api/src/agents/*`、`llm/ark.ts` | +| 渲染层 | 时间线 → MP4(FFmpeg-only) | `apps/api/src/render/*` | + +**单一契约**:所有层共享 `apps/api/src/core`(Zod 定义,`z.infer` 出 TS 类型,`zod-to-json-schema` 派生 `schemas/*.json` 供 LLM 结构化输出)。 + +--- + +## 3. 核心流水线(Agent / 工具协议) + +每个「工具/Agent」输入输出都是**结构化 JSON**,并遵循两条原则: +- **evidence vs rationale 分离**:`evidence[]` 是机器可验证事实(时间码、切镜计数、标签置信度…),`rationale` 是模型自述(UI 标「模型推断」,不作为唯一依据)。 +- **schema 校验 + 失败重试**:LLM 输出经 `chatJson`(去代码块 → `JSON.parse` → Zod 校验 → 带错误重试)保证合法。 + +| 步骤 | 工具 / Agent | 输入 | 输出 | 实现 | 用 AI? | +| --- | --- | --- | --- | --- | --- | +| 1 | `probe_video` / `detect_scenes` / 关键帧 / 封面(ParseAgent) | 样例视频 | `SampleAnalysis`(元数据+切点+关键帧+封面+evidence) | `media/analyze.ts` | 否(FFmpeg) | +| 2 | `describe_frames` | 关键帧(≤12,512px) | 逐帧描述 + 内容概述 + 体裁猜测 | `agents/describeFrames.ts` | 是(VLM) | +| 3 | `extract_structure`(StructureAgent) | 分析 + 转写? + 画面描述 | `VideoStructureBlueprint`(体裁/脚本/节奏/包装/槽位) | `agents/structureAgent.ts` | 是(LLM) | +| 4 | `tag_asset`(AssetTagAgent) | 用户素材 | `TaggedAsset`(与槽位共用 enum) | `agents/tagAsset.ts` | 是(VLM) | +| 5 | `match` / `gap`(规则版) | 蓝图槽位 + 素材 | 匹配报告 + 缺口 + 影响说明 | `core/migration.ts` | 否(规则) | +| 6 | 剪辑专家迁移(MigrateAgent) | 蓝图 + 主题 + 素材 + 编辑KB | `MigrationPlan`(脚本/分镜/时间线/补全/decisions) | `agents/expertMigration.ts` | 是(LLM,规则打底) | +| 7 | `render` | 时间线 + 素材/补全 | MP4(拼接 + 字幕 + 文字卡) | `render/renderTimeline.ts` | 否(FFmpeg) | + +> **声明式取向**:每步可视化所需的 `evidence/rationale/decisions` 都随 Job 持久化,是「迁移过程可视化」(评分项)的数据源。 + +### 3.1 结构蓝图(核心抽象) + +`VideoStructureBlueprint`(`core/blueprint.ts`)= 体裁 + 三层结构 + 槽位 + 证据/自述: +- `videoGenre`:先判体裁再抽结构,**不默认带货**。 +- `scriptStructure.segments[]`:通用叙事节拍 `hook/setup/develop/climax/closing` + `label`(体裁专属名)+ `durationRatio`(和≈1)+ `intent` + `copyPattern`(模式非原文)。 +- `rhythmStructure`:`avgShotSec`/`cutDensity`/`peakAt`/`bgmBeatHints`。 +- `packagingStructure`:字幕密度/标题条/贴纸/转场/封面(可选)。 +- `slots[]`(`StructureSlot`):关键段落的素材需求,是**缺口识别**的基础。 + +### 3.2 迁移(规则打底 + LLM 增强) + +- **规则层**保证结构合法:按 `durationRatio` 生成段落时间线;用标签重合度匹配素材;缺口生成 `FillArtifact`(文字卡兜底)。 +- **专家层**(剪辑导演 LLM + `agents/editing-kb/` 体裁 playbook)产出贴合主题的脚本文案、镜头方向与**观众视角的迁移思路**(rationale)。 +- 二者合并成 `MigrationPlan`,可被渲染。 + +--- + +## 4. 安全边界 + +| 类别 | 措施 | 实现 | +| --- | --- | --- | +| 密钥 | `ARK_API_KEY` 仅服务端 `.env`(已 gitignore),禁止进前端 / git / 聊天;诊断只打印**指纹**(长度/首尾),绝不输出明文 | `config.ts`、`scripts/check-ark.ts` | +| 上传 | 类型白名单 + 大小上限;样例/素材分别限制 | `server/paths.ts`、`server/upload.ts` | +| 路径隔离 | 用户媒体落在 `uploads/{projectId}/`(gitignore),文件名加 UUID 前缀防覆盖 | `server/paths.ts` | +| 模型输出 | 一律经 Zod schema 校验(`chatJson`);**不执行**模型返回的任何代码 | `llm/ark.ts`、`core/validate.ts` | +| 降级 | 无 key / LLM 失败时优雅回退(素材回退默认标签、渲染回退占位、字幕 filter 缺失降级无字幕),不崩溃 | `app.ts`、`render/renderTimeline.ts` | +| 外部资源 | 仅本机 FFmpeg 子进程 + 火山方舟 HTTPS;不拉取任意外部 URL | — | + +--- + +## 5. AI 工具使用说明(课题要求) + +| AI 工具 | 用于哪些环节 | 性质 | +| --- | --- | --- | +| 火山方舟 Doubao(LLM + 多模态 VLM,OpenAI 兼容) | 运行时:`describe_frames`、`extract_structure`、`tag_asset`、剪辑专家迁移 | 运行时依赖(可配置 endpoint) | +| Claude Code / Cursor 等 | 开发期:辅助编码、文档、调试 | 开发辅助 | +| ASR(火山语音,待接) | 计划:样例转写喂给 StructureAgent | 未集成(见 §7) | + +**自主设计与实现**(核心,非现成产品直接生成): +- 「结构」三层模型与**体裁无关**的 `VideoStructureBlueprint`、槽位/缺口/补全模型。 +- evidence vs rationale 分离、`chatJson` 的「schema 校验 + 重试」结构化输出机制。 +- 规则打底 + LLM 增强的**剪辑专家迁移**与自建**编辑知识库**(per-genre playbook)。 +- FFmpeg-only 渲染管线(拼接 / 中文 ASS 字幕烧录 / 文字卡 / 占位降级)。 +- 全部 schema、API、Job 编排、Web 可视化。 + +**借鉴自 [OpenMontage](reference.md)(只借鉴范式,非代码/依赖)**:声明式 pipeline + skill 文档的取向、决策审计轨迹(decision audit trail)、slideshow 风险概念。 + +--- + +## 6. 关键工程决策(详见 [knowledge/03-决策记录](knowledge/03-决策记录.md)) + +- 共享 schema 用 **Zod 单一来源**(types + JSON Schema 派生),避免双份漂移。 +- 仓库 `apps/web` + `apps/api`,共享代码在 `apps/api/src/core`,暂不拆顶层 `packages/`。 +- **P0 渲染 FFmpeg-only**;Remotion/HyperFrames 为后续 stretch。 +- **结构模型通用化(genre-aware)**:去除带货硬编码假设。 +- **迁移 = 规则打底 + LLM 剪辑专家**;编辑知识库注入 prompt。 +- **缺口补全**多策略适配器:默认 `text_card`,可选 `stock_clip`(Pexels 免费视频检索,借鉴 OpenMontage 「免费档案补全」范式、非依赖;`PEXELS_API_KEY` 缺失时优雅回退)。 +- **多版本**通过 `BlueprintPatch` 结构化变换 + 规则迁移产出(`POST /versions`,3 个预设:高节奏 / 高质感 / 强钩子),差异显式可解释、无需 LLM/key。 + +--- + +## 7. 已知限制与后续 + +- **ASR 未集成**(Q-001):脚本结构暂无真实转写,靠画面描述 + 元数据。 +- `/migrate` 目前**同步**(~25s LLM),后续改异步 Job。 +- 槽位 `minDurationSec` 取自样例绝对值,迁移到更短目标时已用**时长软降权**缓解,更优解是按目标时长缩放。 +- 文字卡与底部字幕可能叠加;横屏样例会被竖屏裁切(可选横屏输出)。 +- 端到端真实成片已在 CLI 验证([案例](knowledge/Cases/sample1-first-case.md));产品内全链路验证待有效 ARK key 后复跑。 + +--- + +## 8. 运行与验证 + +```bash +# API +cd apps/api && npm install +npm test # 单元/契约测试(离线,LLM 走 stub) +npm run check:ark # 火山方舟连通性(需 .env 配 key) +npm start # 启动 API(默认 :3001) + +# Web +cd apps/web && npm install && npm run dev # :5173,代理 /api + +# 离线演示脚本(CLI) +cd apps/api +npm run analyze:demo # 合成片信号层 +npx tsx src/agents/scripts/case-demo.ts