SOTA 视频智能体 V3.0 架构设计
工业级多路径视频生成系统 | 支持宣传片/AI动漫/AI电影
1. 核心理念:多路径生成 + 分级一致性控制
1.1 痛点分析
| 场景 |
痛点 |
根因 |
| 宣传视频 |
品牌元素不一致 |
缺乏 Anchor 锁定 |
| AI 动漫 |
角色脸崩/身份漂移 |
无 LoRA + IP-Adapter |
| AI 电影 |
场景割裂/物理违和 |
无 Scene Graph |
| 长视频 |
生成时间过长 |
串行流水线 |
| 成本 |
每个镜头都调用高价 API |
无 LOD 分级 |
1.2 解决方案总览
┌─────────────────────────────────────────────────────────────────────────┐
│ SOTA Video Agent V3.0 Pipeline │
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ [Input] ─► [Planner/Opus] ─► [Consistency Engine] ─► [Generation Hub] │
│ │ │ │ │
│ Director's Brief Style Bible Multi-Path │
│ + Shot List Character Bible Router │
│ World Bible │ │
│ │ ▼ │
│ └──────────► [LOD Renderer] │
│ │ │
│ Draft│Preview│Final │
│ │ │
│ ▼ │
│ [VLM QC + Self-Heal] │
│ │ │
│ ▼ │
│ [Remotion Assembler] │
│ + Audio-First Sync │
│ + LUT Color Grading │
│ │ │
│ ▼ │
│ [Export] │
│ │
└─────────────────────────────────────────────────────────────────────────┘
2. 多路径生成架构 (Generation Hub)
2.1 六大生成路径
| 路径 |
输入 |
输出 |
最佳场景 |
推荐模型 |
| T2I |
文本提示词 |
静态图 |
Anchor 生成 |
Flux Pro, SDXL, Midjourney |
| I2I |
参考图+提示词 |
变体图 |
风格迁移/场景变体 |
Flux Redux, SD ControlNet |
| I2V |
首帧图 |
5-10s视频 |
精确控制开场 |
Kling Pro, Runway Gen-3 |
| T2V |
纯文本 |
5-20s视频 |
快速原型 |
Sora, Veo 3, Pika |
| F2F-V |
首帧+尾帧 |
插值视频 |
转场/变形 |
Kling 2.5+, Pika Frames |
| V2V |
源视频+提示词 |
风格化视频 |
统一画风 |
Runway Gen-3, LTX Video |
2.2 路径选择策略
interface ShotSpec {
type: 'character' | 'world' | 'vfx' | 'transition';
hasAnchor: boolean;
hasEndFrame: boolean;
complexity: 'S' | 'A' | 'B';
}
function selectGenerationPath(shot: ShotSpec): GenerationPath {
// 1. 转场镜头 → 首尾帧插值
if (shot.type === 'transition' && shot.hasEndFrame) {
return 'F2F-V'; // Kling/Pika Frames
}
// 2. 有 Anchor 图的角色/场景镜头 → I2V (最稳定)
if (shot.hasAnchor && ['character', 'world'].includes(shot.type)) {
return 'I2V'; // Kling Pro / Runway
}
// 3. VFX/特效镜头 → T2V (更自由)
if (shot.type === 'vfx') {
return 'T2V'; // Sora / Veo
}
// 4. 无 Anchor → 先 T2I 生成 Anchor,再 I2V
return 'T2I → I2V';
}
2.3 模型选型矩阵 (2025 Q1)
S-Tier: 主力模型
| 模型 |
优势 |
劣势 |
最佳用途 |
成本 |
| Google Veo 3 |
4K画质/物理真实/原生音频 |
API限制/排队 |
电影级最终输出 |
$$$$ |
| OpenAI Sora |
叙事理解/Storyboard/20s长度 |
1080p/无原生音频 |
故事驱动创作 |
$$$ |
| Kuaishou Kling Pro |
I2V最强/角色一致性/2min长度 |
T2V弱于Sora |
动漫/角色视频 |
$$ |
A-Tier: 效率模型
| 模型 |
优势 |
劣势 |
最佳用途 |
成本 |
| Runway Gen-3 |
Director Mode/Motion Brush |
默认720p |
精细控制 |
$$ |
| Pika 2.2 |
Pikaframes多关键帧/1080p |
10s限制 |
转场/社交短片 |
$ |
| Luma Dream Machine |
自然语言编辑 |
质量不稳定 |
快速迭代 |
$ |
B-Tier: 开源/本地
| 模型 |
优势 |
劣势 |
最佳用途 |
成本 |
| LTX Video 13B |
开源/商用许可/快速 |
768x512限制 |
草稿/批量 |
本地GPU |
| Stable Video Diffusion |
开源/可微调 |
质量一般 |
实验/训练 |
本地GPU |
| Wan2.1 |
阿里开源/中文理解 |
新模型待验证 |
国内部署 |
本地GPU |
3. 一致性控制系统 (Consistency Engine)
3.1 三圣经体系
// 风格圣经 (Style Bible) - 统一视觉语言
interface StyleBible {
// 视觉基调
look: 'cinematic' | 'anime' | 'documentary' | 'commercial';
colorPalette: {
primary: string; // 主色调 HEX
secondary: string; // 辅助色
accent: string; // 强调色
};
lighting: 'natural' | 'dramatic' | 'soft' | 'high-key' | 'low-key';
// 后期统一
lutFile?: string; // .cube LUT 文件
filmGrain: 'none' | 'subtle' | 'heavy';
aspectRatio: '16:9' | '9:16' | '2.39:1' | '4:3';
// 参考图库
moodboardUrls: string[];
}
// 角色圣经 (Character Bible) - 身份锁定
interface CharacterBible {
characters: Array<{
id: string;
name: string;
description: string;
// Anchor 系统 (最关键)
anchorImages: {
front: string; // 正面 Anchor
side: string; // 侧面
threeQuarter: string; // 3/4侧
};
// LoRA 配置 (可选,动漫必需)
loraModel?: string; // 预训练 LoRA 路径
loraWeight: number; // 0.5-0.7 推荐
// IP-Adapter 配置
ipAdapterStrength: number; // 0.4-0.7
// 锁定属性
lockAttributes: ('face' | 'hair' | 'clothing' | 'body')[];
}>;
}
// 世界圣经 (World Bible) - 场景连续性
interface WorldBible {
locations: Array<{
id: string;
name: string;
description: string;
// 场景 Anchor
anchorImages: {
wide: string; // 全景
medium: string; // 中景
detail: string; // 细节
};
// 物理属性
timeOfDay: 'dawn' | 'day' | 'golden-hour' | 'dusk' | 'night';
weather: 'clear' | 'cloudy' | 'rain' | 'snow' | 'fog';
// 场景元素
props: string[]; // 道具列表
lighting: string; // 光源描述
}>;
// 场景图谱 (Scene Graph)
sceneGraph: Record<string, string>; // tag → S3 URL
}
3.2 Anchor 生成策略
async def generate_anchors(brief: DirectorsBrief) -> Anchors:
"""
Phase 0: 在任何视频生成之前,先锁定所有 Anchor
使用最高质量的 T2I 模型生成,确保后续一致性
"""
anchors = {}
# 1. 角色 Anchor (Flux Pro + 人脸优化)
for char in brief.characters:
prompt = f"""
Character portrait of {char.description}
Front view, neutral expression, studio lighting
High detail, photorealistic, 8K
"""
# 使用 Flux Pro 生成高质量首图
anchor = await flux_pro.generate(prompt, style=brief.style_bible.look)
# 可选:使用 FaceID 增强面部一致性
if char.type == 'human':
anchor = await face_enhance(anchor)
anchors[f"char_{char.id}"] = anchor
# 2. 场景 Anchor
for loc in brief.locations:
prompt = f"""
Wide establishing shot of {loc.description}
{loc.time_of_day}, {loc.weather}
Cinematic composition, detailed environment
"""
anchor = await flux_pro.generate(prompt)
anchors[f"loc_{loc.id}"] = anchor
# 3. 上传到 S3 并注册到 Scene Graph
for key, img in anchors.items():
url = await s3.upload(img, f"anchors/{brief.job_id}/{key}.png")
brief.world_bible.scene_graph[key] = url
return anchors
3.3 LoRA + IP-Adapter 组合策略
# 适用于 ComfyUI / Forge 工作流
def build_consistency_pipeline(shot: Shot, bible: CharacterBible):
"""
组合 LoRA + IP-Adapter + ControlNet 实现最强一致性
"""
pipeline = []
# 1. 基础:加载 Anchor 到 IP-Adapter
char = bible.get_character(shot.character_id)
pipeline.append({
"node": "IPAdapter",
"image": char.anchor_images.front,
"weight": char.ip_adapter_strength, # 0.5-0.7
"mode": "face_plus" # 面部增强模式
})
# 2. 可选:加载角色 LoRA (动漫推荐)
if char.lora_model:
pipeline.append({
"node": "LoRALoader",
"model": char.lora_model,
"weight": char.lora_weight # 0.5-0.7
})
# 3. 姿态控制:OpenPose ControlNet
if shot.has_pose_reference:
pipeline.append({
"node": "ControlNet",
"type": "openpose",
"image": shot.pose_reference,
"weight": 0.8
})
# 4. 场景控制:Depth ControlNet
if shot.location_anchor:
pipeline.append({
"node": "ControlNet",
"type": "depth",
"image": shot.location_anchor,
"weight": 0.6
})
return pipeline
4. LOD 分级渲染系统
4.1 三级渲染策略
type LODLevel = 'draft' | 'preview' | 'final';
interface LODConfig {
draft: {
// 仅静态图 + Ken Burns 效果
generator: 'flux-schnell' | 'sdxl-turbo';
cost: '$0.002/shot';
time: '2s';
resolution: '768x512';
useCase: '脚本确认/结构预览';
};
preview: {
// 快速视频模型
generator: 'kling-turbo' | 'luma-photon' | 'ltx-video';
cost: '$0.10/shot';
time: '15s';
resolution: '720p';
useCase: '动作逻辑确认/客户中期审核';
};
final: {
// 高质量视频模型
generator: 'kling-pro' | 'veo-3' | 'sora';
cost: '$1.50+/shot';
time: '3min';
resolution: '1080p-4K';
useCase: '最终交付';
};
}
4.2 智能升级策略
class LODRenderer:
"""
成本优化核心:只在必要时升级 LOD
"""
async def render_project(self, project: Project, target_lod: LODLevel):
results = []
for shot in project.shots:
# 检查缓存:是否已有更高级别的渲染
cached = await self.cache.get(shot.id, target_lod)
if cached:
results.append(cached)
continue
# 核心策略:无论什么 LOD,先确保 Anchor 存在
if not shot.anchor_url:
shot.anchor_url = await self.generate_anchor(shot)
# 根据 LOD 级别选择生成器
if target_lod == 'draft':
# 仅使用 Anchor 图,Remotion 添加动画
result = {
"type": "image",
"url": shot.anchor_url,
"effect": self.select_ken_burns(shot),
"duration": shot.duration
}
elif target_lod == 'preview':
result = await self.kling_turbo.i2v(
image=shot.anchor_url,
prompt=shot.prompt,
duration=5
)
elif target_lod == 'final':
result = await self.kling_pro.i2v(
image=shot.anchor_url,
prompt=shot.enhanced_prompt,
duration=10,
resolution='1080p'
)
# 缓存结果
await self.cache.set(shot.id, target_lod, result)
results.append(result)
return results
def select_ken_burns(self, shot: Shot) -> str:
"""根据镜头类型选择推拉效果"""
effects = {
'establishing': 'slow_zoom_out',
'close-up': 'slow_zoom_in',
'medium': 'subtle_pan',
'wide': 'static_with_parallax'
}
return effects.get(shot.shot_type, 'static')
5. VLM 质检与自愈系统
5.1 多维度质检
class VLMQualityControl:
"""
使用 Gemini 2.0 / GPT-4V 进行视觉质检
"""
QUALITY_CHECKS = [
{
"id": "identity_drift",
"prompt": "Compare the face in this video frame with the reference. Is it the same person? Look for: eye shape, nose, jawline, hair.",
"threshold": 0.8
},
{
"id": "physics_violation",
"prompt": "Does this video clip contain any physics violations? Look for: objects floating unnaturally, limbs bending wrong, impossible movements.",
"threshold": 0.9
},
{
"id": "visual_artifacts",
"prompt": "Does this video have visual artifacts? Look for: blurry regions, morphing glitches, temporal flickering, hand/finger deformities.",
"threshold": 0.85
},
{
"id": "brief_alignment",
"prompt": "Does this video match the following brief? Brief: {brief}",
"threshold": 0.75
},
{
"id": "text_legibility",
"prompt": "If there is text in this video, is it readable and spelled correctly?",
"threshold": 0.95
}
]
async def inspect(self, video_url: str, shot: Shot, brief: DirectorsBrief) -> QCReport:
issues = []
# 抽取关键帧
keyframes = await self.extract_keyframes(video_url, count=5)
for check in self.QUALITY_CHECKS:
for i, frame in enumerate(keyframes):
result = await self.vlm.analyze(
image=frame,
reference=shot.anchor_url,
prompt=check["prompt"].format(brief=shot.visual_description)
)
if result.score < check["threshold"]:
issues.append({
"frameId": f"frame_{i}",
"type": check["id"],
"severity": self.calculate_severity(result.score, check["threshold"]),
"description": result.reason,
"suggestion": result.fix_suggestion
})
return QCReport(
overall_score=self.calculate_overall_score(issues),
issues=issues,
passed=len([i for i in issues if i["severity"] == "high"]) == 0
)
5.2 自愈重试机制
class SelfHealingGenerator:
MAX_RETRIES = 3
async def generate_with_healing(
self,
shot: Shot,
anchor: str,
attempt: int = 0
) -> GenerationResult:
if attempt >= self.MAX_RETRIES:
# 降级策略:使用 Draft LOD 保底
return await self.draft_fallback(shot, anchor)
# 1. 生成视频
video_url = await self.generator.i2v(
image=anchor,
prompt=shot.prompt,
negative_prompt=shot.negative_prompt
)
# 2. VLM 质检
qc_report = await self.vlm_qc.inspect(video_url, shot)
if qc_report.passed:
return GenerationResult(
url=video_url,
qc_score=qc_report.overall_score,
attempts=attempt + 1
)
# 3. 自愈:让 LLM 分析问题并修正 Prompt
healing_prompt = await self.opus.refine_prompt(
original_prompt=shot.prompt,
issues=qc_report.issues,
anchor_description=shot.anchor_description
)
# 4. 更新 Shot 并重试
shot.prompt = healing_prompt
shot.negative_prompt = self.build_negative_from_issues(qc_report.issues)
return await self.generate_with_healing(shot, anchor, attempt + 1)
def build_negative_from_issues(self, issues: list) -> str:
"""从质检问题构建 Negative Prompt"""
negatives = []
for issue in issues:
if issue["type"] == "identity_drift":
negatives.append("different face, wrong person, inconsistent features")
elif issue["type"] == "physics_violation":
negatives.append("floating objects, broken physics, impossible movement")
elif issue["type"] == "visual_artifacts":
negatives.append("blur, glitch, morph, deformed hands, extra fingers")
return ", ".join(negatives)
6. 后期合成流水线
6.1 Audio-First 节拍同步
import librosa
class AudioFirstComposer:
"""
音频驱动视频节奏,确保剪辑点落在节拍上
"""
def analyze_bgm(self, audio_path: str) -> BeatMap:
y, sr = librosa.load(audio_path)
# 检测节拍
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
beat_times = librosa.frames_to_time(beats, sr=sr)
# 检测能量峰值(用于强调点)
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
peaks = librosa.util.peak_pick(onset_env, pre_max=3, post_max=3, pre_avg=3, post_avg=5, delta=0.5, wait=10)
peak_times = librosa.frames_to_time(peaks, sr=sr)
return BeatMap(
tempo=tempo,
beats=beat_times.tolist(),
peaks=peak_times.tolist(),
duration=librosa.get_duration(y=y, sr=sr)
)
def align_shots_to_beats(self, shots: list, beat_map: BeatMap) -> list:
"""
调整每个镜头的时长,使切换点落在节拍上
"""
aligned_shots = []
current_time = 0
beat_idx = 0
for shot in shots:
# 找到最近的下一个节拍
target_duration = shot.duration
while beat_idx < len(beat_map.beats) and beat_map.beats[beat_idx] < current_time:
beat_idx += 1
# 计算到下一个节拍的时间
if beat_idx < len(beat_map.beats):
time_to_beat = beat_map.beats[beat_idx] - current_time
# 如果镜头时长接近节拍间隔,对齐到节拍
if abs(target_duration - time_to_beat) < 0.5:
target_duration = time_to_beat
aligned_shots.append({
**shot,
"duration": target_duration,
"start_time": current_time,
"cut_on_beat": True
})
current_time += target_duration
beat_idx += 1
return aligned_shots
6.2 Remotion 合成配置
// remotion/VideoComposition.tsx
import { AbsoluteFill, Sequence, Audio, Video, Img } from 'remotion';
import { interpolate, useCurrentFrame, useVideoConfig } from 'remotion';
interface ProjectManifest {
shots: Array<{
type: 'video' | 'image';
url: string;
effect?: 'zoom_in' | 'zoom_out' | 'pan_left' | 'pan_right';
startFrame: number;
durationFrames: number;
transition?: 'cut' | 'dissolve' | 'fade';
}>;
audio: {
bgmUrl: string;
voiceoverUrl?: string;
bgmVolume: number;
voiceoverVolume: number;
};
postProcess: {
lutFile?: string;
filmGrain?: number;
vignette?: number;
};
}
export const VideoComposition: React.FC<{ manifest: ProjectManifest }> = ({ manifest }) => {
const { fps } = useVideoConfig();
return (
<AbsoluteFill>
{/* 视频/图片序列 */}
{manifest.shots.map((shot, i) => (
<Sequence key={i} from={shot.startFrame} durationInFrames={shot.durationFrames}>
{shot.type === 'video' ? (
<Video src={shot.url} />
) : (
<KenBurnsImage src={shot.url} effect={shot.effect || 'zoom_in'} />
)}
{/* 转场效果 */}
{shot.transition === 'dissolve' && (
<DissolveTransition durationFrames={fps * 0.5} />
)}
</Sequence>
))}
{/* 音频层 */}
<Audio src={manifest.audio.bgmUrl} volume={manifest.audio.bgmVolume} />
{manifest.audio.voiceoverUrl && (
<Audio src={manifest.audio.voiceoverUrl} volume={manifest.audio.voiceoverVolume} />
)}
{/* 后期效果层 */}
{manifest.postProcess.lutFile && (
<LUTOverlay lutFile={manifest.postProcess.lutFile} />
)}
{manifest.postProcess.filmGrain && (
<FilmGrainOverlay intensity={manifest.postProcess.filmGrain} />
)}
</AbsoluteFill>
);
};
// Ken Burns 效果组件
const KenBurnsImage: React.FC<{ src: string; effect: string }> = ({ src, effect }) => {
const frame = useCurrentFrame();
const { durationInFrames } = useVideoConfig();
const scale = interpolate(
frame,
[0, durationInFrames],
effect === 'zoom_in' ? [1, 1.2] : [1.2, 1],
{ extrapolateRight: 'clamp' }
);
return (
<AbsoluteFill style={{ transform: `scale(${scale})` }}>
<Img src={src} style={{ width: '100%', height: '100%', objectFit: 'cover' }} />
</AbsoluteFill>
);
};
6.3 FFmpeg LUT 调色
# 统一不同模型的色彩风格
ffmpeg -i input.mp4 \
-vf "lut3d=cinematic_warm.cube, \
noise=alls=10:allf=t+u, \
vignette=angle=PI/4" \
-c:a copy \
output.mp4
# 常用 LUT 配置
# - cinematic_warm.cube : 电影暖调
# - anime_vibrant.cube : 动漫鲜艳
# - documentary_neutral.cube : 纪录片中性
# - commercial_bright.cube : 广告明亮
7. 并发调度架构
7.1 Map-Reduce 并行生成
import asyncio
from typing import List
class ParallelOrchestrator:
"""
核心优化:将 60 分钟串行生成压缩至 3 分钟
"""
def __init__(self, max_concurrent: int = 10):
self.semaphore = asyncio.Semaphore(max_concurrent)
self.rate_limiter = RateLimiter(requests_per_minute=60)
async def produce_parallel(self, project: Project) -> List[GenerationResult]:
"""
Map-Reduce 架构:
1. Map: 将脚本拆分为独立任务
2. Execute: 并发执行
3. Reduce: 收集结果
"""
# Phase 0: 预生成所有 Anchor (串行,确保一致性)
anchors = await self.generate_anchors(project.brief)
# Phase 1: Map - 创建并发任务
tasks = []
for shot in project.shots:
task = self.generate_shot_with_limit(shot, anchors)
tasks.append(task)
# Phase 2: Gather - 并发执行
# 使用 Semaphore 限制并发数,避免 API 限流
results = await asyncio.gather(*tasks, return_exceptions=True)
# Phase 3: 处理失败任务
final_results = []
for i, result in enumerate(results):
if isinstance(result, Exception):
# 失败任务降级到 Draft 模式
fallback = await self.draft_fallback(project.shots[i])
final_results.append(fallback)
else:
final_results.append(result)
return final_results
async def generate_shot_with_limit(self, shot: Shot, anchors: dict):
"""带限流的单镜头生成"""
async with self.semaphore:
await self.rate_limiter.acquire()
anchor = anchors.get(shot.anchor_key)
return await self.self_healing_generator.generate_with_healing(
shot=shot,
anchor=anchor
)
7.2 进度追踪
interface GenerationProgress {
jobId: string;
totalShots: number;
completedShots: number;
currentPhase: 'anchors' | 'generation' | 'qc' | 'assembly';
shots: Array<{
id: string;
status: 'pending' | 'generating' | 'qc' | 'healing' | 'done' | 'failed';
attempts: number;
qcScore?: number;
thumbnailUrl?: string;
}>;
estimatedTimeRemaining: number;
}
// SSE 进度推送
async function* streamProgress(jobId: string): AsyncGenerator<GenerationProgress> {
while (true) {
const progress = await getJobProgress(jobId);
yield progress;
if (progress.completedShots === progress.totalShots) {
break;
}
await sleep(1000);
}
}
8. 应用场景适配
8.1 宣传视频配置
const promoVideoConfig: ProjectConfig = {
style: {
look: 'commercial',
colorMood: 'vibrant',
pacing: 'fast',
duration: 30 // 30秒黄金时长
},
consistency: {
// 品牌元素强锁定
brandPack: {
logo: 's3://brand/logo.png',
colors: ['#FF6B00', '#1A1A1A'],
fonts: ['Montserrat', 'PingFang SC']
},
characterLock: false, // 宣传片通常无固定角色
sceneLock: true // 场景需要一致
},
generation: {
preferredPath: 'T2I → I2V',
lodStrategy: 'preview_then_final',
primaryModel: 'kling-pro'
},
postProcess: {
lut: 'commercial_bright.cube',
filmGrain: 0,
audioFirst: true,
bgmStyle: 'upbeat'
}
};
8.2 AI 动漫配置
const animeConfig: ProjectConfig = {
style: {
look: 'anime',
colorMood: 'vibrant',
pacing: 'dynamic',
duration: 180 // 3分钟动画
},
consistency: {
// 角色一致性最高优先级
characterLock: true,
characterConfig: {
useLoRA: true, // 必须使用 LoRA
loraWeight: 0.6,
ipAdapterWeight: 0.5,
faceIdStrength: 0.8
},
sceneLock: true
},
generation: {
preferredPath: 'I2V', // 必须从 Anchor 生成
lodStrategy: 'skip_preview', // 动漫直接出 Final
primaryModel: 'kling-pro', // Kling 动漫效果最好
fallbackModel: 'runway-gen3'
},
postProcess: {
lut: 'anime_vibrant.cube',
filmGrain: 0,
lineEnhance: true, // 线条增强
audioFirst: false, // 动漫通常画面优先
bgmStyle: 'jpop'
}
};
8.3 AI 电影配置
const filmConfig: ProjectConfig = {
style: {
look: 'cinematic',
colorMood: 'dramatic',
pacing: 'moderate',
duration: 600, // 10分钟短片
aspectRatio: '2.39:1' // 宽银幕
},
consistency: {
characterLock: true,
sceneGraphEnabled: true, // 启用场景图谱
worldBibleRequired: true, // 必须有世界圣经
characterConfig: {
useLoRA: true,
multiAngleAnchors: true, // 多角度 Anchor
expressionVariants: true // 表情变体
}
},
generation: {
preferredPath: 'I2V',
lodStrategy: 'full_pipeline', // Draft → Preview → Final
primaryModel: 'veo-3', // 电影级输出用 Veo
fallbackModel: 'sora',
maxRetries: 5 // 更多重试次数
},
postProcess: {
lut: 'cinematic_warm.cube',
filmGrain: 0.3,
vignette: 0.2,
audioFirst: true,
dynamicRange: 'cinematic',
surroundSound: true
},
qc: {
strictMode: true,
minScore: 85,
humanReviewRequired: true // 需人工审核
}
};
9. 技术栈总结
| 层级 |
技术选型 |
用途 |
| 规划层 |
Claude Opus / GPT-4 |
Director's Brief 生成、Prompt 优化、自愈分析 |
| T2I 层 |
Flux Pro / SDXL |
Anchor 图生成 |
| I2V 层 |
Kling Pro / Runway Gen-3 |
主力视频生成 |
| T2V 层 |
Sora / Veo 3 |
叙事型/特效镜头 |
| F2F 层 |
Kling 2.5+ / Pika Frames |
转场插值 |
| 一致性 |
LoRA + IP-Adapter + Scene Graph |
身份/场景锁定 |
| 质检 |
Gemini 2.0 / GPT-4V |
VLM 质量检测 |
| 合成 |
Remotion + FFmpeg |
时间线编辑、LUT、导出 |
| 存储 |
R2 / S3 |
资产存储 |
| 调度 |
Python asyncio |
并发控制 |
10. 参考资料
猪哥云(四川)网络科技有限公司 | 合规网 www.hegui.com
猪哥云-数据产品部-Maurice | maurice_wen@proton.me
2025 猪哥云-灵阙企业级智能体平台