SOTA 视频智能体 V3.0 架构设计

工业级多路径视频生成系统 | 支持宣传片/AI动漫/AI电影


1. 核心理念:多路径生成 + 分级一致性控制

1.1 痛点分析

场景 痛点 根因
宣传视频 品牌元素不一致 缺乏 Anchor 锁定
AI 动漫 角色脸崩/身份漂移 无 LoRA + IP-Adapter
AI 电影 场景割裂/物理违和 无 Scene Graph
长视频 生成时间过长 串行流水线
成本 每个镜头都调用高价 API 无 LOD 分级

1.2 解决方案总览

┌─────────────────────────────────────────────────────────────────────────┐
│                    SOTA Video Agent V3.0 Pipeline                        │
├─────────────────────────────────────────────────────────────────────────┤
│                                                                          │
│  [Input] ─► [Planner/Opus] ─► [Consistency Engine] ─► [Generation Hub]  │
│                   │                    │                      │          │
│            Director's Brief     Style Bible              Multi-Path      │
│            + Shot List          Character Bible          Router          │
│                                 World Bible                 │            │
│                                      │                      ▼            │
│                                      └──────────► [LOD Renderer]         │
│                                                        │                 │
│                                                   Draft│Preview│Final    │
│                                                        │                 │
│                                                        ▼                 │
│                                              [VLM QC + Self-Heal]        │
│                                                        │                 │
│                                                        ▼                 │
│                                              [Remotion Assembler]        │
│                                              + Audio-First Sync          │
│                                              + LUT Color Grading         │
│                                                        │                 │
│                                                        ▼                 │
│                                                   [Export]               │
│                                                                          │
└─────────────────────────────────────────────────────────────────────────┘

2. 多路径生成架构 (Generation Hub)

2.1 六大生成路径

路径 输入 输出 最佳场景 推荐模型
T2I 文本提示词 静态图 Anchor 生成 Flux Pro, SDXL, Midjourney
I2I 参考图+提示词 变体图 风格迁移/场景变体 Flux Redux, SD ControlNet
I2V 首帧图 5-10s视频 精确控制开场 Kling Pro, Runway Gen-3
T2V 纯文本 5-20s视频 快速原型 Sora, Veo 3, Pika
F2F-V 首帧+尾帧 插值视频 转场/变形 Kling 2.5+, Pika Frames
V2V 源视频+提示词 风格化视频 统一画风 Runway Gen-3, LTX Video

2.2 路径选择策略

interface ShotSpec {
  type: 'character' | 'world' | 'vfx' | 'transition';
  hasAnchor: boolean;
  hasEndFrame: boolean;
  complexity: 'S' | 'A' | 'B';
}

function selectGenerationPath(shot: ShotSpec): GenerationPath {
  // 1. 转场镜头 → 首尾帧插值
  if (shot.type === 'transition' && shot.hasEndFrame) {
    return 'F2F-V';  // Kling/Pika Frames
  }

  // 2. 有 Anchor 图的角色/场景镜头 → I2V (最稳定)
  if (shot.hasAnchor && ['character', 'world'].includes(shot.type)) {
    return 'I2V';  // Kling Pro / Runway
  }

  // 3. VFX/特效镜头 → T2V (更自由)
  if (shot.type === 'vfx') {
    return 'T2V';  // Sora / Veo
  }

  // 4. 无 Anchor → 先 T2I 生成 Anchor,再 I2V
  return 'T2I → I2V';
}

2.3 模型选型矩阵 (2025 Q1)

S-Tier: 主力模型

模型 优势 劣势 最佳用途 成本
Google Veo 3 4K画质/物理真实/原生音频 API限制/排队 电影级最终输出 $$$$
OpenAI Sora 叙事理解/Storyboard/20s长度 1080p/无原生音频 故事驱动创作 $$$
Kuaishou Kling Pro I2V最强/角色一致性/2min长度 T2V弱于Sora 动漫/角色视频 $$

A-Tier: 效率模型

模型 优势 劣势 最佳用途 成本
Runway Gen-3 Director Mode/Motion Brush 默认720p 精细控制 $$
Pika 2.2 Pikaframes多关键帧/1080p 10s限制 转场/社交短片 $
Luma Dream Machine 自然语言编辑 质量不稳定 快速迭代 $

B-Tier: 开源/本地

模型 优势 劣势 最佳用途 成本
LTX Video 13B 开源/商用许可/快速 768x512限制 草稿/批量 本地GPU
Stable Video Diffusion 开源/可微调 质量一般 实验/训练 本地GPU
Wan2.1 阿里开源/中文理解 新模型待验证 国内部署 本地GPU

3. 一致性控制系统 (Consistency Engine)

3.1 三圣经体系

// 风格圣经 (Style Bible) - 统一视觉语言
interface StyleBible {
  // 视觉基调
  look: 'cinematic' | 'anime' | 'documentary' | 'commercial';
  colorPalette: {
    primary: string;    // 主色调 HEX
    secondary: string;  // 辅助色
    accent: string;     // 强调色
  };
  lighting: 'natural' | 'dramatic' | 'soft' | 'high-key' | 'low-key';

  // 后期统一
  lutFile?: string;           // .cube LUT 文件
  filmGrain: 'none' | 'subtle' | 'heavy';
  aspectRatio: '16:9' | '9:16' | '2.39:1' | '4:3';

  // 参考图库
  moodboardUrls: string[];
}

// 角色圣经 (Character Bible) - 身份锁定
interface CharacterBible {
  characters: Array<{
    id: string;
    name: string;
    description: string;

    // Anchor 系统 (最关键)
    anchorImages: {
      front: string;      // 正面 Anchor
      side: string;       // 侧面
      threeQuarter: string; // 3/4侧
    };

    // LoRA 配置 (可选,动漫必需)
    loraModel?: string;   // 预训练 LoRA 路径
    loraWeight: number;   // 0.5-0.7 推荐

    // IP-Adapter 配置
    ipAdapterStrength: number;  // 0.4-0.7

    // 锁定属性
    lockAttributes: ('face' | 'hair' | 'clothing' | 'body')[];
  }>;
}

// 世界圣经 (World Bible) - 场景连续性
interface WorldBible {
  locations: Array<{
    id: string;
    name: string;
    description: string;

    // 场景 Anchor
    anchorImages: {
      wide: string;       // 全景
      medium: string;     // 中景
      detail: string;     // 细节
    };

    // 物理属性
    timeOfDay: 'dawn' | 'day' | 'golden-hour' | 'dusk' | 'night';
    weather: 'clear' | 'cloudy' | 'rain' | 'snow' | 'fog';

    // 场景元素
    props: string[];      // 道具列表
    lighting: string;     // 光源描述
  }>;

  // 场景图谱 (Scene Graph)
  sceneGraph: Record<string, string>;  // tag → S3 URL
}

3.2 Anchor 生成策略

async def generate_anchors(brief: DirectorsBrief) -> Anchors:
    """
    Phase 0: 在任何视频生成之前,先锁定所有 Anchor
    使用最高质量的 T2I 模型生成,确保后续一致性
    """
    anchors = {}

    # 1. 角色 Anchor (Flux Pro + 人脸优化)
    for char in brief.characters:
        prompt = f"""
        Character portrait of {char.description}
        Front view, neutral expression, studio lighting
        High detail, photorealistic, 8K
        """
        # 使用 Flux Pro 生成高质量首图
        anchor = await flux_pro.generate(prompt, style=brief.style_bible.look)

        # 可选:使用 FaceID 增强面部一致性
        if char.type == 'human':
            anchor = await face_enhance(anchor)

        anchors[f"char_{char.id}"] = anchor

    # 2. 场景 Anchor
    for loc in brief.locations:
        prompt = f"""
        Wide establishing shot of {loc.description}
        {loc.time_of_day}, {loc.weather}
        Cinematic composition, detailed environment
        """
        anchor = await flux_pro.generate(prompt)
        anchors[f"loc_{loc.id}"] = anchor

    # 3. 上传到 S3 并注册到 Scene Graph
    for key, img in anchors.items():
        url = await s3.upload(img, f"anchors/{brief.job_id}/{key}.png")
        brief.world_bible.scene_graph[key] = url

    return anchors

3.3 LoRA + IP-Adapter 组合策略

# 适用于 ComfyUI / Forge 工作流
def build_consistency_pipeline(shot: Shot, bible: CharacterBible):
    """
    组合 LoRA + IP-Adapter + ControlNet 实现最强一致性
    """
    pipeline = []

    # 1. 基础:加载 Anchor 到 IP-Adapter
    char = bible.get_character(shot.character_id)
    pipeline.append({
        "node": "IPAdapter",
        "image": char.anchor_images.front,
        "weight": char.ip_adapter_strength,  # 0.5-0.7
        "mode": "face_plus"  # 面部增强模式
    })

    # 2. 可选:加载角色 LoRA (动漫推荐)
    if char.lora_model:
        pipeline.append({
            "node": "LoRALoader",
            "model": char.lora_model,
            "weight": char.lora_weight  # 0.5-0.7
        })

    # 3. 姿态控制:OpenPose ControlNet
    if shot.has_pose_reference:
        pipeline.append({
            "node": "ControlNet",
            "type": "openpose",
            "image": shot.pose_reference,
            "weight": 0.8
        })

    # 4. 场景控制:Depth ControlNet
    if shot.location_anchor:
        pipeline.append({
            "node": "ControlNet",
            "type": "depth",
            "image": shot.location_anchor,
            "weight": 0.6
        })

    return pipeline

4. LOD 分级渲染系统

4.1 三级渲染策略

type LODLevel = 'draft' | 'preview' | 'final';

interface LODConfig {
  draft: {
    // 仅静态图 + Ken Burns 效果
    generator: 'flux-schnell' | 'sdxl-turbo';
    cost: '$0.002/shot';
    time: '2s';
    resolution: '768x512';
    useCase: '脚本确认/结构预览';
  };

  preview: {
    // 快速视频模型
    generator: 'kling-turbo' | 'luma-photon' | 'ltx-video';
    cost: '$0.10/shot';
    time: '15s';
    resolution: '720p';
    useCase: '动作逻辑确认/客户中期审核';
  };

  final: {
    // 高质量视频模型
    generator: 'kling-pro' | 'veo-3' | 'sora';
    cost: '$1.50+/shot';
    time: '3min';
    resolution: '1080p-4K';
    useCase: '最终交付';
  };
}

4.2 智能升级策略

class LODRenderer:
    """
    成本优化核心:只在必要时升级 LOD
    """

    async def render_project(self, project: Project, target_lod: LODLevel):
        results = []

        for shot in project.shots:
            # 检查缓存:是否已有更高级别的渲染
            cached = await self.cache.get(shot.id, target_lod)
            if cached:
                results.append(cached)
                continue

            # 核心策略:无论什么 LOD,先确保 Anchor 存在
            if not shot.anchor_url:
                shot.anchor_url = await self.generate_anchor(shot)

            # 根据 LOD 级别选择生成器
            if target_lod == 'draft':
                # 仅使用 Anchor 图,Remotion 添加动画
                result = {
                    "type": "image",
                    "url": shot.anchor_url,
                    "effect": self.select_ken_burns(shot),
                    "duration": shot.duration
                }

            elif target_lod == 'preview':
                result = await self.kling_turbo.i2v(
                    image=shot.anchor_url,
                    prompt=shot.prompt,
                    duration=5
                )

            elif target_lod == 'final':
                result = await self.kling_pro.i2v(
                    image=shot.anchor_url,
                    prompt=shot.enhanced_prompt,
                    duration=10,
                    resolution='1080p'
                )

            # 缓存结果
            await self.cache.set(shot.id, target_lod, result)
            results.append(result)

        return results

    def select_ken_burns(self, shot: Shot) -> str:
        """根据镜头类型选择推拉效果"""
        effects = {
            'establishing': 'slow_zoom_out',
            'close-up': 'slow_zoom_in',
            'medium': 'subtle_pan',
            'wide': 'static_with_parallax'
        }
        return effects.get(shot.shot_type, 'static')

5. VLM 质检与自愈系统

5.1 多维度质检

class VLMQualityControl:
    """
    使用 Gemini 2.0 / GPT-4V 进行视觉质检
    """

    QUALITY_CHECKS = [
        {
            "id": "identity_drift",
            "prompt": "Compare the face in this video frame with the reference. Is it the same person? Look for: eye shape, nose, jawline, hair.",
            "threshold": 0.8
        },
        {
            "id": "physics_violation",
            "prompt": "Does this video clip contain any physics violations? Look for: objects floating unnaturally, limbs bending wrong, impossible movements.",
            "threshold": 0.9
        },
        {
            "id": "visual_artifacts",
            "prompt": "Does this video have visual artifacts? Look for: blurry regions, morphing glitches, temporal flickering, hand/finger deformities.",
            "threshold": 0.85
        },
        {
            "id": "brief_alignment",
            "prompt": "Does this video match the following brief? Brief: {brief}",
            "threshold": 0.75
        },
        {
            "id": "text_legibility",
            "prompt": "If there is text in this video, is it readable and spelled correctly?",
            "threshold": 0.95
        }
    ]

    async def inspect(self, video_url: str, shot: Shot, brief: DirectorsBrief) -> QCReport:
        issues = []

        # 抽取关键帧
        keyframes = await self.extract_keyframes(video_url, count=5)

        for check in self.QUALITY_CHECKS:
            for i, frame in enumerate(keyframes):
                result = await self.vlm.analyze(
                    image=frame,
                    reference=shot.anchor_url,
                    prompt=check["prompt"].format(brief=shot.visual_description)
                )

                if result.score < check["threshold"]:
                    issues.append({
                        "frameId": f"frame_{i}",
                        "type": check["id"],
                        "severity": self.calculate_severity(result.score, check["threshold"]),
                        "description": result.reason,
                        "suggestion": result.fix_suggestion
                    })

        return QCReport(
            overall_score=self.calculate_overall_score(issues),
            issues=issues,
            passed=len([i for i in issues if i["severity"] == "high"]) == 0
        )

5.2 自愈重试机制

class SelfHealingGenerator:
    MAX_RETRIES = 3

    async def generate_with_healing(
        self,
        shot: Shot,
        anchor: str,
        attempt: int = 0
    ) -> GenerationResult:

        if attempt >= self.MAX_RETRIES:
            # 降级策略:使用 Draft LOD 保底
            return await self.draft_fallback(shot, anchor)

        # 1. 生成视频
        video_url = await self.generator.i2v(
            image=anchor,
            prompt=shot.prompt,
            negative_prompt=shot.negative_prompt
        )

        # 2. VLM 质检
        qc_report = await self.vlm_qc.inspect(video_url, shot)

        if qc_report.passed:
            return GenerationResult(
                url=video_url,
                qc_score=qc_report.overall_score,
                attempts=attempt + 1
            )

        # 3. 自愈:让 LLM 分析问题并修正 Prompt
        healing_prompt = await self.opus.refine_prompt(
            original_prompt=shot.prompt,
            issues=qc_report.issues,
            anchor_description=shot.anchor_description
        )

        # 4. 更新 Shot 并重试
        shot.prompt = healing_prompt
        shot.negative_prompt = self.build_negative_from_issues(qc_report.issues)

        return await self.generate_with_healing(shot, anchor, attempt + 1)

    def build_negative_from_issues(self, issues: list) -> str:
        """从质检问题构建 Negative Prompt"""
        negatives = []

        for issue in issues:
            if issue["type"] == "identity_drift":
                negatives.append("different face, wrong person, inconsistent features")
            elif issue["type"] == "physics_violation":
                negatives.append("floating objects, broken physics, impossible movement")
            elif issue["type"] == "visual_artifacts":
                negatives.append("blur, glitch, morph, deformed hands, extra fingers")

        return ", ".join(negatives)

6. 后期合成流水线

6.1 Audio-First 节拍同步

import librosa

class AudioFirstComposer:
    """
    音频驱动视频节奏,确保剪辑点落在节拍上
    """

    def analyze_bgm(self, audio_path: str) -> BeatMap:
        y, sr = librosa.load(audio_path)

        # 检测节拍
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        beat_times = librosa.frames_to_time(beats, sr=sr)

        # 检测能量峰值(用于强调点)
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        peaks = librosa.util.peak_pick(onset_env, pre_max=3, post_max=3, pre_avg=3, post_avg=5, delta=0.5, wait=10)
        peak_times = librosa.frames_to_time(peaks, sr=sr)

        return BeatMap(
            tempo=tempo,
            beats=beat_times.tolist(),
            peaks=peak_times.tolist(),
            duration=librosa.get_duration(y=y, sr=sr)
        )

    def align_shots_to_beats(self, shots: list, beat_map: BeatMap) -> list:
        """
        调整每个镜头的时长,使切换点落在节拍上
        """
        aligned_shots = []
        current_time = 0
        beat_idx = 0

        for shot in shots:
            # 找到最近的下一个节拍
            target_duration = shot.duration
            while beat_idx < len(beat_map.beats) and beat_map.beats[beat_idx] < current_time:
                beat_idx += 1

            # 计算到下一个节拍的时间
            if beat_idx < len(beat_map.beats):
                time_to_beat = beat_map.beats[beat_idx] - current_time

                # 如果镜头时长接近节拍间隔,对齐到节拍
                if abs(target_duration - time_to_beat) < 0.5:
                    target_duration = time_to_beat

            aligned_shots.append({
                **shot,
                "duration": target_duration,
                "start_time": current_time,
                "cut_on_beat": True
            })

            current_time += target_duration
            beat_idx += 1

        return aligned_shots

6.2 Remotion 合成配置

// remotion/VideoComposition.tsx
import { AbsoluteFill, Sequence, Audio, Video, Img } from 'remotion';
import { interpolate, useCurrentFrame, useVideoConfig } from 'remotion';

interface ProjectManifest {
  shots: Array<{
    type: 'video' | 'image';
    url: string;
    effect?: 'zoom_in' | 'zoom_out' | 'pan_left' | 'pan_right';
    startFrame: number;
    durationFrames: number;
    transition?: 'cut' | 'dissolve' | 'fade';
  }>;
  audio: {
    bgmUrl: string;
    voiceoverUrl?: string;
    bgmVolume: number;
    voiceoverVolume: number;
  };
  postProcess: {
    lutFile?: string;
    filmGrain?: number;
    vignette?: number;
  };
}

export const VideoComposition: React.FC<{ manifest: ProjectManifest }> = ({ manifest }) => {
  const { fps } = useVideoConfig();

  return (
    <AbsoluteFill>
      {/* 视频/图片序列 */}
      {manifest.shots.map((shot, i) => (
        <Sequence key={i} from={shot.startFrame} durationInFrames={shot.durationFrames}>
          {shot.type === 'video' ? (
            <Video src={shot.url} />
          ) : (
            <KenBurnsImage src={shot.url} effect={shot.effect || 'zoom_in'} />
          )}

          {/* 转场效果 */}
          {shot.transition === 'dissolve' && (
            <DissolveTransition durationFrames={fps * 0.5} />
          )}
        </Sequence>
      ))}

      {/* 音频层 */}
      <Audio src={manifest.audio.bgmUrl} volume={manifest.audio.bgmVolume} />
      {manifest.audio.voiceoverUrl && (
        <Audio src={manifest.audio.voiceoverUrl} volume={manifest.audio.voiceoverVolume} />
      )}

      {/* 后期效果层 */}
      {manifest.postProcess.lutFile && (
        <LUTOverlay lutFile={manifest.postProcess.lutFile} />
      )}
      {manifest.postProcess.filmGrain && (
        <FilmGrainOverlay intensity={manifest.postProcess.filmGrain} />
      )}
    </AbsoluteFill>
  );
};

// Ken Burns 效果组件
const KenBurnsImage: React.FC<{ src: string; effect: string }> = ({ src, effect }) => {
  const frame = useCurrentFrame();
  const { durationInFrames } = useVideoConfig();

  const scale = interpolate(
    frame,
    [0, durationInFrames],
    effect === 'zoom_in' ? [1, 1.2] : [1.2, 1],
    { extrapolateRight: 'clamp' }
  );

  return (
    <AbsoluteFill style={{ transform: `scale(${scale})` }}>
      <Img src={src} style={{ width: '100%', height: '100%', objectFit: 'cover' }} />
    </AbsoluteFill>
  );
};

6.3 FFmpeg LUT 调色

# 统一不同模型的色彩风格
ffmpeg -i input.mp4 \
  -vf "lut3d=cinematic_warm.cube, \
       noise=alls=10:allf=t+u, \
       vignette=angle=PI/4" \
  -c:a copy \
  output.mp4

# 常用 LUT 配置
# - cinematic_warm.cube   : 电影暖调
# - anime_vibrant.cube    : 动漫鲜艳
# - documentary_neutral.cube : 纪录片中性
# - commercial_bright.cube   : 广告明亮

7. 并发调度架构

7.1 Map-Reduce 并行生成

import asyncio
from typing import List

class ParallelOrchestrator:
    """
    核心优化:将 60 分钟串行生成压缩至 3 分钟
    """

    def __init__(self, max_concurrent: int = 10):
        self.semaphore = asyncio.Semaphore(max_concurrent)
        self.rate_limiter = RateLimiter(requests_per_minute=60)

    async def produce_parallel(self, project: Project) -> List[GenerationResult]:
        """
        Map-Reduce 架构:
        1. Map: 将脚本拆分为独立任务
        2. Execute: 并发执行
        3. Reduce: 收集结果
        """

        # Phase 0: 预生成所有 Anchor (串行,确保一致性)
        anchors = await self.generate_anchors(project.brief)

        # Phase 1: Map - 创建并发任务
        tasks = []
        for shot in project.shots:
            task = self.generate_shot_with_limit(shot, anchors)
            tasks.append(task)

        # Phase 2: Gather - 并发执行
        # 使用 Semaphore 限制并发数,避免 API 限流
        results = await asyncio.gather(*tasks, return_exceptions=True)

        # Phase 3: 处理失败任务
        final_results = []
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                # 失败任务降级到 Draft 模式
                fallback = await self.draft_fallback(project.shots[i])
                final_results.append(fallback)
            else:
                final_results.append(result)

        return final_results

    async def generate_shot_with_limit(self, shot: Shot, anchors: dict):
        """带限流的单镜头生成"""
        async with self.semaphore:
            await self.rate_limiter.acquire()

            anchor = anchors.get(shot.anchor_key)
            return await self.self_healing_generator.generate_with_healing(
                shot=shot,
                anchor=anchor
            )

7.2 进度追踪

interface GenerationProgress {
  jobId: string;
  totalShots: number;
  completedShots: number;
  currentPhase: 'anchors' | 'generation' | 'qc' | 'assembly';
  shots: Array<{
    id: string;
    status: 'pending' | 'generating' | 'qc' | 'healing' | 'done' | 'failed';
    attempts: number;
    qcScore?: number;
    thumbnailUrl?: string;
  }>;
  estimatedTimeRemaining: number;
}

// SSE 进度推送
async function* streamProgress(jobId: string): AsyncGenerator<GenerationProgress> {
  while (true) {
    const progress = await getJobProgress(jobId);
    yield progress;

    if (progress.completedShots === progress.totalShots) {
      break;
    }

    await sleep(1000);
  }
}

8. 应用场景适配

8.1 宣传视频配置

const promoVideoConfig: ProjectConfig = {
  style: {
    look: 'commercial',
    colorMood: 'vibrant',
    pacing: 'fast',
    duration: 30  // 30秒黄金时长
  },

  consistency: {
    // 品牌元素强锁定
    brandPack: {
      logo: 's3://brand/logo.png',
      colors: ['#FF6B00', '#1A1A1A'],
      fonts: ['Montserrat', 'PingFang SC']
    },
    characterLock: false,  // 宣传片通常无固定角色
    sceneLock: true        // 场景需要一致
  },

  generation: {
    preferredPath: 'T2I → I2V',
    lodStrategy: 'preview_then_final',
    primaryModel: 'kling-pro'
  },

  postProcess: {
    lut: 'commercial_bright.cube',
    filmGrain: 0,
    audioFirst: true,
    bgmStyle: 'upbeat'
  }
};

8.2 AI 动漫配置

const animeConfig: ProjectConfig = {
  style: {
    look: 'anime',
    colorMood: 'vibrant',
    pacing: 'dynamic',
    duration: 180  // 3分钟动画
  },

  consistency: {
    // 角色一致性最高优先级
    characterLock: true,
    characterConfig: {
      useLoRA: true,           // 必须使用 LoRA
      loraWeight: 0.6,
      ipAdapterWeight: 0.5,
      faceIdStrength: 0.8
    },
    sceneLock: true
  },

  generation: {
    preferredPath: 'I2V',      // 必须从 Anchor 生成
    lodStrategy: 'skip_preview', // 动漫直接出 Final
    primaryModel: 'kling-pro',   // Kling 动漫效果最好
    fallbackModel: 'runway-gen3'
  },

  postProcess: {
    lut: 'anime_vibrant.cube',
    filmGrain: 0,
    lineEnhance: true,         // 线条增强
    audioFirst: false,         // 动漫通常画面优先
    bgmStyle: 'jpop'
  }
};

8.3 AI 电影配置

const filmConfig: ProjectConfig = {
  style: {
    look: 'cinematic',
    colorMood: 'dramatic',
    pacing: 'moderate',
    duration: 600,  // 10分钟短片
    aspectRatio: '2.39:1'  // 宽银幕
  },

  consistency: {
    characterLock: true,
    sceneGraphEnabled: true,   // 启用场景图谱
    worldBibleRequired: true,  // 必须有世界圣经

    characterConfig: {
      useLoRA: true,
      multiAngleAnchors: true,  // 多角度 Anchor
      expressionVariants: true  // 表情变体
    }
  },

  generation: {
    preferredPath: 'I2V',
    lodStrategy: 'full_pipeline',  // Draft → Preview → Final
    primaryModel: 'veo-3',         // 电影级输出用 Veo
    fallbackModel: 'sora',
    maxRetries: 5                  // 更多重试次数
  },

  postProcess: {
    lut: 'cinematic_warm.cube',
    filmGrain: 0.3,
    vignette: 0.2,
    audioFirst: true,
    dynamicRange: 'cinematic',
    surroundSound: true
  },

  qc: {
    strictMode: true,
    minScore: 85,
    humanReviewRequired: true  // 需人工审核
  }
};

9. 技术栈总结

层级 技术选型 用途
规划层 Claude Opus / GPT-4 Director's Brief 生成、Prompt 优化、自愈分析
T2I 层 Flux Pro / SDXL Anchor 图生成
I2V 层 Kling Pro / Runway Gen-3 主力视频生成
T2V 层 Sora / Veo 3 叙事型/特效镜头
F2F 层 Kling 2.5+ / Pika Frames 转场插值
一致性 LoRA + IP-Adapter + Scene Graph 身份/场景锁定
质检 Gemini 2.0 / GPT-4V VLM 质量检测
合成 Remotion + FFmpeg 时间线编辑、LUT、导出
存储 R2 / S3 资产存储
调度 Python asyncio 并发控制

10. 参考资料


猪哥云(四川)网络科技有限公司 | 合规网 www.hegui.com 猪哥云-数据产品部-Maurice | maurice_wen@proton.me 2025 猪哥云-灵阙企业级智能体平台