程序化PPT生成:python-pptx实战

引言

python-pptx 是 Python 生态中操作 PowerPoint 文件的标准库,支持创建、读取和修改 .pptx 文件。它是 AI PPT 生成管线中"内容到文件"的最后一公里。本文从基础用法到高级技巧,系统讲解 python-pptx 的工程实践。

一、基础架构

1.1 PPTX 文件结构

.pptx(实际是 ZIP 文件)
├─ [Content_Types].xml
├─ _rels/
├─ docProps/
│   ├─ app.xml        # 应用属性
│   └─ core.xml       # 核心元数据
└─ ppt/
    ├─ presentation.xml  # 演示文稿定义
    ├─ slides/
    │   ├─ slide1.xml    # 每页幻灯片
    │   ├─ slide2.xml
    │   └─ ...
    ├─ slideLayouts/     # 布局模板
    ├─ slideMasters/     # 母版
    ├─ theme/            # 主题定义
    └─ media/            # 图片/视频等媒体

1.2 python-pptx 对象模型

Presentation
├─ slide_masters[]      # 幻灯片母版
├─ slide_layouts[]      # 布局模板
│   ├─ [0] 标题幻灯片
│   ├─ [1] 标题和内容
│   ├─ [2] 节标题
│   ├─ [5] 仅标题
│   └─ [6] 空白
└─ slides[]             # 幻灯片集合
    └─ Slide
        ├─ shapes[]     # 形状集合
        │   ├─ Shape(矩形/圆形等)
        │   ├─ TextBox(文本框)
        │   ├─ Picture(图片)
        │   ├─ Table(表格)
        │   ├─ Chart(图表)
        │   └─ GroupShape(组合形状)
        ├─ slide_layout  # 引用的布局
        └─ notes_slide   # 备注页

二、基础操作

2.1 创建演示文稿

from pptx import Presentation
from pptx.util import Inches, Pt, Emu, Cm
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_ANCHOR
from pptx.enum.shapes import MSO_SHAPE

# 创建空白演示文稿
prs = Presentation()

# 设置幻灯片尺寸(默认 10" x 7.5")
prs.slide_width = Inches(13.333)   # 宽屏 16:9
prs.slide_height = Inches(7.5)

# 使用预设布局添加幻灯片
slide_layout = prs.slide_layouts[0]  # 标题布局
slide = prs.slides.add_slide(slide_layout)

# 设置标题和副标题
title = slide.shapes.title
title.text = "AI 演示文稿生成"

subtitle = slide.placeholders[1]
subtitle.text = "python-pptx 工程实践指南"

prs.save("demo.pptx")

2.2 文本框与格式化

from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN

def add_formatted_textbox(slide, text: str, left: float, top: float,
                          width: float, height: float,
                          font_name: str = "Microsoft YaHei",
                          font_size: int = 18,
                          font_color: tuple = (0x1F, 0x29, 0x37),
                          bold: bool = False,
                          alignment: int = PP_ALIGN.LEFT):
    """添加格式化文本框"""
    txBox = slide.shapes.add_textbox(
        Inches(left), Inches(top), Inches(width), Inches(height)
    )
    tf = txBox.text_frame
    tf.word_wrap = True

    p = tf.paragraphs[0]
    p.text = text
    p.font.name = font_name
    p.font.size = Pt(font_size)
    p.font.color.rgb = RGBColor(*font_color)
    p.font.bold = bold
    p.alignment = alignment

    return txBox

def add_bullet_list(slide, items: list[str], left: float, top: float,
                    width: float, height: float,
                    font_size: int = 16, line_spacing: float = 1.5):
    """添加项目符号列表"""
    txBox = slide.shapes.add_textbox(
        Inches(left), Inches(top), Inches(width), Inches(height)
    )
    tf = txBox.text_frame
    tf.word_wrap = True

    for i, item in enumerate(items):
        if i == 0:
            p = tf.paragraphs[0]
        else:
            p = tf.add_paragraph()

        p.text = item
        p.font.size = Pt(font_size)
        p.font.color.rgb = RGBColor(0x37, 0x41, 0x51)
        p.level = 0
        p.space_after = Pt(8)

        # 行间距
        from pptx.oxml.ns import qn
        pPr = p._pPr
        lnSpc = pPr.makeelement(qn('a:lnSpc'), {})
        spcPct = lnSpc.makeelement(qn('a:spcPct'), {'val': str(int(line_spacing * 100000))})
        lnSpc.append(spcPct)
        pPr.append(lnSpc)

    return txBox

2.3 图片插入

from pptx.util import Inches
from io import BytesIO
import requests

def add_image_from_file(slide, image_path: str,
                        left: float, top: float,
                        width: float = None, height: float = None):
    """从文件插入图片"""
    if width and height:
        pic = slide.shapes.add_picture(
            image_path, Inches(left), Inches(top),
            Inches(width), Inches(height)
        )
    elif width:
        pic = slide.shapes.add_picture(
            image_path, Inches(left), Inches(top), width=Inches(width)
        )
    else:
        pic = slide.shapes.add_picture(
            image_path, Inches(left), Inches(top)
        )
    return pic

def add_image_from_url(slide, url: str,
                       left: float, top: float,
                       width: float, height: float):
    """从 URL 下载并插入图片"""
    response = requests.get(url)
    image_stream = BytesIO(response.content)

    pic = slide.shapes.add_picture(
        image_stream, Inches(left), Inches(top),
        Inches(width), Inches(height)
    )
    return pic

三、高级组件

3.1 表格

from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor

def add_styled_table(slide, data: list[list[str]],
                     left: float, top: float,
                     width: float, height: float,
                     header_color: tuple = (0x1A, 0x56, 0xDB)):
    """添加带样式的表格"""
    rows = len(data)
    cols = len(data[0]) if data else 0

    table_shape = slide.shapes.add_table(
        rows, cols, Inches(left), Inches(top),
        Inches(width), Inches(height)
    )
    table = table_shape.table

    # 设置列宽
    col_width = Inches(width / cols)
    for col_idx in range(cols):
        table.columns[col_idx].width = col_width

    # 填充数据和样式
    for row_idx, row_data in enumerate(data):
        for col_idx, cell_text in enumerate(row_data):
            cell = table.cell(row_idx, col_idx)
            cell.text = str(cell_text)

            # 文字样式
            for paragraph in cell.text_frame.paragraphs:
                paragraph.font.size = Pt(12)
                paragraph.alignment = PP_ALIGN.CENTER

                if row_idx == 0:  # 表头
                    paragraph.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
                    paragraph.font.bold = True
                else:
                    paragraph.font.color.rgb = RGBColor(0x37, 0x41, 0x51)

            # 背景色
            if row_idx == 0:
                cell.fill.solid()
                cell.fill.fore_color.rgb = RGBColor(*header_color)
            elif row_idx % 2 == 0:
                cell.fill.solid()
                cell.fill.fore_color.rgb = RGBColor(0xF3, 0xF4, 0xF6)

    return table_shape

# 使用示例
data = [
    ["指标", "Q2", "Q3", "变化"],
    ["营收", "$9.5M", "$12.5M", "+32%"],
    ["用户", "10K", "15K", "+50%"],
    ["NPS", "65", "72", "+7"],
]
add_styled_table(slide, data, 1, 2, 8, 3)

3.2 图表

from pptx.chart.data import CategoryChartData, XyChartData
from pptx.enum.chart import XL_CHART_TYPE, XL_LEGEND_POSITION

def add_bar_chart(slide, categories: list[str],
                  series: list[dict],
                  left: float, top: float,
                  width: float, height: float,
                  title: str = None):
    """添加柱状图"""
    chart_data = CategoryChartData()
    chart_data.categories = categories

    for s in series:
        chart_data.add_series(s["name"], s["values"])

    chart = slide.shapes.add_chart(
        XL_CHART_TYPE.COLUMN_CLUSTERED,
        Inches(left), Inches(top),
        Inches(width), Inches(height),
        chart_data
    ).chart

    # 样式设置
    chart.has_legend = len(series) > 1
    if chart.has_legend:
        chart.legend.position = XL_LEGEND_POSITION.BOTTOM
        chart.legend.include_in_layout = False

    if title:
        chart.has_title = True
        chart.chart_title.text_frame.paragraphs[0].text = title

    # 设置系列颜色
    colors = [
        RGBColor(0x1A, 0x56, 0xDB),
        RGBColor(0xF5, 0x9E, 0x0B),
        RGBColor(0x10, 0xB9, 0x81),
    ]
    for i, s in enumerate(chart.series):
        s.format.fill.solid()
        s.format.fill.fore_color.rgb = colors[i % len(colors)]

    return chart

def add_pie_chart(slide, categories: list[str],
                  values: list[float],
                  left: float, top: float,
                  width: float, height: float):
    """添加饼图"""
    chart_data = CategoryChartData()
    chart_data.categories = categories
    chart_data.add_series("", values)

    chart = slide.shapes.add_chart(
        XL_CHART_TYPE.PIE,
        Inches(left), Inches(top),
        Inches(width), Inches(height),
        chart_data
    ).chart

    chart.has_legend = True
    chart.legend.position = XL_LEGEND_POSITION.RIGHT

    # 显示数据标签
    plot = chart.plots[0]
    plot.has_data_labels = True
    data_labels = plot.data_labels
    data_labels.number_format = '0%'
    data_labels.font.size = Pt(10)

    return chart

def add_line_chart(slide, categories: list[str],
                   series: list[dict],
                   left: float, top: float,
                   width: float, height: float):
    """添加折线图"""
    chart_data = CategoryChartData()
    chart_data.categories = categories
    for s in series:
        chart_data.add_series(s["name"], s["values"])

    chart = slide.shapes.add_chart(
        XL_CHART_TYPE.LINE_MARKERS,
        Inches(left), Inches(top),
        Inches(width), Inches(height),
        chart_data
    ).chart

    chart.has_legend = True
    chart.legend.position = XL_LEGEND_POSITION.BOTTOM

    return chart

3.3 形状与图形

def add_rounded_rect(slide, left: float, top: float,
                     width: float, height: float,
                     fill_color: tuple = (0x1A, 0x56, 0xDB),
                     text: str = "", font_color: tuple = (0xFF, 0xFF, 0xFF)):
    """添加圆角矩形"""
    shape = slide.shapes.add_shape(
        MSO_SHAPE.ROUNDED_RECTANGLE,
        Inches(left), Inches(top),
        Inches(width), Inches(height)
    )
    shape.fill.solid()
    shape.fill.fore_color.rgb = RGBColor(*fill_color)
    shape.line.fill.background()  # 无边框

    if text:
        tf = shape.text_frame
        tf.word_wrap = True
        p = tf.paragraphs[0]
        p.text = text
        p.font.color.rgb = RGBColor(*font_color)
        p.font.size = Pt(14)
        p.alignment = PP_ALIGN.CENTER
        tf.vertical_anchor = MSO_ANCHOR.MIDDLE

    return shape

def add_kpi_card(slide, label: str, value: str, change: str,
                 left: float, top: float,
                 width: float = 2.0, height: float = 1.5):
    """添加 KPI 数据卡片"""
    # 背景卡片
    card = slide.shapes.add_shape(
        MSO_SHAPE.ROUNDED_RECTANGLE,
        Inches(left), Inches(top),
        Inches(width), Inches(height)
    )
    card.fill.solid()
    card.fill.fore_color.rgb = RGBColor(0xF8, 0xFA, 0xFC)
    card.line.color.rgb = RGBColor(0xE2, 0xE8, 0xF0)
    card.line.width = Pt(1)

    # 标签
    add_formatted_textbox(slide, label, left + 0.15, top + 0.1,
                         width - 0.3, 0.3, font_size=11,
                         font_color=(0x6B, 0x72, 0x80))

    # 数值
    add_formatted_textbox(slide, value, left + 0.15, top + 0.4,
                         width - 0.3, 0.5, font_size=28,
                         font_color=(0x1F, 0x29, 0x37), bold=True)

    # 变化
    is_positive = change.startswith("+") or change.startswith("↗")
    color = (0x05, 0x96, 0x69) if is_positive else (0xDC, 0x26, 0x26)
    add_formatted_textbox(slide, change, left + 0.15, top + 0.95,
                         width - 0.3, 0.3, font_size=12,
                         font_color=color)

四、模板系统

4.1 从模板创建

def create_from_template(template_path: str, data: dict,
                         output_path: str) -> str:
    """从模板文件创建演示文稿"""
    prs = Presentation(template_path)

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    for run in paragraph.runs:
                        # 替换占位符
                        for key, value in data.items():
                            placeholder = "{{" + key + "}}"
                            if placeholder in run.text:
                                run.text = run.text.replace(placeholder, str(value))

    prs.save(output_path)
    return output_path

# 使用
create_from_template(
    "template.pptx",
    {
        "company_name": "灵阙科技",
        "report_date": "2025年Q3",
        "revenue": "$12.5M",
        "growth": "32%",
    },
    "q3_report.pptx"
)

4.2 母版与布局

def list_slide_layouts(pptx_path: str) -> list[dict]:
    """列出模板中所有可用布局"""
    prs = Presentation(pptx_path)
    layouts = []

    for i, layout in enumerate(prs.slide_layouts):
        placeholders = []
        for ph in layout.placeholders:
            placeholders.append({
                "idx": ph.placeholder_format.idx,
                "type": str(ph.placeholder_format.type),
                "name": ph.name,
                "width": ph.width,
                "height": ph.height
            })

        layouts.append({
            "index": i,
            "name": layout.name,
            "placeholders": placeholders
        })

    return layouts

五、批量生成实战

5.1 数据驱动批量生成

import pandas as pd

def generate_reports_from_data(data_path: str, template_path: str,
                               output_dir: str):
    """从数据表批量生成报告"""
    df = pd.read_csv(data_path)

    for _, row in df.iterrows():
        # 为每条记录生成一份 PPT
        prs = Presentation(template_path)

        # 数据页
        slide = prs.slides.add_slide(prs.slide_layouts[6])

        # 添加 KPI 卡片
        metrics = [
            ("营收", f"${row['revenue']}M", f"+{row['growth']}%"),
            ("用户", f"{row['users']}K", f"+{row['user_growth']}%"),
            ("NPS", str(row['nps']), f"+{row['nps_change']}"),
        ]

        for i, (label, value, change) in enumerate(metrics):
            add_kpi_card(slide, label, value, change,
                        left=0.5 + i * 3.2, top=2.0)

        output_path = f"{output_dir}/{row['region']}_report.pptx"
        prs.save(output_path)

5.2 AI + python-pptx 集成

class AIPresentationBuilder:
    """AI 驱动的演示文稿构建器"""

    def __init__(self, theme: str = "corporate"):
        self.theme_config = THEMES[theme]

    def build(self, content: dict, output_path: str) -> str:
        """从 AI 生成的结构化内容构建 PPTX"""
        prs = Presentation()
        prs.slide_width = Inches(13.333)
        prs.slide_height = Inches(7.5)

        for slide_data in content["slides"]:
            layout_type = slide_data.get("layout_type", "content")
            renderer = self._get_renderer(layout_type)
            renderer(prs, slide_data)

        prs.save(output_path)
        return output_path

    def _get_renderer(self, layout_type: str):
        renderers = {
            "title_slide": self._render_title_slide,
            "content": self._render_content_slide,
            "two_column": self._render_two_column,
            "chart": self._render_chart_slide,
            "image_right": self._render_image_right,
            "comparison": self._render_comparison,
            "quote": self._render_quote,
            "closing": self._render_closing,
        }
        return renderers.get(layout_type, self._render_content_slide)

    def _render_title_slide(self, prs, data):
        slide = prs.slides.add_slide(prs.slide_layouts[6])
        self._set_background(slide, self.theme_config["primary"])

        add_formatted_textbox(
            slide, data["title"], 1, 2.5, 11, 2,
            font_size=44, font_color=(0xFF, 0xFF, 0xFF), bold=True,
            alignment=PP_ALIGN.CENTER
        )
        if data.get("subtitle"):
            add_formatted_textbox(
                slide, data["subtitle"], 1, 4.8, 11, 1,
                font_size=20, font_color=(0xBF, 0xDB, 0xFE),
                alignment=PP_ALIGN.CENTER
            )

    def _render_content_slide(self, prs, data):
        slide = prs.slides.add_slide(prs.slide_layouts[6])

        add_formatted_textbox(
            slide, data["title"], 0.7, 0.4, 11, 1,
            font_size=28, font_color=self.theme_config["primary"][:3],
            bold=True
        )

        if data.get("bullet_points"):
            add_bullet_list(
                slide, data["bullet_points"],
                0.7, 1.8, 11, 5, font_size=18
            )

    def _set_background(self, slide, color):
        bg = slide.background
        fill = bg.fill
        fill.solid()
        if isinstance(color, RGBColor):
            fill.fore_color.rgb = color
        else:
            fill.fore_color.rgb = RGBColor(*color)

六、常见坑与解决方案

问题 原因 解决方案
中文乱码 字体不支持 使用"Microsoft YaHei"或"SimHei"
图片模糊 分辨率不够 使用 300 DPI 以上图片
文字溢出 文本框太小 启用 word_wrap = True + 调整大小
图表数据不显示 数据类型错误 确保数值为 int/float
文件体积过大 图片未压缩 预处理图片至合适尺寸
布局错位 单位混用 统一使用 Inches 或 Emu

总结

python-pptx 是将 AI 生成的结构化内容转化为 PPTX 文件的核心工具。掌握文本框、表格、图表、形状四大组件的创建与样式控制,配合模板系统和批量生成能力,可以构建出高效的企业级 PPT 自动化管线。核心实践:用 Design Tokens 统一视觉风格,用布局渲染器模式组织代码,用数据驱动实现批量生产。


Maurice | maurice_wen@proton.me