昨天我們已經讓 AI 自動生成簡報內容。但如果能讓 AI「看圖片」並幫我們生成說明,是不是就更強了?
今天,我們要讓 FastAPI 服務升級成「圖文自動簡報生成器」,自動分析圖片並整理成 PPT。
我們要讓使用者「上傳一張圖片」,AI 幫我們生成:
一段文字解釋這張圖 一份包含該說明的簡報
import json
import base64
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
from dotenv import load_dotenv
from openai import OpenAI
from pptx import Presentation
from pptx.util import Pt
from pptx.enum.text import PP_ALIGN
from pptx.oxml.ns import qn
from pptx.oxml.xmlchemy import OxmlElement
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")
app = FastAPI(title="AI PowerPoint Generator", version="1.0")
# --- 字型設定 ---
FONT_NAME = "PingFang TC"
def set_run_font(run, size_pt):
    run.font.name = FONT_NAME
    run.font.size = Pt(size_pt)
    rPr = run._r.get_or_add_rPr()
    for tag in ["a:latin", "a:ea", "a:cs"]:
        el = rPr.find(qn(tag))
        if el is None:
            el = OxmlElement(tag)
            rPr.append(el)
        el.set("typeface", FONT_NAME)
# --- 圖片 → base64 ---
def encode_image(image_path: str):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")
# --- 用 GPT 產出簡報大綱 ---
def generate_ppt_outline(text: str, image_b64: str):
    prompt = f"""
請根據圖片與以下文字,產生一份簡報大綱(繁體中文)。
只回傳 JSON,格式如下:
{{
  "title": "主題",
  "slides": [
    {{"title": "投影片標題", "bullets": ["要點1","要點2","要點3"]}}
  ]
}}
附加說明文字:
{text}
"""
    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_b64}"}
                ]
            }
        ],
        temperature=0.6,
        max_tokens=800,
    )
    try:
        return json.loads(resp.choices[0].message.content.strip())
    except Exception as e:
        return {
            "title": "AI 自動簡報",
            "slides": [{"title": "JSON 解析失敗", "bullets": [str(e)]}],
        }
# --- 產生 PPT ---
def build_ppt(outline: dict, output_path: str):
    prs = Presentation()
    # 標題頁
    slide = prs.slides.add_slide(prs.slide_layouts[0])
    slide.shapes.title.text = outline.get("title", "AI 自動產生簡報")
    for r in slide.shapes.title.text_frame.paragraphs[0].runs:
        set_run_font(r, 40)
    sub = slide.placeholders[1]
    sub.text = "由 OpenAI Vision + FastAPI 自動生成"
    for run in sub.text_frame.paragraphs[0].runs:
        set_run_font(run, 18)
    # 內容頁
    for s in outline.get("slides", []):
        slide = prs.slides.add_slide(prs.slide_layouts[1])
        slide.shapes.title.text = s.get("title", "未命名")
        for r in slide.shapes.title.text_frame.paragraphs[0].runs:
            set_run_font(r, 28)
        body = slide.placeholders[1].text_frame
        body.clear()
        for i, b in enumerate(s.get("bullets", [])):
            p = body.add_paragraph() if i > 0 else body.paragraphs[0]
            r = p.add_run()
            r.text = b
            set_run_font(r, 20)
    prs.save(output_path)
# --- FastAPI 路由 ---
@app.post("/generate_ppt_with_image")
async def generate_ppt_with_image(file: UploadFile, desc: str = Form("")):
    tmp_path = f"tmp_{file.filename}"
    with open(tmp_path, "wb") as f:
        f.write(await file.read())
    image_b64 = encode_image(tmp_path)
    outline = generate_ppt_outline(desc, image_b64)
    output_path = "vision_result.pptx"
    build_ppt(outline, output_path)
    return FileResponse(output_path, filename="AI_Vision_Presentation.pptx")
@app.get("/")
async def root():
    return {"message": "success"}
if __name__ == "__main__":
    import uvicorn
    uvicorn.run("openai_test:app", host="127.0.0.1", port=8000, reload=True)
我這邊的範例是把之前籃球的照片放進去,並且給他解釋

程式的概念簡單來說就是讓使用者上傳圖片,FastAPI 接收後由OpenAI Vision 辨識,生成簡報 JSON,最後產出 PPT,其實做法跟昨天差不多,是個滿容易上手的活.
今天我們學會了如何讓 AI 同時理解文字與圖片,並自動產生 PPT。
明天輕鬆一點,換個主題:我們會學怎麼使用 OpenAI 的 Image API,讓 AI 幫你畫出插畫風的角色、網頁封面、海報、甚至是迷因