Day 22：圖文一起來！用 OpenAI Vision 打造自動圖片解說簡報

2025 iThome 鐵人賽

DAY 22

生成式 AI

AI 三十天，哎呀每天都很難：OpenAI API 生存指南系列第 22 篇

17th鐵人賽

nray5268

團隊nutc imac T1

2025-10-06 19:40:23

212 瀏覽

分享至

昨天我們已經讓 AI 自動生成簡報內容。但如果能讓 AI「看圖片」並幫我們生成說明，是不是就更強了？
今天，我們要讓 FastAPI 服務升級成「圖文自動簡報生成器」，自動分析圖片並整理成 PPT。

我們要讓使用者「上傳一張圖片」，AI 幫我們生成：
一段文字解釋這張圖一份包含該說明的簡報


import json
import base64
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
from dotenv import load_dotenv
from openai import OpenAI
from pptx import Presentation
from pptx.util import Pt
from pptx.enum.text import PP_ALIGN
from pptx.oxml.ns import qn
from pptx.oxml.xmlchemy import OxmlElement
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")
app = FastAPI(title="AI PowerPoint Generator", version="1.0")

# --- 字型設定 ---
FONT_NAME = "PingFang TC"
def set_run_font(run, size_pt):
    run.font.name = FONT_NAME
    run.font.size = Pt(size_pt)
    rPr = run._r.get_or_add_rPr()
    for tag in ["a:latin", "a:ea", "a:cs"]:
        el = rPr.find(qn(tag))
        if el is None:
            el = OxmlElement(tag)
            rPr.append(el)
        el.set("typeface", FONT_NAME)

# --- 圖片 → base64 ---
def encode_image(image_path: str):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

# --- 用 GPT 產出簡報大綱 ---
def generate_ppt_outline(text: str, image_b64: str):
    prompt = f"""
請根據圖片與以下文字，產生一份簡報大綱（繁體中文）。
只回傳 JSON，格式如下：
{{
  "title": "主題",
  "slides": [
    {{"title": "投影片標題", "bullets": ["要點1","要點2","要點3"]}}
  ]
}}
附加說明文字：
{text}
"""
    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_b64}"}
                ]
            }
        ],
        temperature=0.6,
        max_tokens=800,
    )

    try:
        return json.loads(resp.choices[0].message.content.strip())
    except Exception as e:
        return {
            "title": "AI 自動簡報",
            "slides": [{"title": "JSON 解析失敗", "bullets": [str(e)]}],
        }

# --- 產生 PPT ---
def build_ppt(outline: dict, output_path: str):
    prs = Presentation()
    # 標題頁
    slide = prs.slides.add_slide(prs.slide_layouts[0])
    slide.shapes.title.text = outline.get("title", "AI 自動產生簡報")
    for r in slide.shapes.title.text_frame.paragraphs[0].runs:
        set_run_font(r, 40)
    sub = slide.placeholders[1]
    sub.text = "由 OpenAI Vision + FastAPI 自動生成"
    for run in sub.text_frame.paragraphs[0].runs:
        set_run_font(run, 18)
    # 內容頁
    for s in outline.get("slides", []):
        slide = prs.slides.add_slide(prs.slide_layouts[1])
        slide.shapes.title.text = s.get("title", "未命名")
        for r in slide.shapes.title.text_frame.paragraphs[0].runs:
            set_run_font(r, 28)
        body = slide.placeholders[1].text_frame
        body.clear()
        for i, b in enumerate(s.get("bullets", [])):
            p = body.add_paragraph() if i > 0 else body.paragraphs[0]
            r = p.add_run()
            r.text = b
            set_run_font(r, 20)
    prs.save(output_path)

# --- FastAPI 路由 ---
@app.post("/generate_ppt_with_image")
async def generate_ppt_with_image(file: UploadFile, desc: str = Form("")):
    tmp_path = f"tmp_{file.filename}"
    with open(tmp_path, "wb") as f:
        f.write(await file.read())

    image_b64 = encode_image(tmp_path)
    outline = generate_ppt_outline(desc, image_b64)
    output_path = "vision_result.pptx"
    build_ppt(outline, output_path)
    return FileResponse(output_path, filename="AI_Vision_Presentation.pptx")

@app.get("/")
async def root():
    return {"message": "success"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run("openai_test:app", host="127.0.0.1", port=8000, reload=True)

我這邊的範例是把之前籃球的照片放進去，並且給他解釋