昨天我們已經讓 AI 自動生成簡報內容。但如果能讓 AI「看圖片」並幫我們生成說明,是不是就更強了?
今天,我們要讓 FastAPI 服務升級成「圖文自動簡報生成器」,自動分析圖片並整理成 PPT。
我們要讓使用者「上傳一張圖片」,AI 幫我們生成:
一段文字解釋這張圖 一份包含該說明的簡報
import json
import base64
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
from dotenv import load_dotenv
from openai import OpenAI
from pptx import Presentation
from pptx.util import Pt
from pptx.enum.text import PP_ALIGN
from pptx.oxml.ns import qn
from pptx.oxml.xmlchemy import OxmlElement
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")
app = FastAPI(title="AI PowerPoint Generator", version="1.0")
# --- 字型設定 ---
FONT_NAME = "PingFang TC"
def set_run_font(run, size_pt):
run.font.name = FONT_NAME
run.font.size = Pt(size_pt)
rPr = run._r.get_or_add_rPr()
for tag in ["a:latin", "a:ea", "a:cs"]:
el = rPr.find(qn(tag))
if el is None:
el = OxmlElement(tag)
rPr.append(el)
el.set("typeface", FONT_NAME)
# --- 圖片 → base64 ---
def encode_image(image_path: str):
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
# --- 用 GPT 產出簡報大綱 ---
def generate_ppt_outline(text: str, image_b64: str):
prompt = f"""
請根據圖片與以下文字,產生一份簡報大綱(繁體中文)。
只回傳 JSON,格式如下:
{{
"title": "主題",
"slides": [
{{"title": "投影片標題", "bullets": ["要點1","要點2","要點3"]}}
]
}}
附加說明文字:
{text}
"""
resp = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_b64}"}
]
}
],
temperature=0.6,
max_tokens=800,
)
try:
return json.loads(resp.choices[0].message.content.strip())
except Exception as e:
return {
"title": "AI 自動簡報",
"slides": [{"title": "JSON 解析失敗", "bullets": [str(e)]}],
}
# --- 產生 PPT ---
def build_ppt(outline: dict, output_path: str):
prs = Presentation()
# 標題頁
slide = prs.slides.add_slide(prs.slide_layouts[0])
slide.shapes.title.text = outline.get("title", "AI 自動產生簡報")
for r in slide.shapes.title.text_frame.paragraphs[0].runs:
set_run_font(r, 40)
sub = slide.placeholders[1]
sub.text = "由 OpenAI Vision + FastAPI 自動生成"
for run in sub.text_frame.paragraphs[0].runs:
set_run_font(run, 18)
# 內容頁
for s in outline.get("slides", []):
slide = prs.slides.add_slide(prs.slide_layouts[1])
slide.shapes.title.text = s.get("title", "未命名")
for r in slide.shapes.title.text_frame.paragraphs[0].runs:
set_run_font(r, 28)
body = slide.placeholders[1].text_frame
body.clear()
for i, b in enumerate(s.get("bullets", [])):
p = body.add_paragraph() if i > 0 else body.paragraphs[0]
r = p.add_run()
r.text = b
set_run_font(r, 20)
prs.save(output_path)
# --- FastAPI 路由 ---
@app.post("/generate_ppt_with_image")
async def generate_ppt_with_image(file: UploadFile, desc: str = Form("")):
tmp_path = f"tmp_{file.filename}"
with open(tmp_path, "wb") as f:
f.write(await file.read())
image_b64 = encode_image(tmp_path)
outline = generate_ppt_outline(desc, image_b64)
output_path = "vision_result.pptx"
build_ppt(outline, output_path)
return FileResponse(output_path, filename="AI_Vision_Presentation.pptx")
@app.get("/")
async def root():
return {"message": "success"}
if __name__ == "__main__":
import uvicorn
uvicorn.run("openai_test:app", host="127.0.0.1", port=8000, reload=True)
我這邊的範例是把之前籃球的照片放進去,並且給他解釋
程式的概念簡單來說就是讓使用者上傳圖片,FastAPI 接收後由OpenAI Vision 辨識,生成簡報 JSON,最後產出 PPT,其實做法跟昨天差不多,是個滿容易上手的活.
今天我們學會了如何讓 AI 同時理解文字與圖片,並自動產生 PPT。
明天輕鬆一點,換個主題:我們會學怎麼使用 OpenAI 的 Image API,讓 AI 幫你畫出插畫風的角色、網頁封面、海報、甚至是迷因