/** 簡易副檔名 → MIME 對照(常見即可) */
const MIME_MAP = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".webp": "image/webp",
};
/** 將本地檔讀成 data URL(供 image_url 使用) */
function fileToDataUrl(filePath) {
const ext = path.extname(filePath).toLowerCase();
const mime = MIME_MAP[ext] || "application/octet-stream";
const b64 = fs.readFileSync(filePath).toString("base64");
return data:${mime};base64,${b64}
;
}
/** 結構化輸出 schema */
const schema = {
type: "object",
required: ["title", "alt", "description", "tags"],
properties: {
title: { type: "string" },
alt: { type: "string" },
description: { type: "string" },
tags: { type: "object" }, // 驗證為 array
ocrText: { type: "string" }, // 可選
},
};
/**
// 組 image 資料來源
let url = imageUrl;
if (!url && imagePath) {
if (!fs.existsSync(imagePath)) {
throw new Error(找不到檔案:${imagePath}
);
}
url = fileToDataUrl(imagePath);
}
// 提示工程:用 PromptBuilder 宣告任務與限制
const pb = new PromptBuilder()
.setRole("你是嚴謹的圖片描述與可近用性(Accessibility)撰寫助手")
.setGoal("針對輸入圖片,產生結構化 JSON:title/alt/description/tags/(optional)ocrText")
.addConstraint("ALT 文本需精煉、具體、避免主觀情緒")
.addConstraint("description 用完整句子,避免流水帳,描述場景/主體/動作/風格")
.addConstraint("tags 為 3~7 個中文關鍵字陣列,從『可辨識客觀元素』出發")
.addConstraint("若畫面有清楚中文字且 wantOCR=true,再嘗試 OCR;否則 ocrText 請留空或省略")
.addConstraint("輸出必須是純 JSON,不要有多餘文字或 Markdown")
.setFormatHint(描述長度:${length === "short" ? "80~120 字" : length === "long" ? "300~500 字" : "150~250 字"}
)
.setJsonSchema(schema);
const messages = [
{ role: "system", content: pb.buildSystemPrompt() },
// user: 多模態輸入(image + 文字指示)
{
role: "user",
content: [
{ type: "input_text", text: 請產生結構化 JSON${wantOCR ? "(同時嘗試 OCR)" : ""}。
},
{ type: "input_image", image_url: url },
],
},
];
const res = await openai.chat.completions.create({
model: "gpt-4o-mini", // 支援圖像理解
temperature: 0.2,
messages,
});
const raw = res.choices?.[0]?.message?.content ?? "";
const obj = extractJson(raw);
// 極輕量 schema 驗證
const check = validateBySchema(obj, schema);
if (!check.ok) {
throw new Error("JSON 不符合 schema:" + check.errors.join("; "));
}
// 類型確認:tags 必須是陣列
if (!Array.isArray(obj.tags)) {
throw new Error("欄位 tags 應為陣列");
}
// wantOCR=false 時,移除空的 ocrText
if (!wantOCR && "ocrText" in obj && !obj.ocrText) {
delete obj.ocrText;
}
return obj;
}
const args = Object.fromEntries(
process.argv.slice(2).reduce((acc, cur, i, arr) => {
if (cur.startsWith("--")) {
const key = cur.replace(/^--/, "");
const val = arr[i + 1] && !arr[i + 1].startsWith("--") ? arr[i + 1] : true;
acc.push([key, val]);
}
return acc;
}, [])
);
async function main() {
const task = args.task || "chat";
if (task === "vision") {
const imagePath = args.imagePath || null;
const imageUrl = args.imageUrl || null;
const wantOCR = args.ocr === "true" || args.ocr === true;
const length = args.length || "medium";
const out = await imageToJson({ imagePath, imageUrl, wantOCR, length });
console.log("\n=== 圖片 → JSON 描述 ===\n");
console.log(JSON.stringify(out, null, 2));
} else if (task === "image") {
const prompt = args.text || "一隻戴著太空頭盔的柴犬,漂浮在月球上,插著台灣國旗";
const size = args.size || "512x512";
const n = args.n ? Number(args.n) : 1;
const urls = await textToImage(prompt, { size, n });
console.log("\n=== 生成圖片 ===\n");
urls.forEach((f) => console.log("已儲存:" + f));
} else if (task === "chat") {
const sessionId = args.session || "default";
if (args.reset) {
resetSession(sessionId);
console.log(已重設會話:${sessionId}
);
return;
}
const input = args.text || "嗨,我想規劃 3 天 2 夜的台中旅遊行程。";
const { reply } = await chatOnce(input, { sessionId });
console.log(\n[${sessionId}] AI:\n${reply}\n
);
} else if (task === "teacher") {
const out = await englishTeacher(args.text || "He go to school every day.");
console.log("\n=== 英文老師 ===\n");
console.log(out);
} else if (task === "review") {
const out = await codeReview("function sum(arr){ return arr.reduce((a,b)=>a+b,0) }");
console.log("\n=== 程式碼審查 ===\n");
console.log(out);
} else if (task === "sentiment") {
const out = await sentimentClassify(args.text || "今天心情糟透了,事情一團亂。");
console.log("\n=== 情緒分類(JSON) ===\n");
console.log(out);
} else if (task === "json_summary") {
const out = await newsToJson(args.text || "OpenAI 發布新模型,效能大幅提升。");
console.log("\n=== 新聞 JSON 摘要 ===\n");
console.log(out);
} else {
console.log("未知任務,請使用 --task chat | teacher | review | sentiment | json_summary | image | vision");
}
}
main().catch((e) => {
console.error("發生錯誤:", e.message);
process.exit(1);
});
▶️ CLI 操作範例
npm run day7:vision:file --silent
npm run day7:vision:url --silent
輸出(範例):
{
"title": "黃昏街角的咖啡外帶杯",
"alt": "一個咖啡外帶杯放在濕潤路緣上,背景為散景的街燈與車流",
"description": "畫面中央是一個棕白相間的咖啡外帶杯,置於濕潤的路面...",
"tags": ["咖啡", "街頭", "夜景", "雨後", "散景", "城市"],
"ocrText": ""
}