🆕 程式碼實作
/**
下載失敗:${res.status} ${res.statusText}
);audio_${Date.now()}${ext}
);function guessExtFromContentType(ct) {
if (!ct) return "";
if (ct.includes("audio/mpeg")) return ".mp3";
if (ct.includes("audio/mp4")) return ".m4a";
if (ct.includes("audio/x-m4a")) return ".m4a";
if (ct.includes("audio/wav")) return ".wav";
if (ct.includes("audio/webm")) return ".webm";
if (ct.includes("video/mp4")) return ".mp4"; // 有些錄音會是 mp4 容器
return "";
}
/**
// 若是 URL 先下載
let localPath = filePath;
if (!localPath && url) {
localPath = await downloadToTemp(url);
}
if (!fs.existsSync(localPath)) {
throw new Error(找不到檔案:${localPath}
);
}
// 以 Readable 方式傳入
const file = await fs.createReadStream(localPath);
// 模型:gpt-4o-transcribe(OpenAI 2025 推薦語音轉寫)
// 註:部分參數(如 response_format)依模型支援度而定
const baseReq = {
file,
model: "gpt-4o-transcribe",
// language
與 prompt
為可選,有助於專有名詞與口音辨識
...(language ? { language } : {}),
...(prompt ? { prompt } : {}),
};
let result;
try {
// 預設回傳純文字
result = await openai.audio.transcriptions.create(baseReq);
} catch (e) {
// 有些環境需要明確 mime 或副檔名提示;或 fallback whisper-1
// 你也可以在這裡實作重試/降級
throw e;
}
const text = (result?.text || "").trim();
// (可選)嘗試詳細段落(若模型支援 verbose_json)
let detailedJson = null;
if (detailed) {
try {
const verbose = await openai.audio.transcriptions.create({
...baseReq,
response_format: "verbose_json",
});
detailedJson = verbose || null;
} catch {
// 若不支援或失敗,忽略即可
detailedJson = null;
}
}
// 落檔 outputs/transcripts
const outDir = path.join(process.cwd(), "outputs", "transcripts");
if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true });
const base = path.basename(localPath).replace(/.[^.]+$/, "");
const txtPath = path.join(outDir, ${base}.txt
);
const jsonPath = path.join(outDir, ${base}.json
);
fs.writeFileSync(txtPath, text, "utf-8");
fs.writeFileSync(
jsonPath,
JSON.stringify({ text, detailed: detailedJson }, null, 2),
"utf-8"
);
return { text, raw: detailedJson, saved: { txt: txtPath, json: jsonPath } };
}
const args = Object.fromEntries(
process.argv.slice(2).reduce((acc, cur, i, arr) => {
if (cur.startsWith("--")) {
const key = cur.replace(/^--/, "");
const val = arr[i + 1] && !arr[i + 1].startsWith("--") ? arr[i + 1] : true;
acc.push([key, val]);
}
return acc;
}, [])
);
async function main() {
const task = args.task || "chat";
if (task === "stt") {
const filePath = args.filePath || null;
const url = args.url || null;
const language = args.lang || ""; // 例:"zh" | "en" | "ja"
const prompt = args.prompt || ""; // 專有名詞提示
const detailed = args.detailed === "true" || args.detailed === true;
const { text, saved } = await transcribe({ filePath, url, language, prompt, detailed });
console.log("\n=== 語音轉文字(STT) ===\n");
console.log(text);
console.log("\n已儲存:");
console.log("- TXT:", saved.txt);
console.log("- JSON:", saved.json);
} else if (task === "vision") {
const imagePath = args.imagePath || null;
const imageUrl = args.imageUrl || null;
const wantOCR = args.ocr === "true" || args.ocr === true;
const length = args.length || "medium";
const out = await imageToJson({ imagePath, imageUrl, wantOCR, length });
console.log("\n=== 圖片 → JSON 描述 ===\n");
console.log(JSON.stringify(out, null, 2));
} else if (task === "image") {
const prompt = args.text || "一隻戴著太空頭盔的柴犬,漂浮在月球上,插著台灣國旗";
const size = args.size || "512x512";
const n = args.n ? Number(args.n) : 1;
const urls = await textToImage(prompt, { size, n });
console.log("\n=== 生成圖片 ===\n");
urls.forEach((f) => console.log("已儲存:" + f));
} else if (task === "chat") {
const sessionId = args.session || "default";
if (args.reset) {
resetSession(sessionId);
console.log(已重設會話:${sessionId}
);
return;
}
const input = args.text || "嗨,我想規劃 3 天 2 夜的台中旅遊行程。";
const { reply } = await chatOnce(input, { sessionId });
console.log(\n[${sessionId}] AI:\n${reply}\n
);
} else if (task === "teacher") {
const out = await englishTeacher(args.text || "He go to school every day.");
console.log("\n=== 英文老師 ===\n");
console.log(out);
} else if (task === "review") {
const out = await codeReview("function sum(arr){ return arr.reduce((a,b)=>a+b,0) }");
console.log("\n=== 程式碼審查 ===\n");
console.log(out);
} else if (task === "sentiment") {
const out = await sentimentClassify(args.text || "今天心情糟透了,事情一團亂。");
console.log("\n=== 情緒分類(JSON) ===\n");
console.log(out);
} else if (task === "json_summary") {
const out = await newsToJson(args.text || "OpenAI 發布新模型,效能大幅提升。");
console.log("\n=== 新聞 JSON 摘要 ===\n");
console.log(out);
} else {
console.log("未知任務,請使用 --task stt | vision | image | chat | teacher | review | sentiment | json_summary");
}
}
main().catch((e) => {
console.error("發生錯誤:", e.message);
process.exit(1);
});
▶️ CLI 操作範例
npm run day8:stt:file --silent
npm run day8:stt:url --silent
node index.js --task stt --filePath sample/meeting.m4a --lang zh --prompt "專案名:SmartGo Plus;人名:小王、小美;術語:RAG、LIFF、IIS"
產出檔案示例:
outputs/transcripts/meeting.txt
outputs/transcripts/meeting.json // { text, detailed?: {...} }