🆕 程式碼
/** 將長文切成小塊,避免 embedding 過長
c${idx++}
, content });/** 簡易清理:去 BOM/多餘空白 */
export function clean(text = "") {
return text.replace(/^\uFEFF/, "").replace(/\r\n/g, "\n").trim();
}
/** 讀取資料夾內的 .md/.txt */
function listDocs(dir) {
const files = fs.readdirSync(dir)
.filter(f => /.md$|.txt$/i.test(f))
.map(f => path.join(dir, f));
return files;
}
/** 讀檔 */
function readFileUtf8(fp) {
return fs.readFileSync(fp, "utf-8");
}
/** 餵 OpenAI 產生向量 */
async function embedMany(texts = []) {
if (texts.length === 0) return [];
const res = await openai.embeddings.create({
model: process.env.OPENAI_EMBEDDING_MODEL || "text-embedding-3-small",
input: texts,
});
return res.data.map(d => d.embedding);
}
/** 餵一堆檔案 → 切塊 → embedding → 寫 index.json */
export async function buildIndexFromDir({
srcDir = "knowledge/faq",
outFile = "data/rag/faq.index.json",
chunkSize = 800,
overlap = 80,
}) {
if (!fs.existsSync(srcDir)) throw new Error(找不到資料夾:${srcDir}
);
const files = listDocs(srcDir);
if (files.length === 0) throw new Error(資料夾 ${srcDir} 沒有 .md/.txt 檔案
);
const docs = [];
for (const fp of files) {
const raw = clean(readFileUtf8(fp));
const chunks = chunkText(raw, chunkSize, overlap);
for (const ch of chunks) {
docs.push({
docId: path.basename(fp),
chunkId: ch.id,
text: ch.content,
});
}
}
// 批次嵌入(避免一次太多)
const BATCH = 64;
const embeddings = [];
for (let i = 0; i < docs.length; i += BATCH) {
const batch = docs.slice(i, i + BATCH).map(d => d.text);
const vecs = await embedMany(batch);
embeddings.push(...vecs);
}
// 組索引
const index = docs.map((d, i) => ({
id: ${d.docId}#${d.chunkId}
,
docId: d.docId,
text: d.text,
vector: embeddings[i],
}));
const outDir = path.dirname(outFile);
if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true });
fs.writeFileSync(outFile, JSON.stringify({ builtAt: Date.now(), model: "text-embedding-3-small", index }, null, 2), "utf-8");
return { outFile, count: index.length };
}
/** 餵 query 做檢索(cosine 相似度) */
function cosine(a, b) {
let dot = 0, na = 0, nb = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
na += a[i] * a[i];
nb += b[i] * b[i];
}
return dot / (Math.sqrt(na) * Math.sqrt(nb) + 1e-9);
}
function loadIndex(outFile) {
if (!fs.existsSync(outFile)) throw new Error(找不到索引:${outFile},請先建立索引
);
const data = JSON.parse(fs.readFileSync(outFile, "utf-8"));
return data.index || [];
}
export async function retrieve({ query, outFile = "data/rag/faq.index.json", topK = 4 }) {
if (!query?.trim()) return [];
const index = loadIndex(outFile);
const qVec = (await embedMany([query]))[0];
const scored = index.map(it => ({ ...it, score: cosine(qVec, it.vector) }));
scored.sort((a, b) => b.score - a.score);
return scored.slice(0, topK);
}
/** 以 RAG 回答(附來源片段) */
export async function answerWithRAG({ query, outFile = "data/rag/faq.index.json", topK = 4 }) {
const hits = await retrieve({ query, outFile, topK });
const context = hits.map((h, i) =># 片段${i + 1}(${h.docId}, score=${h.score.toFixed(3)})\n${h.text}
).join("\n\n");
const messages = [
{ role: "system", content: "你是客服知識庫助理。僅根據提供的片段回答;若片段不足,請明確說明無法判斷並提出下一步。" },
{ role: "user", content: 問題:${query}\n\n以下是知識庫片段(可能不完整):\n${context}\n\n請用繁體中文,先給結論,再以條列列出步驟或政策重點,最後附上「來源」列表(列出片段與檔名)。
}
];
const res = await openai.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.2,
messages,
});
const answer = res.choices?.[0]?.message?.content?.trim() || "目前找不到足夠資訊,建議轉人工客服。";
const sources = hits.map(h => ({ id: h.id, docId: h.docId, score: h.score }));
return { answer, sources };
}
只顯示變更後完整檔案,方便直接覆蓋。
// src/flows/customerFAQ.js
import { openai } from "../aiClient.js";
import { answerWithRAG } from "../day16_rag_store.js";
export const customerFAQ = {
id: "customer_faq",
schema: { required: ["question"] },
prompts: {
system: "你是客服 FAQ 助理,先判斷是否常見問題;若符合,給精簡答案與下一步建議。",
askForSlot: () => "請描述您的問題(帳單、退費、維修、物流…)?",
},
ui: {
welcome: "您好,這裡是客服常見問題。請說明遇到的情況,我會先幫你從知識庫查詢。",
confirmMessage: null,
validationFailed: (errors) => 發生錯誤:${errors.join(", ")}
,
doneMessage: (result) => result.answer,
},
parse: async (input, state) => {
return { question: (state.question ? ${state.question}\n${input}
: input) };
},
onComplete: async (state) => {
// 1) 先用 RAG 回答(若知識庫不足,再回到 LLM 通用回答)
try {
const { answer, sources } = await answerWithRAG({ query: state.question });
// 若答案太空或來源分數過低,可再做後備
const ok = sources.some(s => s.score >= 0.25); // 你可依資料品質調整門檻
if (ok) {
const srcList = sources.map(s => - ${s.docId}(${s.score.toFixed(2)})
).join("\n");
return { answer: ${answer}\n\n**來源**\n${srcList}
};
}
} catch {
// 忽略,走 fallback
}
// 2) Fallback:無 RAG 片段時,給通用引導(正式上線建議回人工流程)
const res = await openai.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.3,
messages: [
{ role: "system", content: "請以精簡條列回覆,若缺資訊請提示使用者補充關鍵細節。" },
{ role: "user", content: `問題:${state.question}\n請用繁體中文回答。` },
],
});
const answer = res.choices?.[0]?.message?.content?.trim() || "目前無法判斷,請提供更詳細的訂單/商品/時間資訊。";
return { answer };
},
};
async function main() {
const task = args.task || "chat";
if (task === "rag") {
const mode = args.mode || "build"; // build | ask
if (mode === "build") {
const srcDir = args.src || "knowledge/faq";
const outFile = args.out || "data/rag/faq.index.json";
const chunkSize = args.chunk ? Number(args.chunk) : 800;
const overlap = args.overlap ? Number(args.overlap) : 80;
const out = await buildIndexFromDir({ srcDir, outFile, chunkSize, overlap });
console.log("\n=== 建立索引完成 ===");
console.log(out);
} else if (mode === "ask") {
const query = args.q || "如何退費?";
const outFile = args.out || "data/rag/faq.index.json";
const topK = args.k ? Number(args.k) : 4;
const { answer, sources } = await answerWithRAG({ query, outFile, topK });
console.log("\n=== RAG 答覆 ===\n");
console.log(answer);
console.log("\n來源:", sources);
} else {
console.log("未知模式,請使用 --mode build | ask");
}
} else {
// 你原本的其他 task 分支…
}
}
main().catch(e => { console.error(e); process.exit(1); });
▶️ 驗收流程
npm run day16:build --silent
npm run day16:ask --silent
npm run day16:faq:start --silent
npm run day16:faq:ask --silent
看到答覆末尾的「來源」檔名與分數,代表 RAG 已介入。
若分數普遍 <0.25,調整 chunkSize/overlap 或擴充知識庫內容。