iT邦幫忙

2025 iThome 鐵人賽

DAY 17
0
生成式 AI

練習AI系列 第 18

RAG 知識庫 Studio(Next.js API + DaisyUI 後台)

  • 分享至 

  • xImage
  •  

🆕 程式碼
0) 安全 I/O(可選,但建議)

新增 src/utils/fsx.js(避免路徑逃逸、保證資料夾存在)

// src/utils/fsx.js
import fs from "fs";
import path from "path";

export const KB_DIR = path.join(process.cwd(), "knowledge", "faq");
export const INDEX_FILE = path.join(process.cwd(), "data", "rag", "faq.index.json");

export function ensureDirs() {
if (!fs.existsSync(KB_DIR)) fs.mkdirSync(KB_DIR, { recursive: true });
const idxDir = path.dirname(INDEX_FILE);
if (!fs.existsSync(idxDir)) fs.mkdirSync(idxDir, { recursive: true });
}

/** 僅允許 .md / .txt */
export function isAllowedName(name) {
return /^[\w-. ]+.(md|txt)$/i.test(name || "");
}

export function safeJoinKB(filename) {
if (!isAllowedName(filename)) throw new Error("非法檔名或不支援的副檔名");
const full = path.join(KB_DIR, filename);
const norm = path.normalize(full);
if (!norm.startsWith(KB_DIR)) throw new Error("路徑不合法");
return norm;
}

export function listKB() {
ensureDirs();
return fs.readdirSync(KB_DIR)
.filter(f => isAllowedName(f))
.map(f => {
const p = path.join(KB_DIR, f);
const s = fs.statSync(p);
return { name: f, bytes: s.size, mtime: s.mtimeMs };
})
.sort((a,b)=>b.mtime-a.mtime);
}

  1. 更新 src/day16_rag_store.js(輕微補強)

直接覆蓋 Day 16 檔案;主要增加 DEFAULTS 與傳入 outFile 的一致性。

// src/day16_rag_store.js
import fs from "fs";
import path from "path";
import { openai } from "./aiClient.js";
import { chunkText, clean } from "./utils/text.js";

const DEFAULTS = {
srcDir: "knowledge/faq",
outFile: "data/rag/faq.index.json",
chunkSize: 800,
overlap: 80,
embedModel: process.env.OPENAI_EMBEDDING_MODEL || "text-embedding-3-small",
};

function listDocs(dir) {
return fs.readdirSync(dir)
.filter(f => /.md$|.txt$/i.test(f))
.map(f => path.join(dir, f));
}
function readFileUtf8(fp) { return fs.readFileSync(fp, "utf-8"); }

async function embedMany(texts = [], model = DEFAULTS.embedModel) {
if (!texts.length) return [];
const res = await openai.embeddings.create({ model, input: texts });
return res.data.map(d => d.embedding);
}

export async function buildIndexFromDir(opts = {}) {
const { srcDir, outFile, chunkSize, overlap } = { ...DEFAULTS, ...opts };
if (!fs.existsSync(srcDir)) throw new Error(找不到資料夾:${srcDir});
const files = listDocs(srcDir);
if (!files.length) throw new Error(資料夾 ${srcDir} 沒有 .md/.txt 檔案);

const docs = [];
for (const fp of files) {
const raw = clean(readFileUtf8(fp));
const chunks = chunkText(raw, chunkSize, overlap);
for (const ch of chunks) {
docs.push({ docId: path.basename(fp), chunkId: ch.id, text: ch.content });
}
}
const BATCH = 64, vectors = [];
for (let i = 0; i < docs.length; i += BATCH) {
const vecs = await embedMany(docs.slice(i, i + BATCH).map(d => d.text));
vectors.push(...vecs);
}
const index = docs.map((d, i) => ({
id: ${d.docId}#${d.chunkId},
docId: d.docId,
text: d.text,
vector: vectors[i],
}));
const dir = path.dirname(outFile);
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
fs.writeFileSync(outFile, JSON.stringify({
builtAt: Date.now(), model: DEFAULTS.embedModel, index
}, null, 2), "utf-8");
return { outFile, count: index.length };
}

function cosine(a, b) {
let dot=0, na=0, nb=0;
for (let i=0;i<a.length;i++){ dot+=a[i]*b[i]; na+=a[i]*a[i]; nb+=b[i]*b[i]; }
return dot / (Math.sqrt(na)*Math.sqrt(nb) + 1e-9);
}
function loadIndex(outFile = DEFAULTS.outFile) {
if (!fs.existsSync(outFile)) throw new Error(找不到索引:${outFile});
const data = JSON.parse(fs.readFileSync(outFile, "utf-8"));
return data.index || [];
}

export async function retrieve({ query, outFile = DEFAULTS.outFile, topK = 4 }) {
if (!query?.trim()) return [];
const index = loadIndex(outFile);
const qVec = (await embedMany([query]))[0];
const scored = index.map(it => ({ ...it, score: cosine(qVec, it.vector) }));
scored.sort((a, b) => b.score - a.score);
return scored.slice(0, topK);
}

export async function answerWithRAG({ query, outFile = DEFAULTS.outFile, topK = 4 }) {
const hits = await retrieve({ query, outFile, topK });
const context = hits.map((h,i)=># 片段${i+1}(${h.docId}, score=${h.score.toFixed(3)})\n${h.text}).join("\n\n");
const res = await openai.chat.completions.create({
model: "gpt-4o-mini", temperature: 0.2,
messages: [
{ role: "system", content: "你是客服知識庫助理。僅根據提供片段回答,來源不足時請說明。" },
{ role: "user", content: 問題:${query}\n\n片段:\n${context}\n\n請用繁體中文:先結論、再步驟、最後列來源(片段序號與檔名)。 }
]
});
const answer = res.choices?.[0]?.message?.content?.trim() || "目前找不到足夠資訊。";
const sources = hits.map(h => ({ id: h.id, docId: h.docId, score: h.score }));
return { answer, sources };
}

  1. API:上傳檔案

app/api/kb/upload/route.js

import { NextResponse } from "next/server";
import fs from "fs";
import { ensureDirs, safeJoinKB, isAllowedName } from "../../../../src/utils/fsx.js";

export const runtime = "nodejs";

export async function POST(req) {
try {
ensureDirs();
const form = await req.formData();
const file = form.get("file");
const name = (form.get("name") || file?.name || "").trim();
if (!file || !name) return NextResponse.json({ ok:false, error:"缺少檔案或檔名" }, { status:400 });
if (!isAllowedName(name)) return NextResponse.json({ ok:false, error:"僅允許 .md/.txt 檔" }, { status:400 });

const buf = Buffer.from(await file.arrayBuffer());
const dest = safeJoinKB(name);
fs.writeFileSync(dest, buf);
return NextResponse.json({ ok:true, name });

} catch (e) {
return NextResponse.json({ ok:false, error: e.message || "Server error" }, { status:500 });
}
}

  1. API:列出檔案

app/api/kb/list/route.js

import { NextResponse } from "next/server";
import { ensureDirs, listKB } from "../../../../src/utils/fsx.js";
export const runtime = "nodejs";

export async function GET() {
try {
ensureDirs();
const files = listKB();
return NextResponse.json({ ok:true, files });
} catch (e) {
return NextResponse.json({ ok:false, error: e.message }, { status:500 });
}
}

  1. API:刪除檔案

app/api/kb/delete/route.js

import { NextResponse } from "next/server";
import fs from "fs";
import { ensureDirs, safeJoinKB } from "../../../../src/utils/fsx.js";
export const runtime = "nodejs";

export async function POST(req) {
try {
ensureDirs();
const { name } = await req.json();
if (!name) return NextResponse.json({ ok:false, error:"name 必填" }, { status:400 });
const p = safeJoinKB(name);
if (fs.existsSync(p)) fs.unlinkSync(p);
return NextResponse.json({ ok:true });
} catch (e) {
return NextResponse.json({ ok:false, error: e.message }, { status:500 });
}
}

  1. API:重建索引

app/api/kb/reindex/route.js

import { NextResponse } from "next/server";
import { buildIndexFromDir } from "../../../../src/day16_rag_store.js";
import { KB_DIR, INDEX_FILE, ensureDirs } from "../../../../src/utils/fsx.js";
export const runtime = "nodejs";

export async function POST() {
try {
ensureDirs();
const out = await buildIndexFromDir({ srcDir: KB_DIR, outFile: INDEX_FILE });
return NextResponse.json({ ok:true, ...out });
} catch (e) {
return NextResponse.json({ ok:false, error: e.message }, { status:500 });
}
}

  1. API:RAG 問答

app/api/kb/ask/route.js

import { NextResponse } from "next/server";
import { answerWithRAG } from "../../../../src/day16_rag_store.js";
import { INDEX_FILE } from "../../../../src/utils/fsx.js";
export const runtime = "nodejs";

export async function POST(req) {
try {
const { q } = await req.json();
if (!q || !q.trim()) return NextResponse.json({ ok:false, error:"q 必填" }, { status:400 });
const { answer, sources } = await answerWithRAG({ query: q, outFile: INDEX_FILE, topK: 4 });
return NextResponse.json({ ok:true, answer, sources });
} catch (e) {
return NextResponse.json({ ok:false, error: e.message }, { status:500 });
}
}

  1. 前端頁 /studio

app/studio/page.tsx

"use client";
import { useEffect, useState } from "react";

type FileRow = { name:string; bytes:number; mtime:number };
type Src = { id:string; docId:string; score:number };

export default function Studio() {
const [files, setFiles] = useState<FileRow[]>([]);
const [loadingList, setLoadingList] = useState(false);
const [uploading, setUploading] = useState(false);
const [reindexing, setReindexing] = useState(false);
const [askQ, setAskQ] = useState("");
const [answer, setAnswer] = useState("");
const [sources, setSources] = useState<Src[]>([]);
const [asking, setAsking] = useState(false);
const [err, setErr] = useState("");

async function refresh() {
setLoadingList(true);
try {
const r = await fetch("/api/kb/list");
const j = await r.json();
if (!j.ok) throw new Error(j.error);
setFiles(j.files);
} catch(e:any){ setErr(e.message); }
finally { setLoadingList(false); }
}
useEffect(()=>{ refresh(); },[]);

async function upload(e: React.FormEvent) {
e.preventDefault();
const form = e.currentTarget;
const fd = new FormData(form);
setUploading(true); setErr("");
try {
const r = await fetch("/api/kb/upload", { method:"POST", body: fd });
const j = await r.json();
if (!j.ok) throw new Error(j.error);
form.reset();
await refresh();
} catch(e:any){ setErr(e.message); }
finally { setUploading(false); }
}

async function del(name:string) {
if (!confirm(刪除 ${name} ?)) return;
try{
const r = await fetch("/api/kb/delete", { method:"POST", headers:{ "Content-Type":"application/json" }, body: JSON.stringify({ name }) });
const j = await r.json();
if (!j.ok) throw new Error(j.error);
await refresh();
} catch(e:any){ setErr(e.message); }
}

async function reindex() {
setReindexing(true); setErr("");
try{
const r = await fetch("/api/kb/reindex", { method:"POST" });
const j = await r.json();
if (!j.ok) throw new Error(j.error);
alert(索引完成:${j.count} 片段);
} catch(e:any){ setErr(e.message); }
finally { setReindexing(false); }
}

async function ask() {
setAsking(true); setErr(""); setAnswer(""); setSources([]);
try{
const r = await fetch("/api/kb/ask", { method:"POST", headers:{ "Content-Type":"application/json" }, body: JSON.stringify({ q: askQ }) });
const j = await r.json();
if (!j.ok) throw new Error(j.error);
setAnswer(j.answer);
setSources(j.sources || []);
} catch(e:any){ setErr(e.message); }
finally { setAsking(false); }
}

return (


Day 17|RAG 知識庫 Studio
{err && {err}}

    <div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
      {/* 左:知識庫管理 */}
      <div className="card bg-base-100 shadow">
        <div className="card-body space-y-4">
          <div className="flex items-center gap-3">
            <h2 className="card-title">知識庫管理</h2>
            <button className={`btn btn-outline btn-sm ${reindexing?"btn-disabled":""}`} onClick={reindex}>
              {reindexing ? "索引中..." : "重建索引"}
            </button>
          </div>

          <form className="flex items-center gap-3" onSubmit={upload}>
            <input type="file" name="file" accept=".md,.txt" className="file-input file-input-bordered" required />
            <input type="text" name="name" placeholder="儲存檔名(例:refund.md)" className="input input-bordered" required />
            <button className={`btn btn-primary ${uploading?"btn-disabled":""}`} type="submit">
              {uploading ? "上傳中..." : "上傳"}
            </button>
          </form>

          <div className="overflow-x-auto">
            <table className="table">
              <thead>
                <tr><th>檔名</th><th>大小</th><th>修改時間</th><th></th></tr>
              </thead>
              <tbody>
                {loadingList ? (
                  <tr><td colSpan={4}>讀取中...</td></tr>
                ) : files.length===0 ? (
                  <tr><td colSpan={4}>尚無資料,先上傳 .md/.txt 後重建索引</td></tr>
                ) : files.map(f=>(
                  <tr key={f.name}>
                    <td>{f.name}</td>
                    <td>{(f.bytes/1024).toFixed(1)} KB</td>
                    <td>{new Date(f.mtime).toLocaleString()}</td>
                    <td><button className="btn btn-ghost btn-xs" onClick={()=>del(f.name)}>刪除</button></td>
                  </tr>
                ))}
              </tbody>
            </table>
          </div>
        </div>
      </div>

      {/* 右:RAG 問答 */}
      <div className="card bg-base-100 shadow">
        <div className="card-body space-y-3">
          <h2 className="card-title">RAG 問答</h2>
          <textarea className="textarea textarea-bordered h-32" placeholder="輸入你的問題(例:我被重複扣款如何退費?)"
            value={askQ} onChange={e=>setAskQ(e.target.value)} />
          <button className={`btn btn-primary ${asking?"btn-disabled":""}`} onClick={ask}>
            {asking ? "查詢中..." : "詢問"}
          </button>

          {answer && (
            <div className="prose max-w-none">
              <h3>回答</h3>
              <div className="whitespace-pre-wrap">{answer}</div>
              <h3>來源</h3>
              {sources.length===0 ? <div>(無來源)</div> : (
                <ul className="list-disc pl-6">
                  {sources.map((s, i)=>(
                    <li key={i}>{s.docId}({s.score.toFixed(2)})</li>
                  ))}
                </ul>
              )}
            </div>
          )}
        </div>
      </div>
    </div>

  </div>
</div>

);
}

  1. package.json(新增 scripts)

保留舊有,只新增下列:

{
"scripts": {
"day17:dev": "next dev",
"day17:reindex": "node index.js --task rag --mode build --src knowledge/faq --out data/rag/faq.index.json"
}
}

▶️ 本地驗收流程

1) 啟動前端

npm run day17:dev

2) 造幾個 .md/.txt 放在 knowledge/faq/(或用頁面上傳)

3) 在 /studio 上傳 → 重建索引 → 右側問答 → 檢查引用與分數


上一篇
RAG 客服 FAQ(Embedding + 向量檢索)
系列文
練習AI18
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言