🆕 程式碼
輕量 Markdown 區塊解析:抓出 H1~H6 標題、slug、章節文字範圍。
// src/utils/md_ast.js
/** 產生簡單 slug(中文轉拼音可日後再加;先做安全 slug) */
export function slugify(s = "") {
return (s || "")
.trim()
.toLowerCase()
.replace(/[^\p{Letter}\p{Number}\s-]/gu, "")
.replace(/\s+/g, "-")
.replace(/-+/g, "-")
.slice(0, 80) || "section";
}
/** 解析 Markdown 成章節樹(陣列,含層級/slug/text) */
export function parseMarkdownToSections(md = "") {
const lines = (md || "").replace(/\r\n/g, "\n").split("\n");
const nodes = [];
let cur = null;
const flush = () => {
if (cur) {
cur.text = (cur.textParts || []).join("\n").trim();
delete cur.textParts;
nodes.push(cur);
cur = null;
}
};
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Fenced code:保持在章節 text 中,不做切分(簡化處理)
const h = line.match(/^(#{1,6})\s+(.+?)\s*$/);
if (h) {
flush();
const level = h[1].length;
const title = h[2].trim();
cur = {
level,
title,
slug: slugify(title),
textParts: [],
};
continue;
}
if (!cur) {
// 若文件開頭沒有標題,放到「導言」章節
cur = { level: 1, title: "導言", slug: "intro", textParts: [] };
}
cur.textParts.push(line);
}
flush();
// 標示父子(以 level 決定)
const stack = [];
for (const n of nodes) {
while (stack.length && stack[stack.length - 1].level >= n.level) stack.pop();
n.parent = stack.length ? stack[stack.length - 1].slug : null;
stack.push(n);
}
return nodes;
}
/** 章節麵包屑(由 parent 串回去;這裡只回 slug 與 title 陣列) */
export function buildBreadcrumb(sections) {
const bySlug = new Map(sections.map(s => [s.slug, s]));
const getTrail = (slug) => {
const trail = [];
let cur = bySlug.get(slug);
while (cur) {
trail.push({ slug: cur.slug, title: cur.title, level: cur.level });
if (!cur.parent) break;
cur = bySlug.get(cur.parent);
}
return trail.reverse();
};
return { getTrail };
}
章節索引獨立存檔:data/rag//.sections.json。
注意:請先有 chunk 索引(Day 18/21/22)再建章節索引,因為要把 chunk 映射到章節。
// src/day23_sections.js
import fs from "fs";
import path from "path";
import { openai } from "./aiClient.js";
import { ensureTenantNS } from "./utils/tenantfs.js";
import { parseMarkdownToSections, buildBreadcrumb } from "./utils/md_ast.js";
const EMBED_MODEL = process.env.OPENAI_EMBEDDING_MODEL || "text-embedding-3-small";
async function embedMany(texts = []) {
if (!texts.length) return [];
const res = await openai.embeddings.create({ model: EMBED_MODEL, input: texts });
return res.data.map(d => d.embedding);
}
function cosine(a,b){let dot=0,na=0,nb=0;for(let i=0;i<a.length;i++){dot+=a[i]*b[i];na+=a[i]*a[i];nb+=b[i]*b[i]}return dot/(Math.sqrt(na)*Math.sqrt(nb)+1e-9)}
function loadChunkIndex(idxFile) {
if (!fs.existsSync(idxFile)) throw new Error("找不到 chunk 索引,請先重建全文索引");
const data = JSON.parse(fs.readFileSync(idxFile, "utf-8"));
return data.index || [];
}
function listDocs(kbDir) {
return fs.readdirSync(kbDir).filter(f => /.md$|.txt$/i.test(f)).map(f => path.join(kbDir, f));
}
/** 建立章節索引(需先有 chunk 索引) */
export async function buildSectionIndex({ tenant, ns }) {
const { kbDir, idxFile } = ensureTenantNS(tenant, ns);
const secFile = path.join(path.dirname(idxFile), ${ns}.sections.json
);
const chunkIndex = loadChunkIndex(idxFile); // [{id, docId, text, vector}]
// 1) 解析所有檔案 → 章節清單
const files = listDocs(kbDir);
const sections = [];
for (const fp of files) {
const docId = path.basename(fp);
const md = fs.readFileSync(fp, "utf-8");
const secs = parseMarkdownToSections(md).map(s => ({ ...s, docId }));
sections.push(...secs);
}
// 2) 為每個章節掛上 chunkIds(簡單以「chunk.text 是否包含於章節 text」判定)
for (const s of sections) {
const cand = chunkIndex.filter(ch => ch.docId === s.docId);
const hits = [];
for (const ch of cand) {
if (!ch?.text) continue;
if (s.text && s.text.includes(ch.text.slice(0, Math.min(ch.text.length, 48)))) {
hits.push(ch.id); // chunkId
}
}
s.chunkIds = hits;
}
// 3) 章節向量
const vecs = await embedMany(sections.map(s => ${s.title}\n\n${s.text}
));
sections.forEach((s, i) => { s.vector = vecs[i]; });
fs.writeFileSync(secFile, JSON.stringify({
builtAt: Date.now(),
model: EMBED_MODEL,
sections: sections.map(({ vector, ...rest }) => rest), // vectors 另存避免檔案過大?這裡先一起存
vectors: vecs, // 和 sections 平行陣列,index 對應
}, null, 2), "utf-8");
return { secFile, sections: sections.length };
}
function loadSectionIndex(secFile) {
if (!fs.existsSync(secFile)) throw new Error("章節索引不存在,請先重建");
const json = JSON.parse(fs.readFileSync(secFile, "utf-8"));
return json;
}
/** 章節優先檢索:取 topS 章節 → 匹配其 chunk → 再以 query re-rank chunk */
export async function retrieveBySections({ tenant, ns, query, topS = 5, topK = 8 }) {
const { idxFile } = ensureTenantNS(tenant, ns);
const secFile = path.join(path.dirname(idxFile), ${ns}.sections.json
);
const chunkIndex = loadChunkIndex(idxFile);
const secIndex = loadSectionIndex(secFile); // {sections, vectors}
// 1) 章節檢索
const qv = (await embedMany([query]))[0];
const scoredSecs = secIndex.sections.map((s, i) => ({
...s,
score: cosine(qv, secIndex.vectors[i]),
})).sort((a,b)=>b.score-a.score).slice(0, topS);
// 2) 匹配候選 chunks(來自這些章節)
const candChunkIds = new Set(scoredSecs.flatMap(s => s.chunkIds || []));
const cands = chunkIndex.filter(ch => candChunkIds.has(ch.id));
// 3) 對候選 chunks 以 query re-rank(cosine)
// 注意:chunkIndex 內沒有 query 向量;我們需要臨時重算 qv 與每個 chunk 的向量相似度。
// 這裡 chunk 的 vector 已在 chunkIndex 中,直接用 cosine(qv, ch.vector)
const reranked = cands.map(ch => ({
...ch,
score: cosine(qv, ch.vector),
})).sort((a,b)=>b.score-a.score).slice(0, topK);
return { sections: scoredSecs, chunks: reranked };
}
/** 幫助前端做 breadcrumb 顯示 */
export function sectionBreadcrumbs({ tenant, ns, sectionSlug }) {
const { idxFile } = ensureTenantNS(tenant, ns);
const secFile = path.join(path.dirname(idxFile), ${ns}.sections.json
);
const { sections } = loadSectionIndex(secFile);
const { getTrail } = buildBreadcrumb(sections);
return getTrail(sectionSlug);
}
export const runtime = "nodejs";
export const POST = withAuth(async (req, ctx) => {
if (ctx.user.role === "viewer") return NextResponse.json({ ok:false, error:"Editor/Admin only" }, { status:403 });
const { tenant, ns } = ctx.params;
const out = await buildSectionIndex({ tenant, ns });
return NextResponse.json({ ok:true, ...out });
}, ["editor","admin"]);
加入 strategy:default(沿用 Day 19 高亮)或 section(章節優先)。
import { NextResponse } from "next/server";
import { withAuth } from "../../../../../src/api/withAuth.js";
import { answerWithRAG, answerWithRAGHighlighted } from "../../../../../src/day16_rag_store.js";
import { retrieveBySections } from "../../../../../src/day23_sections.js";
import { openai } from "../../../../../src/aiClient.js";
import { alignAnswerToSources } from "../../../../../src/day19_highlight.js";
export const runtime = "nodejs";
export const POST = withAuth(async (req, ctx) => {
const { tenant, ns } = ctx.params;
const { q, highlight = true, strategy = "default" } = await req.json();
if (!q || !q.trim()) return NextResponse.json({ ok:false, error:"q 必填" }, { status:400 });
if (strategy === "section") {
// 章節優先檢索
const { sections, chunks } = await retrieveBySections({ tenant, ns, query: q, topS: 5, topK: 8 });
// 用這些 chunks 當作上下文(與 default 一致的回答 prompt)
const ctxText = chunks.map((h,i)=>`# 片段${i+1}(${h.docId}|${h.id.split("#")[1]},score=${h.score.toFixed(3)})\n${h.text}`).join("\n\n");
const res = await openai.chat.completions.create({
model:"gpt-4o-mini", temperature:0.2,
messages:[
{ role:"system", content:"你是嚴謹的客服知識庫助理。僅依據提供片段回答;不足請明確說明。" },
{ role:"user", content:`問題:${q}\n\n片段:\n${ctxText}\n\n請用繁體中文回答,先結論、再步驟、最後列注意事項。` }
]
});
const answer = res.choices?.[0]?.message?.content?.trim() || "目前找不到足夠資訊。";
if (highlight) {
// 對齊:需要 chunks 含 vector;我們已有(從索引來的)
const aligned = await alignAnswerToSources(answer, chunks, 0.27);
// 附上章節 breadcrumb(以 section slug/title)
const secMeta = sections.map((s, idx) => ({
rank: idx + 1,
docId: s.docId,
slug: s.slug,
title: s.title,
level: s.level,
score: s.score,
}));
const sourceChunks = chunks.map((h, i) => ({
displayIndex: i + 1, docId: h.docId, id: h.id, score: h.score, text: h.text
}));
return NextResponse.json({
ok: true,
strategy: "section",
answer,
answerHtml: aligned.html,
spans: aligned.spans,
sources: aligned.sources,
sourceChunks,
sections: secMeta
});
}
// 不高亮的精簡回傳
return NextResponse.json({
ok: true, strategy: "section",
answer,
sources: chunks.map(h=>({ id:h.id, docId:h.docId, score:h.score })),
sections: sections.map(s=>({ docId:s.docId, slug:s.slug, title:s.title, score:s.score }))
});
}
// === default:沿用 Day 19 的高亮路徑 ===
if (highlight) {
const out = await answerWithRAGHighlighted({ tenant, ns, query: q, topK: 4 });
return NextResponse.json({ ok:true, strategy: "default", ...out });
} else {
const { answer, sources } = await answerWithRAG({ tenant, ns, query: q, topK: 4 });
return NextResponse.json({ ok:true, strategy: "default", answer, sources });
}
}, ["viewer","editor","admin"]);
僅列需要新增/替換的片段;其餘維持 Day 19 版本。
(A) 新增 state
const [strategy, setStrategy] = useState<"default"|"section">("default");
const [secMeta, setSecMeta] = useState<Array<{rank:number;docId:string;slug:string;title:string;level:number;score:number}>>([]);
(B) 問答請求改帶 strategy
const r = await fetch(/api/kb/${tenant}/${ns}/ask
, {
method:"POST",
headers: { "Content-Type":"application/json", ...authHeaders() },
body: JSON.stringify({ q: askQ, highlight: true, strategy })
});
const j = await r.json(); if (!j.ok) throw new Error(j.error);
setAnswer(j.answer);
setAnswerHtml(j.answerHtml || "");
setSources(j.sources || []);
setChunks(j.sourceChunks || []);
setSecMeta(j.sections || []);
(C) 於 UI 加入策略切換(放在問答區塊的輸入列上方或旁邊)
(D) 在回答下方顯示章節 Breadcrumb(僅當 strategy=section)
{strategy === "section" && secMeta.length > 0 && (
新增 API(Studio 可綁一顆按鈕)
package.json(可加便捷指令)
{
"scripts": {
"day23:reindex:sections": "curl -s -X POST http://localhost:3000/api/kb/acme/faq/reindex-sections -H 'Authorization: Bearer '"
}
}