🆕 程式碼
/**
* 將 float32 向量 [-1,1] 量化成 int8
*/
export function quantizeVector(vec = []) {
return Int8Array.from(vec.map(v => Math.max(-1, Math.min(1, v)) * 127));
}
/**
* 將 int8 還原成近似 float32
*/
export function dequantizeVector(qvec) {
return Array.from(qvec).map(v => v / 127);
}
/**
* 近似 cosine,相對值會接近原始
*/
export function cosineQ(qv, fv) {
const dv = dequantizeVector(qv);
let dot = 0, na = 0, nb = 0;
for (let i=0;i<dv.length;i++) {
dot += dv[i]*fv[i];
na += dv[i]*dv[i];
nb += fv[i]*fv[i];
}
return dot/(Math.sqrt(na)*Math.sqrt(nb)+1e-9);
}
假設你原本有 buildIndex 和 answerWithRAG,現在加一個 buildIndexQuantized。
import { quantizeVector } from "./day26_quantize.js";
...
export async function buildIndexQuantized({ tenant, ns }) {
const { kbDir, idxFile } = ensureTenantNS(tenant, ns);
const outFile = idxFile.replace(".json", ".qindex.json");
const files = fs.readdirSync(kbDir).filter(f => /\.md$|\.txt$/i.test(f));
const index = [];
for (const f of files) {
const docId = path.basename(f);
const text = fs.readFileSync(path.join(kbDir,f),"utf-8");
const chunks = chunkTextSmart(text); // 你 Day18 已有
const vecs = await embedMany(chunks.map(c=>c.text));
chunks.forEach((c,i)=>{
index.push({
id: `${docId}#${i}`,
docId,
text: c.text,
vectorQ: Array.from(quantizeVector(vecs[i])) // 存 int8 array
});
});
}
fs.writeFileSync(outFile, JSON.stringify({ builtAt:Date.now(), quantized:true, index }, null, 2));
return { outFile, chunks:index.length };
}
支援壓縮索引建置:
import { buildIndex, buildIndexQuantized } from "../../../../../src/day16_rag_store.js";
...
export const POST = withAuth(async (req, ctx) => {
const { tenant, ns } = ctx.params;
- const out = await buildIndex({ tenant, ns });
- return NextResponse.json({ ok:true, ...out });
+ const url = new URL(req.url);
+ const quantize = url.searchParams.get("quantize")==="1";
+ const out = quantize ? await buildIndexQuantized({ tenant, ns }) : await buildIndex({ tenant, ns });
+ return NextResponse.json({ ok:true, quantize, ...out });
}, ["editor","admin"]);
const [useQuant, setUseQuant] = useState(false);
...
<div className="form-control">
<label className="label cursor-pointer">
<span className="label-text">使用壓縮索引</span>
<input type="checkbox" className="toggle toggle-sm" checked={useQuant} onChange={e=>setUseQuant(e.target.checked)} />
</label>
</div>
發問時,傳 strategy: "default" 但在後端先嘗試載入 .qindex.json(若 useQuant=true),否則 fallback 原始索引。
▶️ 驗收流程
重建壓縮索引:
curl -X POST http://localhost:3000/api/kb/acme/faq/reindex?quantize=1 -H "Authorization: Bearer <TOKEN>"
→ 產生 faq.qindex.json。
Studio 打勾「使用壓縮索引」,再發問。
記憶體使用量明顯下降(4 倍縮小)。
答案跟原始索引差不多(可能有細微差異)。