📦 安裝(新增相依)
npm i csv-parse simple-statistics
🆕 程式碼
/** 讀 JSON:支援 Array 或 {data: Array} 兩種結構 */
export function readJson(filePath) {
const raw = fs.readFileSync(filePath, "utf-8");
const obj = JSON.parse(raw);
if (Array.isArray(obj)) return obj;
if (Array.isArray(obj?.data)) return obj.data;
throw new Error("JSON 結構不支援:需要 Array 或 {data: Array}");
}
/** 讀 CSV(UTF-8),回傳 Array */
export function readCsv(filePath) {
const raw = fs.readFileSync(filePath, "utf-8");
const recs = parse(raw, {
columns: true,
skip_empty_lines: true,
trim: true,
});
return recs;
}
/** 取副檔名(小寫) */
export function extnameLower(p) {
return (p.split(".").pop() || "").toLowerCase();
}
/** 工具:安全轉數字 */
function toNum(x) {
if (x === null || x === undefined || x === "") return NaN;
const n = Number(String(x).replace(/,/g, ""));
return Number.isFinite(n) ? n : NaN;
}
/** 推斷欄位型別(簡化且實用) */
function inferType(values) {
let nNum = 0, nDate = 0, nBool = 0, nEmpty = 0;
const SAMPLE = values.slice(0, 1000);
for (const v of SAMPLE) {
if (v === null || v === undefined || v === "") { nEmpty++; continue; }
const s = String(v).trim();
if (/^(true|false)$/i.test(s)) { nBool++; continue; }
const nn = toNum(s);
if (Number.isFinite(nn)) { nNum++; continue; }
// Date(鬆判)
const d = new Date(s);
if (!isNaN(d.valueOf())) { nDate++; continue; }
}
const k = SAMPLE.length - nEmpty;
if (k === 0) return "text";
const pNum = nNum / k, pDate = nDate / k, pBool = nBool / k;
if (pNum > 0.7) return "number";
if (pDate > 0.7) return "date";
if (pBool > 0.7) return "boolean";
return "text";
}
/** 欄位統計:依型別回傳統計資料 */
function profileColumn(values, type) {
const n = values.length;
const empty = values.filter(v => v === null || v === undefined || v === "").length;
const distinct = new Set(values.filter(v => v !== "" && v !== null && v !== undefined).map(String)).size;
const common = { count: n, missing: empty, missingRate: n ? empty / n : 0, distinct };
if (type === "number") {
const nums = values.map(toNum).filter(Number.isFinite);
if (nums.length === 0) return { ...common, stats: null, outliers: [] };
const sorted = nums.slice().sort((a, b) => a - b);
const q1 = ss.quantileSorted(sorted, 0.25);
const p50 = ss.medianSorted(sorted);
const q3 = ss.quantileSorted(sorted, 0.75);
const iqr = q3 - q1;
const lower = q1 - 1.5 * iqr;
const upper = q3 + 1.5 * iqr;
const outliers = nums.filter(v => v < lower || v > upper);
return {
...common,
stats: {
mean: ss.mean(nums),
stdev: nums.length > 1 ? ss.standardDeviation(nums) : 0,
min: sorted[0],
p25: q1,
p50,
p75: q3,
max: sorted[sorted.length - 1],
iqr,
skew: (ss.mean(nums) - p50) / (ss.standardDeviation(nums) + 1e-9),
},
outliersSample: outliers.slice(0, 10),
outliersCount: outliers.length,
};
}
if (type === "boolean") {
const trues = values.filter(v => String(v).toLowerCase() === "true").length;
const falses = values.filter(v => String(v).toLowerCase() === "false").length;
return { ...common, bool: { true: trues, false: falses } };
}
if (type === "date") {
// 取日級別分佈
const days = new Map();
for (const v of values) {
const d = new Date(v);
if (isNaN(d.valueOf())) continue;
const key = d.toISOString().slice(0, 10);
days.set(key, (days.get(key) || 0) + 1);
}
const series = [...days.entries()].sort((a, b) => a[0].localeCompare(b[0])).map(([k, c]) => ({ day: k, count: c }));
return { ...common, timeline: series.slice(0, 200) };
}
// text / 其他 → 類別 Top-K
const counts = new Map();
for (const v of values) {
if (v === null || v === undefined || v === "") continue;
const key = String(v);
counts.set(key, (counts.get(key) || 0) + 1);
}
const topK = [...counts.entries()].sort((a, b) => b[1] - a[1]).slice(0, 10)
.map(([value, cnt]) => ({ value, count: cnt }));
return { ...common, topK };
}
/** 健康檢查:可快速抓問題欄 */
function healthChecks(rows, schema) {
const issues = [];
if (!rows.length) return { issues };
// 重複列
const keySeen = new Set();
let dup = 0;
for (const r of rows) {
const sig = JSON.stringify(r);
if (keySeen.has(sig)) dup++;
else keySeen.add(sig);
}
if (dup > 0) issues.push({ type: "duplicate_rows", count: dup, message: 偵測到 ${dup} 筆重複列
});
// 空值/常值欄位、疑似 ID 欄位
for (const col of Object.keys(schema)) {
const vals = rows.map(r => r[col]);
const nonEmpty = vals.filter(v => v !== "" && v !== null && v !== undefined);
const distinct = new Set(nonEmpty.map(String));
if (nonEmpty.length === 0) issues.push({ type: "all_missing", column: col, message: "此欄幾乎全為缺失" });
if (distinct.size === 1 && nonEmpty.length > 0) issues.push({ type: "constant", column: col, message: "此欄幾乎為常值,資訊量低" });
if (distinct.size === nonEmpty.length && nonEmpty.length === rows.length && rows.length >= 20)
issues.push({ type: "possible_id", column: col, message: "此欄每列唯一,疑似 ID 欄位" });
}
return { issues };
}
/** 主流程:資料分析 */
export async function analyzeData({ filePath, rows: inputRows, llmInsight = true }) {
let rows = inputRows;
if (!rows && !filePath) throw new Error("請提供 filePath 或 rows(陣列)");
if (!rows) {
const ext = extnameLower(filePath);
if (ext === "csv") rows = readCsv(filePath);
else if (ext === "json") rows = readJson(filePath);
else throw new Error("只支援 .csv / .json");
}
if (!Array.isArray(rows) || rows.length === 0) throw new Error("資料為空或格式不正確");
const columns = Object.keys(rows[0]);
const schema = {};
for (const c of columns) {
schema[c] = inferType(rows.map(r => r[c]));
}
const profiles = {};
for (const c of columns) {
profiles[c] = profileColumn(rows.map(r => r[c]), schema[c]);
}
const health = healthChecks(rows, schema);
// AI 洞察(可關閉)
let insights = null;
if (llmInsight) {
const brief = {
rows: rows.length,
cols: columns.length,
schema,
pick: Object.fromEntries(
columns.slice(0, 5).map(c => [c, {
type: schema[c],
...("stats" in (profiles[c] || {}) ? { stats: profiles[c].stats } : {}),
...("topK" in (profiles[c] || {}) ? { topK: profiles[c].topK } : {}),
}])
),
issues: health.issues.slice(0, 8),
};
const res = await openai.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.2,
messages: [
{ role: "system", content: "你是嚴謹的資料分析顧問,請用繁體中文回覆。" },
{ role: "user", content:
`資料概況(JSON):
${JSON.stringify(brief, null, 2)}
請產出:
const result = {
rows: rows.length,
columns,
schema, // 欄位型別
profiles, // 欄位統計
health, // 健康檢查
insights, // LLM 洞察(可為 null)
createdAt: new Date().toISOString(),
};
// 存檔
const outDir = path.join("outputs", "data");
if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true });
const stamp = Date.now();
const jsonPath = path.join(outDir, analysis_${stamp}.json
);
const mdPath = path.join(outDir, analysis_${stamp}.md
);
fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
// Markdown 摘要(精簡)
const md = [# Data Analysis (${stamp})
,- 產出:${result.createdAt}
,- 列數:${result.rows},欄位數:${result.columns.length}
,## 欄位型別
,
...Object.entries(schema).map(([k, v]) => - \
${k}`: ${v}),
\n## 健康檢查, ...(health.issues.length ? health.issues.map(i =>
- [${i.type}] ${i.column ? \
${i.column}`:: ""}${i.message}${i.count ?
(${i.count}) : ""}
) : ["- (無明顯問題)"]),\n## 洞察
,
insights || "(如需洞察,請開啟 llmInsight)",
].join("\n");
fs.writeFileSync(mdPath, md, "utf-8");
return { jsonPath, mdPath, meta: result };
}
const args = Object.fromEntries(
process.argv.slice(2).reduce((acc, cur, i, arr) => {
if (cur.startsWith("--")) {
const key = cur.replace(/^--/, "");
const val = arr[i + 1] && !arr[i + 1].startsWith("--") ? arr[i + 1] : true;
acc.push([key, val]);
}
return acc;
}, [])
);
async function main() {
const task = args.task || "chat";
if (task === "data") {
const filePath = args.file || "";
const noLLM = String(args["no-llm"] || "false").toLowerCase() === "true";
if (!filePath || !fs.existsSync(filePath)) throw new Error("請提供存在的檔案路徑 --file <path.csv|json>");
const out = await analyzeData({ filePath, llmInsight: !noLLM });
console.log("\n=== 數據分析完成 ===");
console.log("- JSON:", out.jsonPath);
console.log("- Markdown:", out.mdPath);
console.log("\n重點:");
console.log(列數=${out.meta.rows} 欄位數=${out.meta.columns.length} 問題數=${out.meta.health.issues.length}
);
} else {
// ...保留你原本的其他 task 分支
}
}
main().catch(e => { console.error(e); process.exit(1); });
package.json(新增 scripts)
{
"scripts": {
"day20:csv": "node index.js --task data --file sample/data.csv",
"day20:json": "node index.js --task data --file sample/data.json",
"day20:csv:nollm": "node index.js --task data --file sample/data.csv --no-llm true"
}
}
Next.js API:app/api/data/analyze/route.js(新增)
// app/api/data/analyze/route.js
import { NextResponse } from "next/server";
import { analyzeData } from "../../../../src/day20_data_analyst.js";
export const runtime = "nodejs";
export async function POST(req) {
try {
const ctype = req.headers.get("content-type") || "";
if (ctype.includes("multipart/form-data")) {
const form = await req.formData();
const file = form.get("file");
if (!file) return NextResponse.json({ ok:false, error:"缺少檔案" }, { status:400 });
const buf = Buffer.from(await file.arrayBuffer());
// 寫入暫存再分析
const name = file.name || upload_${Date.now()}
;
const tmp = /tmp/${Date.now()}_${name}
;
await Bun.write?.(tmp, buf); // 若你不是 bun,改用 fs.writeFileSync
// fallback(Node)
try { require("fs").writeFileSync(tmp, buf); } catch {}
const out = await analyzeData({ filePath: tmp, llmInsight: true });
return NextResponse.json({ ok:true, ...out.meta });
} else {
const body = await req.json();
const rows = body?.rows;
if (!Array.isArray(rows)) return NextResponse.json({ ok:false, error:"JSON 請提供 rows:Array" }, { status:400 });
const out = await analyzeData({ rows, llmInsight: true });
return NextResponse.json({ ok:true, ...out.meta });
}
} catch (e) {
return NextResponse.json({ ok:false, error: e.message || "Server error" }, { status:500 });
}
}
你若不用 Bun,保留 fs.writeFileSync 即可(我已做 fallback )。
type Schema = Record<string, string>;
type Issue = { type:string; column?:string; message:string; count?:number };
export default function DataAnalystPage() {
const [file, setFile] = useState<File|null>(null);
const [loading, setLoading] = useState(false);
const [rows, setRows] = useState(0);
const [cols, setCols] = useState<string[]>([]);
const [schema, setSchema] = useState({});
const [issues, setIssues] = useState<Issue[]>([]);
const [insights, setInsights] = useState("");
const [err, setErr] = useState("");
async function analyze() {
if (!file) { setErr("請選擇檔案"); return; }
setLoading(true); setErr(""); setInsights("");
try {
const fd = new FormData();
fd.append("file", file);
const r = await fetch("/api/data/analyze", { method: "POST", body: fd });
const j = await r.json();
if (!j.ok) throw new Error(j.error);
setRows(j.rows || 0);
setCols(j.columns || []);
setSchema(j.schema || {});
setIssues(j.health?.issues || []);
setInsights(j.insights || "");
} catch (e:any) {
setErr(e.message || "分析失敗");
} finally {
setLoading(false);
}
}
return (
Day 20|AI 數據分析助手
{err && <div className="alert alert-error"><span>{err}</span></div>}
<div className="card bg-base-100 shadow">
<div className="card-body space-y-3">
<input type="file" accept=".csv,.json" className="file-input file-input-bordered"
onChange={e=>setFile(e.target.files?.[0] || null)} />
<button className={`btn btn-primary ${loading ? "btn-disabled" : ""}`} onClick={analyze}>
{loading ? "分析中..." : "開始分析"}
</button>
</div>
</div>
{rows > 0 && (
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
<div className="card bg-base-100 shadow">
<div className="card-body">
<h2 className="card-title">資料概況</h2>
<p>列數:{rows}</p>
<p>欄位數:{cols.length}</p>
<div className="mt-3">
<h3 className="font-semibold">欄位型別</h3>
<ul className="list-disc pl-6">
{Object.entries(schema).map(([k,v])=>(
<li key={k}><code>{k}</code>:{v}</li>
))}
</ul>
</div>
</div>
</div>
<div className="card bg-base-100 shadow">
<div className="card-body">
<h2 className="card-title">健康檢查</h2>
{issues.length === 0 ? (
<div>(無明顯問題)</div>
) : (
<ul className="list-disc pl-6">
{issues.map((i, idx)=>(
<li key={idx}>
<span className="badge badge-outline mr-2">{i.type}</span>
{i.column && <code className="mr-1">{i.column}</code>}
{i.message} {i.count ? `(${i.count})` : ""}
</li>
))}
</ul>
)}
</div>
</div>
{insights && (
<div className="card bg-base-100 shadow lg:col-span-2">
<div className="card-body">
<h2 className="card-title">AI 洞察(摘要)</h2>
<div className="whitespace-pre-wrap">{insights}</div>
</div>
</div>
)}
</div>
)}
</div>
</div>
);
}
▶️ 使用方式
CLI
npm run day20:csv --silent
npm run day20:json --silent
npm run day20:csv:nollm --silent
前端
npm run dev