到目前為止,Agent 只能「問→答→(必要時)重答」,但還不會自己檢查邏輯、規劃下一步。
所以今天的目標就是要讓它具備:
自我反思(Reflection):
回答後自動檢查自己是否有可能答錯、資訊是否不足。
任務規劃(Planning):
若遇到複合型問題(例如「請解釋第 14 條並比較第 20 條」),
讓 Agent 能分解成子任務:
一樣只有新增新的檔案,不要刪掉舊的資料夾窩><
project/
├─ agent/
│ └─ agent_reflect.py # 單 agent + 自我反思 + 簡易任務規劃
└─ utils/
├─ planner.py # 簡易任務分解器(分析多步問題)
└─ reflection.py # 反思模組:回答後的自我檢查與再確認
utils/planner.py
這邊抓「第N條」與「比較」關鍵字,產生可執行步驟。
from __future__ import annotations
import re
from typing import List, Dict, Any
# 中文數字轉阿拉伯(簡版)
_CN = {"零":0,"一":1,"二":2,"兩":2,"三":3,"四":4,"五":5,"六":6,"七":7,"八":8,"九":9}
def _cn2int(s: str) -> int:
s = s.strip()
if s == "十": return 10
if "十" in s:
L, *R = s.split("十")
t = _CN.get(L, 1) if L else 1
o = _CN.get(R[0], 0) if R and R[0] else 0
return t*10 + o
return _CN.get(s, 0)
def _detect_articles(q: str) -> List[int]:
arts: List[int] = []
# 阿拉伯數字
for m in re.finditer(r"第\s*([0-90-9]{1,3})\s*條", q):
n = int(re.sub(r"[0-9]", lambda d: str(ord(d.group())-65248), m.group(1)))
arts.append(n)
# 中文數字
for m in re.finditer(r"第\s*([一二三四五六七八九十兩零]{1,3})\s*條", q):
v = _cn2int(m.group(1))
if v:
arts.append(v)
# 萬一使用者寫「比較14和20」
if (not arts) and ("比較" in q or "對比" in q):
nums = re.findall(r"([0-90-9]{1,3})", q)
for s in nums:
n = int(re.sub(r"[0-9]", lambda d: str(ord(d.group())-65248), s))
arts.append(n)
# 去重、排序
arts = sorted(list({a for a in arts if 1 <= a <= 300}))
return arts
def plan(query: str) -> List[Dict[str, Any]]:
"""
產生可執行步驟:
- 有條號 → 先逐條查原文
- 若包含「比較/對比」且條數≥2 → 追加 compare 步驟
- 其餘 → 走 RAG 步驟
回傳每步驟:{"type": "article"|"compare"|"rag", "payload":..., "desc":...}
"""
steps: List[Dict[str, Any]] = []
arts = _detect_articles(query)
if arts:
for n in arts:
steps.append({"type": "article", "payload": {"no": n}, "desc": f"查第{n}條原文"})
if ("比較" in query or "對比" in query) and len(arts) >= 2:
steps.append({
"type": "compare",
"payload": {"articles": arts[:2]}, # 簡化:只比前兩個
"desc": f"比較第{arts[0]}條與第{arts[1]}條"
})
return steps
# 無條號 → 走 RAG
steps.append({"type": "rag", "payload": {"q": query, "k": 4}, "desc": "RAG 檢索與回答"})
return steps
utils/reflection.py
回答後的「自我反思」小模組:根據分數+簡短 LLM 檢查,產出「是否需要補充」與改寫建議。
from __future__ import annotations
from typing import Dict, Any
from rag.connect import ask_ollama
_REFLECT_PROMPT = """
你是嚴格的法規審查助手。請檢查下面的「問題」與「回答」,是否有以下情況:
1) 關鍵條文未引用或未逐字呈現
2) 定義含糊、缺少條列重點
3) 沒有標示來源索引或條號
請用非常精簡的一行輸出:
need_fix=<yes|no>; reason=<最重要的一個原因>; improved_hint=<要補充的方向或關鍵詞>
【問題】
{q}
【回答】
{a}
【評分(供參考)】
{scores}
""".strip()
def reflect_and_suggest(question: str, answer: str, scores: Dict[str, float], model: str = "mistral") -> Dict[str, Any]:
"""
回傳:
{
"need_fix": bool,
"reason": str,
"improved_query": str # 在原問題後面附加提示
}
"""
try:
r = ask_ollama(_REFLECT_PROMPT.format(q=question, a=answer, scores=scores), model=model)
# 期待格式:need_fix=yes; reason=...; improved_hint=...
need = ("need_fix=yes" in r.lower())
reason = ""
hint = ""
for part in r.split(";"):
if "reason=" in part:
reason = part.split("=", 1)[1].strip()
if "improved_hint=" in part:
hint = part.split("=", 1)[1].strip()
improved_query = f"{question}\n\n[補充方向] {hint or reason}".strip()
return {"need_fix": need, "reason": reason or "品質需補強", "improved_query": improved_query}
except Exception:
return {"need_fix": False, "reason": "", "improved_query": question}
agent/agent_reflect.py
結合 single agent + 規劃 + 反思。
from __future__ import annotations
import sys, json, argparse, time
from pathlib import Path
from datetime import datetime
# 能 import 到 project/*
sys.path.append(str(Path(__file__).resolve().parents[1]))
from utils.planner import plan
from utils.memory import remember, memory_lines
from utils.metrics import compute_metrics
from utils.reflection import reflect_and_suggest
from rag.connect import (
article_lookup, # 直查第N條全文(我們會原樣輸出)
search_chunks, # RAG 檢索
build_prompt, # 組 prompt(含記憶)
ask_ollama, # 呼叫 Ollama
)
# 小工具
def _fix_article_block(raw: str, n: int | None = None) -> str:
head = f"(來源:第{n}條)" if n else ""
if not raw.strip():
return "【條文原文】\n(沒有查到對應條文)\n【說明完畢】"
return f"【條文原文】\n{raw.strip()}\n【說明完畢】\n{head}".strip()
def _compare_two(a_text: str, b_text: str, model: str = "mistral") -> str:
prompt = f"""
請比較下列兩段法規原文的重點與差異,務必條列式、簡潔,並用「(A)」「(B)」標註來源。
(A)
{a_text}
(B)
{b_text}
""".strip()
return ask_ollama(prompt, model=model).strip()
def run_once(query: str, model: str = "mistral") -> dict:
"""
執行一次規劃→步驟→合併回答,回傳:
{
"answer": str,
"contexts": [str], # 用於評分的上下文
"steps": [desc,...], # 走過哪些步
}
"""
steps = plan(query)
contexts: list[str] = []
parts: list[str] = []
step_descs: list[str] = []
for st in steps:
t = st["type"]; p = st["payload"]; step_descs.append(st["desc"])
if t == "article":
n = int(p["no"])
raw = article_lookup(f"第{n}條") or ""
parts.append(_fix_article_block(raw, n))
if raw:
contexts.append(raw)
elif t == "rag":
k = int(p.get("k", 4))
hits = search_chunks(query, k=k)
prompt = build_prompt(query, hits, mem_lines=memory_lines())
ans = ask_ollama(prompt, model=model).strip()
parts.append(ans)
# RAG 的 contexts 來自 hits
for h in hits:
if "text" in h:
contexts.append(h["text"])
elif t == "compare":
a, b = p["articles"][:2]
raw_a = article_lookup(f"第{a}條") or ""
raw_b = article_lookup(f"第{b}條") or ""
parts.append(f"【比較任務】第{a}條 vs 第{b}條\n" + _compare_two(raw_a, raw_b, model=model))
if raw_a: contexts.append(raw_a)
if raw_b: contexts.append(raw_b)
answer = ("\n\n".join([s for s in parts if s.strip()]) or "(沒有產生內容)").strip()
return {"answer": answer, "contexts": contexts, "steps": step_descs}
def main():
ap = argparse.ArgumentParser(description="Single-Agent(Planning + Reflection)")
ap.add_argument("--q", required=True, help="問題內容")
ap.add_argument("--model", default="mistral", help="Ollama 模型名(如 mistral/llama3)")
ap.add_argument("--json_out", action="store_true", help="是否輸出 JSONL 到 logs/")
args = ap.parse_args()
t0 = time.time()
# 第一次執行(照規劃跑步驟)
result = run_once(args.q, model=args.model)
answer = result["answer"]
contexts = result["contexts"]
steps = result["steps"]
elapsed_ms = int((time.time() - t0) * 1000)
# 評分
hits = [{"text": c} for c in contexts] # compute_metrics 需要 {"text":...}
scores = compute_metrics(args.q, hits=hits, answer=answer, model=args.model)
# 反思(需要時重跑一次)
ref = reflect_and_suggest(args.q, answer, scores, model=args.model)
did_retry = False
if ref.get("need_fix"):
did_retry = True
improved_q = ref["improved_query"]
result2 = run_once(improved_q, model=args.model)
# 如果二次答案變更長,就採用(簡單可用)
if len(result2["answer"]) > len(answer):
answer = result2["answer"]
contexts = result2["contexts"]
steps = result2["steps"] + ["(反思修正後重答)"]
hits = [{"text": c} for c in contexts]
scores = compute_metrics(args.q, hits=hits, answer=answer, model=args.model)
# 輸出
print("\n[plan] 任務分解:")
for i, s in enumerate(steps, 1):
print(f"{i}. {s}")
print("\n=== 最終回答 ===")
print(answer)
print("\n=== 評分 ===")
print(scores)
# 記憶 + 日誌
remember(args.q, answer)
if args.json_out:
logs_dir = Path("logs"); logs_dir.mkdir(parents=True, exist_ok=True)
outpath = logs_dir / f"run_{datetime.now():%Y%m%d}.jsonl"
record = {
"ts": datetime.now().isoformat(timespec="seconds"),
"mode": "plan+reflect-1agent",
"query": args.q,
"model": args.model,
"elapsed_ms": elapsed_ms,
"steps": steps,
"scores": scores,
"reflection": {
"need_fix": bool(ref.get("need_fix")),
"reason": ref.get("reason", ""),
},
"retry": {"did_retry": did_retry},
"contexts": contexts,
"answer_chars": len(answer),
"answer": answer,
}
outpath.open("a", encoding="utf-8").write(json.dumps(record, ensure_ascii=False) + "\n")
print(f"\n[log] 已寫入:{outpath}")
if __name__ == "__main__":
main()
要怎麼去執行查詢呢,大家可以使用下面的指令:
# 規劃+反思的單代理
python agent/agent_reflect.py --q "第14條是什麼?" --json_out
# 比較型問題(會分解成多步)
python agent/agent_reflect.py --q "請比較第14條與第20條的差異" --json_out
# 一般問答(會走 RAG 檢索)
python agent/agent_reflect.py --q "什麼是關鍵基礎設施?" --json_out
整個完整的 AI Agent 系統終於成形啦~
雖然中間不斷++--,還有不少限制,但現在的版本已經能自己規劃步驟、會思考、會反省、會評分,至少是一個真正能自我運作的最小可用系統(MVP)!
明天就是最後拉~~~~