一個可直接跑起來的範例專案:後端含 RAG/檢索、工具調用、簡單 Agent 與聊天 API;前端含最小聊天介面。以倉儲/理貨情境為例,但結構通用。
llm-warehouse-copilot/
├─ backend/
│ ├─ app/
│ │ ├─ main.py
│ │ ├─ config.py
│ │ ├─ db.py
│ │ ├─ models.py
│ │ ├─ schemas.py
│ │ ├─ services/
│ │ │ ├─ llm.py
│ │ │ ├─ rag.py
│ │ │ ├─ agents.py
│ │ └─ routers/
│ │ └─ chat.py
│ ├─ scripts/
│ │ └─ ingest.py
│ ├─ tests/
│ │ └─ test_chat.py
│ ├─ requirements.txt
│ └─ Dockerfile
├─ data/
│ ├─ docs/ # 放 SOP/規範 PDF/MD/TXT
│ └─ indices/ # 向量索引(自動產生)
├─ frontend/
│ ├─ package.json
│ ├─ vite.config.ts
│ ├─ index.html
│ └─ src/
│ ├─ main.tsx
│ └─ App.tsx
├─ docker-compose.yml
├─ .env.example
└─ README.md
backend/app/config.py
# -*- coding: utf-8 -*-
from pydantic import BaseSettings, Field
from functools import lru_cache
class Settings(BaseSettings):
# 一般
APP_NAME: str = "Warehouse Copilot API"
APP_VERSION: str = "0.1.0"
ENV: str = "dev"
ALLOW_ORIGINS: str = "*"
# LLM 選擇:openai 或 ollama
LLM_PROVIDER: str = Field("ollama", description="ollama | openai")
# OpenAI 參數
OPENAI_API_KEY: str | None = None
OPENAI_MODEL: str = "gpt-4o-mini"
# Ollama 參數
OLLAMA_HOST: str = "http://localhost:11434"
OLLAMA_MODEL: str = "qwen2.5:7b-instruct"
# 嵌入模型(Sentence-Transformers 或 HuggingFace)
EMBEDDING_MODEL: str = "BAAI/bge-small-zh-v1.5"
# 索引與資料
DATA_DIR: str = "./data/docs"
INDEX_DIR: str = "./data/indices"
# DB(可選,這裡用 SQLite Demo)
DATABASE_URL: str = "sqlite:///./data/warehouse.db"
class Config:
env_file = ".env"
@lru_cache
def get_settings() -> Settings:
return Settings()
backend/app/db.py
# -*- coding: utf-8 -*-
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from .config import get_settings
settings = get_settings()
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
# FastAPI Dependency
from contextlib import contextmanager
@contextmanager
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
backend/app/models.py
# -*- coding: utf-8 -*-
from sqlalchemy import Column, Integer, String, Date, Float
from .db import Base
# Demo:倉庫庫存/品項(實務請自行擴充)
class Item(Base):
__tablename__ = "items"
id = Column(Integer, primary_key=True, index=True)
sku = Column(String, unique=True, index=True)
name = Column(String)
brand = Column(String)
dims = Column(String) # 長x寬x高
weight = Column(Float)
class Inventory(Base):
__tablename__ = "inventory"
id = Column(Integer, primary_key=True, index=True)
item_id = Column(Integer, index=True)
lot = Column(String, index=True)
location = Column(String, index=True)
exp_date = Column(Date, nullable=True)
qty = Column(Integer)
backend/app/schemas.py
# -*- coding: utf-8 -*-
from pydantic import BaseModel
from typing import List, Optional
class ChatMessage(BaseModel):
role: str # user / assistant / system
content: str
class ChatRequest(BaseModel):
messages: List[ChatMessage]
class Citation(BaseModel):
source: str
score: float
snippet: str
class ChatResponse(BaseModel):
answer: str
citations: List[Citation] = []
backend/app/services/llm.py
# -*- coding: utf-8 -*-
"""LLM 抽象層:可切換 OpenAI 或 Ollama"""
from __future__ import annotations
from .config import get_settings
import requests
settings = get_settings()
class LLMClient:
def chat(self, messages: list[dict]) -> str:
provider = settings.LLM_PROVIDER.lower()
if provider == "openai":
return self._openai_chat(messages)
return self._ollama_chat(messages)
def _openai_chat(self, messages: list[dict]) -> str:
import openai # 需在 requirements 安裝 openai>=1.0
client = openai.OpenAI(api_key=settings.OPENAI_API_KEY)
resp = client.chat.completions.create(
model=settings.OPENAI_MODEL,
messages=messages,
temperature=0.2,
)
return resp.choices[0].message.content
def _ollama_chat(self, messages: list[dict]) -> str:
# 直接呼叫 Ollama REST /api/chat
url = f"{settings.OLLAMA_HOST}/api/chat"
payload = {"model": settings.OLLAMA_MODEL, "messages": messages, "stream": False}
r = requests.post(url, json=payload, timeout=120)
r.raise_for_status()
data = r.json()
return data.get("message", {}).get("content", "")
backend/app/services/rag.py
# -*- coding: utf-8 -*-
"""簡易 RAG:使用 sentence-transformers + FAISS 建索引與檢索"""
from __future__ import annotations
from pathlib import Path
from typing import List, Tuple
from dataclasses import dataclass
from .config import get_settings
settings = get_settings()
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
@dataclass
class DocChunk:
text: str
source: str
class SimpleRAG:
def __init__(self, index_dir: str | Path | None = None):
self.index_dir = Path(index_dir or settings.INDEX_DIR)
self.index_dir.mkdir(parents=True, exist_ok=True)
self.model = SentenceTransformer(settings.EMBEDDING_MODEL)
self.index_path = self.index_dir / "faiss.index"
self.meta_path = self.index_dir / "meta.npy"
self.meta: List[DocChunk] = []
self.index: faiss.IndexFlatIP | None = None
self._load()
def _load(self):
if self.index_path.exists() and self.meta_path.exists():
self.index = faiss.read_index(str(self.index_path))
raw = np.load(self.meta_path, allow_pickle=True).tolist()
self.meta = [DocChunk(**m) for m in raw]
else:
self.index = faiss.IndexFlatIP(768) # 對齊 bge-small-zh 向量維度(768)
self.meta = []
def save(self):
faiss.write_index(self.index, str(self.index_path))
np.save(self.meta_path, np.array([m.__dict__ for m in self.meta], dtype=object), allow_pickle=True)
def add_docs(self, chunks: List[DocChunk]):
embs = self.model.encode([c.text for c in chunks], normalize_embeddings=True)
if self.index is None:
self.index = faiss.IndexFlatIP(embs.shape[1])
self.index.add(embs.astype(np.float32))
self.meta.extend(chunks)
def query(self, q: str, top_k: int = 5) -> List[Tuple[DocChunk, float]]:
emb = self.model.encode([q], normalize_embeddings=True).astype(np.float32)
scores, idxs = self.index.search(emb, top_k)
out = []
for i, score in zip(idxs[0], scores[0]):
if i == -1 or i >= len(self.meta):
continue
out.append((self.meta[i], float(score)))
return out
backend/app/services/agents.py
# -*- coding: utf-8 -*-
"""最小 Agent:規則路由 + RAG +(預留)SQL 工具"""
from __future__ import annotations
from typing import List, Dict
from .rag import SimpleRAG
from .llm import LLMClient
SYSTEM_PROMPT = (
"你是倉儲理貨 AI 助理。若問題涉及庫存數量/批次/效期/位置,優先使用工具查詢;若涉及作業流程/包裝規範,優先使用 RAG。"
"必須標註引用來源(文件檔名或路徑)。若查無資料,請明確說明並給出下一步建議。"
)
class MiniAgent:
def __init__(self, rag: SimpleRAG, llm: LLMClient):
self.rag = rag
self.llm = llm
def answer(self, messages: List[Dict]) -> Dict:
user_msg = next((m["content"] for m in reversed(messages) if m["role"] == "user"), "")
# 粗略規則:含「庫存/數量/批次/效期/位置」→ 應走 SQL(此 Demo 未接 DB,先回覆引導)
keywords = ["庫存", "數量", "批次", "效期", "位置"]
if any(k in user_msg for k in keywords):
guidance = (
"目前 Demo 版未連接 WMS/SQL 工具。請於 production 版接上 `inventory` 資料表與查詢工具。\n"
"以下提供基於文件規範的建議:\n"
)
else:
guidance = ""
# RAG 檢索
hits = self.rag.query(user_msg, top_k=5) if self.rag.index is not None else []
context = "\n\n".join([f"[來源:{h[0].source}]\n{h[0].text}" for h in hits])
citations = [{"source": h[0].source, "score": h[1], "snippet": h[0].text[:120]} for h in hits]
prompt_msgs = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"問題:{user_msg}\n\n已檢索到的相關內容:\n{context}\n\n請用繁體中文回答,條列清楚並附上來源。"},
]
draft = self.llm.chat(prompt_msgs)
return {"answer": guidance + draft, "citations": citations}
backend/app/routers/chat.py
# -*- coding: utf-8 -*-
from fastapi import APIRouter, Depends
from ..schemas import ChatRequest, ChatResponse, Citation
from ..services.llm import LLMClient
from ..services.rag import SimpleRAG
from ..services.agents import MiniAgent
router = APIRouter(prefix="/chat", tags=["chat"])
# 單例初始化(簡化示範)
_rag = SimpleRAG()
_llm = LLMClient()
_agent = MiniAgent(_rag, _llm)
@router.post("/completion", response_model=ChatResponse)
def completion(payload: ChatRequest) -> ChatResponse:
messages = [m.dict() for m in payload.messages]
out = _agent.answer(messages)
return ChatResponse(answer=out["answer"], citations=[Citation(**c) for c in out["citations"]])
backend/app/main.py
# -*- coding: utf-8 -*-
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .config import get_settings
from .routers import chat
from .db import Base, engine
from .models import Item, Inventory
settings = get_settings()
app = FastAPI(title=settings.APP_NAME, version=settings.APP_VERSION)
app.add_middleware(
CORSMiddleware,
allow_origins=[o.strip() for o in settings.ALLOW_ORIGINS.split(",")],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 初始化資料表(Demo)
Base.metadata.create_all(bind=engine)
app.include_router(chat.router)
@app.get("/")
def root():
return {"ok": True, "service": settings.APP_NAME, "provider": settings.LLM_PROVIDER}
backend/scripts/ingest.py
# -*- coding: utf-8 -*-
"""將 data/docs 下的檔案切塊並建 RAG 索引"""
from pathlib import Path
import re
from app.services.rag import SimpleRAG, DocChunk
from app.config import get_settings
settings = get_settings()
SEPS = ["\n\n", "\n", ". ", "。", "!", "?"]
def read_text(p: Path) -> str:
if p.suffix.lower() in [".txt", ".md"]:
return p.read_text(encoding="utf-8", errors="ignore")
if p.suffix.lower() in [".pdf"]:
# 簡化:若要 PDF,建議接 pdfminer.six 或 pymupdf
try:
import fitz # pymupdf
doc = fitz.open(p)
text = "".join([page.get_text() for page in doc])
return text
except Exception:
return ""
return ""
def chunk_text(text: str, max_chars: int = 600) -> list[str]:
# 粗略切塊:依分隔符與長度
parts = [text]
for sep in SEPS:
new_parts = []
for t in parts:
new_parts.extend([s.strip() for s in t.split(sep) if s.strip()])
parts = new_parts
if len(parts) > 4_000: # 過多則停止繼續細切
break
# 合併成 ~max_chars 的片段
chunks, buf = [], ""
for s in parts:
if len(buf) + len(s) + 1 <= max_chars:
buf = (buf + " " + s).strip()
else:
if buf:
chunks.append(buf)
buf = s
if buf:
chunks.append(buf)
return chunks
if __name__ == "__main__":
docs_dir = Path(settings.DATA_DIR)
assert docs_dir.exists(), f"資料夾不存在:{docs_dir}"
rag = SimpleRAG(settings.INDEX_DIR)
all_chunks = []
for p in docs_dir.rglob("*"):
if p.is_file() and p.suffix.lower() in [".txt", ".md", ".pdf"]:
text = read_text(p)
if not text:
continue
for c in chunk_text(text):
all_chunks.append(DocChunk(text=c, source=str(p.relative_to(docs_dir))))
if not all_chunks:
print("未找到可索引內容。請於 data/docs 放入文件後再執行。")
else:
rag.add_docs(all_chunks)
rag.save()
print(f"索引完成,共 {len(all_chunks)} 區塊。索引路徑:{settings.INDEX_DIR}")
backend/tests/test_chat.py
# -*- coding: utf-8 -*-
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_root():
r = client.get("/")
assert r.status_code == 200
assert r.json().get("ok") is True
def test_chat_empty_index():
payload = {"messages": [{"role": "user", "content": "倉庫 SOP 的包裝規則是什麼?"}]}
r = client.post("/chat/completion", json=payload)
assert r.status_code == 200
data = r.json()
assert "answer" in data
backend/requirements.txt
fastapi>=0.111
uvicorn[standard]>=0.30
pydantic>=2.7
requests>=2.32
SQLAlchemy>=2.0
sentence-transformers>=3.0
faiss-cpu>=1.8
openai>=1.35
PyMuPDF>=1.24
backend/Dockerfile
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
frontend/package.json
{
"name": "warehouse-copilot-ui",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview --port 5173"
},
"dependencies": {
"react": "^18.2.0",
"react-dom": "^18.2.0"
},
"devDependencies": {
"@types/react": "^18.2.66",
"@types/react-dom": "^18.2.22",
"typescript": "^5.5.4",
"vite": "^5.3.4"
}
}
frontend/vite.config.ts
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react'
export default defineConfig({
plugins: [react()],
server: { port: 5173 }
})
frontend/index.html
<!doctype html>
<html lang="zh-Hant">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>倉儲理貨 AI Copilot</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>
frontend/src/main.tsx
import React from 'react'
import { createRoot } from 'react-dom/client'
import App from './App'
createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<App />
</React.StrictMode>
)
frontend/src/App.tsx
import React, { useState } from 'react'
const API_BASE = import.meta.env.VITE_API_BASE || 'http://localhost:8000'
type Msg = { role: 'user'|'assistant'|'system'; content: string }
type Citation = { source: string; score: number; snippet: string }
type ChatResp = { answer: string; citations: Citation[] }
export default function App(){
const [messages, setMessages] = useState<Msg[]>([])
const [input, setInput] = useState('')
const [loading, setLoading] = useState(false)
const send = async () => {
if(!input.trim()) return
const newMsgs = [...messages, { role: 'user', content: input }]
setMessages(newMsgs)
setInput('')
setLoading(true)
try{
const r = await fetch(`${API_BASE}/chat/completion`,{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({messages: newMsgs})
})
const data: ChatResp = await r.json()
setMessages([...newMsgs, { role: 'assistant', content: data.answer }])
} finally {
setLoading(false)
}
}
return (
<div style={{maxWidth: 860, margin:'40px auto', fontFamily:'system-ui'}}>
<h1>倉儲理貨 AI Copilot</h1>
<div style={{border:'1px solid #ddd', borderRadius:12, padding:16, minHeight:420}}>
{messages.length===0 && <p>輸入:如「依 SOP 提供易碎海產的包裝規則與注意事項」。</p>}
{messages.map((m,i)=> (
<div key={i} style={{margin:'12px 0'}}>
<b>{m.role === 'user' ? '你' : 'AI'}</b>
<div style={{whiteSpace:'pre-wrap'}}>{m.content}</div>
</div>
))}
</div>
<div style={{display:'flex', gap:8, marginTop:12}}>
<input value={input} onChange={e=>setInput(e.target.value)} placeholder="輸入訊息..." style={{flex:1, padding:12, borderRadius:8, border:'1px solid #ccc'}}/>
<button onClick={send} disabled={loading} style={{padding:'12px 16px', borderRadius:8}}>{loading? '思考中...' : '送出'}</button>
</div>
<p style={{opacity:.6, marginTop:8}}>設定 API 基底:在 Vite 環境變數加入 <code>VITE_API_BASE</code>。</p>
</div>
)
}
.env.example
# 後端環境變數
LLM_PROVIDER=ollama # ollama | openai
OPENAI_API_KEY= # 使用 openai 時填入
OPENAI_MODEL=gpt-4o-mini
OLLAMA_HOST=http://localhost:11434
OLLAMA_MODEL=qwen2.5:7b-instruct
EMBEDDING_MODEL=BAAI/bge-small-zh-v1.5
DATA_DIR=./data/docs
INDEX_DIR=./data/indices
DATABASE_URL=sqlite:///./data/warehouse.db
ALLOW_ORIGINS=*
docker-compose.yml
version: "3.9"
services:
api:
build: ./backend
env_file: .env
volumes:
- ./data:/app/data
ports:
- "8000:8000"
depends_on: []
ui:
image: node:20-alpine
working_dir: /app
volumes:
- ./frontend:/app
command: sh -c "npm install && npm run dev -- --host"
environment:
- VITE_API_BASE=http://localhost:8000
ports:
- "5173:5173"
ollama:
image: ollama/ollama:0.3.14
volumes:
- ollama:/root/.ollama
ports:
- "11434:11434"
volumes:
ollama: {}
README.md
# 倉儲理貨 AI Copilot(Demo)
cp .env.example .env
# 若用 ollama:先啟動 compose,然後在宿主端或容器內拉模型
docker compose up -d ollama
# 拉取適合中文的指令模型(可替換)
curl http://localhost:11434/api/pull -d '{"name":"qwen2.5:7b-instruct"}'
docker compose up -d api ui
# 將你的 SOP/規範/FAQ 放到 data/docs
docker compose exec api python -m app.scripts.ingest
Demo 版未接 WMS/SQL 查詢工具,請在 services/agents.py
擴充:
sql_query_tool()
,查表 inventory/items/orders
。MiniAgent.answer()
依關鍵詞/意圖先呼叫 SQL,再結合 RAG 結果。若使用 OpenAI:
.env
設定 LLM_PROVIDER=openai
與 OPENAI_API_KEY
。向量模型:預設 BAAI/bge-small-zh-v1.5
(中文效果好,768 維)。
PDF 解析:示範使用 PyMuPDF
;生產建議加上版面與表格抽取邏輯。
docker compose exec api pytest -q