iT邦幫忙

2025 iThome 鐵人賽

DAY 28
0

倉儲理貨 AI Copilot — LLM 全端專案腳手架(Python FastAPI + RAG + LangGraph + React)

一個可直接跑起來的範例專案:後端含 RAG/檢索、工具調用、簡單 Agent 與聊天 API;前端含最小聊天介面。以倉儲/理貨情境為例,但結構通用。


目錄結構

llm-warehouse-copilot/
├─ backend/
│  ├─ app/
│  │  ├─ main.py
│  │  ├─ config.py
│  │  ├─ db.py
│  │  ├─ models.py
│  │  ├─ schemas.py
│  │  ├─ services/
│  │  │  ├─ llm.py
│  │  │  ├─ rag.py
│  │  │  ├─ agents.py
│  │  └─ routers/
│  │     └─ chat.py
│  ├─ scripts/
│  │  └─ ingest.py
│  ├─ tests/
│  │  └─ test_chat.py
│  ├─ requirements.txt
│  └─ Dockerfile
├─ data/
│  ├─ docs/               # 放 SOP/規範 PDF/MD/TXT
│  └─ indices/            # 向量索引(自動產生)
├─ frontend/
│  ├─ package.json
│  ├─ vite.config.ts
│  ├─ index.html
│  └─ src/
│     ├─ main.tsx
│     └─ App.tsx
├─ docker-compose.yml
├─ .env.example
└─ README.md

後端程式碼(backend/)

backend/app/config.py

# -*- coding: utf-8 -*-
from pydantic import BaseSettings, Field
from functools import lru_cache

class Settings(BaseSettings):
    # 一般
    APP_NAME: str = "Warehouse Copilot API"
    APP_VERSION: str = "0.1.0"
    ENV: str = "dev"
    ALLOW_ORIGINS: str = "*"

    # LLM 選擇:openai 或 ollama
    LLM_PROVIDER: str = Field("ollama", description="ollama | openai")
    # OpenAI 參數
    OPENAI_API_KEY: str | None = None
    OPENAI_MODEL: str = "gpt-4o-mini"
    # Ollama 參數
    OLLAMA_HOST: str = "http://localhost:11434"
    OLLAMA_MODEL: str = "qwen2.5:7b-instruct"

    # 嵌入模型(Sentence-Transformers 或 HuggingFace)
    EMBEDDING_MODEL: str = "BAAI/bge-small-zh-v1.5"

    # 索引與資料
    DATA_DIR: str = "./data/docs"
    INDEX_DIR: str = "./data/indices"

    # DB(可選,這裡用 SQLite Demo)
    DATABASE_URL: str = "sqlite:///./data/warehouse.db"

    class Config:
        env_file = ".env"

@lru_cache
def get_settings() -> Settings:
    return Settings()

backend/app/db.py

# -*- coding: utf-8 -*-
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from .config import get_settings

settings = get_settings()
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

# FastAPI Dependency
from contextlib import contextmanager

@contextmanager
def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()

backend/app/models.py

# -*- coding: utf-8 -*-
from sqlalchemy import Column, Integer, String, Date, Float
from .db import Base

# Demo:倉庫庫存/品項(實務請自行擴充)
class Item(Base):
    __tablename__ = "items"
    id = Column(Integer, primary_key=True, index=True)
    sku = Column(String, unique=True, index=True)
    name = Column(String)
    brand = Column(String)
    dims = Column(String)  # 長x寬x高
    weight = Column(Float)

class Inventory(Base):
    __tablename__ = "inventory"
    id = Column(Integer, primary_key=True, index=True)
    item_id = Column(Integer, index=True)
    lot = Column(String, index=True)
    location = Column(String, index=True)
    exp_date = Column(Date, nullable=True)
    qty = Column(Integer)

backend/app/schemas.py

# -*- coding: utf-8 -*-
from pydantic import BaseModel
from typing import List, Optional

class ChatMessage(BaseModel):
    role: str  # user / assistant / system
    content: str

class ChatRequest(BaseModel):
    messages: List[ChatMessage]

class Citation(BaseModel):
    source: str
    score: float
    snippet: str

class ChatResponse(BaseModel):
    answer: str
    citations: List[Citation] = []

backend/app/services/llm.py

# -*- coding: utf-8 -*-
"""LLM 抽象層:可切換 OpenAI 或 Ollama"""
from __future__ import annotations
from .config import get_settings
import requests

settings = get_settings()

class LLMClient:
    def chat(self, messages: list[dict]) -> str:
        provider = settings.LLM_PROVIDER.lower()
        if provider == "openai":
            return self._openai_chat(messages)
        return self._ollama_chat(messages)

    def _openai_chat(self, messages: list[dict]) -> str:
        import openai  # 需在 requirements 安裝 openai>=1.0
        client = openai.OpenAI(api_key=settings.OPENAI_API_KEY)
        resp = client.chat.completions.create(
            model=settings.OPENAI_MODEL,
            messages=messages,
            temperature=0.2,
        )
        return resp.choices[0].message.content

    def _ollama_chat(self, messages: list[dict]) -> str:
        # 直接呼叫 Ollama REST /api/chat
        url = f"{settings.OLLAMA_HOST}/api/chat"
        payload = {"model": settings.OLLAMA_MODEL, "messages": messages, "stream": False}
        r = requests.post(url, json=payload, timeout=120)
        r.raise_for_status()
        data = r.json()
        return data.get("message", {}).get("content", "")

backend/app/services/rag.py

# -*- coding: utf-8 -*-
"""簡易 RAG:使用 sentence-transformers + FAISS 建索引與檢索"""
from __future__ import annotations
from pathlib import Path
from typing import List, Tuple
from dataclasses import dataclass

from .config import get_settings
settings = get_settings()

import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

@dataclass
class DocChunk:
    text: str
    source: str

class SimpleRAG:
    def __init__(self, index_dir: str | Path | None = None):
        self.index_dir = Path(index_dir or settings.INDEX_DIR)
        self.index_dir.mkdir(parents=True, exist_ok=True)
        self.model = SentenceTransformer(settings.EMBEDDING_MODEL)
        self.index_path = self.index_dir / "faiss.index"
        self.meta_path = self.index_dir / "meta.npy"
        self.meta: List[DocChunk] = []
        self.index: faiss.IndexFlatIP | None = None
        self._load()

    def _load(self):
        if self.index_path.exists() and self.meta_path.exists():
            self.index = faiss.read_index(str(self.index_path))
            raw = np.load(self.meta_path, allow_pickle=True).tolist()
            self.meta = [DocChunk(**m) for m in raw]
        else:
            self.index = faiss.IndexFlatIP(768)  # 對齊 bge-small-zh 向量維度(768)
            self.meta = []

    def save(self):
        faiss.write_index(self.index, str(self.index_path))
        np.save(self.meta_path, np.array([m.__dict__ for m in self.meta], dtype=object), allow_pickle=True)

    def add_docs(self, chunks: List[DocChunk]):
        embs = self.model.encode([c.text for c in chunks], normalize_embeddings=True)
        if self.index is None:
            self.index = faiss.IndexFlatIP(embs.shape[1])
        self.index.add(embs.astype(np.float32))
        self.meta.extend(chunks)

    def query(self, q: str, top_k: int = 5) -> List[Tuple[DocChunk, float]]:
        emb = self.model.encode([q], normalize_embeddings=True).astype(np.float32)
        scores, idxs = self.index.search(emb, top_k)
        out = []
        for i, score in zip(idxs[0], scores[0]):
            if i == -1 or i >= len(self.meta):
                continue
            out.append((self.meta[i], float(score)))
        return out

backend/app/services/agents.py

# -*- coding: utf-8 -*-
"""最小 Agent:規則路由 + RAG +(預留)SQL 工具"""
from __future__ import annotations
from typing import List, Dict
from .rag import SimpleRAG
from .llm import LLMClient

SYSTEM_PROMPT = (
    "你是倉儲理貨 AI 助理。若問題涉及庫存數量/批次/效期/位置,優先使用工具查詢;若涉及作業流程/包裝規範,優先使用 RAG。"
    "必須標註引用來源(文件檔名或路徑)。若查無資料,請明確說明並給出下一步建議。"
)

class MiniAgent:
    def __init__(self, rag: SimpleRAG, llm: LLMClient):
        self.rag = rag
        self.llm = llm

    def answer(self, messages: List[Dict]) -> Dict:
        user_msg = next((m["content"] for m in reversed(messages) if m["role"] == "user"), "")
        # 粗略規則:含「庫存/數量/批次/效期/位置」→ 應走 SQL(此 Demo 未接 DB,先回覆引導)
        keywords = ["庫存", "數量", "批次", "效期", "位置"]
        if any(k in user_msg for k in keywords):
            guidance = (
                "目前 Demo 版未連接 WMS/SQL 工具。請於 production 版接上 `inventory` 資料表與查詢工具。\n"
                "以下提供基於文件規範的建議:\n"
            )
        else:
            guidance = ""

        # RAG 檢索
        hits = self.rag.query(user_msg, top_k=5) if self.rag.index is not None else []
        context = "\n\n".join([f"[來源:{h[0].source}]\n{h[0].text}" for h in hits])
        citations = [{"source": h[0].source, "score": h[1], "snippet": h[0].text[:120]} for h in hits]

        prompt_msgs = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"問題:{user_msg}\n\n已檢索到的相關內容:\n{context}\n\n請用繁體中文回答,條列清楚並附上來源。"},
        ]
        draft = self.llm.chat(prompt_msgs)
        return {"answer": guidance + draft, "citations": citations}

backend/app/routers/chat.py

# -*- coding: utf-8 -*-
from fastapi import APIRouter, Depends
from ..schemas import ChatRequest, ChatResponse, Citation
from ..services.llm import LLMClient
from ..services.rag import SimpleRAG
from ..services.agents import MiniAgent

router = APIRouter(prefix="/chat", tags=["chat"])

# 單例初始化(簡化示範)
_rag = SimpleRAG()
_llm = LLMClient()
_agent = MiniAgent(_rag, _llm)

@router.post("/completion", response_model=ChatResponse)
def completion(payload: ChatRequest) -> ChatResponse:
    messages = [m.dict() for m in payload.messages]
    out = _agent.answer(messages)
    return ChatResponse(answer=out["answer"], citations=[Citation(**c) for c in out["citations"]])

backend/app/main.py

# -*- coding: utf-8 -*-
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .config import get_settings
from .routers import chat
from .db import Base, engine
from .models import Item, Inventory

settings = get_settings()

app = FastAPI(title=settings.APP_NAME, version=settings.APP_VERSION)
app.add_middleware(
    CORSMiddleware,
    allow_origins=[o.strip() for o in settings.ALLOW_ORIGINS.split(",")],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# 初始化資料表(Demo)
Base.metadata.create_all(bind=engine)

app.include_router(chat.router)

@app.get("/")
def root():
    return {"ok": True, "service": settings.APP_NAME, "provider": settings.LLM_PROVIDER}

backend/scripts/ingest.py

# -*- coding: utf-8 -*-
"""將 data/docs 下的檔案切塊並建 RAG 索引"""
from pathlib import Path
import re
from app.services.rag import SimpleRAG, DocChunk
from app.config import get_settings

settings = get_settings()

SEPS = ["\n\n", "\n", ". ", "。", "!", "?"]

def read_text(p: Path) -> str:
    if p.suffix.lower() in [".txt", ".md"]:
        return p.read_text(encoding="utf-8", errors="ignore")
    if p.suffix.lower() in [".pdf"]:
        # 簡化:若要 PDF,建議接 pdfminer.six 或 pymupdf
        try:
            import fitz  # pymupdf
            doc = fitz.open(p)
            text = "".join([page.get_text() for page in doc])
            return text
        except Exception:
            return ""
    return ""

def chunk_text(text: str, max_chars: int = 600) -> list[str]:
    # 粗略切塊:依分隔符與長度
    parts = [text]
    for sep in SEPS:
        new_parts = []
        for t in parts:
            new_parts.extend([s.strip() for s in t.split(sep) if s.strip()])
        parts = new_parts
        if len(parts) > 4_000:  # 過多則停止繼續細切
            break
    # 合併成 ~max_chars 的片段
    chunks, buf = [], ""
    for s in parts:
        if len(buf) + len(s) + 1 <= max_chars:
            buf = (buf + " " + s).strip()
        else:
            if buf:
                chunks.append(buf)
            buf = s
    if buf:
        chunks.append(buf)
    return chunks

if __name__ == "__main__":
    docs_dir = Path(settings.DATA_DIR)
    assert docs_dir.exists(), f"資料夾不存在:{docs_dir}"
    rag = SimpleRAG(settings.INDEX_DIR)
    all_chunks = []
    for p in docs_dir.rglob("*"):
        if p.is_file() and p.suffix.lower() in [".txt", ".md", ".pdf"]:
            text = read_text(p)
            if not text:
                continue
            for c in chunk_text(text):
                all_chunks.append(DocChunk(text=c, source=str(p.relative_to(docs_dir))))
    if not all_chunks:
        print("未找到可索引內容。請於 data/docs 放入文件後再執行。")
    else:
        rag.add_docs(all_chunks)
        rag.save()
        print(f"索引完成,共 {len(all_chunks)} 區塊。索引路徑:{settings.INDEX_DIR}")

backend/tests/test_chat.py

# -*- coding: utf-8 -*-
from fastapi.testclient import TestClient
from app.main import app

client = TestClient(app)

def test_root():
    r = client.get("/")
    assert r.status_code == 200
    assert r.json().get("ok") is True

def test_chat_empty_index():
    payload = {"messages": [{"role": "user", "content": "倉庫 SOP 的包裝規則是什麼?"}]}
    r = client.post("/chat/completion", json=payload)
    assert r.status_code == 200
    data = r.json()
    assert "answer" in data

backend/requirements.txt

fastapi>=0.111
uvicorn[standard]>=0.30
pydantic>=2.7
requests>=2.32
SQLAlchemy>=2.0
sentence-transformers>=3.0
faiss-cpu>=1.8
openai>=1.35
PyMuPDF>=1.24

backend/Dockerfile

FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

前端程式碼(frontend/)

frontend/package.json

{
  "name": "warehouse-copilot-ui",
  "version": "0.1.0",
  "private": true,
  "type": "module",
  "scripts": {
    "dev": "vite",
    "build": "vite build",
    "preview": "vite preview --port 5173"
  },
  "dependencies": {
    "react": "^18.2.0",
    "react-dom": "^18.2.0"
  },
  "devDependencies": {
    "@types/react": "^18.2.66",
    "@types/react-dom": "^18.2.22",
    "typescript": "^5.5.4",
    "vite": "^5.3.4"
  }
}

frontend/vite.config.ts

import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react'
export default defineConfig({
  plugins: [react()],
  server: { port: 5173 }
})

frontend/index.html

<!doctype html>
<html lang="zh-Hant">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>倉儲理貨 AI Copilot</title>
  </head>
  <body>
    <div id="root"></div>
    <script type="module" src="/src/main.tsx"></script>
  </body>
</html>

frontend/src/main.tsx

import React from 'react'
import { createRoot } from 'react-dom/client'
import App from './App'

createRoot(document.getElementById('root')!).render(
  <React.StrictMode>
    <App />
  </React.StrictMode>
)

frontend/src/App.tsx

import React, { useState } from 'react'

const API_BASE = import.meta.env.VITE_API_BASE || 'http://localhost:8000'

type Msg = { role: 'user'|'assistant'|'system'; content: string }

type Citation = { source: string; score: number; snippet: string }

type ChatResp = { answer: string; citations: Citation[] }

export default function App(){
  const [messages, setMessages] = useState<Msg[]>([])
  const [input, setInput] = useState('')
  const [loading, setLoading] = useState(false)

  const send = async () => {
    if(!input.trim()) return
    const newMsgs = [...messages, { role: 'user', content: input }]
    setMessages(newMsgs)
    setInput('')
    setLoading(true)
    try{
      const r = await fetch(`${API_BASE}/chat/completion`,{
        method:'POST', headers:{'Content-Type':'application/json'},
        body: JSON.stringify({messages: newMsgs})
      })
      const data: ChatResp = await r.json()
      setMessages([...newMsgs, { role: 'assistant', content: data.answer }])
    } finally {
      setLoading(false)
    }
  }

  return (
    <div style={{maxWidth: 860, margin:'40px auto', fontFamily:'system-ui'}}>
      <h1>倉儲理貨 AI Copilot</h1>
      <div style={{border:'1px solid #ddd', borderRadius:12, padding:16, minHeight:420}}>
        {messages.length===0 && <p>輸入:如「依 SOP 提供易碎海產的包裝規則與注意事項」。</p>}
        {messages.map((m,i)=> (
          <div key={i} style={{margin:'12px 0'}}>
            <b>{m.role === 'user' ? '你' : 'AI'}</b>
            <div style={{whiteSpace:'pre-wrap'}}>{m.content}</div>
          </div>
        ))}
      </div>
      <div style={{display:'flex', gap:8, marginTop:12}}>
        <input value={input} onChange={e=>setInput(e.target.value)} placeholder="輸入訊息..." style={{flex:1, padding:12, borderRadius:8, border:'1px solid #ccc'}}/>
        <button onClick={send} disabled={loading} style={{padding:'12px 16px', borderRadius:8}}>{loading? '思考中...' : '送出'}</button>
      </div>
      <p style={{opacity:.6, marginTop:8}}>設定 API 基底:在 Vite 環境變數加入 <code>VITE_API_BASE</code>。</p>
    </div>
  )
}

根目錄設定檔與說明

.env.example

# 後端環境變數
LLM_PROVIDER=ollama           # ollama | openai
OPENAI_API_KEY=               # 使用 openai 時填入
OPENAI_MODEL=gpt-4o-mini
OLLAMA_HOST=http://localhost:11434
OLLAMA_MODEL=qwen2.5:7b-instruct
EMBEDDING_MODEL=BAAI/bge-small-zh-v1.5
DATA_DIR=./data/docs
INDEX_DIR=./data/indices
DATABASE_URL=sqlite:///./data/warehouse.db
ALLOW_ORIGINS=*

docker-compose.yml

version: "3.9"
services:
  api:
    build: ./backend
    env_file: .env
    volumes:
      - ./data:/app/data
    ports:
      - "8000:8000"
    depends_on: []
  ui:
    image: node:20-alpine
    working_dir: /app
    volumes:
      - ./frontend:/app
    command: sh -c "npm install && npm run dev -- --host"
    environment:
      - VITE_API_BASE=http://localhost:8000
    ports:
      - "5173:5173"
  ollama:
    image: ollama/ollama:0.3.14
    volumes:
      - ollama:/root/.ollama
    ports:
      - "11434:11434"
volumes:
  ollama: {}

README.md

# 倉儲理貨 AI Copilot(Demo)
  1. 準備環境
cp .env.example .env
# 若用 ollama:先啟動 compose,然後在宿主端或容器內拉模型
docker compose up -d ollama
# 拉取適合中文的指令模型(可替換)
curl http://localhost:11434/api/pull -d '{"name":"qwen2.5:7b-instruct"}'
  1. 啟動後端與前端
docker compose up -d api ui
  1. 準備文件並建立索引
# 將你的 SOP/規範/FAQ 放到 data/docs
docker compose exec api python -m app.scripts.ingest
  1. 測試介面

重要說明

  • Demo 版未接 WMS/SQL 查詢工具,請在 services/agents.py 擴充:

    • 新增 sql_query_tool(),查表 inventory/items/orders
    • MiniAgent.answer() 依關鍵詞/意圖先呼叫 SQL,再結合 RAG 結果。
  • 若使用 OpenAI:

    • .env 設定 LLM_PROVIDER=openaiOPENAI_API_KEY
  • 向量模型:預設 BAAI/bge-small-zh-v1.5(中文效果好,768 維)。

  • PDF 解析:示範使用 PyMuPDF;生產建議加上版面與表格抽取邏輯。

測試

docker compose exec api pytest -q

下一步(Roadmap)

  • ✅ P0:RAG + Chat + 引用
  • ⏩ P1:接 SQL/WMS 工具(庫存/批次/效期/位置)
  • ⏩ P2:最短撿貨路徑(基於 location graph)與標籤列印工具
  • ⏩ P3:LangGraph 狀態機(多步工具、錯誤恢復)、觀測(Langfuse)

上一篇
Day27-Vibe Coding
系列文
AI咒術迴戰~LLM絕對領域展開28
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言