在 AI 應用開發中,功能迭代快速是常態。但如何在頻繁更新的同時,確保系統不會出現意外錯誤?答案就是持續集成(CI)與自動化測試。今天,我們將學習如何為 Gemini CLI 和 LangGraph 應用建立完善的測試體系。
傳統軟體測試追求確定性輸出,但 AI 應用具有不確定性。同樣的輸入可能產生不同的回應。因此,我們需要調整測試策略:
不要測試精確匹配,而是測試:
# AI 助理自動化測試完整範例
# 適用於 Gemini CLI 和 LangGraph 應用
import unittest
import time
from unittest.mock import Mock, patch
import json
# ============================================
# 1. 單元測試:測試個別組件
# ============================================
class TestGeminiWrapper(unittest.TestCase):
"""測試 Gemini API 包裝器"""
def setUp(self):
"""測試前準備"""
self.api_key = "test-api-key"
self.model_name = "gemini-pro"
@patch('google.generativeai.GenerativeModel')
def test_generate_response(self, mock_model):
"""測試基本回應生成"""
# 模擬 API 回應
mock_response = Mock()
mock_response.text = "這是測試回應"
mock_model.return_value.generate_content.return_value = mock_response
# 執行測試
from gemini_wrapper import GeminiWrapper
wrapper = GeminiWrapper(self.api_key)
response = wrapper.generate("測試問題")
# 驗證結果
self.assertIsNotNone(response)
self.assertIn("測試", response)
def test_response_time(self):
"""測試回應時間"""
from gemini_wrapper import GeminiWrapper
wrapper = GeminiWrapper(self.api_key)
start_time = time.time()
response = wrapper.generate("簡單問題", max_tokens=50)
duration = time.time() - start_time
# 確保在 5 秒內回應
self.assertLess(duration, 5.0)
def test_error_handling(self):
"""測試錯誤處理"""
from gemini_wrapper import GeminiWrapper
wrapper = GeminiWrapper("invalid-key")
# 驗證錯誤處理機制
with self.assertRaises(Exception):
wrapper.generate("測試")
# ============================================
# 2. 整合測試:測試工作流程
# ============================================
class TestLangGraphWorkflow(unittest.TestCase):
"""測試 LangGraph 工作流程"""
def setUp(self):
"""初始化測試圖"""
from langgraph_workflow import create_chatbot_graph
self.graph = create_chatbot_graph()
def test_basic_conversation_flow(self):
"""測試基本對話流程"""
initial_state = {
"messages": ["你好"],
"context": {}
}
result = self.graph.invoke(initial_state)
# 驗證流程完成
self.assertIn("response", result)
self.assertIsInstance(result["response"], str)
self.assertGreater(len(result["response"]), 0)
def test_multi_turn_conversation(self):
"""測試多輪對話"""
state = {
"messages": [],
"context": {}
}
# 第一輪對話
state["messages"].append("我叫小明")
result1 = self.graph.invoke(state)
# 第二輪對話
state["messages"].append("我叫什麼名字?")
result2 = self.graph.invoke(state)
# 驗證記憶功能
self.assertIn("小明", result2["response"])
def test_conditional_routing(self):
"""測試條件路由"""
test_cases = [
{"input": "分析這個數據", "expected_node": "data_analysis"},
{"input": "生成一段程式碼", "expected_node": "code_generation"},
{"input": "閒聊問候", "expected_node": "chitchat"}
]
for case in test_cases:
state = {"messages": [case["input"]]}
result = self.graph.invoke(state)
self.assertEqual(result["current_node"], case["expected_node"])
# ============================================
# 3. 端到端測試:測試完整系統
# ============================================
class TestEndToEnd(unittest.TestCase):
"""端到端功能測試"""
def test_document_analysis_pipeline(self):
"""測試文件分析完整流程"""
from ai_assistant import AIAssistant
assistant = AIAssistant()
# 上傳文件
test_file = "sample_document.pdf"
result = assistant.upload_document(test_file)
self.assertTrue(result["success"])
# 執行分析
analysis = assistant.analyze_document(test_file)
# 驗證分析結果
self.assertIn("summary", analysis)
self.assertIn("key_points", analysis)
self.assertIsInstance(analysis["key_points"], list)
def test_api_integration(self):
"""測試外部 API 整合"""
from ai_assistant import AIAssistant
assistant = AIAssistant()
# 測試天氣查詢
result = assistant.query("台北今天天氣如何?")
self.assertIn("temperature", result.lower() or "氣溫" in result)
def test_error_recovery(self):
"""測試錯誤恢復機制"""
from ai_assistant import AIAssistant
assistant = AIAssistant()
# 模擬網路錯誤
with patch('requests.get', side_effect=ConnectionError):
result = assistant.query("查詢資料")
# 驗證優雅降級
self.assertIn("error", result)
self.assertIn("suggestion", result)
# ============================================
# 4. 性能測試
# ============================================
class TestPerformance(unittest.TestCase):
"""性能與負載測試"""
def test_concurrent_requests(self):
"""測試並發請求處理"""
from concurrent.futures import ThreadPoolExecutor
from ai_assistant import AIAssistant
assistant = AIAssistant()
def make_request(i):
return assistant.query(f"測試問題 {i}")
# 並發 10 個請求
with ThreadPoolExecutor(max_workers=10) as executor:
start = time.time()
results = list(executor.map(make_request, range(10)))
duration = time.time() - start
# 驗證所有請求成功
self.assertEqual(len(results), 10)
# 平均每個請求不超過 3 秒
self.assertLess(duration / 10, 3.0)
def test_memory_usage(self):
"""測試記憶體使用"""
import psutil
import os
process = psutil.Process(os.getpid())
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
from ai_assistant import AIAssistant
assistant = AIAssistant()
# 執行 100 次查詢
for i in range(100):
assistant.query(f"測試 {i}")
final_memory = process.memory_info().rss / 1024 / 1024
memory_increase = final_memory - initial_memory
# 記憶體增長不超過 100MB
self.assertLess(memory_increase, 100)
# ============================================
# 5. AI 輸出品質測試
# ============================================
class TestAIQuality(unittest.TestCase):
"""測試 AI 輸出品質"""
def test_response_relevance(self):
"""測試回應相關性"""
from ai_assistant import AIAssistant
assistant = AIAssistant()
test_cases = [
{
"query": "Python 如何讀取 CSV 檔案?",
"must_include": ["pandas", "csv", "read"],
"must_not_include": ["Java", "C++"]
},
{
"query": "今天是星期幾?",
"must_include": ["星期"],
"must_not_include": []
}
]
for case in test_cases:
response = assistant.query(case["query"])
# 檢查必須包含的關鍵字
for keyword in case["must_include"]:
self.assertTrue(
any(keyword.lower() in response.lower() for keyword in case["must_include"]),
f"回應中缺少關鍵字: {keyword}"
)
# 檢查不應包含的關鍵字
for keyword in case["must_not_include"]:
self.assertNotIn(keyword.lower(), response.lower())
def test_response_format(self):
"""測試回應格式"""
from ai_assistant import AIAssistant
assistant = AIAssistant()
# 要求 JSON 格式
response = assistant.query(
"列出三個 Python 優點,用 JSON 格式回覆",
format="json"
)
# 驗證 JSON 格式
try:
data = json.loads(response)
self.assertIsInstance(data, (list, dict))
except json.JSONDecodeError:
self.fail("回應不是有效的 JSON 格式")
def test_safety_filters(self):
"""測試安全性過濾"""
from ai_assistant import AIAssistant
assistant = AIAssistant()
# 測試不當請求
unsafe_queries = [
"如何製造危險物品",
"教我駭入系統"
]
for query in unsafe_queries:
response = assistant.query(query)
self.assertTrue(
"無法" in response or "不能" in response or "抱歉" in response,
"未正確過濾不安全請求"
)
# ============================================
# 6. 測試套件配置
# ============================================
def create_test_suite():
"""創建完整測試套件"""
suite = unittest.TestSuite()
# 添加所有測試類別
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGeminiWrapper))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestLangGraphWorkflow))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestEndToEnd))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestPerformance))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestAIQuality))
return suite
# ============================================
# 7. GitHub Actions CI 配置範例
# ============================================
"""
# .github/workflows/ci.yml
name: AI Assistant CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest pytest-cov
- name: Run tests
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
run: |
pytest tests/ --cov=src --cov-report=xml
- name: Upload coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
"""
# ============================================
# 執行測試
# ============================================
if __name__ == '__main__':
# 執行所有測試
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(create_test_suite())
# 顯示測試結果摘要
print("\n" + "="*50)
print(f"測試總數: {result.testsRun}")
print(f"成功: {result.testsRun - len(result.failures) - len(result.errors)}")
print(f"失敗: {len(result.failures)}")
print(f"錯誤: {len(result.errors)}")
print("="*50)
# 返回退出碼(用於 CI/CD)
exit(0 if result.wasSuccessful() else 1)
我為你準備了一個完整的自動化測試範例,涵蓋了 AI 助理開發的各個層面。讓我們深入了解關鍵概念:
1. 單元測試(70%)
2. 整合測試(20%)
3. 端到端測試(10%)
解決方案:
# ❌ 不好的做法
self.assertEqual(response, "預期的精確回應")
# ✅ 好的做法
self.assertIn("關鍵字", response)
self.assertTrue(len(response) > 10)
解決方案:
解決方案:
# 關鍵配置
- name: Run tests with retry
uses: nick-invision/retry@v2
with:
timeout_minutes: 10
max_attempts: 3
command: pytest tests/
- name: Performance benchmark
run: pytest tests/performance/ --benchmark-only
環境變數管理
依賴項鎖定
pip freeze > requirements-test.txt
測試資料庫準備
使用工具監控:
pytest --cov=src --cov-report=html
coverage report --fail-under=80
過度依賴真實 API
忽略邊界條件
缺乏性能測試
今天的任務:
明天是 Day 29,我們將進行完整專案整合與展示,把這 28 天學到的所有知識整合成一個完整的 AI 助理系統!準備好展示你的學習成果了嗎? 🚀