模組三:知識整合模組 (knowledge_integrator.py)
from typing import List, Dict, Any, Tuple
from datetime import datetime
import json
from config import MemoryConfig
class KnowledgeIntegrator:
"""知識整合器 - 融合個人記憶與文件知識的智慧大腦"""
def __init__(self):
# 整合策略權重配置
self.integration_weights = {
'personal_memory': 0.6, # 個人記憶權重較高
'document_knowledge': 0.4, # 文件知識輔助
'recency_boost': 0.2, # 近期資訊加權
'relevance_threshold': 0.3 # 相關性門檻
}
# 衝突解決策略
self.conflict_resolution = {
'trust_personal': True, # 優先信任個人記憶
'date_priority': True, # 較新資訊優先
'source_credibility': True # 考慮來源可信度
}
def integrate_knowledge(self, query: str, personal_memories: List[Dict],
document_results: List[Dict]) -> Dict[str, Any]:
"""
智慧整合個人記憶和文件知識
整合流程:
1. 分析查詢意圖
2. 評估資訊相關性和可信度
3. 檢測和解決資訊衝突
4. 建構整合回應上下文
"""
# 步驟1:分析查詢意圖
query_intent = self._analyze_query_intent(query)
# 步驟2:評估和過濾資訊
filtered_memories = self._filter_by_relevance(personal_memories, query, 'memory')
filtered_documents = self._filter_by_relevance(document_results, query, 'document')
# 步驟3:檢測資訊衝突
conflicts = self._detect_conflicts(filtered_memories, filtered_documents)
# 步驟4:解決衝突並整合
integrated_info = self._resolve_and_integrate(
filtered_memories, filtered_documents, conflicts, query_intent
)
# 步驟5:建構回應上下文
response_context = self._build_response_context(
integrated_info, query_intent, conflicts
)
return response_context
def _analyze_query_intent(self, query: str) -> Dict[str, Any]:
"""分析查詢意圖"""
intent_patterns = {
'personal_inquiry': [
r'我的|我是|我在|我有|我想',
r'記得|記住|告訴我關於我',
r'我們之前|上次討論'
],
'factual_lookup': [
r'什麼是|如何|怎麼|規定|政策',
r'說明|步驟|流程|方法',
r'定義|解釋'
],
'comparative': [
r'比較|差別|不同|相同',
r'哪個更好|優缺點'
],
'procedural': [
r'如何做|步驟|流程|程序',
r'申請|辦理|處理'
]
}
detected_intents = []
for intent_type, patterns in intent_patterns.items():
if any(re.search(pattern, query) for pattern in patterns):
detected_intents.append(intent_type)
primary_intent = detected_intents[0] if detected_intents else 'general'
return {
'primary_intent': primary_intent,
'all_intents': detected_intents,
'personal_focus': 'personal_inquiry' in detected_intents,
'needs_official_info': 'factual_lookup' in detected_intents or 'procedural' in detected_intents
}
def _filter_by_relevance(self, items: List[Dict], query: str,
item_type: str) -> List[Dict]:
"""根據相關性過濾資訊"""
if not items:
return []
filtered_items = []
for item in items:
# 取得相關性分數
relevance_score = item.get('similarity', 0)
# 根據類型調整門檻
threshold = self.integration_weights['relevance_threshold']
if item_type == 'memory':
threshold *= 0.8 # 個人記憶門檻較低
if relevance_score >= threshold:
# 加入額外評分因子
enhanced_item = item.copy()
enhanced_item['final_score'] = self._calculate_final_score(
item, query, item_type
)
filtered_items.append(enhanced_item)
# 按最終分數排序
filtered_items.sort(key=lambda x: x['final_score'], reverse=True)
return filtered_items
def _calculate_final_score(self, item: Dict, query: str, item_type: str) -> float:
"""計算項目的最終相關性分數"""
base_score = item.get('similarity', 0)
# 基礎權重
if item_type == 'memory':
weight = self.integration_weights['personal_memory']
else:
weight = self.integration_weights['document_knowledge']
weighted_score = base_score * weight
# 時間新近性加權
if 'timestamp' in item or 'last_updated' in item:
recency_bonus = self._calculate_recency_bonus(item)
weighted_score += recency_bonus * self.integration_weights['recency_boost']
# 來源可信度加權
credibility_bonus = self._calculate_credibility_bonus(item, item_type)
weighted_score += credibility_bonus * 0.1
return min(weighted_score, 1.0) # 限制在1.0以內
def _calculate_recency_bonus(self, item: Dict) -> float:
"""計算時間新近性加分"""
try:
# 取得時間戳記
timestamp_str = item.get('timestamp') or item.get('last_updated')
if not timestamp_str:
return 0.0
timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
now = datetime.now()
# 計算天數差異
days_diff = (now - timestamp).days
# 新近性分數:越新分數越高
if days_diff <= 1:
return 0.3
elif days_diff <= 7:
return 0.2
elif days_diff <= 30:
return 0.1
else:
return 0.0
except Exception:
return 0.0
def _calculate_credibility_bonus(self, item: Dict, item_type: str) -> float:
"""計算來源可信度加分"""
if item_type == 'memory':
# 個人記憶的可信度基於重要性和確信度
importance = item.get('importance', 0.5)
confidence = item.get('confidence', 0.5)
return (importance + confidence) / 2
else:
# 文件知識的可信度基於文件類型和來源
metadata = item.get('document', {}).get('metadata', {})
# 官方文件有較高可信度
if any(keyword in metadata.get('relative_path', '').lower()
for keyword in ['official', '官方', 'policy', '政策']):
return 0.8
# FAQ文件中等可信度
if 'faq' in metadata.get('relative_path', '').lower():
return 0.6
return 0.4
def _detect_conflicts(self, memories: List[Dict],
documents: List[Dict]) -> List[Dict]:
"""檢測個人記憶與文件知識間的衝突"""
conflicts = []
# 簡化的衝突檢測:比較相似主題的不同資訊
for memory in memories[:3]: # 只檢查前3個最相關的記憶
memory_content = memory.get('content', '')
for doc in documents[:3]: # 只檢查前3個最相關的文件
doc_content = doc.get('content', '')
# 檢測可能的衝突指標
conflict_indicators = self._find_conflict_indicators(
memory_content, doc_content
)
if conflict_indicators:
conflicts.append({
'type': 'information_mismatch',
'memory_item': memory,
'document_item': doc,
'indicators': conflict_indicators,
'confidence': len(conflict_indicators) * 0.2
})
return conflicts
def _find_conflict_indicators(self, text1: str, text2: str) -> List[str]:
"""尋找兩段文字間的衝突指標"""
indicators = []
# 時間相關衝突
time_conflicts = [
(r'(\d+)年', r'(\d+)年'),
(r'(\d+)月', r'(\d+)月'),
(r'週一|週二|週三|週四|週五|週六|週日', r'週一|週二|週三|週四|週五|週六|週日')
]
for pattern1, pattern2 in time_conflicts:
matches1 = re.findall(pattern1, text1)
matches2 = re.findall(pattern2, text2)
if matches1 and matches2 and matches1[0] != matches2[0]:
indicators.append(f'時間衝突: {matches1[0]} vs {matches2[0]}')
# 數量相關衝突
number_pattern = r'(\d+(?:\.\d+)?)'
numbers1 = re.findall(number_pattern, text1)
numbers2 = re.findall(number_pattern, text2)
if numbers1 and numbers2:
try:
num1, num2 = float(numbers1[0]), float(numbers2[0])
if abs(num1 - num2) / max(num1, num2) > 0.1: # 差異超過10%
indicators.append(f'數量衝突: {num1} vs {num2}')
except ValueError:
pass
return indicators
def _resolve_and_integrate(self, memories: List[Dict], documents: List[Dict],
conflicts: List[Dict], query_intent: Dict) -> Dict[str, Any]:
"""解決衝突並整合資訊"""
integrated_info = {
'primary_sources': [],
'supporting_sources': [],
'resolved_conflicts': [],
'confidence_level': 0.0
}
# 根據查詢意圖決定主要資訊來源
if query_intent['personal_focus']:
# 個人相關查詢:優先使用個人記憶
integrated_info['primary_sources'] = memories[:2]
integrated_info['supporting_sources'] = documents[:2]
elif query_intent['needs_official_info']:
# 需要官方資訊:優先使用文件知識
integrated_info['primary_sources'] = documents[:2]
integrated_info['supporting_sources'] = memories[:1]
else:
# 一般查詢:平衡整合
all_sources = memories + documents
all_sources.sort(key=lambda x: x['final_score'], reverse=True)
integrated_info['primary_sources'] = all_sources[:2]
integrated_info['supporting_sources'] = all_sources[2:4]
# 處理衝突
for conflict in conflicts:
resolution = self._resolve_single_conflict(conflict, query_intent)
integrated_info['resolved_conflicts'].append(resolution)
# 計算整體信心度
all_scores = [item['final_score'] for item in
integrated_info['primary_sources'] + integrated_info['supporting_sources']]
integrated_info['confidence_level'] = sum(all_scores) / len(all_scores) if all_scores else 0.0
return integrated_info
def _resolve_single_conflict(self, conflict: Dict, query_intent: Dict) -> Dict:
"""解決單一衝突"""
memory_item = conflict['memory_item']
document_item = conflict['document_item']
resolution = {
'conflict_type': conflict['type'],
'preferred_source': None,
'reason': '',
'alternative_info': None
}
# 根據配置的解決策略
if self.conflict_resolution['trust_personal'] and query_intent['personal_focus']:
resolution['preferred_source'] = 'memory'
resolution['reason'] = '個人相關查詢優先採用個人記憶'
resolution['alternative_info'] = document_item
elif self.conflict_resolution['date_priority']:
memory_date = memory_item.get('timestamp')
doc_date = document_item.get('last_updated')
if memory_date and doc_date:
if memory_date > doc_date:
resolution['preferred_source'] = 'memory'
resolution['reason'] = '個人記憶較新'
else:
resolution['preferred_source'] = 'document'
resolution['reason'] = '文件資訊較新'
else:
# 預設使用文件資訊
resolution['preferred_source'] = 'document'
resolution['reason'] = '採用官方文件資訊'
resolution['alternative_info'] = memory_item
return resolution
def _build_response_context(self, integrated_info: Dict,
query_intent: Dict, conflicts: List[Dict]) -> Dict[str, Any]:
"""建構AI回應的上下文"""
context = {
'system_prompt': self._generate_system_prompt(query_intent, integrated_info),
'primary_information': self._format_sources(integrated_info['primary_sources']),
'supporting_information': self._format_sources(integrated_info['supporting_sources']),
'conflict_notes': self._format_conflicts(conflicts, integrated_info['resolved_conflicts']),
'confidence_indicators': {
'overall_confidence': integrated_info['confidence_level'],
'has_conflicts': len(conflicts) > 0,
'source_diversity': len(integrated_info['primary_sources']) + len(integrated_info['supporting_sources'])
},
'response_guidelines': self._generate_response_guidelines(query_intent, integrated_info)
}
return context
def _generate_system_prompt(self, query_intent: Dict, integrated_info: Dict) -> str:
"""生成AI系統提示"""
base_prompt = """你是一個智慧助手,具備個人記憶和文件知識查詢能力。請根據以下整合資訊來回應用戶:
回應原則:
1. 優先使用主要資訊來源
2. 適當引用支援資訊
3. 如有資訊衝突,請說明並提供建議
4. 保持回應的個性化和專業性
"""
# 根據查詢意圖調整提示
if query_intent['personal_focus']:
base_prompt += "注意:這是個人相關查詢,請結合用戶的個人情況給出個性化回應。\n"
if query_intent['needs_official_info']:
base_prompt += "注意:這需要官方準確資訊,請以文件知識為主,確保資訊的權威性。\n"
if integrated_info['confidence_level'] < 0.5:
base_prompt += "注意:相關資訊的確信度較低,請在回應中表達適當的不確定性。\n"
return base_prompt
def _format_sources(self, sources: List[Dict]) -> str:
"""格式化資訊來源"""
if not sources:
return "無相關資訊"
formatted = ""
for i, source in enumerate(sources, 1):
source_type = "個人記憶" if source.get('type') in ['profile', 'fact', 'progress'] else "文件知識"
content = source.get('content', '')
# 截斷過長的內容
if isinstance(content, dict):
content = str(content)
content = content[:200] + "..." if len(content) > 200 else content
formatted += f"{i}. [{source_type}] {content}\n"
return formatted
def _format_conflicts(self, conflicts: List[Dict], resolutions: List[Dict]) -> str:
"""格式化衝突資訊"""
if not conflicts:
return "無資訊衝突"
formatted = "注意到以下資訊差異:\n"
for i, (conflict, resolution) in enumerate(zip(conflicts, resolutions), 1):
formatted += f"{i}. {conflict['type']}: {resolution['reason']}\n"
return formatted
def _generate_response_guidelines(self, query_intent: Dict,
integrated_info: Dict) -> List[str]:
"""生成回應指導原則"""
guidelines = []
if query_intent['personal_focus']:
guidelines.append("結合用戶個人情況進行個性化回應")
if query_intent['needs_official_info']:
guidelines.append("確保提供準確的官方資訊")
if integrated_info['confidence_level'] < 0.3:
guidelines.append("表達資訊的不確定性,建議尋求更多資訊")
if len(integrated_info['resolved_conflicts']) > 0:
guidelines.append("說明資訊來源的差異並提供建議")
guidelines.append("保持友善和專業的對話風格")
return guidelines
模組四:升級聊天機器人整合
現在我們需要升級原有的聊天機器人,整合新的文件知識功能。
import google.generativeai as genai
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
from config import MemoryConfig, AIConfig
from short_term_memory import ShortTermMemory
from long_term_memory import LongTermMemory
from memory_manager import MemoryManager
from faq_manager import FAQManager
from knowledge_integrator import KnowledgeIntegrator
class EnhancedSmartChatbot:
"""
升級版智慧聊天機器人
新功能:
- 整合個人記憶與文件知識
- 智慧衝突解決
- 多來源資訊融合
- 增強的上下文理解
"""
def __init__(self, user_id: str, api_key: str, model_name: str = None,
faq_db_path: str = None):
self.user_id = user_id
# 初始化記憶系統三大核心模組
self.short_term_memory = ShortTermMemory(
max_turns=AIConfig.SHORT_TERM_MAX_TURNS,
max_age_minutes=AIConfig.SHORT_TERM_MAX_AGE_MINUTES
)
self.long_term_memory = LongTermMemory(user_id)
self.memory_manager = MemoryManager()
# 初始化新的知識管理模組
self.faq_manager = FAQManager(faq_db_path or f"faq_{user_id}.db")
self.knowledge_integrator = KnowledgeIntegrator()
# 初始化 AI 模型
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel(model_name or AIConfig.DEFAULT_MODEL)
# 系統狀態追蹤
self.last_consolidation = None
self.conversation_count = 0
# 統計資訊
self.stats = {
'total_queries': 0,
'memory_hits': 0,
'document_hits': 0,
'integrated_responses': 0
}
def chat(self, user_message: str, topic: str = None, use_documents: bool = True) -> str:
"""
進行對話的主要方法 - 整合記憶與文件知識
完整的對話流程:
1. 儲存用戶訊息到短期記憶
2. 同時搜尋個人記憶和文件知識
3. 智慧整合多源資訊
4. 建構豐富的AI上下文
5. 生成個性化回應
6. 儲存AI回應並進行記憶管理
"""
self.stats['total_queries'] += 1
try:
# 步驟1:新增用戶訊息到短期記憶
self.short_term_memory.add_message("user", user_message, topic)
self.conversation_count += 1
# 步驟2:並行搜尋個人記憶和文件知識
search_results = self._perform_knowledge_search(user_message, use_documents)
# 步驟3:智慧整合資訊
integrated_context = self.knowledge_integrator.integrate_knowledge(
user_message,
search_results['personal_memories'],
search_results['document_results']
)
# 步驟4:建構AI上下文
full_context = self._build_enhanced_context(
user_message, integrated_context
)
# 步驟5:生成AI回應
response = self.model.generate_content(full_context)
ai_response = response.text
# 步驟6:儲存回應並管理記憶
self.short_term_memory.add_message("assistant", ai_response, topic)
# 更新統計
self._update_statistics(search_results, integrated_context)
# 檢查記憶整理
if self.memory_manager.should_consolidate_to_longterm(
self.conversation_count, self.last_consolidation
):
self._consolidate_memories()
return ai_response
except Exception as e:
return f"抱歉,我遇到了一些技術問題:{str(e)}"
def _perform_knowledge_search(self, query: str, use_documents: bool) -> Dict[str, List]:
"""執行知識搜尋"""
search_results = {
'personal_memories': [],
'document_results': []
}
# 搜尋個人記憶
try:
personal_memories = self.memory_manager.find_relevant_memories(
query, self.long_term_memory, max_results=5
)
search_results['personal_memories'] = personal_memories
if personal_memories:
self.stats['memory_hits'] += 1
except Exception as e:
print(f"個人記憶搜尋失敗: {e}")
# 搜尋文件知識(如果啟用)
if use_documents:
try:
document_results = self.faq_manager.search(
query, max_results=5, similarity_threshold=0.1
)
search_results['document_results'] = document_results
if document_results:
self.stats['document_hits'] += 1
except Exception as e:
print(f"文件知識搜尋失敗: {e}")
return search_results
def _build_enhanced_context(self, user_message: str,
integrated_context: Dict[str, Any]) -> str:
"""建構增強的AI對話上下文"""
# 基礎系統提示
system_prompt = integrated_context['system_prompt']
# 添加主要資訊
if integrated_context['primary_information']:
system_prompt += "\n=== 主要相關資訊 ===\n"
system_prompt += integrated_context['primary_information']
# 添加支援資訊
if integrated_context['supporting_information']:
system_prompt += "\n=== 補充資訊 ===\n"
system_prompt += integrated_context['supporting_information']
# 添加衝突說明
if integrated_context['conflict_notes'] != "無資訊衝突":
system_prompt += "\n=== 資訊差異說明 ===\n"
system_prompt += integrated_context['conflict_notes']
# 添加信心度指標
confidence = integrated_context['confidence_indicators']['overall_confidence']
if confidence < 0.5:
system_prompt += f"\n=== 注意事項 ===\n相關資訊的確信度為 {confidence:.1%},請在回應中適當表達不確定性。\n"
# 添加近期對話上下文
recent_context = self.short_term_memory.get_recent_context(
turns=AIConfig.MAX_CONTEXT_TURNS
)
if recent_context:
conversation_history = self._format_recent_context(recent_context)
system_prompt += f"\n=== 最近的對話 ===\n{conversation_history}"
# 添加回應指導原則
guidelines = integrated_context['response_guidelines']
if guidelines:
system_prompt += "\n=== 回應指導原則 ===\n"
for guideline in guidelines:
system_prompt += f"- {guideline}\n"
# 添加當前用戶訊息
system_prompt += f"\n=== 當前用戶訊息 ===\n{user_message}\n\n請根據以上資訊進行回應:"
return system_prompt
def _format_recent_context(self, recent_context: List[Dict]) -> str:
"""格式化近期對話上下文"""
conversation_history = ""
for msg in recent_context[:-1]: # 排除當前訊息
role = "用戶" if msg['role'] == 'user' else "AI"
content = msg['content']
# 限制長度避免上下文過長
if len(content) > 100:
content = content[:100] + "..."
conversation_history += f"{role}:{content}\n"
return conversation_history
def _update_statistics(self, search_results: Dict, integrated_context: Dict):
"""更新使用統計"""
if (search_results['personal_memories'] and search_results['document_results']):
self.stats['integrated_responses'] += 1
def _consolidate_memories(self):
"""執行記憶整理"""
print("🧠 正在整理記憶...")
self.memory_manager.consolidate_memories(
self.short_term_memory,
self.long_term_memory
)
self.last_consolidation = datetime.now()
self._cleanup_short_term_memory()
print("✅ 記憶整理完成")
def _cleanup_short_term_memory(self):
"""清理短期記憶中的過期內容"""
cutoff_time = datetime.now() - timedelta(minutes=30)
while self.short_term_memory.conversations:
oldest_msg = self.short_term_memory.conversations[0]
if ('timestamp' in oldest_msg and
oldest_msg['timestamp'] < cutoff_time):
self.short_term_memory.conversations.popleft()
else:
break
# === 文件管理方法 ===
def add_knowledge_document(self, file_path: str, metadata: Dict[str, Any] = None) -> bool:
"""新增知識文件"""
try:
return self.faq_manager.add_document(file_path, metadata)
except Exception as e:
print(f"新增文件失敗: {e}")
return False
def add_knowledge_directory(self, directory_path: str,
file_patterns: List[str] = None) -> Dict[str, Any]:
"""批量新增知識文件"""
try:
return self.faq_manager.add_documents_from_directory(directory_path, file_patterns)
except Exception as e:
print(f"批量新增失敗: {e}")
return {'successful': 0, 'failed': 1}
def search_knowledge_base(self, query: str, max_results: int = 10) -> List[Dict]:
"""直接搜尋知識庫"""
try:
return self.faq_manager.search(query, max_results)
except Exception as e:
print(f"知識庫搜尋失敗: {e}")
return []
# === 查詢和管理方法 ===
def get_user_profile(self) -> Dict[str, Any]:
"""取得用戶資料"""
return self.long_term_memory.get_user_profile()
def get_conversation_summary(self) -> List[Dict]:
"""取得對話摘要"""
return self.long_term_memory.get_recent_summaries()
def get_learning_progress(self, subject: str = None) -> List[Dict]:
"""取得學習進度"""
return self.long_term_memory.get_learning_progress(subject)
def get_system_statistics(self) -> Dict[str, Any]:
"""取得系統使用統計"""
total = self.stats['total_queries']
return {
**self.stats,
'memory_hit_rate': self.stats['memory_hits'] / total if total > 0 else 0,
'document_hit_rate': self.stats['document_hits'] / total if total > 0 else 0,
'integration_rate': self.stats['integrated_responses'] / total if total > 0 else 0
}
def get_knowledge_base_info(self) -> Dict[str, Any]:
"""取得知識庫資訊"""
try:
# 這裡需要在FAQManager中新增相應方法
conn = sqlite3.connect(self.faq_manager.db_path)
cursor = conn.cursor()
# 統計文件數量
cursor.execute("SELECT COUNT(*) FROM documents")
doc_count = cursor.fetchone()[0]
# 統計文件片段數量
cursor.execute("SELECT COUNT(*) FROM document_chunks")
chunk_count = cursor.fetchone()[0]
# 取得最近的搜尋統計
cursor.execute("SELECT COUNT(*) FROM search_logs WHERE timestamp > datetime('now', '-7 days')")
recent_searches = cursor.fetchone()[0]
conn.close()
return {
'total_documents': doc_count,
'total_chunks': chunk_count,
'recent_searches': recent_searches,
'vector_cache_status': 'active' if self.faq_manager._vector_cache else 'inactive'
}
except Exception as e:
return {'error': str(e)}
# === 智慧對話助理類別 ===
class SmartAssistant(EnhancedSmartChatbot):
"""
智慧對話助理 - 為特定領域優化的版本
預設配置適合:
- 客服機器人
- 技術支援助理
- 企業內部助手
"""
def __init__(self, user_id: str, api_key: str, assistant_type: str = "general"):
super().__init__(user_id, api_key)
self.assistant_type = assistant_type
self._configure_for_type(assistant_type)
def _configure_for_type(self, assistant_type: str):
"""根據助理類型調整配置"""
if assistant_type == "customer_service":
# 客服配置:重視文件知識,快速回應
self.knowledge_integrator.integration_weights.update({
'personal_memory': 0.3,
'document_knowledge': 0.7,
'relevance_threshold': 0.2
})
elif assistant_type == "technical_support":
# 技術支援:平衡個人記憶與技術文件
self.knowledge_integrator.integration_weights.update({
'personal_memory': 0.5,
'document_knowledge': 0.5,
'relevance_threshold': 0.3
})
elif assistant_type == "personal_assistant":
# 個人助理:重視個人記憶和偏好
self.knowledge_integrator.integration_weights.update({
'personal_memory': 0.7,
'document_knowledge': 0.3,
'relevance_threshold': 0.25
})
def quick_answer(self, question: str) -> Dict[str, Any]:
"""快速問答模式 - 適合FAQ查詢"""
# 直接搜尋文件知識
doc_results = self.search_knowledge_base(question, max_results=3)
if doc_results and doc_results[0]['similarity'] > 0.5:
# 高相關性直接回答
best_result = doc_results[0]
return {
'answer': best_result['content'],
'source': best_result.get('document', {}).get('title', '知識庫'),
'confidence': best_result['similarity'],
'type': 'direct_lookup'
}
else:
# 進入完整對話模式
response = self.chat(question, use_documents=True)
return {
'answer': response,
'source': '整合分析',
'confidence': 0.7,
'type': 'integrated_response'
}
讓我們通過幾個實際場景來展示完整的知識整合系統:
import os
import tempfile
from enhanced_chatbot import EnhancedSmartChatbot, SmartAssistant
class DemoScenarios:
"""實戰案例演示"""
def __init__(self, api_key: str):
self.api_key = api_key
def demo_customer_service_bot(self):
"""演示客服機器人場景"""
print("🤖 === 客服機器人演示 ===")
# 建立客服助理
assistant = SmartAssistant("customer_001", self.api_key, "customer_service")
# 準備FAQ文件
self._create_sample_faq_files(assistant)
# 模擬客戶對話
customer_queries = [
("我想要退貨,請問退貨政策是什麼?", "退貨政策"),
("我是VIP會員,有什麼特別優惠嗎?", "會員優惠"),
("我上次買的商品有問題,你們記得我的購買記錄嗎?", "售後服務"),
("請問你們的營業時間是?", "營業資訊"),
("我想要申請退款,需要什麼手續?", "退款流程")
]
print("📞 模擬客戶諮詢對話:\n")
for query, topic in customer_queries:
print(f"👤 客戶:{query}")
# 先嘗試快速回答
quick_result = assistant.quick_answer(query)
if quick_result['type'] == 'direct_lookup' and quick_result['confidence'] > 0.6:
print(f"🤖 客服(快速回答):{quick_result['answer'][:100]}...")
print(f"📖 來源:{quick_result['source']} (信心度: {quick_result['confidence']:.1%})")
else:
response = assistant.chat(query, topic)
print(f"🤖 客服(智慧分析):{response}")
print("-" * 50)
# 展示統計資訊
stats = assistant.get_system_statistics()
kb_info = assistant.get_knowledge_base_info()
print("\n📊 === 系統統計 ===")
print(f"總查詢次數: {stats['total_queries']}")
print(f"記憶命中率: {stats['memory_hit_rate']:.1%}")
print(f"文件命中率: {stats['document_hit_rate']:.1%}")
print(f"整合回應率: {stats['integration_rate']:.1%}")
print(f"知識庫文件數: {kb_info.get('total_documents', 0)}")
def demo_technical_support_bot(self):
"""演示技術支援機器人場景"""
print("\n🔧 === 技術支援機器人演示 ===")
# 建立技術支援助理
assistant = SmartAssistant("tech_001", self.api_key, "technical_support")
# 準備技術文件
self._create_sample_tech_docs(assistant)
# 模擬技術支援對話
tech_queries = [
("我的電腦無法開機,有什麼可能的原因?", "故障排除"),
("我是工程師小明,上次你幫我解決了網路問題", "用戶識別"),
("軟體安裝後出現錯誤代碼 0x80070057", "軟體問題"),
("請問系統需求規格是什麼?", "系統需求"),
("基於我的技術背景,推薦什麼解決方案?", "個性化建議")
]
print("🔧 模擬技術支援對話:\n")
for query, topic in tech_queries:
print(f"👤 用戶:{query}")
response = assistant.chat(query, topic)
print(f"🔧 技術支援:{response}")
print("-" * 50)
# 展示學習進度追蹤
progress = assistant.get_learning_progress()
if progress:
print("\n📚 === 用戶技術能力追蹤 ===")
for prog in progress:
print(f"📖 {prog['subject']}: {prog['skill_level']}")
def demo_personal_assistant(self):
"""演示個人助理場景"""
print("\n👤 === 個人助理演示 ===")
# 建立個人助理
assistant = SmartAssistant("personal_001", self.api_key, "personal_assistant")
# 準備個人文件
self._create_sample_personal_docs(assistant)
# 模擬個人助理對話
personal_queries = [
("我是李小明,28歲軟體工程師,喜歡機器學習", "自我介紹"),
("根據我的背景,推薦一些學習資源", "學習建議"),
("我想制定一個Python進階學習計畫", "學習規劃"),
("公司的Python開發規範是什麼?", "開發規範"),
("結合我的興趣和公司要求,什麼最適合我?", "個性化建議")
]
print("👤 模擬個人助理對話:\n")
for query, topic in personal_queries:
print(f"👤 用戶:{query}")
response = assistant.chat(query, topic)
print(f"🤖 助理:{response}")
print("-" * 50)
# 展示個人資料累積
profile = assistant.get_user_profile()
print("\n📋 === 個人資料累積 ===")
for key, value in profile.items():
if value:
print(f" {key}: {value}")
def demo_knowledge_conflict_resolution(self):
"""演示知識衝突解決"""
print("\n⚖️ === 知識衝突解決演示 ===")
assistant = EnhancedSmartChatbot("conflict_demo", self.api_key)
# 創建衝突的文件內容
conflict_doc = """
公司政策更新 (2024年版)
遠程工作政策:
- 員工可以每週在家工作 3 天
- 核心工作時間:上午 10:00 - 下午 4:00
- 會議時間:週二、週四必須進辦公室
"""
# 寫入臨時文件
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8') as f:
f.write(conflict_doc)
temp_file = f.name
# 添加文件到知識庫
assistant.add_knowledge_document(temp_file, {'type': 'policy', 'version': '2024'})
# 模擬用戶記憶(舊資訊)
assistant.chat("我記得公司政策是每週可以在家工作2天", "工作政策")
# 現在詢問現行政策
print("👤 用戶:請問現在的遠程工作政策是什麼?")
response = assistant.chat("請問現在的遠程工作政策是什麼?", "政策查詢")
print(f"🤖 助理:{response}")
# 清理臨時文件
os.unlink(temp_file)
def _create_sample_faq_files(self, assistant):
"""創建範例FAQ文件"""
faq_content = """
常見問題解答 (FAQ)
Q: 退貨政策是什麼?
A: 商品購買後30天內可無條件退貨,需保持商品完整性。VIP會員享有45天退貨期限。
Q: VIP會員有什麼優惠?
A: VIP會員享有:
- 全站商品9折優惠
- 免運費服務
- 優先客服支援
- 生日專屬折扣碼
Q: 營業時間是?
A:
- 線上服務:24小時全年無休
- 客服電話:週一至週五 9:00-18:00
- 實體門市:週一至週日 10:00-22:00
Q: 退款流程是什麼?
A:
1. 登入會員帳號申請退款
2. 填寫退款原因
3. 提供銀行帳戶資訊
4. 3-5個工作日處理完成
"""
# 寫入臨時文件
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8') as f:
f.write(faq_content)
assistant.add_knowledge_document(f.name, {'type': 'faq', 'department': 'customer_service'})
print("✅ FAQ文件已載入知識庫")
def _create_sample_tech_docs(self, assistant):
"""創建範例技術文件"""
tech_content = """
技術支援手冊
=== 常見故障排除 ===
電腦無法開機:
1. 檢查電源線連接
2. 確認電源開關
3. 檢查記憶體是否插好
4. 檢查顯示卡連接
錯誤代碼 0x80070057:
- 原因:參數錯誤
- 解決:檢查系統時間設定,運行系統檔案檢查
=== 系統需求 ===
最低需求:
- 作業系統:Windows 10
- 記憶體:4GB RAM
- 儲存空間:50GB
- 處理器:Intel i3 或同等級
建議需求:
- 作業系統:Windows 11
- 記憶體:8GB RAM
- 儲存空間:100GB SSD
- 處理器:Intel i5 或同等級
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8') as f:
f.write(tech_content)
assistant.add_knowledge_document(f.name, {'type': 'manual', 'department': 'technical'})
print("✅ 技術文件已載入知識庫")
def _create_sample_personal_docs(self, assistant):
"""創建範例個人文件"""
personal_content = """
個人學習資源庫
=== Python進階學習資源 ===
推薦書籍:
1. 《流暢的Python》- 適合中級開發者
2. 《Effective Python》- 最佳實踐指南
3. 《Python設計模式》- 架構設計
線上課程:
- Coursera: Machine Learning with Python
- Udemy: Advanced Python Programming
- YouTube: Real Python 頻道
=== 機器學習資源 ===
入門路徑:
1. 數學基礎:線性代數、統計學
2. Python基礎:NumPy, Pandas
3. 機器學習:Scikit-learn
4. 深度學習:TensorFlow, PyTorch
實作專案建議:
- 房價預測模型
- 圖像分類系統
- 自然語言處理應用
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8') as f:
f.write(personal_content)
assistant.add_knowledge_document(f.name, {'type': 'learning', 'category': 'personal'})
print("✅ 個人學習資源已載入知識庫")
def run_comprehensive_demo(api_key: str):
"""運行完整演示"""
if not api_key:
print("❌ 請設定 Gemini API Key")
return
demo = DemoScenarios(api_key)
print("🚀 === AI知識整合系統完整演示 ===\n")
try:
# 演示1:客服機器人
demo.demo_customer_service_bot()
# 演示2:技術支援機器人
demo.demo_technical_support_bot()
# 演示3:個人助理
demo.demo_personal_assistant()
# 演示4:知識衝突解決
demo.demo_knowledge_conflict_resolution()
print("\n🎉 === 演示完成 ===")
print("✅ 所有功能正常運作")
print("📚 知識整合系統已準備就緒!")
except Exception as e:
print(f"❌ 演示過程中發生錯誤: {e}")
if __name__ == "__main__":
API_KEY = "your-gemini-api-key" # 請替換成實際的API Key
run_comprehensive_demo(API_KEY)
最後,讓我們升級主程序,提供完整的用戶界面和系統管理功能:
import os
import sys
import json
from datetime import datetime
from enhanced_chatbot import EnhancedSmartChatbot, SmartAssistant
from demo_scenarios import run_comprehensive_demo
class EnhancedChatbotInterface:
"""升級版聊天機器人界面"""
def __init__(self, api_key: str, user_id: str = "user_001"):
self.api_key = api_key
self.user_id = user_id
self.chatbot = None
self.current_mode = "enhanced" # enhanced, assistant, quick
def initialize_chatbot(self, mode: str = "enhanced", assistant_type: str = "general"):
"""初始化聊天機器人"""
try:
if mode == "enhanced":
self.chatbot = EnhancedSmartChatbot(self.user_id, self.api_key)
elif mode == "assistant":
self.chatbot = SmartAssistant(self.user_id, self.api_key, assistant_type)
self.current_mode = mode
print(f"✅ 聊天機器人已初始化 ({mode} 模式)")
return True
except Exception as e:
print(f"❌ 初始化失敗: {e}")
return False
def run_interactive_session(self):
"""運行互動式對話會話"""
print("🤖 === AI智慧助手 (升級版) ===")
print("💡 指令說明:")
print(" 輸入 '/help' - 查看所有指令")
print(" 輸入 '/mode' - 切換模式")
print(" 輸入 '/load' - 載入知識文件")
print(" 輸入 'quit' - 退出程序")
print("=" * 60)
if not self.chatbot:
if not self.initialize_chatbot():
return
while True:
try:
user_input = input(f"\n🧑 您 [{self.current_mode}]: ").strip()
if user_input.lower() in ['quit', 'exit', '退出', 'q']:
self._show_session_summary()
print("👋 再見!感謝使用AI智慧助手")
break
if not user_input:
print("💬 請輸入您想說的話...")
continue
# 處理特殊指令
if user_input.startswith("/"):
self._handle_command(user_input)
continue
# 正常對話
start_time = datetime.now()
if self.current_mode == "quick":
response_data = self.chatbot.quick_answer(user_input)
response = response_data['answer']
source_info = f"[{response_data['source']}] 信心度: {response_data['confidence']:.1%}"
else:
response = self.chatbot.chat(user_input)
source_info = "[智慧整合]"
response_time = (datetime.now() - start_time).total_seconds()
print(f"\n🤖 AI: {response}")
print(f"⏱️ {source_info} 回應時間: {response_time:.2f}秒")
# 顯示記憶整理提示
if hasattr(self.chatbot, 'last_consolidation') and self.chatbot.last_consolidation:
last_time = self.chatbot.last_consolidation.strftime('%H:%M:%S')
print(f"💭 [最後記憶整理: {last_time}]")
except KeyboardInterrupt:
print("\n\n👋 程序已中斷")
break
except Exception as e:
print(f"❌ 對話時發生錯誤: {e}")
def _handle_command(self, command: str):
"""處理特殊指令"""
if command == "/help":
self._show_help()
elif command == "/mode":
self._switch_mode()
elif command.startswith("/load"):
self._load_knowledge_files(command)
elif command == "/stats":
self._show_statistics()
elif command == "/profile":
self._show_user_profile()
elif command == "/summary":
self._show_conversation_summary()
elif command == "/progress":
self._show_learning_progress()
elif command.startswith("/search"):
self._search_knowledge(command)
elif command == "/kb":
self._show_knowledge_base_info()
elif command == "/export":
self._export_data()
elif command == "/reset":
self._reset_system()
else:
print(f"❌ 未知指令: {command}")
print("💡 輸入 '/help' 查看可用指令")
def _show_help(self):
"""顯示幫助資訊"""
help_text = """
🆘 === 可用指令 ===
📋 基本指令:
/help - 顯示此幫助
/mode - 切換運行模式
/stats - 顯示系統統計
/profile - 查看用戶資料
/summary - 查看對話摘要
/progress - 查看學習進度
📚 知識庫管理:
/load <path> - 載入知識文件或目錄
/kb - 查看知識庫資訊
/search <query> - 搜尋知識庫
🔧 系統管理:
/export - 導出資料
/reset - 重置系統
quit - 退出程序
💡 使用範例:
/load ./docs/faq.txt
/load ./knowledge_base/ *.pdf
/search 退貨政策
"""
print(help_text)
def _switch_mode(self):
"""切換運行模式"""
print("\n🔄 === 模式切換 ===")
print("1. enhanced - 完整智慧模式(記憶+文件整合)")
print("2. assistant - 專業助理模式")
print("3. quick - 快速問答模式")
choice = input("請選擇模式 (1-3): ").strip()
if choice == "1":
self.initialize_chatbot("enhanced")
elif choice == "2":
print("\n🤖 助理類型:")
print("1. general - 通用助理")
print("2. customer_service - 客服助理")
print("3. technical_support - 技術支援")
print("4. personal_assistant - 個人助理")
type_choice = input("請選擇助理類型 (1-4): ").strip()
type_map = {
"1": "general",
"2": "customer_service",
"3": "technical_support",
"4": "personal_assistant"
}
assistant_type = type_map.get(type_choice, "general")
self.initialize_chatbot("assistant", assistant_type)
elif choice == "3":
self.current_mode = "quick"
print("✅ 已切換到快速問答模式")
else:
print("❌ 無效選擇")
def _load_knowledge_files(self, command: str):
"""載入知識文件"""
parts = command.split(" ", 1)
if len(parts) < 2:
print("❌ 請指定文件路徑")
print("💡 用法: /load <文件路徑> 或 /load <目錄路徑>")
return
path = parts[1].strip()
if not os.path.exists(path):
print(f"❌ 路徑不存在: {path}")
return
try:
if os.path.isfile(path):
# 載入單個文件
success = self.chatbot.add_knowledge_document(path)
if success:
print(f"✅ 文件載入成功: {path}")
else:
print(f"❌ 文件載入失敗: {path}")
elif os.path.isdir(path):
# 載入目錄
print(f"📁 正在載入目錄: {path}")
patterns = input("檔案模式過濾 (例如: *.pdf,*FAQ*) [留空載入全部]: ").strip()
file_patterns = [p.strip() for p in patterns.split(",")] if patterns else None
result = self.chatbot.add_knowledge_directory(path, file_patterns)
print(f"✅ 載入完成:")
print(f" 成功: {result['successful']} 個文件")
print(f" 失敗: {result['failed']} 個文件")
if result.get('failed_docs'):
print("❌ 失敗的文件:")
for failed in result['failed_docs'][:3]:
print(f" - {failed['file']}: {failed['error']}")
except Exception as e:
print(f"❌ 載入過程發生錯誤: {e}")
def _show_statistics(self):
"""顯示統計資訊"""
if not hasattr(self.chatbot, 'get_system_statistics'):
print("❌ 當前模式不支援統計功能")
return
stats = self.chatbot.get_system_statistics()
print("\n📊 === 系統統計 ===")
print(f"總查詢次數: {stats['total_queries']}")
print(f"記憶命中: {stats['memory_hits']} ({stats['memory_hit_rate']:.1%})")
print(f"文件命中: {stats['document_hits']} ({stats['document_hit_rate']:.1%})")
print(f"整合回應: {stats['integrated_responses']} ({stats['integration_rate']:.1%})")
def _show_user_profile(self):
"""顯示用戶資料"""
profile = self.chatbot.get_user_profile()
print("\n📋 === 用戶資料 ===")
if not any(profile.values()):
print(" 目前沒有用戶資料")
else:
for key, value in profile.items():
if value:
if isinstance(value, list):
print(f" {key}: {', '.join(map(str, value))}")
else:
print(f" {key}: {value}")
def _show_conversation_summary(self):
"""顯示對話摘要"""
summaries = self.chatbot.get_conversation_summary()
print("\n📝 === 對話摘要 ===")
if not summaries:
print(" 目前沒有對話摘要")
else:
for i, summary in enumerate(summaries[:5], 1):
print(f" {i}. {summary['summary'][:80]}...")
if 'date_range' in summary:
print(f" 時間: {summary['date_range']}")
def _show_learning_progress(self):
"""顯示學習進度"""
progress = self.chatbot.get_learning_progress()
print("\n📚 === 學習進度 ===")
if not progress:
print(" 目前沒有學習記錄")
else:
for prog in progress:
print(f" 📖 {prog['subject']}: {prog['skill_level']}")
if prog.get('details'):
print(f" 詳細: {prog['details']}")
def _search_knowledge(self, command: str):
"""搜尋知識庫"""
parts = command.split(" ", 1)
if len(parts) < 2:
print("❌ 請輸入搜尋關鍵字")
return
query = parts[1].strip()
if hasattr(self.chatbot, 'search_knowledge_base'):
results = self.chatbot.search_knowledge_base(query, max_results=5)
print(f"\n🔍 === 搜尋結果: '{query}' ===")
if not results:
print(" 沒有找到相關結果")
else:
for i, result in enumerate(results, 1):
print(f" {i}. 相關度: {result['similarity']:.2%}")
print(f" 來源: {result.get('document', {}).get('title', '未知')}")
print(f" 內容: {result['content'][:100]}...")
print()
else:
print("❌ 當前模式不支援知識庫搜尋")
def _show_knowledge_base_info(self):
"""顯示知識庫資訊"""
if hasattr(self.chatbot, 'get_knowledge_base_info'):
info = self.chatbot.get_knowledge_base_info()
print("\n📚 === 知識庫資訊 ===")
print(f"文件總數: {info.get('total_documents', 0)}")
print(f"文字段落: {info.get('total_chunks', 0)}")
print(f"最近搜尋: {info.get('recent_searches', 0)} 次")
print(f"向量狀態: {info.get('vector_cache_status', '未知')}")
else:
print("❌ 當前模式不支援知識庫功能")
def _export_data(self):
"""導出資料"""
try:
export_data = {
'user_profile': self.chatbot.get_user_profile(),
'conversation_summary': self.chatbot.get_conversation_summary(),
'learning_progress': self.chatbot.get_learning_progress(),
'export_time': datetime.now().isoformat()
}
filename = f"chatbot_export_{self.user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(filename, 'w', encoding='utf-8') as f:
json.dump(export_data, f, ensure_ascii=False, indent=2)
print(f"✅ 資料已導出到: {filename}")
except Exception as e:
print(f"❌ 導出失敗: {e}")
def _reset_system(self):
"""重置系統"""
confirm = input("⚠️ 確定要重置所有資料嗎?(輸入 'YES' 確認): ")
if confirm == "YES":
try:
# 重新初始化聊天機器人
old_mode = self.current_mode
self.initialize_chatbot(old_mode)
print("✅ 系統已重置")
except Exception as e:
print(f"❌ 重置失敗: {e}")
else:
print("❌ 重置已取消")
def _show_session_summary(self):
"""顯示會話總結"""
if hasattr(self.chatbot, 'get_system_statistics'):
stats = self.chatbot.get_system_statistics()
print("\n📋 === 會話總結 ===")
print(f"總交互次數: {stats['total_queries']}")
print(f"模式: {self.current_mode}")
print(f"記憶使用率: {stats['memory_hit_rate']:.1%}")
print(f"文件使用率: {stats['document_hit_rate']:.1%}")
def main():
"""主程序入口"""
# 讀取API Key
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
api_key = input("請輸入您的 Gemini API Key: ").strip()
if not api_key:
print("❌ 需要 API Key 才能使用")
return
# 命令行參數處理
if len(sys.argv) > 1:
if sys.argv[1] == "demo":
print("🎬 啟動演示模式...")
run_comprehensive_demo(api_key)
return
elif sys.argv[1] == "help":
print("""
🤖 AI智慧助手使用說明
啟動方式:
python enhanced_main.py - 啟動互動模式
python enhanced_main.py demo - 運行演示
python enhanced_main.py help - 顯示此幫助
環境變數:
GEMINI_API_KEY - 設定 Gemini API Key
功能特色:
✅ 個人記憶管理
✅ 文件知識整合
✅ 智慧衝突解決
✅ 多模式運行
✅ 知識庫搜尋
""")
return
# 啟動互動模式
try:
interface = EnhancedChatbotInterface(api_key)
interface.run_interactive_session()
except KeyboardInterrupt:
print("\n\n👋 程序已中斷")
except Exception as e:
print(f"❌ 程序執行失敗: {e}")
if __name__ == "__main__":
main()