到目前為止,我們能在本地用 Open WebUI 跟模型互動;但要把 LLM 真正「變成元件」嵌進應用,就一定要學會:
預設本機端點為 http://localhost:11434。
若 WebUI 在 Docker 裡、Ollama 在 host,可用
http://host.docker.internal:11434
(Linux 可能需改用172.17.0.1:11434
)。
/api/generate
import requests
# Ollama 本機 API 位址
OLLAMA_URL = "http://127.0.0.1:11434/api/chat"
MODEL = "gemma3:4b"
# 系統提示:你可以改成任何你要的角色 / 口吻
SYSTEM_PROMPT = """
我是一個Ai助理可以幫你完成各種任務,請用中文回答我所有的問題,並且回答的時候要很有禮貌,謝謝你。
"""
def ask_ollama(user_text: str) -> str:
payload = {
"model": MODEL,
"stream": False,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_text}
],
"options": {"temperature": 0.4}
}
resp = requests.post(OLLAMA_URL, json=payload)
resp.raise_for_status()
data = resp.json()
return data["message"]["content"].strip()
if __name__ == "__main__":
while True:
user_input = input("你:").strip()
if not user_input or user_input.lower() in ["quit", "exit"]:
break
reply = ask_ollama(user_input)
print(f"Tiramisu_Ai:{reply}")
/api/chat
# filename: chat_ollama_client.py
from ollama import Client
import json
from pathlib import Path
from typing import List, Dict, Optional
class OllamaChat:
def __init__(
self,
model: str = "gemma3:1b",
host: str = "http://127.0.0.1:11434",
system_prompt: Optional[str] = None,
history_path: Optional[str] = None,
options: Optional[dict] = None, # 例如 {"temperature": 0.2, "num_ctx": 4096}
):
self.client = Client(host=host)
self.model = model
self.history: List[Dict[str, str]] = []
self.history_path = Path(history_path) if history_path else None
self.options = options or {}
if system_prompt:
self.history.append({"role": "system", "content": system_prompt})
if self.history_path and self.history_path.exists():
self.load_history() # 若檔案存在,自動載入
def save_history(self):
if not self.history_path:
return
self.history_path.write_text(json.dumps(self.history, ensure_ascii=False, indent=2))
def load_history(self):
if not self.history_path or not self.history_path.exists():
return
self.history = json.loads(self.history_path.read_text())
def clear_history(self, keep_system: bool = True):
if keep_system:
sys_msgs = [m for m in self.history if m.get("role") == "system"]
self.history = sys_msgs
else:
self.history = []
self.save_history()
def ask(self, user_message: str, stream: bool = True) -> str:
"""送出一則訊息;若 stream=True,會邊收邊印出。回傳完整模型回答字串。"""
self.history.append({"role": "user", "content": user_message})
if stream:
resp_stream = self.client.chat(
model=self.model,
messages=self.history,
options=self.options,
stream=True,
)
full = []
for part in resp_stream:
chunk = part.get("message", {}).get("content", "")
if chunk:
print(chunk, end="", flush=True)
full.append(chunk)
print() # 換行
answer = "".join(full)
else:
resp = self.client.chat(
model=self.model,
messages=self.history,
options=self.options,
stream=False,
)
answer = resp["message"]["content"]
print(answer)
# 把助理回覆補進歷史
self.history.append({"role": "assistant", "content": answer})
self.save_history()
return answer
if __name__ == "__main__":
bot = OllamaChat(
model="gemma3:1b",
system_prompt="你是 helpful 的中文助理,回答要精確且有條理。",
history_path="chat_history.json",
options={"temperature": 0.2, "num_ctx": 8192}, # 視模型與機器資源調整
)
print("開始聊天!輸入 /reset 清空(保留 system);/reset_all 全清;/quit 結束。")
while True:
msg = input("\n我:").strip()
if msg == "/quit":
break
elif msg == "/reset":
bot.clear_history(keep_system=True)
print("→ 已清空(保留 system)")
continue
elif msg == "/reset_all":
bot.clear_history(keep_system=False)
print("→ 已全清空")
continue
bot.ask(msg, stream=True)