Day 11：使用 Python 連接 Ollama API，實現程式化呼叫模型

2025 iThome 鐵人賽

DAY 11

AI & Data

「30 天打造 Discord AI 助手：結合本地 LLM 與 IoT 的智慧生活」系列第 11 篇

17th鐵人賽

tiramisu_island

2025-09-24 23:48:02

191 瀏覽

分享至

📌 背景與目標

到目前為止，我們能在本地用 Open WebUI 跟模型互動；但要把 LLM 真正「變成元件」嵌進應用，就一定要學會：

以 Python 程式呼叫 Ollama HTTP API
支援 一般回傳與串流回傳（streaming）
封裝成乾淨的 client class，好維護、好擴充
提供 Flask 迷你服務作為對外入口（之後 Discord / IoT 都能用）

🧩 Ollama API 兩種路徑（最常用）

/api/generate：單輪指令（prompt in → text out），適合工具指令、摘要。
/api/chat：多輪對話（messages[]），支援 system/user/assistant 角色，適合聊天、長對話。

預設本機端點為 http://localhost:11434。

若 WebUI 在 Docker 裡、Ollama 在 host，可用 http://host.docker.internal:11434（Linux 可能需改用 172.17.0.1:11434）。

🧪 最小可行範例（非串流）

單輪生成 `/api/generate`

import requests

# Ollama 本機 API 位址
OLLAMA_URL = "http://127.0.0.1:11434/api/chat"
MODEL = "gemma3:4b"

# 系統提示：你可以改成任何你要的角色 / 口吻
SYSTEM_PROMPT = """
我是一個Ai助理可以幫你完成各種任務，請用中文回答我所有的問題，並且回答的時候要很有禮貌，謝謝你。
"""

def ask_ollama(user_text: str) -> str:
    payload = {
        "model": MODEL,
        "stream": False,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_text}
        ],
        "options": {"temperature": 0.4}
    }
    resp = requests.post(OLLAMA_URL, json=payload)
    resp.raise_for_status()
    data = resp.json()
    return data["message"]["content"].strip()

if __name__ == "__main__":
    while True:
        user_input = input("你：").strip()
        if not user_input or user_input.lower() in ["quit", "exit"]:
            break
        reply = ask_ollama(user_input)
        print(f"Tiramisu_Ai：{reply}")

多輪對話 `/api/chat`

# filename: chat_ollama_client.py
from ollama import Client
import json
from pathlib import Path
from typing import List, Dict, Optional

class OllamaChat:
    def __init__(
        self,
        model: str = "gemma3:1b",
        host: str = "http://127.0.0.1:11434",
        system_prompt: Optional[str] = None,
        history_path: Optional[str] = None,
        options: Optional[dict] = None,   # 例如 {"temperature": 0.2, "num_ctx": 4096}
    ):
        self.client = Client(host=host)
        self.model = model
        self.history: List[Dict[str, str]] = []
        self.history_path = Path(history_path) if history_path else None
        self.options = options or {}

        if system_prompt:
            self.history.append({"role": "system", "content": system_prompt})

        if self.history_path and self.history_path.exists():
            self.load_history()  # 若檔案存在，自動載入

    def save_history(self):
        if not self.history_path:
            return
        self.history_path.write_text(json.dumps(self.history, ensure_ascii=False, indent=2))

    def load_history(self):
        if not self.history_path or not self.history_path.exists():
            return
        self.history = json.loads(self.history_path.read_text())

    def clear_history(self, keep_system: bool = True):
        if keep_system:
            sys_msgs = [m for m in self.history if m.get("role") == "system"]
            self.history = sys_msgs
        else:
            self.history = []
        self.save_history()

    def ask(self, user_message: str, stream: bool = True) -> str:
        """送出一則訊息；若 stream=True，會邊收邊印出。回傳完整模型回答字串。"""
        self.history.append({"role": "user", "content": user_message})

        if stream:
            resp_stream = self.client.chat(
                model=self.model,
                messages=self.history,
                options=self.options,
                stream=True,
            )
            full = []
            for part in resp_stream:
                chunk = part.get("message", {}).get("content", "")
                if chunk:
                    print(chunk, end="", flush=True)
                    full.append(chunk)
            print()  # 換行
            answer = "".join(full)
        else:
            resp = self.client.chat(
                model=self.model,
                messages=self.history,
                options=self.options,
                stream=False,
            )
            answer = resp["message"]["content"]
            print(answer)

        # 把助理回覆補進歷史
        self.history.append({"role": "assistant", "content": answer})
        self.save_history()
        return answer

if __name__ == "__main__":
    bot = OllamaChat(
        model="gemma3:1b",
        system_prompt="你是 helpful 的中文助理，回答要精確且有條理。",
        history_path="chat_history.json",
        options={"temperature": 0.2, "num_ctx": 8192},  # 視模型與機器資源調整
    )

    print("開始聊天！輸入 /reset 清空(保留 system)；/reset_all 全清；/quit 結束。")
    while True:
        msg = input("\n我：").strip()
        if msg == "/quit":
            break
        elif msg == "/reset":
            bot.clear_history(keep_system=True)
            print("→ 已清空（保留 system）")
            continue
        elif msg == "/reset_all":
            bot.clear_history(keep_system=False)
            print("→ 已全清空")
            continue

        bot.ask(msg, stream=True)