pip install llama-index-llms-ollama
首先是最上面,我們直接 prompt 它,要他出 json
然後是中間,generic completion API
This is notably less reliable, but supported by all text-based LLMs.
最後就是支持 function calling API 的 model
我們就是在呼叫 llm 的時候把 tool 給他,API 會幫我們處理 json 的事情
我們在 Day14 實作的 FunctionAgent 就是這種
chat_with_tools 然後把 tool 給他釐清: 在本系列文中,tool calling 與 function_calling 我們視為同義
釐清: function/tool calling 與 JSON mode 的差異
那是不是全用 function calling 就好了?
我們的目標:
In God we trust. All others must bring data.
import pydantic  # pip show pydantic
print(f"our pydantic version: {pydantic.VERSION}")
from pprint import pprint
from typing import List, Optional, Tuple
from pydantic import BaseModel
from pydantic import Field
class Options(BaseModel):
    """單選題的選項物件,包含 A, B, C, D 四個選項"""
    A: str = Field(..., description='選項A')
    B: str = Field(..., description='選項B')
    C: str = Field(..., description='選項C')
    D: str = Field(..., description='選項D')
class MCQ(BaseModel):
    """單選題結構,包含題號、題幹、選項與答案"""
    qid: int = Field(..., description='題號')
    question: str = Field(..., description='題幹')
    options: Options = Field(..., description="本題的四個選項")
    ans: Optional[str] = Field(default=None, description='答案')
class Meta(BaseModel):
    """試題原始資訊,包含 年分、科目、第幾次考試"""
    year: Optional[int] = Field(default=None, description='第?年')
    subject: Optional[str] = Field(default=None, description='科目名稱')
    times: Optional[int] = Field(default=None, description='第?次考試')
class ExtractExam(BaseModel):
    """
    提取整份考卷
    - qset: 單選題考題集合
    - subject: 科目名稱
    - year: 考試年分
    - times: 第幾次考試
    """
    qset: List[MCQ] = Field(..., description='單選題考題')
    metadata: Meta = Field(..., description='考題資訊')
schema = MCQ.model_json_schema()
pprint(schema)
3.0. 我們的 pydantic version 是 2.11.9(提醒:v1 和 v2 在 API 上有些差異)
3.1. 創建 Pydantic class , 就直接繼承 BaseModel: MCQ(BaseModel)
3.2. 一般型別就直接標註(int, str): qid: int
3.3. 嵌套的使用我們的 models: qset: List[MCQ]
3.4. 使用 docstring(""" """): 讓 llm 更容易理解結構
3.5. 使用 Field(..., description=): 一個是預設值,一個是描述
3.6. 用.model_json_schema() 轉成 json schema: 這就是你要塞進 prompt 的規格文件。
3.7. 釐清以上之後: 以後我們就請 chat-gpt 幫你寫就好,這個他真的很會
from llama_index.readers.file import PDFReader
from pathlib import Path
import time
file_path = Path("./data/114_針灸科學.pdf")
FULL_DOCUMENT=False
pdf_reader = PDFReader(return_full_document=FULL_DOCUMENT)
documents = pdf_reader.load_data(file=file_path)
print(f"len of documents: {len(documents)}")
text = documents[0].text
print(f"text len: {len(text)}")
print('---')
print(text)
pypdf source_code
data/114_針灸科學.pdf 路徑下有載好的 pdf 考題return_full_document 設 False 這樣就會分頁讀成 documentfrom llama_index.core.program.function_program import get_function_tool
exam_tool = get_function_tool(ExtractExam)
print(f"# tool info: ")
print(f"# name: {exam_tool.metadata.name}\n\n# description: {exam_tool.metadata.description}")
print('---')
# pip install llama-index-llms-ollama
from llama_index.llms.ollama import Ollama
llama = Ollama(
    model="llama3.1:8b",
    request_timeout=120.0,
    context_window=8000,
    temperature=0.0,
)
start = time.time()
resp = llama.chat_with_tools(
    [exam_tool],
    user_msg="請從下列文本中提取考試: " + text,
    tool_required=True,  # can optionally force the tool call
)
end = time.time()
print(f'dur: {end-start:.2f} sec')
tool_calls = llama.get_tool_calls_from_response(
    resp, error_on_no_tool_call=False
)
print(f"type: {type(tool_calls)}, len: {len(tool_calls)}, dtype: {type(tool_calls[0])}")
print('---')
pprint(tool_calls[0].tool_kwargs)
ExtractExam,你被強迫使用它Exam 就好了,這邊定 ExtractExam 就是比較好想像為什麼 structured output 就是 function callingchat_with_tools 呼叫,這個我們之前也有做過MCQ
mcq_tool = get_function_tool(MCQ)
print(f"# name: {mcq_tool.metadata.name}\n\n# description: {mcq_tool.metadata.description}")
start = time.time()
resp = llama.chat_with_tools(
    [mcq_tool],
    user_msg="你是一個無情的考題提取機器,負責從文本中盡可能多的提取 MCQ,以下是文本資訊:" + text,
    tool_required=True,  # can optionally force the tool call
    allow_parallel_tool_calls=True,
)
end = time.time()
print(f"dur: {end - start:.2f} sec")
tool_calls = llama.get_tool_calls_from_response(
    resp, error_on_no_tool_call=False
)
print(f'len of tool_call: {len(tool_calls)}')
print('---')
for tool_call in tool_calls:
    pprint(tool_call.tool_kwargs)
gpt-5-mini 來看一下 allow multiple tool calls 是不是壞了print(f"# name: {mcq_tool.metadata.name}\n\n# description: {mcq_tool.metadata.description}")
import os
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv())
from llama_index.llms.openai import OpenAI
mini = OpenAI(model="gpt-5-mini")
start = time.time()
resp = mini.chat_with_tools(
    [mcq_tool],
    user_msg="你是一個無情的考題提取機器,負責從文本中盡可能多的提取 MCQ,以下是文本資訊:" + text,
    tool_required=True,  # can optionally force the tool call
    allow_parallel_tool_calls=True,
)
end = time.time()
print(f"dur: {end - start:.2f} sec")
tool_calls = mini.get_tool_calls_from_response(
    resp, error_on_no_tool_call=False
)
print(f'len of tool_call: {len(tool_calls)}')
print('---')
for tool_call in tool_calls:
    pprint(tool_call.tool_kwargs)
前面用的都是第一段講的第三種方法,我們現在要回到第 1 種就是直接 prompt 他,因為我們要改用 gemma3
complete
import json
schema = MCQ.model_json_schema()
prompt = "Here is a JSON schema for an Exam: " + json.dumps(
    schema, indent=2, ensure_ascii=False
)
gemma = Ollama(
    model="gemma3:12b",
    request_timeout=120.0,
    # Manually set the context window to limit memory usage
    context_window=8000,
    json_mode=False,
    temperature=0.0,
)
prompt += (
    """
  Extract an Exam from the following text.
  Format your output as a JSON object according to the schema above.
  Do not include any other text than the JSON object.
  Omit any markdown formatting. Do not include any preamble or explanation.
  請盡可能多的提取考題
"""
    + text
)
response = gemma.complete(prompt)
import re
raw = response.text.strip()
# 把 ```json ... ``` 和 ``` 拿掉
if raw.startswith("```"):
    raw = re.sub(r"^```(?:json)?", "", raw)
    raw = re.sub(r"```$", "", raw)
    raw = raw.strip()
data = json.loads(raw)
pprint(data)
json_gemma = Ollama(
    model="gemma3:12b",
    request_timeout=120.0,
    # Manually set the context window to limit memory usage
    context_window=8000,
    json_mode=True,
    temperature=0.0,
)
response = json_gemma.complete(prompt)
json.loads(response.text)
{'qid': 1,
 'question': '常見針灸配穴法中,所指的是「四關穴」,為下列何穴位之組合?',
 'options': {'A': '上星、日月', 'B': '合谷、太衝', 'C': '內關、外關', 'D': '上關、下關'},
 'ans': None}
schema = ExtractExam.model_json_schema()
prompt = "Here is a JSON schema for an Exam: " + json.dumps(
    schema, indent=2, ensure_ascii=False
)
json_gemma = Ollama(
    model="gemma3:12b",
    request_timeout=120.0,
    # Manually set the context window to limit memory usage
    context_window=8000,
    json_mode=True,
    temperature=0.0,
)
prompt += (
    """
  Extract an Exam from the following text.
  Format your output as a JSON object according to the schema above.
  Do not include any other text than the JSON object.
  Omit any markdown formatting. Do not include any preamble or explanation.
  請盡可能多的提取考題
"""
    + text
)
response = json_gemma.complete(prompt)
json.loads(response.text)
我們今天學了讓 llm 回我們 json 的三種 call 法
還有一分鐘搞懂 Pydantic
我本來都只會開 json mode 然後瘋狂改 prompt
簡易的測試了 6 種小情況
這邊都只是簡易的測試,我們明天來把量帶上去做真正的 benchmark