pip install llama-index-postprocessor-longllmlingua
import os
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv())
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
from llama_index.tools.tavily_research.base import TavilyToolSpec
tavily_tool = TavilyToolSpec(
    api_key=TAVILY_API_KEY,
)
question = '徵象(Signs)及症狀(Symptoms)之區別?'
tavily_response = tavily_tool.search(question, max_results=3)
type(tavily_response), len(tavily_response)  # (list, 3)
tavily_response[0]
from llama_index.core.schema import TextNode, NodeWithScore
nodes = [
    NodeWithScore(
        node=TextNode(
            text=doc.text,
            metadata=doc.metadata,
            id_=doc.doc_id,
        )
    )
    for doc in tavily_response
]
# pip install llama-index-postprocessor-longllmlingua
import time
from llama_index.postprocessor.longllmlingua import LongLLMLinguaPostprocessor
start = time.time()
compressor_llmlingua2 = LongLLMLinguaPostprocessor(
    model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
    device_map="auto",
    use_llmlingua2=True,
)
end = time.time()
print(f'dur: {end - start:.2f} sec')
from llama_index.core.schema import QueryBundle
import time
start = time.time()
results = compressor_llmlingua2._postprocess_nodes(
    nodes, query_bundle=QueryBundle(query_str=question)
)
end = time.time()
print(f'dur: {end - start:.2f} sec')
results
code
single_node_list = [nodes[0]]
print(single_node_list)
start = time.time()
results = compressor_llmlingua2._postprocess_nodes(
    single_node_list, query_bundle=QueryBundle(query_str=question)
)
end = time.time()
print(f'dur: {end - start:.2f} sec')
print(results)
壓縮前:
壓縮後
約 0.26 sec 跑完
可以看到確實是砍掉了一些無關的內容,不過還是不能直接用
question = '大杯珍珠奶茶微糖微冰的簡稱是什麼?'
nodes = [
    NodeWithScore(
        node=TextNode(
            text='大杯珍珠奶茶微糖微冰',
            metadata={'url': 'url://12345'},
            id_='yoyoyo',
        )
    )
]
start = time.time()
results = compressor_llmlingua2._postprocess_nodes(
    nodes, query_bundle=QueryBundle(query_str=question)
)
end = time.time()
print(results)
target_token 用來控制提取後的文本要少於多少字target_token=19 的結果是無壓縮全文,target_token=18 的結果是空字串