下面是完全離線、只要 Python 標準庫+matplotlib/pandas(選用)就能跑的範例程式碼。
import re
from collections import Counter
# 範例文章(可替換成新聞全文)
text = """
台灣晶片大廠於昨日公布財報,營收與淨利均優於市場預期。公司表示,受惠於 AI 加速需求,製程良率改善帶動毛利率提升。管理層同時警告,全球供應鏈仍有不確定性,但短期訂單能見度良好。
市場反應正向,股價盤中上漲逾 6%。
"""
# --- 1) 簡單句子切割 ---
def split_sentences(text):
sentences = re.split(r'(?<=[。!?\?])\s*', text.strip())
return [s for s in sentences if s]
# --- 2) 字詞頻率計算(非常基礎) ---
STOPWORDS = set([
'的','了','與','和','在','是','有','也','與','就','但','而','其','能','短期'
]) # 可擴充
def tokenize_words(s):
words = re.findall(r'\w+|[一-龥]+', s) # 英文或中文詞塊(簡單版)
return [w.lower() for w in words if w.lower() not in STOPWORDS and len(w) > 1]
def sentence_score(sent, freq):
words = tokenize_words(sent)
if not words:
return 0
return sum(freq.get(w,0) for w in words) / len(words)
# --- 3) 建摘要(取 top_n 句,保持原順序) ---
def extractive_summary(text, top_n=2):
sents = split_sentences(text)
words = []
for s in sents:
words += tokenize_words(s)
freq = Counter(words)
scores = [(i, s, sentence_score(s, freq)) for i, s in enumerate(sents)]
top = sorted(scores, key=lambda x: x[2], reverse=True)[:top_n]
top_idx = sorted([t[0] for t in top])
summary = ' '.join(sents[i] for i in top_idx)
return summary
# --- 4) 簡單情緒判斷(金融詞典示意) ---
POS = {"優於","提升","上漲","利多","成長","改善","利好","增加"}
NEG = {"不確定","下滑","下跌","虧損","警告","減少","利空","擔憂"}
def sentiment_score(text):
t = text.lower()
pos_count = sum(t.count(w) for w in POS)
neg_count = sum(t.count(w) for w in NEG)
score = pos_count - neg_count
if score > 0:
return "正向", score
elif score < 0:
return "負向", score
else:
return "中性", score
# --- 執行示例 ---
summary = extractive_summary(text, top_n=2)
sentiment, s = sentiment_score(text)
print("=== 摘要 ===")
print(summary)
print("\n=== 情緒判定 ===")
print(sentiment, "(分數:", s, ")")