from flask import Flask, render_template, request, redirect, url_for
import pandas as pd
import matplotlib.pyplot as plt
import os
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
def process_file(filepath):
df = pd.read_csv(filepath)
# 計算每篇文章的圖片數量和表格數量
df['images'] = df['images'].apply(lambda x: eval(x) if x else [])
df['tables'] = df['tables'].apply(lambda x: eval(x) if x else [])
df['image_count'] = df['images'].apply(len)
df['table_count'] = df['tables'].apply(len)
# 生成圖表並保存到靜態文件夾
plt.figure(figsize=(10, 6))
plt.hist(df['image_count'], bins=range(0, df['image_count'].max() + 2), color='skyblue', edgecolor='black')
plt.title('Distribution of Image Counts in Articles')
plt.xlabel('Number of Images')
plt.ylabel('Number of Articles')
plt.grid(True)
plt.savefig('static/images/image_distribution.png')
plt.figure(figsize=(10, 6))
plt.hist(df['table_count'], bins=range(0, df['table_count'].max() + 2), color='lightgreen', edgecolor='black')
plt.title('Distribution of Table Counts in Articles')
plt.xlabel('Number of Tables')
plt.ylabel('Number of Articles')
plt.grid(True)
plt.savefig('static/images/table_distribution.png')
plt.figure(figsize=(10, 6))
plt.scatter(df['image_count'], df['table_count'], color='coral')
plt.title('Relationship between Image Counts and Table Counts')
plt.xlabel('Number of Images')
plt.ylabel('Number of Tables')
plt.grid(True)
plt.savefig('static/images/image_table_relationship.png')
# 返回統計數據
total_articles = len(df)
avg_image_count = df['image_count'].mean()
avg_table_count = df['table_count'].mean()
return total_articles, avg_image_count, avg_table_count
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
# 檢查是否有文件上傳
if 'file' not in request.files:
return redirect(request.url)
file = request.files['file']
if file.filename == '':
return redirect(request.url)
if file:
filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
file.save(filepath)
total_articles, avg_image_count, avg_table_count = process_file(filepath)
return render_template('index.html',
total_articles=total_articles,
avg_image_count=avg_image_count,
avg_table_count=avg_table_count)
# 默認顯示初始頁面,沒有統計數據
return render_template('index.html',
total_articles=None,
avg_image_count=None,
avg_table_count=None)
if __name__ == '__main__':
app.run(debug=True)
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>PTT 文章分析結果</title>
</head>
<body>
<h1>PTT 文章數據分析結果</h1>
<form action="/" method="post" enctype="multipart/form-data">
<label for="file">上傳CSV文件:</label>
<input type="file" name="file" id="file">
<button type="submit">上傳並分析</button>
</form>
{% if total_articles is not none %}
<p>文章總數: {{ total_articles }}</p>
<p>平均每篇文章的圖片數量: {{ avg_image_count }}</p>
<p>平均每篇文章的表格數量: {{ avg_table_count }}</p>
<h2>圖片數量分佈圖</h2>
<img src="/static/images/image_distribution.png" alt="圖片數量分佈圖">
<h2>表格數量分佈圖</h2>
<img src="/static/images/table_distribution.png" alt="表格數量分佈圖">
<h2>圖片數量與表格數量的關係圖</h2>
<img src="/static/images/image_table_relationship.png" alt="圖片數量與表格數量的關係圖">
{% endif %}
</body>
</html>