安裝SQLAlchemy
pip install sqlalchemy
from sqlalchemy import create_engine, Column, Integer, String, Text
from sqlalchemy.orm import declarative_base, sessionmaker
# 定義ORM基礎類
Base = declarative_base()
# 定義文章數據庫模型
class Article(Base):
__tablename__ = 'articles'
id = Column(Integer, primary_key=True, autoincrement=True)
title = Column(String, nullable=False)
link = Column(String, nullable=False)
images = Column(Text) # 存儲圖片URL列表,轉換為字符串
tables = Column(Text) # 存儲表格數據,轉換為字符串
# 創建SQLite數據庫引擎
engine = create_engine('sqlite:///ptt_articles.db')
Base.metadata.create_all(engine)
# 創建Session
Session = sessionmaker(bind=engine)
session = Session()
# 測試數據 (這應該是從您的爬蟲代碼中獲得的)
articles_data = [
{
'title': 'Test Article 1',
'link': 'https://www.ptt.cc/bbs/Gossiping/M.1620000000.A.html',
'images': ['https://example.com/image1.jpg', 'https://example.com/image2.jpg'],
'tables': [['Header1', 'Header2'], ['Row1Col1', 'Row1Col2'], ['Row2Col1', 'Row2Col2']]
},
{
'title': 'Test Article 2',
'link': 'https://www.ptt.cc/bbs/Gossiping/M.1620000001.A.html',
'images': ['https://example.com/image3.jpg'],
'tables': [['Header1', 'Header2'], ['Row1Col1', 'Row1Col2']]
}
]
# 將爬取的數據保存到數據庫
for article_data in articles_data:
article = Article(
title=article_data['title'],
link=article_data['link'],
images=str(article_data['images']), # 將列表轉換為字符串存儲
tables=str(article_data['tables']) # 將列表轉換為字符串存儲
)
session.add(article)
# 提交保存
session.commit()
# 查詢數據
articles = session.query(Article).all()
for article in articles:
print(f"ID: {article.id}, 標題: {article.title}, 連結: {article.link}")