iT邦幫忙

0

在使用ptt表特爬蟲(python)時發生圖片損毀問題

  • 分享至 

  • xImage

import requests
from bs4 import BeautifulSoup
import os

def download_images(url, save_path):
print(f"正在下載圖片;{url}")
response = requests.get(url)
with open(save_path, 'wb') as file:
file.write(response.content)
print("-" * 30)
def main():
url = "https://www.ptt.cc/bbs/Beauty/M.1500390386.A.3D8.html"
headers = {"Cookie": "over18=1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

spans = soup.find_all("span", class_="article-meta-value")

title = spans[2].text
dir_name = f"images/{title}"
if not os.path.exists(dir_name):
    os.makedirs(dir_name)

links = soup.find_all("a")
allow_file_name = ["jpg", "png", "gif", "jpeg"]
for link in links:
    href = link.get("href")
    if not href:
        continue
    file_name = href.split("/")[-1]
    extension = href.split(".")[-1].lower()
    
    if extension in allow_file_name:
        print(f"檔案型態:{extension}")
        print(f"url:{href}")
        download_images(href, f"{dir_name}/{file_name}")
    

if name == "main":
main()

圖片
  直播研討會
圖片
{{ item.channelVendor }} {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友回答

立即登入回答