我想要用python讀取柏克萊暢銷榜與試讀榜的所有書籍資料
用DataFrame儲存,5個欄位
類型 booktype 暢銷榜/試讀榜
圖 picture
書名 bookname
作者 author
價格 price
兩個榜的url可用一個清單儲存  https://www.books.com.tw/web/sys_cebtopb/cebook  https://www.books.com.tw/web/sys_cebtryb/cebook
圖要下載, 檔名可以用序號重新命名
我東抄西抄,只組合出這樣
import requests
from IPython.display import Image
from IPython.display import Image, display
from bs4 import BeautifulSoup
import pandas as pd
def downloadimg(imgurl, img_name):
    urlcontent = requests.get(imgurl)
    with open(img_name,'wb') as file: 
         file.write(urlcontent.content)
         file.flush() 
         file.close() 
         print('已儲存' + img_name)
url="https://www.books.com.tw/web/sys_cebtryb/cebook"
myrequest = requests.get(url) 
#print(myrequest.content) 
soup=BeautifulSoup(myrequest.content,"html.parser") 
#print(soup)
firstImg = soup.find('img') #取得第1個<img>
print('firstImg:', firstImg)
print('--------------------')
#先跑出柏克萊是讀榜資料=====================================
dvdlist = soup.find_all('img',{'class':'cover'})
count = 0
imglist = []
namelist = []
#print(dvdlist)
#跑出dvdlist跑出dvdlist=================================================
for idx in range(len(dvdlist)):
     count += 1
     print(count)
     print(dvdlist[idx])
     print('圖-->', dvdlist[idx].get('src'))
     imglist.append(dvdlist[idx].get('src'))
     print('書名-->', dvdlist[idx].get('alt'))
     namelist.append(dvdlist[idx].get('alt'))
     print('作者-->', dvdlist[idx].get('b'))#這兩段跑不成功
     namelist.append(dvdlist[idx].get('b'))#這兩段跑不成功
#跑出booklist跑出  另外用來抓作者=================================================
url="https://www.books.com.tw/web/sys_cebtryb/cebook"
myrequest = requests.get(url) 
#print(myrequest.content) 
soup=BeautifulSoup(myrequest.content,"html.parser") 
booklist=soup.find_all('a',{'href':'cover'})# 我是抓錯屬性嗎?
print(booklist)
display(Image(pic))
#上面這段也跑不成功
想要把圖用
只到這裡就卡好久 ~ 看看有沒有高手可以教一下 感謝
修改如下,試試看吧
#先跑出柏克萊是讀榜資料=====================================
dvdlist = soup.find_all('div',{'class':'type02_bd-a'})
piclist = soup.find_all('img',{'class':'cover'})
count = 0
imglist = []
namelist = []
#print(dvdlist)
#跑出dvdlist跑出dvdlist=================================================
for idx in range(len(dvdlist)):
     count += 1
     print(count)
     print('圖-->', piclist[idx].get('src'))
     print('書名-->', dvdlist[idx].h4.text.strip())
     print('作者-->', dvdlist[idx].li.text.strip().replace("作者:",""))