各位先進,能幫我看看,一直執行不成功,謝謝。
import requests,json,time
import pandas as pd
from datetime import datetime
from datetime import timedelta
import pymssql
from sqlalchemy import create_engine
time_now = time.strftime('%Y-%m-%d %H:%M', time.localtime())
datas = {
'頁數':[],
'商品名稱':[],
'最高價':[],
'最低價':[],
'出貨地':[],
'已售出':[],
'庫存':[],
'網址':[],
'關鍵字':[],
'抓取日期':[],
}
i = 0
newest=0
keyword='手機'
for i in range(20):
url = 'https://shopee.tw/api/v4/search/search_items?by=relevancy&keyword='+keyword+'&limit=60&newest='+str(newest)+'&order=desc&page_type=search&scenario=PAGE_GLOBAL_SEARCH&version=2'
print(i+1, newest)
print(url)
newest+=60
req = requests.get(url,headers = {
'content-type':'application/json',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'x-api-source': 'pc',
"Referer": "https://www.google.com/",
})
req_json = req.json()
items = req_json["items"]
for item in items:
item1 = item['item_basic']['name']
price_min = int(item['item_basic']['price_min']/100000)
price_max = int(item['item_basic']['price_max']/100000)
location = item['item_basic']['shop_location']
sold = item['item_basic']['historical_sold']
stock = item['item_basic']['stock']
url = 'https://shopee.tw/product/'+str(item['item_basic']['shopid'])+'/'+str(item['item_basic']['itemid'])
datas['頁數'].append(i+1)
datas['商品名稱'].append(item1)
datas['最高價'].append(price_min)
datas['最低價'].append(price_max)
datas['已售出'].append(sold)
datas['庫存'].append(stock)
datas['出貨地'].append(location)
datas['網址'].append(url)
datas['關鍵字'].append(keyword)
datas['抓取日期'].append(time_now)
df = pd.DataFrame(datas, columns=['頁數','商品名稱','最高價','最低價','已售出','庫存','出貨地','網址','關鍵字','抓取日期'])
con = create_engine("mssql+pymssql://shopee:shopee@192.168.1.1:1433/IT")
df = pd.DataFrame(datas)
df.to_sql('shopee',con,if_exists='replace', index=False)
这段代码看起来是在爬取Shopee的商品数据,并将爬取到的数据存入数据库。如果一直執行不成功,可能是爬取网站的API出现了变化或者数据库连接出现了问题。你可以尝试调试代码,查看爬取网站的API是否还能正常使用,并确认数据库连接是否正确。
試試看這個套件 fake-useragent,隨機產生 User-Agent 字串
https://pypi.org/project/fake-useragent/
然後中文改成英文,盡量不要中文命名,建議每個階段下一個log好追蹤錯在哪裡~