https://www.myacg.com.tw/goods_list.php
我我想爬取這網站的商品列表
第一頁我能夠爬取
但是沒有換頁
而是一直往下滑
直到最後
查了一下
是指動態加載
教學中用POST取得json
用POST嘗試後卻無法取得
這是代碼
不知道問題在哪...
import requests
from bs4 import BeautifulSoup
def str2obj(s,s1=';',s2='='):
li=s.split(s1)
res={}
for kv in li:
li2=kv.split(s2)
if len(li2)>1:
res[li2[0]]=li2[1]
return res
url='https://www.myacg.com.tw/goods_list_load_html_api.php'
headers ='''
POST /goods_list_load_html_api.php HTTP/1.1
Host: www.myacg.com.tw
Connection: keep-alive
Content-Length: 148
Accept: text/html, */*; q=0.01
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Origin: https://www.myacg.com.tw
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: cors
Sec-Fetch-Dest: empty
Referer: https://www.myacg.com.tw/goods_list.php
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja;q=0.6
Cookie: _ga=GA1.3.717561162.1589798051; _gid=GA1.3.1621339013.1589798051; _fbp=fb.2.1589798050985.1976526027; PHPSESSID=tuov00pqmsl239531rc4tscp46; goods_hit[1644929]=1; r18=1; BIGipServerpoor_10.10.12.62_80=1040976394.20480.0000; TS011c35a1=01b0479bf3acdc1dfda4f6843ce29d06bde1903ce55ba6e0c8565f36bd5665dfd4945def72ff56b8aeb49edd0f4be872e1e0a31993; goods_hit[1849699]=1; TS01f565ed=01b0479bf346e280bd4430a385c00ccb5d3ab1e9f29694ec422e6ea8f6523a792b8fe1eae2009643044c11c22aeec4e46cc4ed843e; goods_hit[1884426]=1; _gat_UA-51168861-1=1; arp_scroll_position=2679
'''
headers=str2obj(headers,'\n',': ')
#print(headers)
payload = {'start_date':'',
'end_date':'',
'keyword':'',
'search_ctid':'',
'low_price':'',
'high_price':'',
'show':'image',
'type':'0',
'sort':'1',
'seller':'',
'gnum':'',
'ctid':'',
'ct18':'',
'has_amount':'',
'demo':'',
'now_count':'40',
}
res = requests.post(url,headers=headers,params = payload)
soup = BeautifulSoup(res.text,'html.parser')
print(soup)
import requests
from bs4 import BeautifulSoup
def str2obj(s,s1=';',s2='='):
li=s.split(s1)
res={}
for kv in li:
li2=kv.split(s2)
if len(li2)>1:
res[li2[0]]=li2[1]
return res
url='https://www.myacg.com.tw/goods_list_load_html_api.php'
headers ='''
POST /goods_list_load_html_api.php HTTP/1.1
Host: www.myacg.com.tw
Connection: keep-alive
Content-Length: 148
Accept: text/html, */*; q=0.01
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Origin: https://www.myacg.com.tw
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: cors
Sec-Fetch-Dest: empty
Referer: https://www.myacg.com.tw/goods_list.php
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja;q=0.6
Cookie: _ga=GA1.3.717561162.1589798051; _gid=GA1.3.1621339013.1589798051; _fbp=fb.2.1589798050985.1976526027; PHPSESSID=tuov00pqmsl239531rc4tscp46; goods_hit[1644929]=1; r18=1; BIGipServerpoor_10.10.12.62_80=1040976394.20480.0000; TS011c35a1=01b0479bf3acdc1dfda4f6843ce29d06bde1903ce55ba6e0c8565f36bd5665dfd4945def72ff56b8aeb49edd0f4be872e1e0a31993; goods_hit[1849699]=1; TS01f565ed=01b0479bf346e280bd4430a385c00ccb5d3ab1e9f29694ec422e6ea8f6523a792b8fe1eae2009643044c11c22aeec4e46cc4ed843e; goods_hit[1884426]=1; _gat_UA-51168861-1=1; arp_scroll_position=2679
'''
headers=str2obj(headers,'\n',': ')
payload = {'start_date':'',
'end_date':'',
'keyword':'',
'search_ctid':'',
'low_price':'',
'high_price':'',
'show':'image',
'type':'0',
'sort':'1',
'seller':'',
'gnum':'',
'ctid':'',
'ct18':'',
'has_amount':'',
'demo':'',
'now_count':'40',
}
res = requests.post(url,headers=headers,data= payload)
print(res.content)
exit()
res = requests.post(url,headers=headers,data= payload)
改成 data.