爬蟲目的網站(首頁/現貨行情/蛋/養雞協會雞蛋交易行情/行情圖):http://www.foodchina.com.tw/model/marketing/AnaChartNew.aspx?id=51&ChkID=297&Page=0&Type=1&cn=False
該網站需要先利用篩選器選擇你要的資料,
(類別:全選,價格:全選,區間:日,2018年1月31日前31天走勢圖)
我目前利用
from selenium import webdriver
from selenium.webdriver.support.ui import Select
開啟該網站,並下指令去操作篩選器,
目前完整程式碼:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
#安裝chromedriver,chromedriver存放位置
dr = webdriver.Chrome('C:/Users/User/chromedriver')
#造訪網頁連結
dr.get('http://www.foodchina.com.tw/model/marketing/AnaChartNew.aspx?id=51&ChkID=297&Page=0&Type=1&cn=False')
#點選所有類別checkbox
checkboxs = dr.find_elements_by_css_selector('input[type=checkbox]')
for checkbox in checkboxs:
checkbox.click()
#點選項目checkbox
checkboxs_price = dr.find_elements_by_css_selector('input[checked=checked]')
for checkbox_price in checkboxs_price:
checkbox_price.click()
#點選指定radiobutton,區間,日
radios = dr.find_elements_by_css_selector('input[value=RB1]')
for radio in radios:
radio.click()
#抓取下拉式選單元件
#年
select_year = Select(dr.find_element_by_id('ctl00_ctl00_cpl_MainContent_cpl_BasicMainContent_ddl_Year1'))
select_year.select_by_value('2018')
#月
select_month = Select(dr.find_element_by_id('ctl00_ctl00_cpl_MainContent_cpl_BasicMainContent_ddl_Month1'))
select_month.select_by_value('1')
#日
select_day = Select(dr.find_element_by_id('ctl00_ctl00_cpl_MainContent_cpl_BasicMainContent_ddl_Day'))
select_day.select_by_value('31')
#點選查詢button
buttons = dr.find_elements_by_css_selector('input[value=查詢]')
for button in buttons:
button.click()
run程式後,會開啟chrome,並篩選我需要的指令,
結果跑出canvas圖,
但是我想要爬梳的是canvas圖上的折點數值,
值包在http://www.foodchina.com.tw/model/ajax/getChartData.ashx 內,
我先簡單撰寫爬蟲程式,但爬梳的結果卻沒有爬到值data的y值,
目前爬梳程式碼:
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = 'http://www.foodchina.com.tw/model/ajax/getChartData.ashx'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'}
resp = requests.post(url)
# 將 HTML 轉成 BeautifulSoup 物件
soup = BeautifulSoup(resp.text, 'html.parser')
print(soup)
想詢問我可以如何修改我的程式碼,
才可以爬到我要的值呢?謝謝
excel 做法
Sub test()
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "http://www.foodchina.com.tw/model/marketing/AnaChartNew.aspx?id=51&ChkID=297&Page=0&Type=1&cn=False", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.Send
strText = .responsetext
VIEWSTATE = encodeURI(CStr(Split(Split(strText, "__VIEWSTATE"" value=""")(1), """ />")(0))) 'VIEWSTATE
VIEWSTATEGENERATOR = encodeURI(CStr(Split(Split(strText, "__VIEWSTATEGENERATOR"" value=""")(1), """ />")(0))) 'VIEWSTATEGENERATOR
EVENTVALIDATION = encodeURI(CStr(Split(Split(strText, "__EVENTVALIDATION"" value=""")(1), """ />")(0))) 'EVENTVALIDATION
.Open "POST", "http://www.foodchina.com.tw/model/marketing/AnaChartNew.aspx?id=51&ChkID=297&Page=0&Type=1&cn=False", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
msg_string = "ctl00_ctl00_ToolkitScriptManager1_HiddenField="
msg_string = msg_string & "&__EVENTTARGET="
msg_string = msg_string & "&__EVENTARGUMENT="
msg_string = msg_string & "&__VIEWSTATE=" & VIEWSTATE
msg_string = msg_string & "&__VIEWSTATEGENERATOR=" & VIEWSTATEGENERATOR
msg_string = msg_string & "&__SCROLLPOSITIONX=0"
msg_string = msg_string & "&__SCROLLPOSITIONY=288"
msg_string = msg_string & "&__EVENTVALIDATION=" & EVENTVALIDATION
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24cblCols%240=on" '240=北部, 241=彰化...
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24cblRows%240=on"
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24DataType=RB1"
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24ddl_Year1=" & Format(Date, "yyyy")
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24ddl_Month1=" & Format(Date, "m")
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24ddl_Day=" & Format(Date, "d")
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24ddl_Year2=" & Format(Date, "yyyy")
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24ddl_Month2=" & Format(Date, "m")
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24queryYearList=1"
msg_string = msg_string & "&ctl00%24ctl00%24cpl_MainContent%24cpl_BasicMainContent%24btnSummit=%E6%9F%A5%E8%A9%A2"
.Send msg_string
.Open "POST", "http://www.foodchina.com.tw/model/ajax/getChartData.ashx", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.Send
strJSON = .responsetext
With CreateObject("msscriptcontrol.scriptcontrol")
.Language = "JavaScript"
.AddCode "var mydata =" & strJSON
'.AddCode strJSON
Set objJSON = .CodeObject
Set detail = CallByName(CallByName(objJSON, "mydata", VbGet), "data", VbGet)
aaa = UBound(Split(strJSON, "{"))
ReDim arr(1 To aaa, 1 To 2)
i = 1
For Each info In CallByName(detail, "datasets", VbGet)
Set data_set = CallByName(info, "data", VbGet)
For Each detail_data In data_set
arr(i, 1) = CallByName(detail_data, "x", VbGet)
arr(i, 2) = CallByName(detail_data, "y", VbGet)
i = i + 1
Next
[a1].Resize(UBound(arr), 2) = arr
Next
End With
End With
End Sub
Function encodeURI(strText) As String
With CreateObject("msscriptcontrol.scriptcontrol")
.Language = "JavaScript"
encodeURI = .Eval("encodeURIComponent('" & strText & "');")
End With
End Function