各位前輩好,
我想爬CMoney網頁 產業脈動-資金流向 的資料,
目前已經把header都加上了,cookie也考慮了,
但一直出現以下錯誤訊息
{"Error":-3,"Message":"金鑰不正確"}
可是我透過chrome 開發人員模式下$.get又正常
不知道是哪邊我漏掉的?
目前已知頁面資訊如下:
頁面: https://www.cmoney.tw/finance/f00018.aspx
AJAX:
https://www.cmoney.tw/finance/ashx/mainpage.ashx?action=GetCapitalFlows&cmkey=28WDBKL0l8wh03bIHWrmLw%3D%3D
方法: get
key: GetCapitalFlows
cmkey: $(".primary-navi-now a").attr("cmkey")
流程
目前的程式碼如下
語言: PHP
<?php
$cookie_jar = tempnam(sys_get_temp_dir(), "CURLCOOKIE");
$url = 'https://www.cmoney.tw/';
$html = sendHttpRequest($url);
$url = 'https://www.cmoney.tw/finance/f00018.aspx';
$html = sendHttpRequest($url);
$cmkey = getCurrentPageCmkey($html);
echo "cmkey:$cmkey" . PHP_EOL;
$url = 'https://www.cmoney.tw/finance/ashx/mainpage.ashx?' . http_build_query([
'action' => 'GetCapitalFlows',
'cmkey' => $cmkey,
]);
$html = sendHttpRequest($url, [], [
'Accept' => 'application/json, */*; q=0.01',
'Host' => 'www.cmoney.tw',
'Referer' => 'https://www.cmoney.tw/finance/f00018.aspx',
'X-Requested-With' => 'XMLHttpRequest',
'Dnt' => '1',
'Pragma' => 'no-cache',
'Cache-Control' => 'no-cache',
'Connection' => 'keep-alive',
'Accept-Encoding' => 'gzip, deflate, br',
'Accept-Language' => 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
'Sec-Ch-Ua' => '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'Sec-Ch-Ua-Mobile' => '?0',
'Sec-Ch-Ua-Platform' => '"Windows"',
'Sec-Fetch-Dest' => 'empty',
'Sec-Fetch-Mode' => 'cors',
'Sec-Fetch-Site' => 'same-origin',
]);
echo $html;
function getCurrentPageCmkey($pageHtml = '')
{
$dom = new DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($pageHtml);
$cmkey = false;
$xpath = new DOMXPath($dom);
$element = $xpath->query('//*[@class="primary-navi-now"]/h3/a')[0];
if ($element)
$cmkey = $element->getAttribute('cmkey');
return $cmkey;
}
function sendHttpRequest($url, $postField = [], $httpHeader = [])
{
global $cookie_jar;
$httpHeader = $httpHeader + array("User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36");
// print_r($httpHeader);
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => (empty($postField) ? "GET" : "POST"),
CURLOPT_POSTFIELDS => $postField,
CURLOPT_HTTPHEADER => $httpHeader,
CURLOPT_COOKIEJAR => $cookie_jar,
CURLOPT_COOKIEFILE => $cookie_jar,
));
$res = curl_exec($curl);
// Check if any error occurred
if (curl_errno($curl)) {
echo 'Curl error: ' . curl_error($curl);
}
curl_close($curl);
// echo 'cookie:' . PHP_EOL;
// echo file_get_contents($cookie_jar) . PHP_EOL;
return $res;
}
感謝 chan15 大大
後來比對後發現是我CURLOPT_HTTPHEADER 設定錯了
在原先sendHttpRequest方法中加上以下片段即可
foreach ($httpHeader as $key => &$header) {
$header = "$key: $header";
}
<?php
$url = 'https://www.cmoney.tw/finance/f00018.aspx';
$html = sendHttpRequest($url);
$cmkey = getCurrentPageCmkey($html);
echo "cmkey:$cmkey" . PHP_EOL;
$html = sendHttpRequest('https://www.cmoney.tw/finance/ashx/mainpage.ashx?' . http_build_query([
'action' => 'GetCapitalFlows',
'cmkey' => $cmkey,
]));
echo $html;
function getCurrentPageCmkey($pageHtml = '')
{
$dom = new DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($pageHtml);
$cmkey = false;
$xpath = new DOMXPath($dom);
$element = $xpath->query('//*[@class="primary-navi-now"]/h3/a')[0];
if ($element) {
$cmkey = $element->getAttribute('cmkey');
}
return $cmkey;
}
function sendHttpRequest($url)
{
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => [
'Referer: https://www.cmoney.tw/finance/f00018.aspx',
],
]);
$response = curl_exec($curl);
curl_close($curl);
return $response;
}