各位前輩好,
我想爬CMoney網頁 產業脈動-資金流向 的資料,
目前已經把header都加上了,cookie也考慮了,
但一直出現以下錯誤訊息
{"Error":-3,"Message":"金鑰不正確"}
可是我透過chrome 開發人員模式下$.get又正常
不知道是哪邊我漏掉的?
目前已知頁面資訊如下:
頁面: https://www.cmoney.tw/finance/f00018.aspx
AJAX:
https://www.cmoney.tw/finance/ashx/mainpage.ashx?action=GetCapitalFlows&cmkey=28WDBKL0l8wh03bIHWrmLw%3D%3D
方法: get
key: GetCapitalFlows
cmkey: $(".primary-navi-now a").attr("cmkey")
流程
目前的程式碼如下
語言: PHP
<?php
$cookie_jar = tempnam(sys_get_temp_dir(), "CURLCOOKIE");
$url = 'https://www.cmoney.tw/';
$html = sendHttpRequest($url);
$url = 'https://www.cmoney.tw/finance/f00018.aspx';
$html = sendHttpRequest($url);
$cmkey = getCurrentPageCmkey($html);
echo "cmkey:$cmkey" . PHP_EOL;
$url = 'https://www.cmoney.tw/finance/ashx/mainpage.ashx?' . http_build_query([
    'action' => 'GetCapitalFlows',
    'cmkey' => $cmkey,
]);
$html = sendHttpRequest($url, [], [
    'Accept' => 'application/json, */*; q=0.01',
    'Host' => 'www.cmoney.tw',
    'Referer' => 'https://www.cmoney.tw/finance/f00018.aspx',
    'X-Requested-With' => 'XMLHttpRequest',
    'Dnt' => '1',
    'Pragma' => 'no-cache',
    'Cache-Control' => 'no-cache',
    'Connection' => 'keep-alive',
    'Accept-Encoding' => 'gzip, deflate, br',
    'Accept-Language' => 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
    'Sec-Ch-Ua' => '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
    'Sec-Ch-Ua-Mobile' => '?0',
    'Sec-Ch-Ua-Platform' => '"Windows"',
    'Sec-Fetch-Dest' => 'empty',
    'Sec-Fetch-Mode' => 'cors',
    'Sec-Fetch-Site' => 'same-origin',
]);
echo $html;
function getCurrentPageCmkey($pageHtml = '')
{
    $dom = new DOMDocument();
    libxml_use_internal_errors(true);
    $dom->loadHTML($pageHtml);
    $cmkey = false;
    $xpath = new DOMXPath($dom);
    $element = $xpath->query('//*[@class="primary-navi-now"]/h3/a')[0];
    if ($element)
        $cmkey = $element->getAttribute('cmkey');
    return $cmkey;
}
function sendHttpRequest($url, $postField = [], $httpHeader = [])
{
    global $cookie_jar;
    $httpHeader = $httpHeader + array("User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36");
    // print_r($httpHeader);
    $curl = curl_init();
    curl_setopt_array($curl, array(
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_SSL_VERIFYHOST => false,
        CURLOPT_ENCODING => "",
        CURLOPT_MAXREDIRS => 10,
        CURLOPT_TIMEOUT => 30,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
        CURLOPT_CUSTOMREQUEST => (empty($postField) ? "GET" : "POST"),
        CURLOPT_POSTFIELDS => $postField,
        CURLOPT_HTTPHEADER => $httpHeader,
        CURLOPT_COOKIEJAR => $cookie_jar,
        CURLOPT_COOKIEFILE => $cookie_jar,
    ));
    $res = curl_exec($curl);
    // Check if any error occurred
    if (curl_errno($curl)) {
        echo 'Curl error: ' . curl_error($curl);
    }
    curl_close($curl);
    // echo 'cookie:' . PHP_EOL;
    // echo file_get_contents($cookie_jar) . PHP_EOL;
    return $res;
}
感謝 chan15 大大
後來比對後發現是我CURLOPT_HTTPHEADER 設定錯了
在原先sendHttpRequest方法中加上以下片段即可
foreach ($httpHeader as $key => &$header) {
    $header = "$key: $header";
}
<?php
$url = 'https://www.cmoney.tw/finance/f00018.aspx';
$html = sendHttpRequest($url);
$cmkey = getCurrentPageCmkey($html);
echo "cmkey:$cmkey" . PHP_EOL;
$html = sendHttpRequest('https://www.cmoney.tw/finance/ashx/mainpage.ashx?' . http_build_query([
        'action' => 'GetCapitalFlows',
        'cmkey' => $cmkey,
    ]));
echo $html;
function getCurrentPageCmkey($pageHtml = '')
{
    $dom = new DOMDocument();
    libxml_use_internal_errors(true);
    $dom->loadHTML($pageHtml);
    $cmkey = false;
    $xpath = new DOMXPath($dom);
    $element = $xpath->query('//*[@class="primary-navi-now"]/h3/a')[0];
    if ($element) {
        $cmkey = $element->getAttribute('cmkey');
    }
    return $cmkey;
}
function sendHttpRequest($url)
{
    $curl = curl_init();
    curl_setopt_array($curl, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_HTTPHEADER => [
            'Referer: https://www.cmoney.tw/finance/f00018.aspx',
        ],
    ]);
    $response = curl_exec($curl);
    curl_close($curl);
    return $response;
}