最近在寫PChome爬蟲時 登入時的驗證碼一直解決不了
有試過搭配2Captcha的解決方案 但是當我把2Captcha的回傳驗證碼Token填入帶去PChome的LoginApi 時 一樣說我
{"Code":"403-03-011","Msg":"reCAPTCHA Verify Fail"}
reCAPTCHA Verify Fail
我採用的是requests 的方式 不是Selenium 想請問是我的使用方式錯了嗎
from twocaptcha import TwoCaptcha
import requests
login_url = 'https://ecvip.pchome.com.tw/login/v3/login.htm'
login_sitekey = '6Le3jvYaAAAAAPhQTZe9zO6uNQg3OfdRZSTNuw7B' #PCHOME的recaptcha 的SiteKey
session = requests.Session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
}
def solve_recaptcha_v3(sitekey, url):
solver = TwoCaptcha("xxxx")
try:
result = solver.recaptcha(
sitekey=sitekey,
url=url,
version='v3',
invisible=1,
enterprise=1,
action='login',
score=0.9
)
except Exception as e:
return False
else:
return result
sys.exit('result: ' + str(result))
def login_recaptcha_token():
# get token
token = solve_recaptcha_v3(login_sitekey, login_url)
return token['code']
def login(email, password): #登入Func
token = login_recaptcha_token() #取得recaptcha TOKEN
if token == False:
print("Failed Get recaptcha_token")
return False
res = session.get(login_url, headers=headers)
#update cookies
session.cookies.update(res.cookies)
print(session.cookies.get_dict())
testURL = f"https://ecvip.pchome.com.tw/ecapi/member/v3.1/recaptcha?op=login&_={str(int(time.time() * 1000))}" #這裡好像會先去判斷Token是否有效
for i in range(1, 100):
res = session.post(testURL, headers=headers,json={"Account": email, "reCaptchaToken": token})
print(res.text)
json_data = json.loads(res.text)
if json_data['Status'] == "Done":
break
time.sleep(1)
res = session.post(f"https://ecvip.pchome.com.tw/ecapi/member/v3.1/login", headers=headers, json= {"Account": email, "Pwd": "xxxx", "IsSave": 0, "reCaptchaToken": token}) #登入API
#print({"Account": email, "Pwd": "xxxx", "IsSave": 0, "reCaptchaToken": token})
print(res.text)
if __name__ == '__main__':
login("xxx@google.com", "xxxx")
以上是我的CODE
但是當我發送登入API都是回傳
{"Code":"403-03-011","Msg":"reCAPTCHA Verify Fail"}
程式碼看起來沒問題,可能是PChome網站的驗證機制變強了,因為reCAPTCHA的驗證就是要擋非真實人類的行為,導致你的reCAPTCHA驗證失敗。
換個思路可能的解決方案是嘗試使用selenium,因為selenium可以模擬人類瀏覽行為,更有可能通過reCAPTCHA的驗證。請參考以下selenium的例子:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
def login(email, password):
driver = webdriver.Chrome(executable_path='YOUR_CHROME_DRIVER_PATH')
driver.get("https://ecvip.pchome.com.tw/login/v3/login.htm")
email_input = driver.find_element_by_id("loginAcc")
password_input = driver.find_element_by_id("loginPwd")
email_input.send_keys(email)
password_input.send_keys(password)
time.sleep(5) # 等待reCAPTCHA載入完成
login_button = driver.find_element_by_id("loginButton")
login_button.click()
WebDriverWait(driver, 60).until(
EC.presence_of_element_located((By.ID, "logged"))
)
print("登入成功!")
if __name__ == '__main__':
login("your_email@example.com", "your_password")