用Python抓網站資料
如該網有2000筆資料 顯示100筆資料 剩下資料需按"加載更多"鈕
當已顯示全部資料時 還是只能抓前100筆資料 抓不到剩下資料
df = pd.read_html('https://tw.tradingview.com/markets/stocks-taiwan/market-movers-all-stocks/')
請問有何建議??
僅供參考
from time import sleep
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Firefox()
driver.get('https://tw.tradingview.com/markets/stocks-taiwan/market-movers-all-stocks/')
# 只要「載入更多」的按鈕還在,就一直按按鈕
while True:
sleep(3)
try:
# 尋找「載入更多」按鈕
load_button = driver.find_element(By.CSS_SELECTOR, '.button-SFwfC2e0')
# 按下「載入更多」按鈕
load_button.click()
# 「載入更多」按鈕不見時會跑進這
except NoSuchElementException:
break
# 用 pandas 抓取網頁上的所有表格
tables = pd.read_html(driver.page_source)
# 將抓到的表格以 csv 格式存下來,可用 Excel 檢查結果
# 2.csv 應該就是你要的東西
for i, table in enumerate(tables):
table.to_csv(f'{i+1}.csv', index=False)
driver.quit()
參考看看
import requests
import pandas as pd
url = 'https://scanner.tradingview.com/taiwan/scan'
# 唯一要改的是 range 的 0,200, 看200要改成多少
payload = {'columns':['name','description','logoid','update_mode','type','typespecs','close','pricescale','minmov','fractional','minmove2','currency','change','volume','relative_volume_10d_calc','market_cap_basic','fundamental_currency_code','price_earnings_ttm','earnings_per_share_diluted_ttm','earnings_per_share_diluted_yoy_growth_ttm','dividends_yield_current','sector.tr','market','sector','recommendation_mark'],'ignore_unknown_fields':false,'options':{'lang':'zh_TW'},'range':[0,200],'sort':{'sortBy':'name','sortOrder':'asc','nullsFirst':false},'preset':'all_stocks'}
# Make the POST request
response = requests.post(url, json=payload)
# Check if the request was successful
if response.status_code == 200:
# Get the JSON data from the response
data = response.json()
# Convert the JSON data to a pandas DataFrame
df = pd.DataFrame(data)
# Display the first few rows of the DataFrame
print(df.head())
else:
print(f"Request failed with status code: {response.status_code}")