最近想要把google drive的資料夾內容名稱全部爬下來,但發現用一般滾輪下拉的方式都沒辦法成功,後來想說使用鍵盤操作按下鍵,雖然沒有出現錯誤但卻沒作用,爬出來的東西就是沒有滾動的樣子,還是說我x_path找錯地方了?
(還是有辦法可以找出滾輪js的資訊做or鼠標進行拖拉的方式來成功?
import requests 
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import time
from datetime import datetime
from selenium.webdriver.common.keys import Keys
chrome_options = webdriver.ChromeOptions()  
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(chrome_options=chrome_options)  
url = google driver的網址
driver.get(url)
div = driver.find_element_by_xpath('//*[@class="PolqHc sd-ph"]/div')
div.send_keys(Keys.DOWN)
soup = BeautifulSoup(driver.page_source) 
for title in soup.select('.Q5txwe'):
    print(title.text)
js="var action=document.documentElement.scrollTop=100000"
driver.execute_script(js)
在其他網站都能滾,但在google drive就不行了
試試
lenOfPage = driver.execute_script('window.scrollTo(0, [hard code the height])')
有google drive api用api抓就好啦...
要先去開api。
理論上正常使用是不會用到錢。
from __future__ import print_function
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly']
def main():
    """Shows basic usage of the Drive v3 API.
    Prints the names and ids of the first 10 files the user has access to.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    service = build('drive', 'v3', credentials=creds)
    # Call the Drive v3 API
    results = service.files().list(q="mimeType='application/vnd.google-apps.folder'",
                                         fields='nextPageToken, files(id, name)',
                                         ).execute()
    # results = service.files().list(
    #     pageSize=10, fields="nextPageToken, files(id, name)").execute()
    items = results.get('files', [])
    if not items:
        print('No files found.')
    else:
        print('Files:')
        for item in items:
            print(u'{0} ({1})'.format(item['name'], item['id']))
if __name__ == '__main__':
    main()