Day20 Python 爬蟲 Selenium

from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# 基本設定
options = Options()

# Chrome 位置
options.binary_location = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"

# webdriver 位置
webdriver_path = 'C:\\chromedriver.exe'
driver = webdriver.Chrome(executable_path=webdriver_path, options=options)

# 前往 google
driver.get('https://google.com')

# 輸入搜尋文字
search_elem = driver.find_element_by_name('q')
search_elem.send_keys('iThelp')

# 點擊搜尋按鈕
search_btn = driver.find_element_by_name('btnK')
search_btn.click()

# 關閉瀏覽器
#driver.close()

執行後就會發現以神速進行了搜尋w

接下來來嘗試看看文章的範例吧!

from pyquery import PyQuery as pq
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from random import randint
import selenium.webdriver.support.ui as ui
import time

def get_movie_info(movie_url):
    """
    Get movie info from certain IMDB url
    """
    d = pq(movie_url)
    movie_rating = float(d("strong span").text())
    movie_genre = [x.text() for x in d(".subtext a").items()]
    movie_release_date = movie_genre.pop()
    movie_poster = d(".poster img").attr('src')
    movie_cast = [x.text() for x in d(".primary_photo+ td a").items()]

    # 回傳資訊
    movie_info = {
        "movieRating": movie_rating,
        "movieReleaseDate": movie_release_date,
        "movieGenre": movie_genre,
        "moviePosterLink": movie_poster,
        "movieCast": movie_cast
    }
    return movie_info

def get_movies(*args):
    """
    Get multiple movies' info from movie titles
    """
    imdb_home = "https://www.imdb.com/"
    options = Options()
    options.binary_location = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
    webdriver_path = 'C:\\chromedriver.exe'
    driver = webdriver.Chrome(executable_path=webdriver_path, options=options)
    movies = dict()
    wait = ui.WebDriverWait(driver,10)
    for movie_title in args:
        # 前往 IMDB 首頁
        driver.get(imdb_home)
        # 定位搜尋欄位
        search_elem = wait.until(lambda driver:driver.find_element_by_name("q"))
        # 輸入電影名稱
        search_elem.send_keys(movie_title)
        # 定位搜尋按鈕
        submit_elem = wait.until(lambda driver:driver.find_element_by_id("suggestion-search-button"))
        # 按下搜尋按鈕
        submit_elem.click()
        # 限縮搜尋結果為「電影」類
        category_movie_elem = wait.until(lambda driver:driver.find_element_by_link_text("Movie"))
        # 按下限縮搜尋結果
        category_movie_elem.click()
        # 定位搜尋結果連結
        first_result_elem = wait.until(lambda driver:driver.find_element_by_xpath("//td[@class='result_text']/a"))
        # 按下搜尋結果連結
        first_result_elem.click()
        # 呼叫 get_movie_info()
        current_url = driver.current_url
        movie_info = get_movie_info(current_url)
        movies[movie_title] = movie_info
        time.sleep(randint(3, 8))
    driver.close()
    return movies

print(get_movies("Avengers: Endgame", "Captain Marvel"))
#driver.close()

回傳結果