Python爬蟲疑問爬取play商店使用者評論

python3 selenium python爬蟲

ggininder852 2022-07-01 23:31:27 ‧ 1956 瀏覽

分享至

Python初學者參考網路上的教學執行以下程式碼
想要爬取google classroom的全部評論但使用都只能爬到3000多則評論

都會跳出以上畫面就停止爬取了
想知道問題為何無法爬取全部的評論
以下為我使用的程式碼

def click_yes(obj):
    try:
        obj.find_element_by_xpath("//body/div[4]/div[2]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/footer[1]/div[2]/div[1]/div[1]/div[2]/span[2]").click()     
    except:
        pass

    
page = int(input("請輸入頁面向下捲動次數:"))
dirverPath = 'C:\\spider\\chromedriver.exe'
browser = webdriver.Chrome(executable_path = dirverPath)
url = 'https://play.google.com/store/apps/details?id=com.google.android.apps.classroom'
browser.get(url)

number = 0
counter = 0
post_title = []

move = browser.find_element_by_tag_name('body')
click_text(move) 
time.sleep(1)
click_yes(move)
time.sleep(1)


while page > counter:

    move.send_keys(Keys.PAGE_DOWN)
    
    objsoup = bs4.BeautifulSoup(browser.page_source, 'lxml')
    articles = objsoup.find_all('div', class_ = 'RHo1pe')


    for article in articles:
        title = article.find('div', class_ = 'X5PpBb') #尋找留言暱稱
        rank = article.find('div', role = 'img') #尋找評價
        content= article.find('div', class_ = 'h3YV2d')#尋找完整評論
        timestamp=article.find('span',class_ = 'bp9Aid')#尋找留言時間
        
       if title.text not in post_title:
            number += 1
            post_title.append(title.text)
            print("留言編號:", number)
            print("留言暱稱:", title.text)
            print("評價:", rank.get('aria-label'))
            print("評論:", content.text)
            print("時間:", timestamp.text)
            print("="*100)
            
            
    counter += 1
    
print(post_title)