iT邦幫忙

0

python 爬蟲 youtube熱門影片

BB 2021-04-29 21:44:231173 瀏覽

小的我想要爬取youtube熱門影片的影片相關資訊
現在可以成功透過youtube_dl套件成功得到資訊,但一次只能獲得一支影片的資訊

from pprint import pprint
import youtube_dl


def get_video_info(youtube_url):
    video_info = {}

    with youtube_dl.YoutubeDL() as ydl:
        info = ydl.extract_info(youtube_url, download=False)
        video_info['ID'] = info.get('id')
        video_info['標題'] = info.get('title')
        video_info['影片縮圖'] = info.get('thumbnail')
        video_info['上傳者'] = info.get('uploader')
        video_info['上傳者網址'] = info.get('uploader_url')
        video_info['影片長度(秒)'] = info.get('duration')
        video_info['觀看次數'] = info.get('view_count')
        video_info['留言數'] = info.get('comment_count') # -
        video_info['喜歡數'] = info.get('like_count')
        video_info['不喜歡數'] = info.get('dislike_count')
        video_info['平均評分'] = info.get('average_rating')
        video_info['描述'] = info.get('description')
        video_info['標籤'] = info.get('tags')
        video_info['網頁網址'] = info.get('webpage_url')
        video_info['上傳日期'] = info.get('upload_date')
    pprint (video_info)

因此我必須先獲得每部影片的網址才可以連續獲得所有影片資訊
目前程式碼沒辦法成功得到影片網址

import bs4
import requests
header = {"User-Agent":"Moziilla/5.0 (Windows NT 6.1; WOW64)\AppleWebKit/537.6 (KHTML, like Gecko) Chrome/45.0.2454.101\
            Safari/537.36"}
url = "https://www.youtube.com/feed/trending?bp=6gQJRkVleHBsb3Jl"
download = requests.get(url, headers = header)
analyze = bs4.BeautifulSoup(download.text, "lxml")
a = analyze.find_all("div")
base = "https://www.youtube.com"
for i in a:
        href = i.get('href')
        url = base + href 
        print(url)

現在我只知道可以透過api獲得
但使用方法在爬文之後還是不了解
希望各位可以幫幫忙 謝謝

1 個回答

0
微甜的酸
iT邦新手 3 級 ‧ 2021-04-30 20:12:02
最佳解答

官網範例:
Python

# -*- coding: utf-8 -*-

# Sample Python code for youtube.videos.list
# See instructions for running these code samples locally:
# https://developers.google.com/explorer-help/guides/code_samples#python

import os

import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors

scopes = ["https://www.googleapis.com/auth/youtube.readonly"]

def main():
    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"
    client_secrets_file = "YOUR_CLIENT_SECRET_FILE.json"

    # Get credentials and create an API client
    flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
        client_secrets_file, scopes)
    credentials = flow.run_console()
    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, credentials=credentials)

    request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        chart="mostPopular",
        regionCode="TW"
    )
    response = request.execute()

    print(response)

if __name__ == "__main__":
    main()

回傳結果(略)

官方文件

BB iT邦新手 5 級 ‧ 2021-05-01 20:30:35 檢舉

有錯誤不太清楚是哪裡出問題
輸入authorization code後執行的結果是:
File "youtube_url_get.py", line 35, in
main()
File "youtube_url_get.py", line 21, in main
credentials = flow.run_console()
File "C:\Users\lucie\anaconda3\lib\site-packages\google_auth_oauthlib\flow.py", line 414, in run_console
self.fetch_token(code=code)
File "C:\Users\lucie\anaconda3\lib\site-packages\google_auth_oauthlib\flow.py", line 288, in fetch_token
return self.oauth2session.fetch_token(self.client_config["token_uri"], **kwargs)
File "C:\Users\lucie\anaconda3\lib\site-packages\requests_oauthlib\oauth2_session.py", line 360, in fetch_token
self._client.parse_request_body_response(r.text, scope=self.scope)
File "C:\Users\lucie\anaconda3\lib\site-packages\oauthlib\oauth2\rfc6749\clients\base.py", line 421, in parse_request_body_response
self.token = parse_token_response(body, scope=scope)
File "C:\Users\lucie\anaconda3\lib\site-packages\oauthlib\oauth2\rfc6749\parameters.py", line 431, in parse_token_response
validate_token_parameters(params)
File "C:\Users\lucie\anaconda3\lib\site-packages\oauthlib\oauth2\rfc6749\parameters.py", line 438, in validate_token_parameters
raise_from_error(params.get('error'), params)
File "C:\Users\lucie\anaconda3\lib\site-packages\oauthlib\oauth2\rfc6749\errors.py", line 405, in raise_from_error
raise cls(**kwargs)
oauthlib.oauth2.rfc6749.errors.InvalidGrantError: (invalid_grant) Malformed auth code.

我剛用沒問題啊,應該是你沒用client secrets file的關西。
改用RESTful:

import requests as req

key="你的API金鑰"
maxResults=5 #最多100筆

params={
    "part":"snippet,contentDetails,statistics",
    "chart":"mostPopular",
    "regionCode":"TW",
    "key":key,
    "maxResults":maxResults
}

url="https://youtube.googleapis.com/youtube/v3/videos"

response=req.get(url, params=params)

print(response.text)

我要發表回答

立即登入回答