小的我想要爬取youtube熱門影片的影片相關資訊
現在可以成功透過youtube_dl套件成功得到資訊,但一次只能獲得一支影片的資訊
from pprint import pprint
import youtube_dl
def get_video_info(youtube_url):
video_info = {}
with youtube_dl.YoutubeDL() as ydl:
info = ydl.extract_info(youtube_url, download=False)
video_info['ID'] = info.get('id')
video_info['標題'] = info.get('title')
video_info['影片縮圖'] = info.get('thumbnail')
video_info['上傳者'] = info.get('uploader')
video_info['上傳者網址'] = info.get('uploader_url')
video_info['影片長度(秒)'] = info.get('duration')
video_info['觀看次數'] = info.get('view_count')
video_info['留言數'] = info.get('comment_count') # -
video_info['喜歡數'] = info.get('like_count')
video_info['不喜歡數'] = info.get('dislike_count')
video_info['平均評分'] = info.get('average_rating')
video_info['描述'] = info.get('description')
video_info['標籤'] = info.get('tags')
video_info['網頁網址'] = info.get('webpage_url')
video_info['上傳日期'] = info.get('upload_date')
pprint (video_info)
因此我必須先獲得每部影片的網址才可以連續獲得所有影片資訊
目前程式碼沒辦法成功得到影片網址
import bs4
import requests
header = {"User-Agent":"Moziilla/5.0 (Windows NT 6.1; WOW64)\AppleWebKit/537.6 (KHTML, like Gecko) Chrome/45.0.2454.101\
Safari/537.36"}
url = "https://www.youtube.com/feed/trending?bp=6gQJRkVleHBsb3Jl"
download = requests.get(url, headers = header)
analyze = bs4.BeautifulSoup(download.text, "lxml")
a = analyze.find_all("div")
base = "https://www.youtube.com"
for i in a:
href = i.get('href')
url = base + href
print(url)
現在我只知道可以透過api獲得
但使用方法在爬文之後還是不了解
希望各位可以幫幫忙 謝謝
官網範例:
Python
# -*- coding: utf-8 -*-
# Sample Python code for youtube.videos.list
# See instructions for running these code samples locally:
# https://developers.google.com/explorer-help/guides/code_samples#python
import os
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
scopes = ["https://www.googleapis.com/auth/youtube.readonly"]
def main():
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "YOUR_CLIENT_SECRET_FILE.json"
# Get credentials and create an API client
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
client_secrets_file, scopes)
credentials = flow.run_console()
youtube = googleapiclient.discovery.build(
api_service_name, api_version, credentials=credentials)
request = youtube.videos().list(
part="snippet,contentDetails,statistics",
chart="mostPopular",
regionCode="TW"
)
response = request.execute()
print(response)
if __name__ == "__main__":
main()
回傳結果(略)
.
我剛用沒問題啊,應該是你沒用client secrets file的關西。
改用RESTful:
import requests as req
key="你的API金鑰"
maxResults=5 #最多100筆
params={
"part":"snippet,contentDetails,statistics",
"chart":"mostPopular",
"regionCode":"TW",
"key":key,
"maxResults":maxResults
}
url="https://youtube.googleapis.com/youtube/v3/videos"
response=req.get(url, params=params)
print(response.text)