講解完網路爬蟲的實際應用後,接下來將他跟 Line chatbot 進行整合吧!
首先,先新增一個 Python 檔,該 Python 檔在此處命名為 cook.py,而這個 Python 檔中主要有兩個功能:
## cook.py
# import 所需套件
from __future__ import with_statement
import contextlib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
try:
from urllib.request import urlopen
except:
from urllib2 import urlopen
import sys
from random import random
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import time
# 縮短網址功能
def make_tiny(url):
request_url = ('http://tinyurl.com/api-create.php?' + urlencode({'url':url}))
with contextlib.closing(urlopen(request_url)) as response:
return response.read().decode('utf-8')
# 關鍵字搜尋食譜功能
class Cook_keyword:
def __init__(self, keyword):
self.keyword = keyword
def scrape(self):
## 使用假header
ua = UserAgent()
user_agent = ua.safari
headers = {'User-Agent': user_agent}
response = requests.get(
"https://cookpad.com/tw/%E6%90%9C%E5%B0%8B/" + self.keyword +
"?event=search.history", headers=headers)
soup = BeautifulSoup(response.content, "html.parser")
# 爬取前五筆食譜
cards = soup.find_all(
'li', {'class': 'block-link card border-cookpad-gray-400 border-t-0 border-l-0 border-r-0 border-b flex m-0 rounded-none overflow-hidden ranked-list__item xs:border-b-none xs:mb-sm xs:rounded'}, limit=5)
content = []
result = []
for card in cards:
# 食譜名稱
title = card.find(
"a", {"class": "block-link__main"}).getText()
title = title.replace('/', '-')
title = title[:13]
# 食譜簡介
try:
info = card.find("div", {"class": "clamp-2 break-words"}).getText()
info = info[:40]
except:
info = 'None'
# 食譜連結
try:
url = card.find("a")
url = 'https://cookpad.com' + url["href"]
url = make_tiny(url)
except:
url = 'https://cookpad.com'
# 圖片網址
try:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")
img_url = soup.find('div', {'class', 'tofu_image'})
img_url = img_url.select_one("img").get('data-original')
img_url = make_tiny(img_url)
except:
img_url = 'https://i.imgur.com/bUTHY8X.jpg'
# 將取得的食譜名稱、簡介、連結、圖片網址一起指派給 content
content = [title, info, url, img_url]
result.append(content)
return result
完成網路爬蟲搜尋食譜的功能後,接下來就將他套用進 Line Chatbot 的 Carousel template 吧!(因為介面關係,此處僅示範顯示 3 個食譜的 Carousel,要增加 Carousel 就以此類推,最多可以有 10 個 Carousel)
from .cook import Cook_search
message.append(
TemplateSendMessage(
alt_text='Carousel template',
template=CarouselTemplate(
columns=[
CarouselColumn(
thumbnail_image_url=content[0][3],
title=content[0][0],
text=str(content[0][1]),
actions=[
MessageTemplateAction(
label='詳細資料',
text=content[0][1]
),
URITemplateAction( label='前往食譜',
uri=content[0][2]
), MessageTemplateAction(
label='選擇食譜',
text='選擇食譜:' + content[0][0]
)]),
CarouselColumn(
thumbnail_image_url=content[1][3],
title=content[1][0],
text=str(content[1][1]),
actions=[
MessageTemplateAction(
label='詳細資料',
text=content[1][1]
),
URITemplateAction( label='前往食譜',
uri=content[1][2]
), MessageTemplateAction(
label='選擇食譜',
text='選擇食譜:' + content[1][0]
)]),
CarouselColumn(
thumbnail_image_url=content[2][3],
title=content[2][0],
text=str(content[2][1]),
actions=[
MessageTemplateAction(
label='詳細資料',
text=content[2][1]
),
URITemplateAction( label='前往食譜',
uri=content[2][2]
), MessageTemplateAction(
label='選擇食譜',
text='選擇食譜:' + content[2][0]
)]),
]
)
)
)
line_bot_api.reply_message(event.reply_token, message)
以上程式的呈現如下圖所示,只要輸入「搜尋 布丁」,就會自動從 cookpad 上爬取相關的前五筆食譜,並將他製作成 Carousel template 的形式。