Requests 模組可以做到的事情很多,像是可以在部落格中發布文章或者到某個網站中爬取資訊並可以將其匯出excel表。
pip install requests
import requests as req
url = "https://www.dcard.tw/f"
re = req.get(url)
print(re)
由結果可以得知,這是一個response物件
print(re.text)
img_url = "http://i0.hdslb.com/bfs/article/a2eb4c4efb554e92bd5f3409de35dab5a0bf639f.jpg"
re = req.get(img_url)
print(re.content)
with open('animal.jpg', mode = "wb") as file:
file.write(re.content)
import requests as req
pdf_url = "https://www.cdc.gov.tw/Uploads/files/無蚊最安心!做好環境管理%20清除孳生源.pdf"
re = req.get(pdf_url)
with open('dengue.pdf', mode = "wb") as file:
file.write(re.content)
import requests as req
url = 'https://httpbin.org/post'
re = req.post(url) #這邊要改成post
print(re.text)
import requests as req
url = 'https://httpbin.org/post'
data = {
'title' : 'python',
'content' : 'Hello World!'
}
re = req.post(url, data=data)
print(re.text)
import requests as req
url = 'https://httpbin.org/post'
data = {
'title' : 'python',
'content' : 'Hello World!'
}
with open('dengue.pdf', mode='rb') as file:
pdf = {'upload' : file.read()}
re = req.post(url, data=pdf, json=data)
print(re.text)
"User-Agent": "python-requests/2.32.3",可以看到這個User-Agent會告訴網頁,這個請求是從python發送過來的,通常會被擋掉,所以要把User-Agent的值改掉,改成像瀏覽器發送的請求。
找一個網站,開啟原始碼,點network,重新整理,從標題的地方找User-Agent。
新增一個header的字典
import requests as req
url = 'https://httpbin.org/post'
data = {
'title' : 'python',
'content' : 'Hello World!'
}
header = {
'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15'
}
re = req.post(url, headers=header)
print(re.text)