台灣公益資訊中心非常貼心的提供了全國所有非營利機構的基本資料供查詢,我只需要非營利機構的名稱及email [為了寄送EDM],但要一個個點選,剪貼,太不符合資訊人的效率了,於是,又搬出Python大神來了
參考網址:http://www.npo.org.tw/npolist_list.asp
程式碼:
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import requests
import time
import random
from bs4 import BeautifulSoup
from openpyxl import Workbook
from openpyxl import load_workbook
urls = list()
nonprofit = list()
#先抓出每一個非營利機構的超連結
for i in range(0,160):
url = 'http://www.npo.org.tw/npolist_list.asp?nowpage=' + str(i) + '&npost=&lo=&keyword2='
try:
response = requests.get(url)
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
soup = BeautifulSoup(response.text,'html.parser')
table = soup.find('table','common_table_01')
tds = table.find_all('td','common_table_01_td_bg')
for td in tds:
urls.append(td.a['href'])
#再來抓出每一個營利機構的名稱與Email
for item in urls:
url = 'http://www.npo.org.tw/' + item
try:
response = requests.get(url)
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
soup = BeautifulSoup(response.text,'html.parser')
table = soup.find('table')
rows = table.find_all('tr')
#print(rows[0].find_all('td')[0].text)
#print(rows[7].find_all('td')[0].text)
nonprofit.append([rows[0].find_all('td')[0].text,rows[7].find_all('td')[0].text])
#======寫入Excel檔案======================
wb = Workbook() #產生新的EXCEL
ws = wb.active
ws.append(['機構','email'])
for com in nonprofit:
ws.append(list(com))
wb.save('非營利機構總名單.xlsx')
wb.close()