學習筆記
main.py
# from pprint import pprint
from bs4 import BeautifulSoup
import requests, pickle
import re
from config import username,password,headers,url,ajax_url,p_url,path
from datetime import datetime
from openpyxl import Workbook
import compare
import os, sys
def create_folder(path):
if not os.path.isdir(path):
os.makedirs(path)
def ask_excel(ask_option):
flag = []
yes_list = ['y','Y','yes']
no_list = ['n','N','no','']
while flag not in yes_list and flag not in no_list:
flag = input(f'Do u want {ask_option}? y/n [n]: ')
if(flag not in yes_list and flag not in no_list):
print('plz enter y or n or ENTER!!!')
# print(f'flag={flag}')
if(flag in yes_list):
return 1
elif(flag in no_list):
return 0
def do_excel(uid,date,opt_title,option,root_json,path): # 跑生成excel
wb = Workbook()
ws = wb.active
title = ['username', 'full_name', 'profile_pic']
ws.append(title)
for users in root_json['users']:
id = []
id.append('@'+users['username'])
id.append(users['full_name'])
id.append(users['profile_pic_url']+'.jpg')
ws.append(id)
wb.save(path+f'{uid}{date}{opt_title[option]}.xlsx')
def do_txt(uid,date,opt_title,option,root_json,path): # 跑生成txt
i=1
with open(path+f'{uid}{date}{opt_title[option]}.txt', 'w+',encoding='utf-8') as f:
for users in root_json['users']:
# id = (f'{i}','@'+users['username'], users['full_name'])
id = (f'@'+users['username'], users['full_name'])
i+=1
# reresponse = response.text.replace('\\u0026','&')
f.write(str(id)+'\n')
f.write(f'Total: {i-1} records!')
print(f'Got {i-1} records!!!')
def main():
date = datetime.now().strftime("%Y%m%d-%H%M")
time = int(datetime.now().timestamp())
payload = {
'username': f'{username}',
'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{time}:{password}',
'queryParams': {},
'optIntoOneTap': 'false'
}
print(f'If target is private account, you have to follow it first!!!')
while True:
uid = str(input('Enter id: '))
if (uid == ''):
print(f'Do not leave blank!!!')
else:
break
while True:
opt_list = {'':'following','1':'following','2':'followers','following':'following','followers':'followers'}
option = str(input('following[1]/followers[2] [1]: '))
if(option in opt_list):
option = opt_list[option]
break
else:
print(f'enter 1 or 2 or following or followers!!!')
try:
fcount = int(input('Enter max num of following/followers [2000]: '))
except ValueError:
fcount = 2000
opt_title = {
'following': 'fwi',
'followers': 'fwr',}
ask = ask_excel('Excel file(will have profile pic)')
# print(ask)
with requests.session() as session:
if not os.path.exists(f'{path}{username}session.pkl'):
print('Getting sessions') #session = requests.sess.....
res = session.get(url)
csrf = re.findall(r"csrf_token\":\"(.*?)\"",res.text)[0]
cookies = res.cookies #res獲取第一次cookie和csrf
cookies['csrf'] = csrf
headers['x-csrftoken'] = csrf
# print(headers)
session.post(ajax_url, data=payload, headers=headers, cookies=cookies)
with open(f'{path}{username}session.pkl', 'wb') as f:
pickle.dump(session.cookies, f) #用現有cookie和csrf token 去取得登入的session
headers['Referer'] = f'https://www.instagram.com/{uid}/following/'
# print(req2.text)
else:
print('Reloading sessions and updating cookies')
headers['Referer'] = f'https://www.instagram.com/{uid}/following/'
with open(f'{path}{username}session.pkl', 'rb') as f:
cookies = session.cookies.update(pickle.load(f))
headers['x-csrftoken'] = session.cookies['csrftoken']
# print(session.cookies)
# print(headers)
fsi=session.get(p_url+uid,cookies=cookies,headers=headers)
# print(fsi.text)
try:
# print(str(re.findall(r"id\":\"(.*?)\"",fsi.text)))
friendid = str(re.findall(r"id\":\"(.*?)\"",fsi.text)[1])
checkid = str(re.findall(r"id\":\"(.*?)\"",fsi.text)[-1])
if(friendid == '236' or friendid == None or checkid == '236'):
raise Exception
print(f"userid:{friendid}")
except:
os.remove(f'{path}{username}session.pkl')
print(f'error while checking userid')
print(f'1.plz check the target username(no @)!!!')
print(f'2.Make sure u set the right USERNAME and PASSWORD in *config.py* file!!!')
print(f'3.your account might block by instagram server, plz try again later or change your ip!!')
sys.exit()
# url的後輟 可以像翻頁一樣去增加再爬取 或是直接爆max來爬取
params = {
'count': fcount,
'max_id': '',
'search_surface': 'follow_list_page'}
response = session.get(f'https://i.instagram.com/api/v1/friendships/{friendid}/{option}/', params=params, cookies=cookies, headers=headers)
# print(response.text)
try:
root_json = response.json()
except requests.exceptions.JSONDecodeError as jsonError:
print(f'Error when processing json file: {jsonError}')
print(f'1.Make sure u set the right USERNAME and PASSWORD in *config.py* file!!!')
print(f'2.your account might block by instagram server, plz try again later or change your ip!!')
sys.exit()
if(ask == 1):
try:
do_excel(uid,date,opt_title,option,root_json,path)
except IOError as error:
print(f'Error when generate Excel file:{error}')
# pprint(response.text)
do_txt(uid,date,opt_title,option,root_json,path)
ask2 = ask_excel('compare with old file')
if(ask2 == 1):
try:
f1 = path+input(f'Enter first filename(older file): ')+'.txt'
f2 = path+f'{uid}{date}{opt_title[option]}'+'.txt'
compare.compare_file(f1, f2)
except IOError as error:
print(f'Error when generate compared.txt file:{error}')
print(f'Make sure u have the file existed and enter the right filename(without .txt)!!!')
sys.exit()
if __name__ == '__main__':
create_folder(path)
if username == 'USERNAME or EMAIL':
print(f'plz go config.py to set your USERNAME and PASSWORD')
os._exit(0)
main()
compare.py
import sys
import difflib
import os
from config import path
def create_folder(path):
if not os.path.isdir(path):
os.makedirs(path)
# 讀取配置文件函數
def read_file(file_name):
try:
file_handle = open(file_name, 'r', encoding="utf-8")
text = file_handle.read().splitlines() # 讀取後以行進行分割
file_handle.close()
return text
except IOError as error:
print('Read file Error: {0}'.format(error))
sys.exit()
# 比較兩個文件並輸出html格式的結果
def compare_file(file1_name, file2_name):
if file1_name == "" or file2_name == "":
print('文件路徑不能為空: file1_name的路徑為: {0}, file2_name的路徑為: {1} .'.format(file1_name, file2_name))
sys.exit()
text1_lines = read_file(file1_name)
text2_lines = read_file(file2_name)
print_list = ['+','-']
no_print_list = ['!']
d = difflib.Differ()
diff_print = [a for a in d.compare(text1_lines, text2_lines) if a[0] in print_list and a[-1] not in no_print_list ]
if diff_print:
print('record of compare will be storage in compared.txt and result.html !')
try:
with open(path+'compared.txt', 'w', encoding="utf-8") as result_file:
result_file.write('\n'.join(diff_print))
print('\n'.join(diff_print))
except IOError as error:
print('寫入compare.txt文件時發生錯誤:{0}'.format(error))
else:
print(f"It's all same as old file")
diff = difflib.HtmlDiff() # 創建htmldiff 對象
result = diff.make_file(text1_lines, text2_lines) # 通過make_file 方法輸出 html 格式的對比結果
# 將結果保存到result.html文件中並打開
try:
with open(path+'result.html', 'w', encoding="utf-8") as result_file: #同 f = open('result.html', 'w') 打開或創建一個result.html文件
result_file.write(result) #同 f.write(result)
except IOError as error:
print('寫入html文件錯誤:{0}'.format(error))
if __name__ == '__main__':
create_folder(path)
f1 = path+input(f'Enter first filename(older file): ')+'.txt'
f2 = path+input(f'Enter second filename(newer file): ')+'.txt'
compare_file(f1, f2)
config.py
#Ur Instagram username and password
username = 'USERNAME or EMAIL'
password = 'PASSWORD'
url = f'https://www.instagram.com/accounts/login/'
ajax_url = f'https://www.instagram.com/accounts/login/ajax/'
p_url = f'https://i.instagram.com/api/v1/users/web_profile_info/?username='
path = r'./data/'
headers = {
'authority': 'www.instagram.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'zh-TW,zh;q=0.9',
'dnt': '1',
'sec-ch-prefers-color-scheme': 'dark',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
'sec-gpc': '1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Instagram 105.0.0.11.118 (iPhone11,8; iOS 12_3_1; en_US; en-US; scale=2.00; 828x1792; 165586599)',
'viewport-width': '1707',
'X-Requested-With': 'XMLHttpRequest',
}