目前正在自製一個IP池,但在驗證IP時使用了兩種方法,卻發現兩種方法得出的結果會不一樣!(方法1驗證成功,方法2卻失敗)
想請各位大大看一下是哪邊有問題<(_ _)>
方法1:
def parse(self, response):
...
meta = {
'port': port,
'proxy': proxy,
'dont_retry': True,
'download_timeout': 5,
'_proxy_scheme': scheme,
'_proxy_ip': ip,
'update': update
}
yield scrapy.Request('https://httpbin.org/ip',callback=self.proxy_check_available, meta=meta, dont_filter=True,errback = lambda x: self.download_errback(x,meta['proxy']))
def proxy_check_available(self, response):
json_data = json.loads(response.text)['origin']
proxy_ip = response.meta['_proxy_ip']+', '+response.meta['_proxy_ip']
item = GetProxyItem()
if proxy_ip == json.loads(response.text)['origin']:
item['scheme'] = response.meta['_proxy_scheme']
item['proxy'] = response.meta['proxy']
item['port'] = response.meta['port']
item['update'] = response.meta['update']
yield item
方法2:
url = 'https://httpbin.org/ip'
headers = {
"User_Agent" : "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"
# "Referer" : "http://www.xicidaili.com/nn/1"
}
res = requests.get(url,proxies=proxies,headers=headers)
json_data = json.loads(res.text)['origin']
if proxy_ip == json_data:
print('pass : '+ x['proxy'])
else:
print('delete : ' + x['proxy'])