import requests
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063'}
def proxy():
with open(r'ip_proxies\ip代理池.txt', 'r', encoding='utf-8') as f:
r = f.readlines()
ip_list = []
for ip in r:
ip_new = ''.join(ip.split(' ')[0:4])
proxies = eval(ip_new)
ip_list.append(proxies)
return ip_list
def test_ip(ip, headers):
try:
# response = requests.get("https://ip.cn/",
# headers=headers, proxies=ip, timeout=3)
# d = response.content.decode()
# html = etree.HTML(d)
# addr = html.xpath('//*[@id="result"]/div/p[2]/code/text()')
# address = addr[0] if len(addr) > 0 else '未知地点'
# return ip, address
response = requests.get("https://www.baidu.com/",
headers=headers, proxies=ip, timeout=3)
if response.status_code == 200:
return ip
except:
return None
f = open(r'ip_proxies\有效ip.txt',
'w', encoding='utf-8')
for i in proxy():
result = test_ip(i, headers)
if result is not None:
# ip_re, address = result
# print(ip_re, address)
ip_re = result
print(ip_re)
f.write('{}'.format(ip_re))
f.write('\n')
f.close()
爬虫系列,(2),验证IP地址
最新推荐文章于 2024-10-18 10:41:41 发布