# 获取免费的代理并验证代理的可用性
# 爬虫第二部, 找到了xicidaili
import requests
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
}
def get_free_proxy():
url = 'https://www.xicidaili.com/nn/'
# 爬虫第三步
response = requests.get(url, headers=headers)
# with open('xicidaili.html', 'wb') as f:
# f.write(response.content)
html_ele = etree.HTML(response.content)
tr_eles = html_ele.xpath('//table[@id="ip_list"]//tr')
tr_eles.pop(0)
for tr_ele in tr_eles:
ip_str = tr_ele.xpath('./td[2]/text()')[0]
port = tr_ele.xpath('./td[3]/text()')[0]
yield 'http://' + ip_str + ':' + port
def validate_proxy(proxy_str):
url = 'http://httpbin.org/get'
proxy = {
'http': proxy_str,
'https': proxy_str
}
try:
response = requests.get(url, proxies=proxy, timeout=5)
python 爬取可用代理ip
最新推荐文章于 2025-06-29 09:09:42 发布