对于经常爬取网站的人来说,经常遇到封IP的情况,下面就是对快代理跟全代理的免费ip的爬取。下面就是代码,想要的直接复制就行了。不懂得下方评论就行
import requests,re,time
from lxml.html import etree
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36',
}
def kuai():
for y in range(1,30):
url=f'https://www.kuaidaili.com/free/inha/{y}/'
html1=requests.get(url)
ips=re.findall('<td\s*data-title="IP">(.*?)</td>',html1.text)
port=re.findall('<td\s*data-title="PORT">(.*?)</td>',html1.text)
for x in range(len(ips)):
ip='http://'+ips[x]+':'+port[x]
with open('ip.txt','a+',encoding='utf-8') as f:
f.write(ip)
f.write('\n')
def quan():
url='http://www.goubanjia.com/'
html2=requests.get(url=url,headers=headers).text
htm=etree.HTML(html2)
ips=htm.xpath('//td[@class="ip"]')
for x in ips:
a=''.join(x.xpath('//text()'))
a2=re.findall('\d+.\d+.\d+.\d+.\d+:\d+',a)
for ip in a2:
ip1='http://'+ip
with open('ip.txt','a+',encoding='utf-8') as f:
f.write(ip1)
f.write('\n')
quan()