爬虫最强ip代理池设置，三家免费ip共享王网站资源

本文为博主原创文章，未经博主允许不得转载。https://blog.youkuaiyun.com/weixin_43576564

本文链接：https://blog.youkuaiyun.com/weixin_43576564/article/details/103771959

本文介绍了如何建立强大的IP代理池，利用三家免费的IP资源网站，为网络爬虫提供稳定的数据抓取支持。通过设置代理池，可以有效避免因单一IP被封禁而影响爬虫的运行效率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import requests
from lxml import etree
import threading
from queue import Queue

threads=[]
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
pro_ips = []
url1='https://www.kuaidaili.com/free/intr/'
url2='http://www.iphai.com/free/wg'
url3='https://www.xicidaili.com/nn/1'

def parse1(url1):
    global pro_ips
    response=requests.get(url1)
    response.encoding='utf-8'
    html=response.text
    html=etree.HTML(html)
    ips=html.xpath('//tbody/tr/td[1]/text()')
    ports=html.xpath('//tr/td[2]/text()')
    types=html.xpath('//tr/td[4]/text()')
    for i in range(len(ips)):
        pro_ips.append([types[i],ips[i],ports[i]])
        print(pro_ips[i])

def parse2(url2):
    global pro_ips
    response = requests.get(url2)
    response.encoding = 'utf-8'
    html = response.text
    html = etree.HTML(html)
    ips=html.xpath('//tr/td[1]/text()')
    ports=html.xpath('//tr/td[2]/text()')
    types=html.xpath('//tr/td[4]/text()')
    lens=len(pro_ips)
    for i in range(len(ips)-1):
        if types[i].strip()=="":
            types[i]="http"
            pro_ips.append([types[i].strip(), ips[i].strip(), ports[i].strip()])
        else:
            pro_ips.append([types[i].strip(),ips[i].strip(),ports[i].strip()])
        print(pro_ips[lens+i])

def parse3(url3):
    global pro_ips,headers
    response = requests.get(url3,headers=headers)
    response.encoding = 'utf-8'
    html = response.text
    html = etree.HTML(html)
    ips = html.xpath('//tr/td[2]/text()')
    ports = html.xpath('//tr/td[3]/text()')
    types = html.xpath('//tr/td[6]/text()')
    for i in range(len(ips) - 1):
        pro_ips.append([types[i], ips[i], ports[i]])
        print(pro_ips[i])

def trsts(pro):
    global headers
    url="http://icanhazip.com/"
    # url='http://www.882667.com/'
    try:
        response=requests.get(url,headers=headers,proxies=pro,timeout=5)
        print(response.text)
    except:
        print("ip无效")

if __name__ == '__main__':
    proxys = []
    dic={}
    parse1(url1)
    print("----------------")
    parse2(url2)
    print("----------------")
    parse3(url3)
    for i in range(len(pro_ips)-1):
        typ=pro_ips[i][0].lower()
        ip=pro_ips[i][1]
        port=pro_ips[i][2]
        xx={'%s' %typ:'%s://%s:%s' %(typ,ip,port)}
        proxys.append(xx)
        print(proxys[i])
        pro=proxys[i]
        t=threading.Thread(target=trsts,args=(pro))
        t.start()
        threads.append(t)
    for t in threads:
        t.join()
    print("结束")