一、在selenuim中代理的使用
import random
# 设置代理
proxy_arr = [
# '--proxy-server=https://220.173.37.128:7890',
'--proxy-server=http://14.20.235.129:34100',
]
chrome_options = webdriver.ChromeOptions()
proxy = random.choice(proxy_arr) # 随机选择一个代理
print(proxy)
#如果某个代理访问失败,可从proxy_arr中去除
chrome_options.add_argument(proxy) # 添加代理
driver = webdriver.Chrome(options=chrome_options) #模拟浏览器加入ip (谷歌浏览器)
二、在scrapy框架中使用代理
# 使用免费代理IP
class ProxyMiddleware(object):
def process_request(self,request,spider):
# proxies可以在settings.py中,也可以来源于代理ip的webapi
# proxy = random.choice(proxies)
# 免费的会失效,报 111 connection refused 信息!重找一个代理ip再试
proxy = ['https://39.103.175.42:3128','https://116.232.145.42:8118']
request.meta['proxy'] = proxy
return None # 可以不写return
三、在普通爬虫中使用代理
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/557.35 (KHTML, like Gecko) Chrome/93.0.4477.82 Safari/537.36 Edg/93.0.961.51'
}
proxies_pool = [
{'http': '119.23.186.31:8089'},
{'http': '49.232.185.65:7890'},
{'http': '39.103.157.77:7890'}
]
proxies = random.choice(proxies_pool)
request = urllib.request.Request(url=url,headers=headers)
handler = urllib.request.ProxyHandler(proxies=proxies)
opener = urllib.request.build_opener(handler)
response = opener.open(request)
return response
学习scrapy框架