自带的代理不好用,可以自定义。
class ProxyMiddleware(object):
def process_request(self, request, spider):#次函数名不能改
#代理列表
PROXIES = [
{'ip_port': '111.11.228.75:80', 'user_pass': ''},
{'ip_port': '120.198.243.22:80', 'user_pass': ''},
{'ip_port': '111.8.60.9:8123', 'user_pass': ''},
{'ip_port': '101.71.27.120:80', 'user_pass': ''},
{'ip_port': '122.96.59.104:80', 'user_pass': ''},
{'ip_port': '122.224.249.122:8088', 'user_pass': ''},
]
proxy = random.choice(PROXIES)#随机选择代理
if proxy['user_pass'] is not None:#如果代理不需要密码,在meta中添加
request.meta['proxy'] = to_bytes("http://%s" % proxy['ip_port'])#转为bytes
encoded_user_pass = base64.encodestring(to_bytes(proxy['user_pass']))#用户名密码用base64加密
request.headers['Proxy-Authorization'] = to_bytes('Basic ' + encoded_user_pass)#在请求头中加入代理
print "**************ProxyMiddleware have pass************" + proxy['ip_port']
else:#代理需要密码
print "**************ProxyMiddleware no pass************" + proxy['ip_port']
request.meta['proxy'] = to_bytes("http://%s" % proxy['ip_port'])
最后在settings里配置下, DOWNLOADER_MIDDLEWARES = {'step8_king.middlewares.ProxyMiddleware': 500,}