import random
import urllib.request
import time
#使用代理服务器进行信息爬取,很好解决IP限制的问题
#http://www.xicidaili.com/
def proxyServer(url,proxy_addr,headers):
proxy=urllib.request.ProxyHandler({'http':proxy_addr})
opener=urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
urllib.request.install_opener(opener)
opener.addheaders=[headers]
return urllib.request.urlopen(url).read().decode('utf-8','ignore')
url='https://blog.youkuaiyun.com/weixin_42141853/article/details/80784327'
proxyLists=['111.231.115.150:8888','118.190.95.269001:9001','175.0.72.205:61202','60.216.177.152:8118','101.236.35.98:8866']
agentLists=[
"Mozilla/5.0(Macintosh;U;IntelMacOSX10_6_8;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
"Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0",
"Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11"]
proxy=proxyLists[random.randrange(len(proxyLists))]
headers=( 'User-Agent',agentLists[random.randrange(len(agentLists))])
for i in range(1,10000):
time.sleep(2)
data=proxyServer(url,proxy,headers)
print(len(data))
print(type(data))
print(data)