#coding:utf8
import httplib
from bs4 import BeautifulSoup
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
ip_L =[]
port_L = []
def urlList(): #组成url列表
L = ['/proxy/index.shtml']
for i in range(2,11):
i = '/proxy/http_'+str(i)+'.shtml'
L.append(i)
return L
def get_ip_list(iplist): #组成ip列表
for ip in iplist:
ip_L.append(ip.string)
return ip_L
def get_port_list(portlist): #组成端口列表
for port in portlist:
port_L.append(port.string)
return port_L
def main(url): #主函数
connect = httplib.HTTPConnection(url,80,timeout=30)
for i in urlList():
connect.request('GET',i)
response = connect.getresponse()
read = response.read()
soup = BeautifulSoup(read)
ip_List = soup.find_all('div',class_='ip') #设置key_word参数为div标签内的class属性=ip
port_List = soup.find_all('div',class_='port')
get_ip_list(ip_List) #调用函数组成ip list
get_port_list(port_List) #调用函数组成port list
for k,v in dict(zip(ip_L,port_L)).items(): #将两个list组成一个字典
print k+':'+v
break
main('www.cz88.net')
python抓取某代理网站代理IP及端口
最新推荐文章于 2024-01-16 11:15:34 发布