import urllib.request import re import os def url_open(url): res=urllib.request.Request(url) res.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; rv:51.0) Gecko/20100101 Firefox/51.0') req=urllib.request.urlopen(res) html=req.read() return html def ip(url,file,count,user_input): try: html=url_open(url).decode('utf-8') except UnicodeDecodeError: html=url_open(url) s=r'(\d+\.\d+\.\d+\.\d+)' d=r'"PORT">(\d*)</td>' ips=re.findall(s,html) #获取IP dk=re.findall(d,html) #获取端口号 dictionary = dict(zip(ips, dk)) #将IP 和端口号压缩成字典 ips = list(dictionary) #获取IP字典的Key值 for ip in ips: #通过遍历Key,取出对应的Value with open(file, 'a+') as f: f.write(ip+' ') #保存IP f.write(dictionary[ip]+'\n')#保存端口号 print('第%s条'%count) count+=1 if count==user_input+1: #用户所需条数控制器 print('获取完成,请到%s文件夹下查看!'%file) break return count if __name__ =='__main__': user_input=int(input('要下载多少条:')) file='D:\My Documents\Desktop\\ips.txt' if os.path.exists(file): pass i=1 #页面循环控制器 count = 1 #IP计数器 while i: try: url = 'http://www.kuaidaili.com/free/inha/%s/'%i count = ip(url,file,count,user_input) if count == user_input+1: break i+=1 except TypeError: #无法解码C++转义字符,跳过当前页 i+=1 continue except urllib.error.HTTPError: #获取网页地址不稳定,时常503 continue
IP及端口号的获取及保存【实用】
最新推荐文章于 2023-10-26 11:27:54 发布