IP及端口号的获取及保存【实用】

最新推荐文章于 2023-10-26 11:27:54 发布

SayLove丶

最新推荐文章于 2023-10-26 11:27:54 发布

阅读量635

点赞数

分类专栏： python3 爬虫文章标签： python 爬虫

本文链接：https://blog.youkuaiyun.com/qq_34776122/article/details/78432661

版权

python3 同时被 2 个专栏收录

16 篇文章

订阅专栏

爬虫

9 篇文章

订阅专栏

import urllib.request
import re
import os
def url_open(url):
    res=urllib.request.Request(url)
    res.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; rv:51.0) Gecko/20100101 Firefox/51.0')
    req=urllib.request.urlopen(res)
    html=req.read()
    return  html

def ip(url,file,count,user_input):
    try:
        html=url_open(url).decode('utf-8')
    except UnicodeDecodeError:
        html=url_open(url)
    s=r'(\d+\.\d+\.\d+\.\d+)'
    d=r'"PORT">(\d*)</td>'
    ips=re.findall(s,html)           #获取IP
    dk=re.findall(d,html)            #获取端口号
    dictionary = dict(zip(ips, dk))  #将IP 和端口号压缩成字典
    ips = list(dictionary)           #获取IP字典的Key值
    for ip in ips:                  #通过遍历Key，取出对应的Value
        with open(file, 'a+') as f:
            f.write(ip+'  ')        #保存IP
            f.write(dictionary[ip]+'\n')#保存端口号
            print('第%s条'%count)
            count+=1
        if count==user_input+1: #用户所需条数控制器
            print('获取完成，请到%s文件夹下查看!'%file)
            break
    return count
if __name__ =='__main__':
    user_input=int(input('要下载多少条：'))
    file='D:\My Documents\Desktop\\ips.txt'
    if os.path.exists(file):
        pass
    i=1   #页面循环控制器
    count = 1 #IP计数器
    while i:
        try:
            url = 'http://www.kuaidaili.com/free/inha/%s/'%i
            count = ip(url,file,count,user_input)
            if count == user_input+1:
                break
            i+=1
        except TypeError: #无法解码C++转义字符，跳过当前页
            i+=1
            continue
        except urllib.error.HTTPError: #获取网页地址不稳定，时常503
            continue