python 批量检测URL

首先导入需要的库

  • requests:用于发送 HTTP 请求。
  • urlparse:用于解析 URL。
  • ThreadPoolExecutoras_completed:用于并发处理 URL。

import requests
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

编写函数 发生HTTP GET 请求

def get_status_code(url):
    try:
        response = requests.get(url, verify=False)
        return url, response.status_code
    except requests.RequestException as e:
        return url, f"请求错误: {e}"

然后处理URL,确保URL包含HTTP协议信息,然后处理是否有重复的URL

def process_urls(file_path, max_threads=10):
    urls = set()  # 使用集合避免重复 URL
    with open(file_path, 'r') as file:
        for line in file:
            url = line.strip()
            if url:
                if not urlparse(url).scheme:
                    url = 'http://' + url
                if ':' in url and not url.startswith(('http://', 'https://')):
                    url = 'http://' + url
                urls.add(url)  # 添加到集合中

然后并发的访问URL,并记录

    status_code_200_urls = []
    with ThreadPoolExecutor(max_threads) as executor:
        future_to_url = {executor.submit(get_status_code, url): url for url in urls}
        for future in as_completed(future_to_url):
            url = future_to_url[future]
            try:
                url, status_code = future.result()
                if status_code == 200:
                    status_code_200_urls.append(url)
                print(f"访问 {url} 的状态码是: {status_code}")
            except Exception as e:
                print(f"请求错误: {e}")

最后就将状态码为200的URL,保存在txt中

完整代码如下

import requests
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

def get_status_code(url):
    try:
        response = requests.get(url, verify=False)
        return url, response.status_code
    except requests.RequestException as e:
        return url, f"请求错误: {e}"

def process_urls(file_path, max_threads=10):
    urls = set()  # 使用集合避免重复 URL
    with open(file_path, 'r') as file:
        for line in file:
            url = line.strip()
            if url:
                if not urlparse(url).scheme:
                    url = 'http://' + url
                if ':' in url and not url.startswith(('http://', 'https://')):
                    url = 'http://' + url
                urls.add(url)  # 添加到集合中

    status_code_200_urls = []
    with ThreadPoolExecutor(max_threads) as executor:
        future_to_url = {executor.submit(get_status_code, url): url for url in urls}
        for future in as_completed(future_to_url):
            url = future_to_url[future]
            try:
                url, status_code = future.result()
                if status_code == 200:
                    status_code_200_urls.append(url)
                print(f"访问 {url} 的状态码是: {status_code}")
            except Exception as e:
                print(f"请求错误: {e}")

    # 将状态码为 200 的 URL 写入到文件中
    with open('200_status_urls.txt', 'a') as file:
        for url in status_code_200_urls:
            file.write(url + '\n')

    print("\n状态码为 200 的 URL:")
    for url in status_code_200_urls:
        print(url)
    print(f"\n状态码为 200 的网址总数: {len(status_code_200_urls)}")

# 文件路径
file_path = 'ping1.txt'
# 最大线程数(可以根据需要调整)
max_threads = 10
process_urls(file_path, max_threads)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值