首先导入需要的库
requests
:用于发送 HTTP 请求。urlparse
:用于解析 URL。ThreadPoolExecutor
和as_completed
:用于并发处理 URL。
import requests
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed
编写函数 发生HTTP GET 请求
def get_status_code(url):
try:
response = requests.get(url, verify=False)
return url, response.status_code
except requests.RequestException as e:
return url, f"请求错误: {e}"
然后处理URL,确保URL包含HTTP协议信息,然后处理是否有重复的URL
def process_urls(file_path, max_threads=10):
urls = set() # 使用集合避免重复 URL
with open(file_path, 'r') as file:
for line in file:
url = line.strip()
if url:
if not urlparse(url).scheme:
url = 'http://' + url
if ':' in url and not url.startswith(('http://', 'https://')):
url = 'http://' + url
urls.add(url) # 添加到集合中
然后并发的访问URL,并记录
status_code_200_urls = []
with ThreadPoolExecutor(max_threads) as executor:
future_to_url = {executor.submit(get_status_code, url): url for url in urls}
for future in as_completed(future_to_url):
url = future_to_url[future]
try:
url, status_code = future.result()
if status_code == 200:
status_code_200_urls.append(url)
print(f"访问 {url} 的状态码是: {status_code}")
except Exception as e:
print(f"请求错误: {e}")
最后就将状态码为200的URL,保存在txt中
完整代码如下
import requests
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed
def get_status_code(url):
try:
response = requests.get(url, verify=False)
return url, response.status_code
except requests.RequestException as e:
return url, f"请求错误: {e}"
def process_urls(file_path, max_threads=10):
urls = set() # 使用集合避免重复 URL
with open(file_path, 'r') as file:
for line in file:
url = line.strip()
if url:
if not urlparse(url).scheme:
url = 'http://' + url
if ':' in url and not url.startswith(('http://', 'https://')):
url = 'http://' + url
urls.add(url) # 添加到集合中
status_code_200_urls = []
with ThreadPoolExecutor(max_threads) as executor:
future_to_url = {executor.submit(get_status_code, url): url for url in urls}
for future in as_completed(future_to_url):
url = future_to_url[future]
try:
url, status_code = future.result()
if status_code == 200:
status_code_200_urls.append(url)
print(f"访问 {url} 的状态码是: {status_code}")
except Exception as e:
print(f"请求错误: {e}")
# 将状态码为 200 的 URL 写入到文件中
with open('200_status_urls.txt', 'a') as file:
for url in status_code_200_urls:
file.write(url + '\n')
print("\n状态码为 200 的 URL:")
for url in status_code_200_urls:
print(url)
print(f"\n状态码为 200 的网址总数: {len(status_code_200_urls)}")
# 文件路径
file_path = 'ping1.txt'
# 最大线程数(可以根据需要调整)
max_threads = 10
process_urls(file_path, max_threads)