ThreadPoolExecutor是 Python 中用于管理线程池的强大工具,特别适合处理 I/O 密集型任务。下面通过几个具体示例来展示其用法。
核心用法示例
1. 基础任务提交
from concurrent.futures import ThreadPoolExecutor
import time
def task(name, duration):
"""模拟一个耗时任务"""
print(f"任务 {name} 开始执行")
time.sleep(duration) # 模拟I/O操作或耗时计算
return f"任务 {name} 完成,耗时 {duration}秒"
# 创建线程池(最大10个线程)
with ThreadPoolExecutor(max_workers=10) as thread_pool:
# 提交单个任务
future = thread_pool.submit(task, "A", 2)
# 获取任务结果(会阻塞直到任务完成)
result = future.result()
print(result)
2. 批量提交任务
from concurrent.futures import ThreadPoolExecutor, as_completed
def process_data(item):
"""处理单个数据项"""
return item * 2
data_list = [1, 2, 3, 4, 5]
with ThreadPoolExecutor(max_workers=10) as thread_pool:
# 批量提交任务
futures = [thread_pool.submit(process_data, item) for item in data_list]
# 按完成顺序获取结果
for future in as_completed(futures):
result = future.result()
print(f"处理结果: {result}")
实际应用场景示例
3. 并发网络请求
from concurrent.futures import ThreadPoolExecutor
import requests
def fetch_url(url):
"""获取网页内容"""
response = requests.get(url)
return len(response.content), url
urls = [
"https://www.example.com",
"https://www.google.com",
"https://www.github.com"
]
with ThreadPoolExecutor(max_workers=10) as thread_pool:
futures = [thread_pool.submit(fetch_url, url) for url in urls]
for future in as_completed(futures):
size, url = future.result()
print(f"{url}: 大小 {size} 字节")
4. 文件批量处理
from concurrent.futures import ThreadPoolExecutor
import os
def process_file(filename):
"""处理单个文件"""
# 模拟文件处理操作
processed_content = f"处理后的{filename}"
return processed_content
file_list = ["file1.txt", "file2.txt", "file3.txt"]
with ThreadPoolExecutor(max_workers=10) as thread_pool:
futures = {}
for filename in file_list:
future = thread_pool.submit(process_file, filename)
futures[future] = filename
for future in as_completed(futures):
original_file = futures[future]
result = future.result()
print(f"文件 {original_file} 处理完成: {result}")
5.在 thread_pool.submit()中,形参不仅可以传递具体的值,还可以直接传递另一个函数的返回值。Python 会在调用 submit方法时先执行作为参数的函数,并将其返回值作为实际参数传递给目标任务。
from concurrent.futures import ThreadPoolExecutor
import time
def get_task_config(task_id):
"""一个返回任务配置的函数"""
# 这里可以是从文件、数据库或复杂计算中获取配置
config = {
'name': f'Task-{task_id}',
'duration': task_id * 2 # 假设持续时间与任务ID成比例
}
print(f"为任务 {task_id} 生成配置: {config}")
return config
def task(name, duration):
"""模拟的耗时任务"""
print(f"任务 {name} 开始执行,预计耗时 {duration} 秒")
time.sleep(duration)
return f"任务 {name} 完成,实际耗时 {duration}秒"
# 使用线程池
with ThreadPoolExecutor(max_workers=3) as executor:
# 直接使用 get_task_config(1) 的返回值作为 task 的参数
future = executor.submit(task, get_task_config(1)['name'], get_task_config(1)['duration'])
result = future.result()
print(result)
关键特性说明
|
特性 |
说明 |
代码示例 |
|---|---|---|
|
非阻塞提交 |
|
|
|
结果获取 |
|
|
|
状态查询 |
可检查任务状态 |
|
|
异常处理 |
任务异常在调用 |
|
完整工作流程示例
from concurrent.futures import ThreadPoolExecutor, wait
def complex_task(task_id, data):
"""复杂的任务处理函数"""
print(f"任务 {task_id} 开始处理数据: {data}")
# 模拟处理过程
processed = data ** 2
return task_id, processed
def main():
# 使用with语句自动管理线程池资源
with ThreadPoolExecutor(max_workers=10) as thread_pool:
tasks = [
("task1", 10),
("task2", 20),
("task3", 30)
]
# 提交所有任务
futures = [
thread_pool.submit(complex_task, task_id, data)
for task_id, data in tasks
]
# 等待所有任务完成
wait(futures)
# 收集结果
results = []
for future in futures:
try:
task_id, result = future.result()
results.append((task_id, result))
print(f"任务 {task_id} 完成,结果: {result}")
except Exception as e:
print(f"任务执行出错: {e}")
return results
if __name__ == "__main__":
results = main()
print("所有任务执行完毕")
注意事项
-
资源管理:使用
with语句确保线程池正确关闭 -
异常处理:始终在
try-except块中调用future.result() -
线程安全:确保任务函数是线程安全的,避免共享资源竞争
-
任务数量:合理设置
max_workers,通常为 CPU 核心数的 1-2 倍
这种线程池模式能够显著提高 I/O 密集型应用的性能,同时保持代码的简洁性和可维护性。
11万+

被折叠的 条评论
为什么被折叠?



