流水线并行
串行示例:
import time
def plus_one(x):
time.sleep(0.5)
return x + 1
def mul_two(x):
time.sleep(0.5)
return x * 2
if __name__ == '__main__':
for i in range(5):
start_time = time.time()
output = mul_two(plus_one(i))
print(f"{output}: {time.time() - start_time:.1f}s")
运行结果:
2: 1.0s
4: 1.0s
6: 1.0s
8: 1.0s
10: 1.0s
并行示例:
import time
import threading
from queue import Queue
def input_worker(data_provider, inq):
for x in data_provider():
inq.put(x)
# 数据生成完毕后放入终止信号
inq.put(None)
def worker(func, inq, outq):
while True:
x = inq.get()
# 收到终止信号则退出循环
if x is None:
# 向下一个队列传递终止信号
outq.put(None)
break
outq.put(func(x))
class Pipeline:
def __init__(self, data_provider, *funcs):
self.queues = [Queue(1) for _ in range(len(funcs) + 1)]
self.processes = [threading.Thread(target=input_worker, args=(data_provider, self.queues[0]))]
for i, func in enumerate(funcs):
self.processes.append(threading.Thread(target=worker, args=(func, self.queues[i], self.queues[i + 1])))
for p in self.processes:
p.start()
def __next__(self):
item = self.queues[-1].get()
# 收到终止信号则停止迭代
if item is None:
raise StopIteration
return item
def __iter__(self):
return self
def stop(self):
# 用join等待进程正常退出,更优雅
for p in self.processes:
p.join()
def data_provider():
for i in range(5):
yield i
def plus_one(x):
time.sleep(0.5)
return x + 1
def mul_two(x):
time.sleep(0.5)
return x * 2
if __name__ == '__main__':
pipeline = Pipeline(data_provider, plus_one, mul_two)
start_time = time.time()
try:
for output in pipeline:
print(f"{output}: {time.time() - start_time:.1f}s")
start_time = time.time()
finally:
pipeline.stop()
运行结果:
2: 1.0s
4: 0.5s
6: 0.5s
8: 0.5s
10: 0.5s
流水线推理
串行推理:
import time
import torch
from torchvision import transforms, models
from PIL import Image
import os
# 图像预处理流水线
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
# 加载预训练模型
model = models.mobilenet_v2(pretrained=True)
model.eval()
# 移动到GPU(如果可用)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# 图像路径生成器
def image_provider(img_dir="images"):
"""生成图像文件路径"""
if not os.path.exists(img_dir):
os.makedirs(img_dir)
# 遍历目录中的所有图像文件
for filename in os.listdir(img_dir):
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
yield os.path.join(img_dir, filename)
# 流水线阶段1:加载图像
def load_image(img_path):
"""加载图像并返回PIL Image对象"""
try:
img = Image.open(img_path).convert('RGB')
print(f"加载图像成功 {img_path}")
return (img_path, img)
except Exception as e:
print(f"加载图像失败 {img_path}: {e}")
return (img_path, None)
# 流水线阶段2:预处理图像
def preprocess_image(data):
"""对图像进行预处理"""
img_path, img = data
if img is None:
return (img_path, None)
try:
processed = preprocess(img)
print(f"预处理图像成功 {img_path}")
return (img_path, processed)
except Exception as e:
print(f"预处理失败 {img_path}: {e}")
return (img_path, None)
# 流水线阶段3:模型推理
def model_inference(data):
"""使用模型进行推理"""
img_path, tensor = data
if tensor is None:
return (img_path, None)
try:
# 添加批次维度并移动到设备
input_tensor = tensor.unsqueeze(0).to(device)
# 推理
with torch.no_grad():
output = model(input_tensor)
# 获取预测结果
_, predicted_idx = torch.max(output, 1)
print(f"推理成功 {img_path}")
return (img_path, predicted_idx.item())
except Exception as e:
print(f"推理失败 {img_path}: {e}")
return (img_path, None)
if __name__ == '__main__':
start_time = time.time()
# 单线程执行流程:依次处理每个图像
for img_path in image_provider():
# 记录单张图片处理开始时间
img_start_time = time.time()
# 加载图像
loaded_data = load_image(img_path)
if loaded_data[1] is None:
continue
# 预处理图像
preprocessed_data = preprocess_image(loaded_data)
if preprocessed_data[1] is None:
continue
# 模型推理
result = model_inference(preprocessed_data)
print(f"图像: {os.path.basename(result[0])}, 预测类别索引: {result[1]}")
print(f"所有图像处理完成,总耗时: {time.time() - start_time:.2f}s")
程序输出:
......
加载图像成功 images\000000000643.jpg
预处理图像成功 images\000000000643.jpg
推理成功 images\000000000643.jpg
图像: 000000000643.jpg, 预测类别索引: 664
加载图像成功 images\000000000650.jpg
预处理图像成功 images\000000000650.jpg
推理成功 images\000000000650.jpg
图像: 000000000650.jpg, 预测类别索引: 281
所有图像处理完成,总耗时: 1.23s
并行推理:
import time
import torch
from torchvision import transforms, models
from PIL import Image
import os
import threading
from queue import Queue # 使用线程安全的queue.Queue
# 图像预处理流水线
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
# 加载预训练模型(多线程共享模型,注意移动到设备的操作在主线程完成)
model = models.mobilenet_v2(pretrained=True)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# 流水线阶段1:加载图像
def load_image(img_dir="images"):
for filename in os.listdir(img_dir):
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
img_path = os.path.join(img_dir, filename)
try:
img = Image.open(img_path).convert('RGB')
print(f"加载图像成功 {img_path}")
yield img_path, img
except Exception as e:
print(f"加载图像失败 {img_path}: {e}")
yield None
yield None
# 流水线阶段2:预处理图像
def preprocess_image(data):
if data is None:
return None
img_path, img = data
if img is None:
return (img_path, None)
try:
processed = preprocess(img)
print(f"预处理图像成功 {img_path}")
return (img_path, processed)
except Exception as e:
print(f"预处理失败 {img_path}: {e}")
return (img_path, None)
# 流水线阶段3:模型推理(注意:PyTorch的CUDA操作本身线程安全,但需确保输入数据正确)
def model_inference(data):
if data is None:
return None
img_path, tensor = data
if tensor is None:
return (img_path, None)
try:
input_tensor = tensor.unsqueeze(0).to(device)
with torch.no_grad():
output = model(input_tensor)
_, predicted_idx = torch.max(output, 1)
print(f"推理成功 {img_path}")
return (img_path, predicted_idx.item())
except Exception as e:
print(f"推理失败 {img_path}: {e}")
return (img_path, None)
def input_worker(data_provider, inq):
for x in data_provider():
inq.put(x)
def worker(func, inq, outq):
while True:
x = inq.get() # 阻塞获取任务
if x is None: # 收到终止信号
outq.put(None)
break
outq.put(func(x))
class ThreadPipeline:
def __init__(self, data_provider, *funcs):
self.queues = [Queue(1) for _ in range(len(funcs) + 1)] # 线程安全队列
self.threads = [threading.Thread(
target=input_worker,
args=(data_provider, self.queues[0]),
daemon=True # 守护线程,主线程退出时自动结束
)]
# 创建处理线程
for i, func in enumerate(funcs):
self.threads.append(threading.Thread(
target=worker,
args=(func, self.queues[i], self.queues[i + 1]),
daemon=True
))
# 启动所有线程
for t in self.threads:
t.start()
def __next__(self):
item = self.queues[-1].get()
if item is None:
raise StopIteration
return item
def __iter__(self):
return self
def stop(self):
# 等待所有线程结束
for t in self.threads:
t.join()
if __name__ == '__main__':
# 创建多线程流水线
pipeline = ThreadPipeline(
load_image,
preprocess_image,
model_inference
)
start_time = time.time()
try:
for result in pipeline:
img_path, pred_idx = result
print(f"图像: {os.path.basename(img_path)}, 预测类别索引: {pred_idx}")
print(f"耗时: {time.time() - start_time:.2f}s")
finally:
pipeline.stop()
print("所有图像处理完成,流水线已停止")
程序输出:
......
预处理图像成功 images\000000000650.jpg
推理成功 images\000000000641.jpg
图像: 000000000641.jpg, 预测类别索引: 874
推理成功 images\000000000643.jpg
图像: 000000000643.jpg, 预测类别索引: 664
推理成功 images\000000000650.jpg
图像: 000000000650.jpg, 预测类别索引: 281
耗时: 0.92s
所有图像处理完成,流水线已停止
这里的images文件夹内为coco128的图片。可以看到,流水线处理可以节省约1/4推理耗时;而且本例模型的前后处理较简单,复杂的模型采用流水线处理可以更大幅的提升效率。

1094

被折叠的 条评论
为什么被折叠?



