模型流水线推理

最新推荐文章于 2025-12-06 18:40:48 发布

原创最新推荐文章于 2025-12-06 18:40:48 发布 · 264 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#python #算法 #模型推理 #流水线并发

deep learning 同时被 2 个专栏收录

148 篇文章

订阅专栏

model deployment

70 篇文章

订阅专栏

流水线并行

串行示例：

import time


def plus_one(x):
    time.sleep(0.5)
    return x + 1


def mul_two(x):
    time.sleep(0.5)
    return x * 2


if __name__ == '__main__':
    for i in range(5):
        start_time = time.time()
        output = mul_two(plus_one(i))
        print(f"{output}: {time.time() - start_time:.1f}s")

运行结果：

2: 1.0s
4: 1.0s
6: 1.0s
8: 1.0s
10: 1.0s

并行示例：

import time
import threading
from queue import Queue


def input_worker(data_provider, inq):
    for x in data_provider():
        inq.put(x)
    # 数据生成完毕后放入终止信号
    inq.put(None)


def worker(func, inq, outq):
    while True:
        x = inq.get()
        # 收到终止信号则退出循环
        if x is None:
            # 向下一个队列传递终止信号
            outq.put(None)
            break
        outq.put(func(x))


class Pipeline:
    def __init__(self, data_provider, *funcs):
        self.queues = [Queue(1) for _ in range(len(funcs) + 1)]
        self.processes = [threading.Thread(target=input_worker,  args=(data_provider, self.queues[0]))]
        for i, func in enumerate(funcs):
            self.processes.append(threading.Thread(target=worker, args=(func, self.queues[i], self.queues[i + 1])))
        for p in self.processes:
            p.start()

    def __next__(self):
        item = self.queues[-1].get()
        # 收到终止信号则停止迭代
        if item is None:
            raise StopIteration
        return item

    def __iter__(self):
        return self

    def stop(self):
        # 用join等待进程正常退出，更优雅
        for p in self.processes:
            p.join()


def data_provider():
    for i in range(5):
        yield i


def plus_one(x):
    time.sleep(0.5)
    return x + 1


def mul_two(x):
    time.sleep(0.5)
    return x * 2


if __name__ == '__main__':
    pipeline = Pipeline(data_provider, plus_one, mul_two)
    start_time = time.time()
    try:
        for output in pipeline:
            print(f"{output}: {time.time() - start_time:.1f}s")
            start_time = time.time()
    finally:
        pipeline.stop()

运行结果：

2: 1.0s
4: 0.5s
6: 0.5s
8: 0.5s
10: 0.5s

流水线推理

串行推理：

import time
import torch
from torchvision import transforms, models
from PIL import Image
import os

# 图像预处理流水线
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# 加载预训练模型
model = models.mobilenet_v2(pretrained=True)
model.eval()
# 移动到GPU（如果可用）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 图像路径生成器
def image_provider(img_dir="images"):
    """生成图像文件路径"""
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)
    
    # 遍历目录中的所有图像文件
    for filename in os.listdir(img_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            yield os.path.join(img_dir, filename)

# 流水线阶段1：加载图像
def load_image(img_path):
    """加载图像并返回PIL Image对象"""
    try:
        img = Image.open(img_path).convert('RGB')
        print(f"加载图像成功 {img_path}")
        return (img_path, img)
    except Exception as e:
        print(f"加载图像失败 {img_path}: {e}")
        return (img_path, None)

# 流水线阶段2：预处理图像
def preprocess_image(data):
    """对图像进行预处理"""
    img_path, img = data
    if img is None:
        return (img_path, None)
    try:
        processed = preprocess(img)
        print(f"预处理图像成功 {img_path}")
        return (img_path, processed)
    except Exception as e:
        print(f"预处理失败 {img_path}: {e}")
        return (img_path, None)

# 流水线阶段3：模型推理
def model_inference(data):
    """使用模型进行推理"""
    img_path, tensor = data
    if tensor is None:
        return (img_path, None)
    try:
        # 添加批次维度并移动到设备
        input_tensor = tensor.unsqueeze(0).to(device)
        
        # 推理
        with torch.no_grad():
            output = model(input_tensor)
        
        # 获取预测结果
        _, predicted_idx = torch.max(output, 1)
        print(f"推理成功 {img_path}")
        return (img_path, predicted_idx.item())
    except Exception as e:
        print(f"推理失败 {img_path}: {e}")
        return (img_path, None)

if __name__ == '__main__':
    start_time = time.time()
    
    # 单线程执行流程：依次处理每个图像
    for img_path in image_provider():
        # 记录单张图片处理开始时间
        img_start_time = time.time()
        
        # 加载图像
        loaded_data = load_image(img_path)
        if loaded_data[1] is None:
            continue
        
        # 预处理图像
        preprocessed_data = preprocess_image(loaded_data)
        if preprocessed_data[1] is None:
            continue
        
        # 模型推理
        result = model_inference(preprocessed_data)
        print(f"图像: {os.path.basename(result[0])}, 预测类别索引: {result[1]}")
    
    print(f"所有图像处理完成，总耗时: {time.time() - start_time:.2f}s")

程序输出：

......
加载图像成功 images\000000000643.jpg
预处理图像成功 images\000000000643.jpg
推理成功 images\000000000643.jpg
图像: 000000000643.jpg, 预测类别索引: 664
加载图像成功 images\000000000650.jpg
预处理图像成功 images\000000000650.jpg
推理成功 images\000000000650.jpg
图像: 000000000650.jpg, 预测类别索引: 281
所有图像处理完成，总耗时: 1.23s

并行推理：

import time
import torch
from torchvision import transforms, models
from PIL import Image
import os
import threading
from queue import Queue  # 使用线程安全的queue.Queue

# 图像预处理流水线
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# 加载预训练模型（多线程共享模型，注意移动到设备的操作在主线程完成）
model = models.mobilenet_v2(pretrained=True)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 流水线阶段1：加载图像
def load_image(img_dir="images"):
    for filename in os.listdir(img_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            img_path = os.path.join(img_dir, filename)
            try:
                img = Image.open(img_path).convert('RGB')
                print(f"加载图像成功 {img_path}")
                yield img_path, img
            except Exception as e:
                print(f"加载图像失败 {img_path}: {e}")
                yield None
    yield None

# 流水线阶段2：预处理图像
def preprocess_image(data):
    if data is None:
        return None
    img_path, img = data
    if img is None:
        return (img_path, None)
    try:
        processed = preprocess(img)
        print(f"预处理图像成功 {img_path}")
        return (img_path, processed)
    except Exception as e:
        print(f"预处理失败 {img_path}: {e}")
        return (img_path, None)

# 流水线阶段3：模型推理（注意：PyTorch的CUDA操作本身线程安全，但需确保输入数据正确）
def model_inference(data):
    if data is None:
        return None
    img_path, tensor = data
    if tensor is None:
        return (img_path, None)
    try:
        input_tensor = tensor.unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(input_tensor)
        _, predicted_idx = torch.max(output, 1)
        print(f"推理成功 {img_path}")
        return (img_path, predicted_idx.item())
    except Exception as e:
        print(f"推理失败 {img_path}: {e}")
        return (img_path, None)

def input_worker(data_provider, inq):
    for x in data_provider():
        inq.put(x)

def worker(func, inq, outq):
    while True:
        x = inq.get()  # 阻塞获取任务
        if x is None:  # 收到终止信号
            outq.put(None)
            break
        outq.put(func(x))

class ThreadPipeline:
    def __init__(self, data_provider, *funcs):
        self.queues = [Queue(1) for _ in range(len(funcs) + 1)]  # 线程安全队列
        self.threads = [threading.Thread(
            target=input_worker,
            args=(data_provider, self.queues[0]),
            daemon=True  # 守护线程，主线程退出时自动结束
        )]
        
        # 创建处理线程
        for i, func in enumerate(funcs):
            self.threads.append(threading.Thread(
                target=worker,
                args=(func, self.queues[i], self.queues[i + 1]),
                daemon=True
            ))
        
        # 启动所有线程
        for t in self.threads:
            t.start()

    def __next__(self):
        item = self.queues[-1].get()
        if item is None:
            raise StopIteration
        return item

    def __iter__(self):
        return self

    def stop(self):
        # 等待所有线程结束
        for t in self.threads:
            t.join()

if __name__ == '__main__':
    # 创建多线程流水线
    pipeline = ThreadPipeline(
        load_image,
        preprocess_image,
        model_inference
    )
    
    start_time = time.time()
    try:
        for result in pipeline:
            img_path, pred_idx = result
            print(f"图像: {os.path.basename(img_path)}, 预测类别索引: {pred_idx}")
        print(f"耗时: {time.time() - start_time:.2f}s")
    finally:
        pipeline.stop()
        print("所有图像处理完成，流水线已停止")

程序输出：

......
预处理图像成功 images\000000000650.jpg
推理成功 images\000000000641.jpg
图像: 000000000641.jpg, 预测类别索引: 874
推理成功 images\000000000643.jpg
图像: 000000000643.jpg, 预测类别索引: 664
推理成功 images\000000000650.jpg
图像: 000000000650.jpg, 预测类别索引: 281
耗时: 0.92s
所有图像处理完成，流水线已停止

这里的images文件夹内为coco128的图片。可以看到，流水线处理可以节省约1/4推理耗时；而且本例模型的前后处理较简单，复杂的模型采用流水线处理可以更大幅的提升效率。