DEIMv2模型onnxruntime和tensorrt部署

部署运行你感兴趣的模型镜像

onnxruntime部署

DEIMv2模型onnx结构如下:
在这里插入图片描述

import numpy as np
import onnxruntime
from PIL import Image, ImageDraw


def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR):
    """Resizes an image while maintaining aspect ratio and pads it."""
    original_width, original_height = image.size
    ratio = min(size / original_width, size / original_height)
    new_width = int(original_width * ratio)
    new_height = int(original_height * ratio)
    image = image.resize((new_width, new_height), interpolation)

    # Create a new image with the desired size and paste the resized image onto it
    new_image = Image.new("RGB", (size, size))
    new_image.paste(image, ((size - new_width) // 2, (size - new_height) // 2))
    return new_image, ratio, (size - new_width) // 2, (size - new_height) // 2


def draw(images, labels, boxes, scores, ratios, paddings, thrh=0.4):
    result_images = []
    for i, im in enumerate(images):
        draw = ImageDraw.Draw(im)
        scr = scores[i]
        lab = labels[i][scr > thrh]
        box = boxes[i][scr > thrh]
        scr = scr[scr > thrh]

        ratio = ratios[i]
        pad_w, pad_h = paddings[i]

        for lbl, bb in zip(lab, box):
            # Adjust bounding boxes according to the resizing and padding
            bb = [
                (bb[0] - pad_w) / ratio,
                (bb[1] - pad_h) / ratio,
                (bb[2] - pad_w) / ratio,
                (bb[3] - pad_h) / ratio,
            ]
            draw.rectangle(bb, outline='red')
            draw.text((bb[0], bb[1]), text=str(lbl), fill='blue')

        result_images.append(im)
    return result_images

onnx_session = onnxruntime.InferenceSession("deimv2_hgnetv2_atto_coco.onnx", providers=['CPUExecutionProvider'])
img = Image.open('bus.jpg')
resized_im_pil, ratio, pad_w, pad_h = resize_with_aspect_ratio(img, size=320)

inputs = {}
inputs["images"] = np.expand_dims(np.array(resized_im_pil) / 255.0, axis=0).astype(np.float32).transpose(0, 3, 1, 2)
inputs["orig_target_sizes"] = np.array([[resized_im_pil.size[1], resized_im_pil.size[0]]])
outputs = onnx_session.run(None, inputs)

labels, boxes, scores = outputs

result_images = draw([img], labels, boxes, scores, [ratio], [(pad_w, pad_h)])
result_images[0].save('onnx_result.jpg')

tensorrt部署

import numpy as np
import tensorrt as trt
import common
from PIL import Image, ImageDraw


def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR):
    """Resizes an image while maintaining aspect ratio and pads it."""
    original_width, original_height = image.size
    ratio = min(size / original_width, size / original_height)
    new_width = int(original_width * ratio)
    new_height = int(original_height * ratio)
    image = image.resize((new_width, new_height), interpolation)

    # Create a new image with the desired size and paste the resized image onto it
    new_image = Image.new("RGB", (size, size))
    new_image.paste(image, ((size - new_width) // 2, (size - new_height) // 2))
    return new_image, ratio, (size - new_width) // 2, (size - new_height) // 2


def draw(images, labels, boxes, scores, ratios, paddings, thrh=0.4):
    result_images = []
    for i, im in enumerate(images):
        draw = ImageDraw.Draw(im)
        scr = scores[i]
        lab = labels[i][scr > thrh]
        box = boxes[i][scr > thrh]
        scr = scr[scr > thrh]

        ratio = ratios[i]
        pad_w, pad_h = paddings[i]

        for lbl, bb in zip(lab, box):
            # Adjust bounding boxes according to the resizing and padding
            bb = [
                (bb[0] - pad_w) / ratio,
                (bb[1] - pad_h) / ratio,
                (bb[2] - pad_w) / ratio,
                (bb[3] - pad_h) / ratio,
            ]
            draw.rectangle(bb, outline='red')
            draw.text((bb[0], bb[1]), text=str(lbl), fill='blue')

        result_images.append(im)
    return result_images


logger = trt.Logger(trt.Logger.WARNING)
trt.init_libnvinfer_plugins(logger, "")
with open("deimv2_hgnetv2_atto_coco.engine", "rb") as f, trt.Runtime(logger) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
inputs, outputs, bindings, stream = common.allocate_buffers(engine)

img = Image.open('bus.jpg')
resized_im_pil, ratio, pad_w, pad_h = resize_with_aspect_ratio(img, size=320)

images = np.expand_dims(np.array(resized_im_pil) / 255.0, axis=0).astype(np.float32).transpose(0, 3, 1, 2)
orig_target_sizes = np.array([[resized_im_pil.size[1], resized_im_pil.size[0]]])
np.copyto(inputs[0].host, images.ravel())
np.copyto(inputs[1].host, orig_target_sizes.ravel())

output = common.do_inference(context,engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
labels, boxes, scores = output[0].reshape(1,300), output[1].reshape(1,300,4), output[2].reshape(1,300)

result_images = draw([img], labels, boxes, scores, [ratio], [(pad_w, pad_h)])
result_images[0].save('result.jpg')

common.py

#
# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import os
import ctypes
from typing import Optional, List

import numpy as np
import tensorrt as trt
from cuda import cuda, cudart

try:
    # Sometimes python does not understand FileNotFoundError
    FileNotFoundError
except NameError:
    FileNotFoundError = IOError

EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

def check_cuda_err(err):
    if isinstance(err, cuda.CUresult):
        if err != cuda.CUresult.CUDA_SUCCESS:
            raise RuntimeError("Cuda Error: {}".format(err))
    if isinstance(err, cudart.cudaError_t):
        if err != cudart.cudaError_t.cudaSuccess:
            raise RuntimeError("Cuda Runtime Error: {}".format(err))
    else:
        raise RuntimeError("Unknown error type: {}".format(err))

def cuda_call(call):
    err, res = call[0], call[1:]
    check_cuda_err(err)
    if len(res) == 1:
        res = res[0]
    return res

def GiB(val):
    return val * 1 << 30


def add_help(description):
    parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    args, _ = parser.parse_known_args()


def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[], err_msg=""):
    """
    Parses sample arguments.

    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.

    Returns:
        str: Path of data directory.
    """

    # Standard command-line arguments for all samples.
    kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data")
    parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-d",
        "--datadir",
        help="Location of the TensorRT sample data directory, and any additional data directories.",
        action="append",
        default=[kDEFAULT_DATA_ROOT],
    )
    args, _ = parser.parse_known_args()

    def get_data_path(data_dir):
        # If the subfolder exists, append it to the path, otherwise use the provided path as-is.
        data_path = os.path.join(data_dir, subfolder)
        if not os.path.exists(data_path):
            if data_dir != kDEFAULT_DATA_ROOT:
                print("WARNING: " + data_path + " does not exist. Trying " + data_dir + " instead.")
            data_path = data_dir
        # Make sure data directory exists.
        if not (os.path.exists(data_path)) and data_dir != kDEFAULT_DATA_ROOT:
            print(
                "WARNING: {:} does not exist. Please provide the correct data path with the -d option.".format(
                    data_path
                )
            )
        return data_path

    data_paths = [get_data_path(data_dir) for data_dir in args.datadir]
    return data_paths, locate_files(data_paths, find_files, err_msg)


def locate_files(data_paths, filenames, err_msg=""):
    """
    Locates the specified files in the specified data directories.
    If a file exists in multiple data directories, the first directory is used.

    Args:
        data_paths (List[str]): The data directories.
        filename (List[str]): The names of the files to find.

    Returns:
        List[str]: The absolute paths of the files.

    Raises:
        FileNotFoundError if a file could not be located.
    """
    found_files = [None] * len(filenames)
    for data_path in data_paths:
        # Find all requested files.
        for index, (found, filename) in enumerate(zip(found_files, filenames)):
            if not found:
                file_path = os.path.abspath(os.path.join(data_path, filename))
                if os.path.exists(file_path):
                    found_files[index] = file_path

    # Check that all files were found
    for f, filename in zip(found_files, filenames):
        if not f or not os.path.exists(f):
            raise FileNotFoundError(
                "Could not find {:}. Searched in data paths: {:}\n{:}".format(filename, data_paths, err_msg)
            )
    return found_files


class HostDeviceMem:
    """Pair of host and device memory, where the host memory is wrapped in a numpy array"""
    def __init__(self, size: int, dtype: np.dtype):
        nbytes = size * dtype.itemsize
        host_mem = cuda_call(cudart.cudaMallocHost(nbytes))
        pointer_type = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))

        self._host = np.ctypeslib.as_array(ctypes.cast(host_mem, pointer_type), (size,))
        self._device = cuda_call(cudart.cudaMalloc(nbytes))
        self._nbytes = nbytes

    @property
    def host(self) -> np.ndarray:
        return self._host

    @host.setter
    def host(self, arr: np.ndarray):
        if arr.size > self.host.size:
            raise ValueError(
                f"Tried to fit an array of size {arr.size} into host memory of size {self.host.size}"
            )
        np.copyto(self.host[:arr.size], arr.flat, casting='safe')

    @property
    def device(self) -> int:
        return self._device

    @property
    def nbytes(self) -> int:
        return self._nbytes

    def __str__(self):
        return f"Host:\n{self.host}\nDevice:\n{self.device}\nSize:\n{self.nbytes}\n"

    def __repr__(self):
        return self.__str__()

    def free(self):
        cuda_call(cudart.cudaFree(self.device))
        cuda_call(cudart.cudaFreeHost(self.host.ctypes.data))


# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
# If engine uses dynamic shapes, specify a profile to find the maximum input & output size.
def allocate_buffers(engine: trt.ICudaEngine, profile_idx: Optional[int] = None):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda_call(cudart.cudaStreamCreate())
    tensor_names = [engine.get_tensor_name(i) for i in range(engine.num_io_tensors)]
    for binding in tensor_names:
        # get_tensor_profile_shape returns (min_shape, optimal_shape, max_shape)
        # Pick out the max shape to allocate enough memory for the binding.
        shape = engine.get_tensor_shape(binding) if profile_idx is None else engine.get_tensor_profile_shape(binding, profile_idx)[-1]
        shape_valid = np.all([s >= 0 for s in shape])
        if not shape_valid and profile_idx is None:
            raise ValueError(f"Binding {binding} has dynamic shape, " +\
                "but no profile was specified.")
        size = trt.volume(shape)
        if engine.has_implicit_batch_dimension:
            size *= engine.max_batch_size
        dtype = np.dtype(trt.nptype(engine.get_tensor_dtype(binding)))

        # Allocate host and device buffers
        bindingMemory = HostDeviceMem(size, dtype)

        # Append the device buffer to device bindings.
        bindings.append(int(bindingMemory.device))

        # Append to the appropriate list.
        if engine.get_tensor_mode(binding) == trt.TensorIOMode.INPUT:
            inputs.append(bindingMemory)
        else:
            outputs.append(bindingMemory)
    return inputs, outputs, bindings, stream


# Frees the resources allocated in allocate_buffers
def free_buffers(inputs: List[HostDeviceMem], outputs: List[HostDeviceMem], stream: cudart.cudaStream_t):
    for mem in inputs + outputs:
        mem.free()
    cuda_call(cudart.cudaStreamDestroy(stream))


# Wrapper for cudaMemcpy which infers copy size and does error checking
def memcpy_host_to_device(device_ptr: int, host_arr: np.ndarray):
    nbytes = host_arr.size * host_arr.itemsize
    cuda_call(cudart.cudaMemcpy(device_ptr, host_arr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice))


# Wrapper for cudaMemcpy which infers copy size and does error checking
def memcpy_device_to_host(host_arr: np.ndarray, device_ptr: int):
    nbytes = host_arr.size * host_arr.itemsize
    cuda_call(cudart.cudaMemcpy(host_arr, device_ptr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))


def _do_inference_base(inputs, outputs, stream, execute_async):
    # Transfer input data to the GPU.
    kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
    [cuda_call(cudart.cudaMemcpyAsync(inp.device, inp.host, inp.nbytes, kind, stream)) for inp in inputs]
    # Run inference.
    execute_async()
    # Transfer predictions back from the GPU.
    kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
    [cuda_call(cudart.cudaMemcpyAsync(out.host, out.device, out.nbytes, kind, stream)) for out in outputs]
    # Synchronize the stream
    cuda_call(cudart.cudaStreamSynchronize(stream))
    # Return only the host outputs.
    return [out.host for out in outputs]


def do_inference(context, engine, bindings, inputs, outputs, stream):
    def execute_async_func():
        context.execute_async_v3(stream_handle=stream)
    # Setup context tensor address.
    num_io = engine.num_io_tensors
    for i in range(num_io):
        context.set_tensor_address(engine.get_tensor_name(i), bindings[i])
    return _do_inference_base(inputs, outputs, stream, execute_async_func)

您可能感兴趣的与本文相关的镜像

TensorRT-v8.6

TensorRT-v8.6

TensorRT

TensorRT 是NVIDIA 推出的用于深度学习推理加速的高性能推理引擎。它可以将深度学习模型优化并部署到NVIDIA GPU 上,实现低延迟、高吞吐量的推理过程。

[rank2]: Traceback (most recent call last): [rank2]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 87, in <module> [rank2]: main(args) [rank2]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 55, in main [rank2]: solver.val() [rank2]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_solver.py", line 195, in val [rank2]: test_stats, coco_evaluator = evaluate(module, self.criterion, self.postprocessor, [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context [rank2]: return func(*args, **kwargs) [rank2]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_engine.py", line 140, in evaluate [rank2]: for samples, targets in metric_logger.log_every(data_loader, 10, header): [rank2]: File "/user/yangyifan/code/DEIMv2-main/engine/misc/logger.py", line 215, in log_every [rank2]: for obj in iterable: [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ [rank2]: data = self._next_data() [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1480, in _next_data [rank2]: return self._process_data(data) [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1505, in _process_data [rank2]: data.reraise() [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/_utils.py", line 733, in reraise [rank2]: raise exception [rank2]: NotImplementedError: Caught NotImplementedError in DataLoader worker process 0. [rank2]: Original Traceback (most recent call last): [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop [rank2]: data = fetcher.fetch(index) # type: ignore[possibly-undefined] [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch [rank2]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp> [rank2]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank2]: File "/user/yangyifan/code/DEIMv2-main/engine/data/dataset/coco_dataset.py", line 44, in __getitem__ [rank2]: img, target, _ = self._transforms(img, target, self) [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank2]: return self._call_impl(*args, **kwargs) [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank2]: return forward_call(*args, **kwargs) [rank2]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 58, in forward [rank2]: return self.get_forward(self.policy['name'])(*inputs) [rank2]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 71, in default_forward [rank2]: sample = transform(sample) [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank2]: return self._call_impl(*args, **kwargs) [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank2]: return forward_call(*args, **kwargs) [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 68, in forward [rank2]: flat_outputs = [ [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 69, in <listcomp> [rank2]: self.transform(inpt, params) if needs_transform else inpt [rank2]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 55, in transform [rank2]: raise NotImplementedError [rank2]: NotImplementedError [rank3]: Traceback (most recent call last): [rank3]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 87, in <module> [rank3]: main(args) [rank3]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 55, in main [rank3]: solver.val() [rank3]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_solver.py", line 195, in val [rank3]: test_stats, coco_evaluator = evaluate(module, self.criterion, self.postprocessor, [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context [rank3]: return func(*args, **kwargs) [rank3]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_engine.py", line 140, in evaluate [rank3]: for samples, targets in metric_logger.log_every(data_loader, 10, header): [rank3]: File "/user/yangyifan/code/DEIMv2-main/engine/misc/logger.py", line 215, in log_every [rank3]: for obj in iterable: [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ [rank3]: data = self._next_data() [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1480, in _next_data [rank3]: return self._process_data(data) [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1505, in _process_data [rank3]: data.reraise() [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/_utils.py", line 733, in reraise [rank3]: raise exception [rank3]: NotImplementedError: Caught NotImplementedError in DataLoader worker process 0. [rank3]: Original Traceback (most recent call last): [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop [rank3]: data = fetcher.fetch(index) # type: ignore[possibly-undefined] [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch [rank3]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp> [rank3]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank3]: File "/user/yangyifan/code/DEIMv2-main/engine/data/dataset/coco_dataset.py", line 44, in __getitem__ [rank3]: img, target, _ = self._transforms(img, target, self) [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank3]: return self._call_impl(*args, **kwargs) [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank3]: return forward_call(*args, **kwargs) [rank3]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 58, in forward [rank3]: return self.get_forward(self.policy['name'])(*inputs) [rank3]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 71, in default_forward [rank3]: sample = transform(sample) [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank3]: return self._call_impl(*args, **kwargs) [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank3]: return forward_call(*args, **kwargs) [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 68, in forward [rank3]: flat_outputs = [ [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 69, in <listcomp> [rank3]: self.transform(inpt, params) if needs_transform else inpt [rank3]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 55, in transform [rank3]: raise NotImplementedError [rank3]: NotImplementedError Resume checkpoint from ckpts/deimv2_dinov3_s_coco.pth [rank1]: Traceback (most recent call last): [rank1]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 87, in <module> [rank1]: main(args) [rank1]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 55, in main [rank1]: solver.val() [rank1]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_solver.py", line 195, in val [rank1]: test_stats, coco_evaluator = evaluate(module, self.criterion, self.postprocessor, [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context [rank1]: return func(*args, **kwargs) [rank1]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_engine.py", line 140, in evaluate [rank1]: for samples, targets in metric_logger.log_every(data_loader, 10, header): [rank1]: File "/user/yangyifan/code/DEIMv2-main/engine/misc/logger.py", line 215, in log_every [rank1]: for obj in iterable: [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ [rank1]: data = self._next_data() [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1480, in _next_data [rank1]: return self._process_data(data) [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1505, in _process_data [rank1]: data.reraise() [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/_utils.py", line 733, in reraise [rank1]: raise exception [rank1]: NotImplementedError: Caught NotImplementedError in DataLoader worker process 0. [rank1]: Original Traceback (most recent call last): [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop [rank1]: data = fetcher.fetch(index) # type: ignore[possibly-undefined] [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch [rank1]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp> [rank1]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank1]: File "/user/yangyifan/code/DEIMv2-main/engine/data/dataset/coco_dataset.py", line 44, in __getitem__ [rank1]: img, target, _ = self._transforms(img, target, self) [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank1]: return self._call_impl(*args, **kwargs) [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank1]: return forward_call(*args, **kwargs) [rank1]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 58, in forward [rank1]: return self.get_forward(self.policy['name'])(*inputs) [rank1]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 71, in default_forward [rank1]: sample = transform(sample) [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank1]: return self._call_impl(*args, **kwargs) [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank1]: return forward_call(*args, **kwargs) [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 68, in forward [rank1]: flat_outputs = [ [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 69, in <listcomp> [rank1]: self.transform(inpt, params) if needs_transform else inpt [rank1]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 55, in transform [rank1]: raise NotImplementedError [rank1]: NotImplementedError Load model.state_dict Not load criterion.state_dict Not load postprocessor.state_dict Load ema.state_dict from model.state_dict [rank0]: Traceback (most recent call last): [rank0]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 87, in <module> [rank0]: main(args) [rank0]: File "/user/yangyifan/code/DEIMv2-main/train.py", line 55, in main [rank0]: solver.val() [rank0]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_solver.py", line 195, in val [rank0]: test_stats, coco_evaluator = evaluate(module, self.criterion, self.postprocessor, [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context [rank0]: return func(*args, **kwargs) [rank0]: File "/user/yangyifan/code/DEIMv2-main/engine/solver/det_engine.py", line 140, in evaluate [rank0]: for samples, targets in metric_logger.log_every(data_loader, 10, header): [rank0]: File "/user/yangyifan/code/DEIMv2-main/engine/misc/logger.py", line 215, in log_every [rank0]: for obj in iterable: [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 708, in __next__ [rank0]: data = self._next_data() [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1480, in _next_data [rank0]: return self._process_data(data) [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1505, in _process_data [rank0]: data.reraise() [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/_utils.py", line 733, in reraise [rank0]: raise exception [rank0]: NotImplementedError: Caught NotImplementedError in DataLoader worker process 0. [rank0]: Original Traceback (most recent call last): [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop [rank0]: data = fetcher.fetch(index) # type: ignore[possibly-undefined] [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch [rank0]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp> [rank0]: data = [self.dataset[idx] for idx in possibly_batched_index] [rank0]: File "/user/yangyifan/code/DEIMv2-main/engine/data/dataset/coco_dataset.py", line 44, in __getitem__ [rank0]: img, target, _ = self._transforms(img, target, self) [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank0]: return self._call_impl(*args, **kwargs) [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank0]: return forward_call(*args, **kwargs) [rank0]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 58, in forward [rank0]: return self.get_forward(self.policy['name'])(*inputs) [rank0]: File "/user/yangyifan/code/DEIMv2-main/engine/data/transforms/container.py", line 71, in default_forward [rank0]: sample = transform(sample) [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl [rank0]: return self._call_impl(*args, **kwargs) [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl [rank0]: return forward_call(*args, **kwargs) [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 68, in forward [rank0]: flat_outputs = [ [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 69, in <listcomp> [rank0]: self.transform(inpt, params) if needs_transform else inpt [rank0]: File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torchvision/transforms/v2/_transform.py", line 55, in transform [rank0]: raise NotImplementedError [rank0]: NotImplementedError W1126 13:55:28.175000 9238 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 9377 closing signal SIGTERM W1126 13:55:28.176000 9238 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 9378 closing signal SIGTERM E1126 13:55:28.354000 9238 site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: 1) local_rank: 0 (pid: 9376) of binary: /user/yangyifan/miniconda3/envs/det/bin/python Traceback (most recent call last): File "/user/yangyifan/.local/bin/torchrun", line 10, in <module> sys.exit(main()) File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper return f(*args, **kwargs) File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main run(args) File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run elastic_launch( File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ return launch_agent(self._config, self._entrypoint, list(args)) File "/user/yangyifan/miniconda3/envs/det/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent raise ChildFailedError( torch.distributed.elastic.multiprocessing.errors.ChildFailedError: ============================================================ train.py FAILED ------------------------------------------------------------ Failures: [1]: time : 2025-11-26_13:55:28 host : stresstest.local.lan rank : 3 (local_rank: 3) exitcode : 1 (pid: 9379) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ------------------------------------------------------------ Root Cause (first observed failure): [0]: time : 2025-11-26_13:55:28 host : stresstest.local.lan rank : 0 (local_rank: 0) exitcode : 1 (pid: 9376) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ============================================================
11-27
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

给算法爸爸上香

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值