使用CV_CUDA对图像进行Crop和Resize

最新推荐文章于 2025-04-17 11:13:30 发布

nudt_qxx

最新推荐文章于 2025-04-17 11:13:30 发布

阅读量984

点赞数 1

文章标签： c++ opencv 计算机视觉

本文链接：https://blog.youkuaiyun.com/xiangxianghehe/article/details/134288326

版权

本文探讨了在C++中使用OpenCVCUDAAPI进行图像裁剪和缩放操作的性能，发现直接调用CUDAAPI比使用OpenCV原生功能更快。作者提供了代码示例并测量了两种方法的时间消耗。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

可能是我的使用方式不对，直接调用C++ OpenCV api比用CV_CUDA快很多。

/*
 * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <common/NvDecoder.h>
#include <common/TestUtils.h>
#include <cuda_runtime_api.h>
#include <cvcuda/OpCustomCrop.hpp>
#include <cvcuda/OpResize.hpp>
#include <getopt.h>
#include <cmath>
#include <opencv2/opencv.hpp>
#include <nvcv/Image.hpp>
#include <nvcv/Tensor.hpp>

#include <chrono>   
using namespace std;
using namespace chrono;
/**
 * @brief Crop and Resize sample app.
 *
 * The Crop and Resize is a simple pipeline which demonstrates usage of
 * CVCuda Tensor along with a few operators.
 *
 * Input Batch Tensor -> Crop -> Resize -> WriteImage
 */

/**
 * @brief Utility to show usage of sample app
 *
 **/
void showUsage()
{
    std::cout << "usage: ./nvcv_cropandresize_app -i <image file path or  image directory -b <batch size>" << std::endl;
}

/**
 * @brief Utility to parse the command line arguments
 *
 **/
int ParseArgs(int argc, char *argv[], std::string &imagePath, uint32_t &batchSize)
{
    static struct option long_options[] = {
        {     "help",       no_argument, 0, 'h'},
        {"imagePath", required_argument, 0, 'i'},
        {    "batch", required_argument, 0, 'b'},
        {          0,                 0, 0,   0}
    };

    int long_index = 0;
    int opt        = 0;
    while ((opt = getopt_long(argc, argv, "hi:b:", long_options, &long_index)) != -1)
    {
        switch (opt)
        {
        case 'h':
            showUsage();
            return -1;
            break;
        case 'i':
            imagePath = optarg;
            break;
        case 'b':
            batchSize = std::stoi(optarg);
            break;
        case ':':
            showUsage();
            return -1;
        default:
            break;
        }
    }
    std::ifstream imageFile(imagePath);
    if (!imageFile.good())
    {
        showUsage();
        std::cerr << "Image path '" + imagePath + "' does not exist\n";
        return -1;
    }
    return 0;
}

int main(int argc, char *argv[])
{
    // Default parameters
    std::string imagePath = "test.jpg";
    uint32_t    batchSize = 1;
    cv::Mat imgMat = cv::imread(imagePath);

    // Parse the command line paramaters to override the default parameters
    int retval = ParseArgs(argc, argv, imagePath, batchSize);
    if (retval != 0)
    {
        return retval;
    }

    // NvJpeg is used to decode the images to the color format required.
    // Since we need a contiguous buffer for batched input, a buffer is
    // preallocated based on the  maximum image dimensions and  batch size
    // for NvJpeg to write into.

    // Note : The maximum input image dimensions needs to be updated in case
    // of testing with different test images

    int maxImageWidth  = 1920;
    int maxImageHeight = 1080;
    int maxChannels    = 3;

    // tag: Create the cuda stream
    cudaStream_t stream;
    CHECK_CUDA_ERROR(cudaStreamCreate(&stream));

    // tag: Allocate input tensor
    // Allocating memory for RGBI input image batch of uint8_t data type
    // without padding since NvDecode utility currently doesnt support
    // Padded buffers.

    nvcv::TensorDataStridedCuda::Buffer inBuf;
    inBuf.strides[3] = sizeof(uint8_t);
    inBuf.strides[2] = maxChannels * inBuf.strides[3];
    inBuf.strides[1] = maxImageWidth * inBuf.strides[2];
    inBuf.strides[0] = maxImageHeight * inBuf.strides[1];
    CHECK_CUDA_ERROR(cudaMallocAsync(&inBuf.basePtr, batchSize * inBuf.strides[0], stream));

    // tag: Tensor Requirements
    // Calculate the requirements for the RGBI uint8_t Tensor which include
    // pitch bytes, alignment, shape  and tensor layout
    nvcv::Tensor::Requirements inReqs
        = nvcv::Tensor::CalcRequirements(batchSize, {maxImageWidth, maxImageHeight}, nvcv::FMT_RGB8);

    // Create a tensor buffer to store the data pointer and pitch bytes for each plane
    nvcv::TensorDataStridedCuda inData(nvcv::TensorShape{inReqs.shape, inReqs.rank, inReqs.layout},
                                       nvcv::DataType{inReqs.dtype}, inBuf);

    // TensorWrapData allows for interoperation of external tensor representations with CVCUDA Tensor.
    nvcv::Tensor inTensor = nvcv::TensorWrapData(inData);

    // tag: Image Loading
    // NvJpeg is used to load the images to create a batched input device buffer.
    uint8_t             *gpuInput = reinterpret_cast<uint8_t *>(inBuf.basePtr);
    CHECK_CUDA_ERROR(cudaMemcpyAsync(gpuInput, imgMat.data, inBuf.strides[0], cudaMemcpyHostToDevice));
    // The total images is set to the same value as batch size for testing
    uint32_t             totalImages = batchSize;
    // Format in which the decoded output will be saved
    //nvjpegOutputFormat_t outputFormat = NVJPEG_OUTPUT_RGBI;

    //NvDecode(imagePath, batchSize, totalImages, outputFormat, gpuInput);

    // tag: The input buffer is now ready to be used by the operators

    // Set parameters for Crop and Resize
    // ROI dimensions to crop in the input image
    int cropX      = 150;
    int cropY      = 50;
    int cropWidth  = 800;
    int cropHeight = 1000;

    // Set the resize dimensions
    int resizeWidth  = 1600;
    int resizeHeight = 2000;

    //  Initialize the CVCUDA ROI struct
    NVCVRectI crpRect = {cropX, cropY, cropWidth, cropHeight};

    cv::Rect Rect(cropX, cropY, cropWidth, cropHeight);

    auto t1=std::chrono::steady_clock::now();
    // 裁剪图像
    cv::Mat cropImg = imgMat(Rect);

    // 调整图像大小
    cv::resize(cropImg, cropImg, cv::Size(resizeWidth, resizeHeight));
    auto t2=std::chrono::steady_clock::now();
    double dr_ms=std::chrono::duration<double,std::milli>(t2-t1).count();
    std::cout << "opencv costs: " <<  dr_ms << "ms" << std::endl;

    // tag: Allocate Tensors for Crop and Resize
    // Create a CVCUDA Tensor based on the crop window size.
    nvcv::Tensor cropTensor(batchSize, {cropWidth, cropHeight}, nvcv::FMT_RGB8);
    // Create a CVCUDA Tensor based on resize dimensions
    nvcv::Tensor resizedTensor(batchSize, {resizeWidth, resizeHeight}, nvcv::FMT_RGB8);




    // tag: Initialize operators for Crop and Resize
    cvcuda::CustomCrop cropOp;
    cvcuda::Resize     resizeOp;

    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start);
    // tag: Executes the CustomCrop operation on the given cuda stream
    cropOp(stream, inTensor, cropTensor, crpRect);

    // Resize operator can now be enqueued into the same stream
    resizeOp(stream, cropTensor, resizedTensor, NVCV_INTERP_LINEAR);

    // tag: Profile section

    cudaEventRecord(stop);
    cudaEventSynchronize(stop);
    float operatorms = 0;
    cudaEventElapsedTime(&operatorms, start, stop);
    std::cout << "Time for Crop and Resize : " << operatorms << " ms" << std::endl;


    // tag: Copy the buffer to CPU and write resized image into .bmp file
    WriteRGBITensor(resizedTensor, stream);

    // tag: Clean up
    CHECK_CUDA_ERROR(cudaStreamDestroy(stream));

    // tag: End of Sample
}

输出

opencv costs: 3.16336ms
Time for Crop and Resize : 200.148 ms
Writing to ./cvcudatest_0.jpg 4800 1600 2000