[CUDA] 读取一张raw图像(NV12格式)，转为jpg图像（rgb格式）,并进行耗时统计。

最新推荐文章于 2025-01-25 20:44:59 发布

AndreLe

最新推荐文章于 2025-01-25 20:44:59 发布

阅读量565

点赞数 5

文章标签： opencv gpu算力图像处理计算机视觉 c++

本文链接：https://blog.youkuaiyun.com/AndreLe/article/details/140300489

版权

本文采用cuda来读取raw图像，将NV12格式的数据转换为rgb格式，输出为JPG图像，并利用opencv保存到本地。

话不多说，直接上代码：

/*****************************************
Copyright (c) 2024 Yang Li
@filename:yuv_rgb.cu
@datetime:2024.07.09
@author:LY
@e-mail:1454594112@qq.com
@blog:https://blog.youkuaiyun.com/AndreLe
*****************************************/


#include <cuda_runtime.h>
#include <device_launch_parameters.h>
 
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
 
#include <iostream>
#include <fstream>
#include <sstream>
 
using namespace std;
using namespace cv;
 

u_char * get_NV12_buffer(const std::string &filePath, int width, int height) 
{
    std::ifstream file(filePath, std::ios::binary | std::ios::ate);
    
    if (!file.is_open()) {
        std::cerr << "Error: Unable to open NV12 file " << filePath << std::endl;
        return NULL;
    }
 
    std::streamsize size = file.tellg();
    file.seekg(0, std::ios::beg);
 
    size=width*height*3/2;
    char *buffer=new char [size];
    
    if (!file.read(buffer, size)) {
        std::cerr << "Error: Unable to read NV12 data from file " << filePath << std::endl;
        return NULL;
    }
    cout<<"length="<<size<<endl;
    return (u_char *)buffer;
}
 

__global__ void imageYUV2RGB(u_char* nv12data, u_char* rgbdata,int width,int height) 
{
    
    int x = threadIdx.x + blockIdx.x * blockDim.x;	//表示x方向上的ID
    int y = threadIdx.y + blockIdx.y * blockDim.y;	//表示y方向上的ID
    //printf("hello world from block %d ,global id %d\n",x,y);
    u_char *ybase = nv12data;
    u_char *ubase = &nv12data[width * height];

     if (x < width && y < height)
     {
        int index = y * width + x;
        u_char Y = ybase[x + y * width];
        u_char U = ubase[y / 2 * width + (x / 2) * 2];
        u_char V = ubase[y / 2 * width + (x / 2) * 2 + 1];

        rgbdata[index*3] =  Y + 1.402 * (V - 128);//R
        rgbdata[index*3+1] = Y - 0.34413 * (U - 128) - 0.71414 * (V - 128);//G
        rgbdata[index*3+2] =Y + 1.772 * (U - 128);//B
       
    }
            
}
 size_t RoundUp(int groupSize, int globalSize)
{
    int r = globalSize % groupSize;
    if(r == 0)
    {
         return globalSize;
    }
    else
    {
         return globalSize + groupSize - r;
    }
}


int main() {
  
    // rgb
    int img_w=3840;
    int img_h=2176;
    u_char * nv12 = get_NV12_buffer("test.raw",img_w,img_h);
    
    
    int length = img_w * img_h;

    int yuvMemSize = length * sizeof(u_char)*3/2;
    int rgbMemSize = length * sizeof(u_char)*3;
    //cout<<yuvMemSize<<"x"<<rgbMemSize<<endl;

    u_char* yuv_device;
    u_char* rgb_device;

    cudaMalloc((void**)&yuv_device, yuvMemSize);
    cudaMalloc((void**)&rgb_device, rgbMemSize);
 
    cudaMemcpy(yuv_device, nv12, yuvMemSize, cudaMemcpyHostToDevice);

    
    dim3 blocksPerGrid( RoundUp(32,img_w), RoundUp(32,img_h));      // grid
    dim3 threadsPerBlock(32, 32);

    auto start = std::chrono::high_resolution_clock::now();
  
    imageYUV2RGB<<< blocksPerGrid,threadsPerBlock >>> (yuv_device, rgb_device,img_w,img_h);
    
    cudaDeviceSynchronize();//同步CPU和gpu，否则测速结果为cpu启动内核函数的速度
    auto stop = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
    std::cout << "Time taken by : " << duration.count() /1000.0<< " ms" << std::endl;	
    
    Mat rgb_host = Mat::zeros(img_h, img_w, CV_8UC3);
    cudaMemcpy(rgb_host.data, rgb_device, rgbMemSize, cudaMemcpyDeviceToHost);
    
    //保存图像
    cv::Mat dst_Image(img_h, img_w, CV_8UC3);
    cv::cvtColor(rgb_host, dst_Image,cv::COLOR_RGB2BGR);
    cv::imwrite("output.jpg", dst_Image);
    // free

    cudaFree(yuv_device);
    cudaFree(rgb_device);
 
    return 0;
}