本文采用cuda来读取raw图像,将NV12格式的数据转换为rgb格式,输出为JPG图像,并利用opencv保存到本地。
话不多说,直接上代码:
/*****************************************
Copyright (c) 2024 Yang Li
@filename:yuv_rgb.cu
@datetime:2024.07.09
@author:LY
@e-mail:1454594112@qq.com
@blog:https://blog.youkuaiyun.com/AndreLe
*****************************************/
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
using namespace cv;
u_char * get_NV12_buffer(const std::string &filePath, int width, int height)
{
std::ifstream file(filePath, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
std::cerr << "Error: Unable to open NV12 file " << filePath << std::endl;
return NULL;
}
std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);
size=width*height*3/2;
char *buffer=new char [size];
if (!file.read(buffer, size)) {
std::cerr << "Error: Unable to read NV12 data from file " << filePath << std::endl;
return NULL;
}
cout<<"length="<<size<<endl;
return (u_char *)buffer;
}
__global__ void imageYUV2RGB(u_char* nv12data, u_char* rgbdata,int width,int height)
{
int x = threadIdx.x + blockIdx.x * blockDim.x; //表示x方向上的ID
int y = threadIdx.y + blockIdx.y * blockDim.y; //表示y方向上的ID
//printf("hello world from block %d ,global id %d\n",x,y);
u_char *ybase = nv12data;
u_char *ubase = &nv12data[width * height];
if (x < width && y < height)
{
int index = y * width + x;
u_char Y = ybase[x + y * width];
u_char U = ubase[y / 2 * width + (x / 2) * 2];
u_char V = ubase[y / 2 * width + (x / 2) * 2 + 1];
rgbdata[index*3] = Y + 1.402 * (V - 128);//R
rgbdata[index*3+1] = Y - 0.34413 * (U - 128) - 0.71414 * (V - 128);//G
rgbdata[index*3+2] =Y + 1.772 * (U - 128);//B
}
}
size_t RoundUp(int groupSize, int globalSize)
{
int r = globalSize % groupSize;
if(r == 0)
{
return globalSize;
}
else
{
return globalSize + groupSize - r;
}
}
int main() {
// rgb
int img_w=3840;
int img_h=2176;
u_char * nv12 = get_NV12_buffer("test.raw",img_w,img_h);
int length = img_w * img_h;
int yuvMemSize = length * sizeof(u_char)*3/2;
int rgbMemSize = length * sizeof(u_char)*3;
//cout<<yuvMemSize<<"x"<<rgbMemSize<<endl;
u_char* yuv_device;
u_char* rgb_device;
cudaMalloc((void**)&yuv_device, yuvMemSize);
cudaMalloc((void**)&rgb_device, rgbMemSize);
cudaMemcpy(yuv_device, nv12, yuvMemSize, cudaMemcpyHostToDevice);
dim3 blocksPerGrid( RoundUp(32,img_w), RoundUp(32,img_h)); // grid
dim3 threadsPerBlock(32, 32);
auto start = std::chrono::high_resolution_clock::now();
imageYUV2RGB<<< blocksPerGrid,threadsPerBlock >>> (yuv_device, rgb_device,img_w,img_h);
cudaDeviceSynchronize();//同步CPU和gpu,否则测速结果为cpu启动内核函数的速度
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "Time taken by : " << duration.count() /1000.0<< " ms" << std::endl;
Mat rgb_host = Mat::zeros(img_h, img_w, CV_8UC3);
cudaMemcpy(rgb_host.data, rgb_device, rgbMemSize, cudaMemcpyDeviceToHost);
//保存图像
cv::Mat dst_Image(img_h, img_w, CV_8UC3);
cv::cvtColor(rgb_host, dst_Image,cv::COLOR_RGB2BGR);
cv::imwrite("output.jpg", dst_Image);
// free
cudaFree(yuv_device);
cudaFree(rgb_device);
return 0;
}