NVIDIA CUDA Video Decoder(H.264)

本文介绍了如何在CUDA环境下配置并使用NVIDIA的CUDA Video Decoder进行H.264视频解码。内容包括设置CUDA开发环境、所需的库和头文件、关键数据结构如CUcontext和CUvideodecoder,以及初始化、调用API和释放资源的流程。解码后的NV12原始数据可以进行渲染显示或进一步的像素格式转换。
部署运行你感兴趣的模型镜像


配置好CUDA开发环境,同时确保合适的显卡支持


开发时需要的 Lib 和 Header File

<span style="font-size:14px;">#ifdef  _DEBUG
#pragma comment(lib, "cutil32D.lib")
#else
#pragram comment(lib, "cutil32.lib")
#endif
#pragram comment(lib, "nvcuvid.lib")</span>


动态链接的时候,需要 nvcuvid.dll 和 cutil32.dll,所以执行的时候确保这两个dll文件路径配置。

<span style="font-size:14px;">#include "cuviddec.h"
#include "cutil.h"
#include "cutil_inline_drvapi.h"
#include "cuda_runtime_api.h"
#include "cutil_inline_runtime.h"
#include "nvcuvid.h"</span>

一些参数的注释:

CUcontext                     m_cuContext; // 类似于Windows 的 Handel

CUvideoctxlock            m_CtxLock; // 类似于CrticalSection(Windows关键段)

CUvideodecoder m_CUDADecoder; // Decoder 解码器

CUvideoparser m_hParser; // Frame Parser 分析每帧frame使用

CUDAPassStruct m_PassData; // 传递给Callback Function 的使用者自定义资料结构


调用的API

初始化:

<span style="font-size:14px;">//CUDA Test//
CUresult cudaRes = CUDA_SUCCESS;
CUdevice device = NULL;
int iCount = 0;
 
cuInit(0);
// Check for a min spec of Compute 1.1 capability before running
if (!cutilDrvCudaCapabilities(1,1)) 
{
    cutilExit(0, NULL);
    goto EF;
}
cudaRes = cuDeviceGetCount(&iCount);
if(cudaRes != CUDA_SUCCESS) 
    goto EF;
cudaRes = cuDeviceGet(&device, 0);
if(cudaRes != CUDA_SUCCESS) 
    goto EF;
cudaRes = cuCtxCreate(&m_cuContext, 0, device);
if(cudaRes != CUDA_SUCCESS) 
    goto EF;
cudaRes = cuvidCtxLockCreate(&m_CtxLock, m_cuContext);
if(cudaRes != CUDA_SUCCESS) 
    goto EF;
CUVIDDECODECREATEINFO dci;
// Fill the decoder-create-info struct from the given video-format struct.
memset(&dci, 0, sizeof(CUVIDDECODECREATEINFO));
// Create video decoder
dci.CodecType  = cudaVideoCodec_H264;
dci.ulWidth  = iW;    //视频宽度//
dci.ulHeight = iH;     //视频高度//
dci.ulNumDecodeSurfaces = 2;    
// Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
while (dci.ulNumDecodeSurfaces * dci.ulWidth * dci.ulHeight > 16*1024*1024)
{
    dci.ulNumDecodeSurfaces–;
}
dci.ChromaFormat = cudaVideoChromaFormat_420;     //输入视频的Pixelformat//
dci.OutputFormat  = cudaVideoSurfaceFormat_NV12;   //解码后输出的Pixelformat (目前只支持NV12)//
dci.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;    

// No scaling
dci.ulTargetWidth = dci.ulWidth;    //Decode后输出的宽度//
dci.ulTargetHeight = dci.ulHeight;  //Decode后输出的高度//
dci.ulNumOutputSurfaces = 2;        //英文注释说是只支持2个//
dci.ulCreationFlags = cudaVideoCreate_Default;    //可以看一下Header File的定义说明//
dci.vidLock = NULL;//ctx;
cudaRes = cuvidCreateDecoder(&m_CUDADecoder, &dci);     //建立H.264 Decoder//
if(cudaRes != CUDA_SUCCESS) 
    goto EF;</span>
<span style="font-size:14px;">
//传递的使用者结构//
m_PassData.CUDADecoder = m_CUDADecoder;
m_PassData.iWidth = dci.ulWidth;
m_PassData.iHeight = dci.ulHeight;
m_PassData.pFrameBuffer = m_pFrameBuffer;
m_PassData.puiFrameSize = &m_uiFrameSize;
m_PassData.pLocker = &m_Locker;


//建立 Parser //
CUVIDPARSERPARAMS oVideoParserParameters;  
memset(&oVideoParserParameters, 0, sizeof(CUVIDPARSERPARAMS));
oVideoParserParameters.CodecType = cudaVideoCodec_H264;
oVideoParserParameters.ulMaxNumDecodeSurfaces = dci.ulNumDecodeSurfaces;
oVideoParserParameters.ulMaxDisplayDelay = 4; // this flag is needed so the parser will push frames out to the decoder as quickly as it can//
oVideoParserParameters.pUserData = &m_PassData;   //传递使用者自定义的结构//
oVideoParserParameters.pfnSequenceCallback  = NULL;    // Called before decoding frames and/or whenever there is a format change//
oVideoParserParameters.pfnDecodePicture = HandlePictureDecode;    // Called when a picture is ready to be decoded (decode order)//
oVideoParserParameters.pfnDisplayPicture = HandlePictureDisplay;   // Called whenever a picture is ready to be displayed (display order)//
cudaRes = cuvidCreateVideoParser(&m_hParser, &oVideoParserParameters);
if(cudaRes != CUDA_SUCCESS) 
    goto EF;</span>


使用的顺序和 Callback Function(回调函数)

提供给 Parser使用的两个 Callback function:

<span style="font-size:14px;">static int CUDAAPI HandlePictureDecode(void * pUserData, CUVIDPICPARAMS * pPicParams);</span>      //解析完可以回调cuvidDecodePicture

<span style="font-size:14px;">static int CUDAAPI HandlePictureDisplay(void *pUserData, CUVIDPARSERDISPINFO * pPicParams);</span> //解码完成后回调的地方


使用Parser是为了取得CUVIDPICPARAMS资料后传递给 Decode API 使用

<span style="font-size:14px;">int CUDAAPI HandlePictureDecode(void * pUserData, CUVIDPICPARAMS * pPicParams)
{
    CUDAPassStruct* pa = reinterpret_cast<CUDAPassStruct *>(pUserData);
    CUresult cudaRes = cuvidDecodePicture(pa->CUDADecoder, pPicParams);
    return 1;
}

int CUDAAPI HandlePictureDisplay(void *pUserData, CUVIDPARSERDISPINFO * pPicParams)
{
    CUDAPassStruct* pa = reinterpret_cast<CUDAPassStruct *>(pUserData);
    CUVIDPROCPARAMS oVideoProcessingParameters;
    memset(&oVideoProcessingParameters, 0, sizeof(CUVIDPROCPARAMS));
    CUdeviceptr pDecodedFrame = 0;
    unsigned int nDecodedPitch = 0;
    unsigned int nWidth = 0;
    unsigned int nHeight = 0;
    CUresult oResult = cuvidMapVideoFrame(pa->CUDADecoder, pPicParams->picture_index, &pDecodedFrame, &nDecodedPitch, &oVideoProcessingParameters);
    unsigned int nv12_size = nDecodedPitch * (pa->iHeight + pa->iHeight/2);  // 12bpp
    *pa->puiFrameSize = nv12_size;
    oResult = cuMemcpyDtoH(pa->pFrameBuffer, pDecodedFrame, nv12_size);
    oResult = cuvidUnmapVideoFrame(pa->CUDADecoder, pDecodedFrame);
    return 1;
}</span>


释放


<span style="font-size:14px;">CUresult cudaRes = CUDA_SUCCESS;
cudaRes = cuvidCtxLockDestroy(m_CtxLock);
cudaRes = cuvidDestroyDecoder(m_CUDADecoder);
cudaRes = cuvidDestroyVideoParser(m_hParser);
cudaRes = cuCtxDestroy(m_cuContext);
m_CtxLock = 0;
m_CUDADecoder = 0;
m_hParser = 0;
m_cuContext = 0;</span>

外部处理

由于目前输出的Pixel Format 为 NV12,在取得解码后的 Raw Data后有两种办法:

(1)用NV12的Render显示

(2)再转换一次 PixelFormat





参考:https://hauhan.wordpress.com/2010/07/17/nvidia-cuda-h-264-decode-%E5%BF%83%E5%BE%97/


您可能感兴趣的与本文相关的镜像

PyTorch 2.9

PyTorch 2.9

PyTorch
Cuda

PyTorch 是一个开源的 Python 机器学习库,基于 Torch 库,底层由 C++ 实现,应用于人工智能领域,如计算机视觉和自然语言处理

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值