下面是nvcodec解码案例,可以参考:
#include "nvcuvid.h"
#include <cuda_runtime.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "utils/demuxer.h"
#define START_CODE_ANNEXB 0x00000001
#define START_CODE_SIZE 4
typedef struct packet_t {
uint8_t* data;
int size;
int64_t pts;
} packet;
typedef struct nvcuvid_test_ctx_t {
CUvideodecoder cudecoder;
CUvideoparser cuparser;
CUVIDPARSERPARAMS cuparseinfo;
CUVIDDECODECAPS caps8;
CUVIDDECODECAPS caps10;
CUVIDDECODECAPS caps12;
CUcontext cu_ctx;
CUvideoctxlock cu_ctx_lock;
FFmpegDemuxer* demuxer;
uint64_t frame_count;
const char* in_file_name;
void* host_data;
void* tmp_data;
int tmp_data_size;
int coded_width;
int coded_height;
int out_width;
int out_height;
cudaVideoCodec codec_type;
int bit_depth_minus8; // 0-8bit, 2-10bit, 4-12bit
int decode_surface;
unsigned char progressive_sequence;
unsigned int bit_rate;
cudaVideoSurfaceFormat out_pix_fmt;
cudaVideoChromaFormat in_pix_fmt;
cudaVideoDeinterlaceMode deinter_lace_mode;
struct {
int left;
int top;
int right;
int bottom;
} crop;
struct {
int width;
int height;
} resize;
struct {
unsigned int num;
unsigned int den;
} framerate;
int eos;
int nb_surfaces;
int internal_error;
int need_resize;
int decoder_flushing;
int in_file_size;
int read_size;
FILE* in_file;
FILE* out_file;
} nvcuvid_test_ctx;
static void print_caps(CUVIDDECODECAPS* caps) {
printf("eCodecType: %d\n", caps->eCodecType);
printf("eChromaFormat: %d\n", caps->eChromaFormat);
printf("nBitDepthMinus8: %d\n", caps->nBitDepthMinus8);
printf("bIsSupported: %d\n", caps->bIsSupported);
printf("nNumNVDECs: %d\n", caps->nNumNVDECs);
printf("nMaxWidth: %d\n", caps->nMaxWidth);
printf("nMaxHeight: %d\n", caps->nMaxHeight);
printf("nMaxMBCount: %d\n", caps->nMaxMBCount);
printf("nMinWidth: %d\n", caps->nMinWidth);
printf("nMinHeight: %d\n", caps->nMinHeight);
printf("nMaxMBCount: %d\n", caps->nMaxMBCount);
}
static int handle_video_sequence(void* opaque, CUVIDEOFORMAT* format) {
nvcuvid_test_ctx* ctx = NULL;
CUVIDDECODECAPS* caps = NULL;
CUVIDDECODECREATEINFO cuinfo = {0};
CUVIDRECONFIGUREDECODERINFO cf = {0};
ctx = (nvcuvid_test_ctx*)opaque;
ctx->internal_error = 0;
// getcaps
caps = &ctx->caps8;
caps->eCodecType = format->codec;
caps->eChromaFormat = format->chroma_format;
caps->nBitDepthMinus8 =
format->bit_depth_luma_minus8; // 0-8bit, 2-10bit, 4-12bit
// cuCtxPushCurrent(ctx->cu_ctx);
ctx->internal_error = cuvidGetDecoderCaps(caps);
if (ctx->internal_error != 0) {
printf("cuvidGetDecoderCaps failed,ret=%d\n", ctx->internal_error);
goto err;
}
// cuCtxPopCurrent(NULL);
print_caps(caps);
if (!caps->bIsSupported) {
printf("Codec not supported on this GPU\n");
goto err;
}
// check if need to reinit decoder
ctx->bit_rate = format->bitrate;
ctx->codec_type = format->codec;
ctx->coded_width = format->coded_width;
ctx->coded_height = format->coded_height;
ctx->in_pix_fmt = format->chroma_format;
ctx->framerate.num = format->frame_rate.numerator;
ctx->framerate.den = format->frame_rate.denominator;
ctx->bit_depth_minus8 = format->bit_depth_luma_minus8;
ctx->progressive_sequence = format->progressive_sequence;
ctx->decode_surface = format->min_num_decode_surfaces;
printf(
"bitrate:%d,codec:%d,coded_width:%d,coded_height:%d,chroma_format:%d,"
"frame_rate:%d/%d,bit_depth_luma_minus8:%d,progressive_sequence:%d,"
"min_num_decode_surfaces:%d\n",
ctx->bit_rate, ctx->codec_type, ctx->coded_width, ctx->coded_height,
ctx->in_pix_fmt, ctx->framerate.num, ctx->framerate.den,
ctx->bit_depth_minus8, ctx->progressive_sequence, ctx->decode_surface);
// do some checks
// a.check max resolution supported
// b.check max mb count supported
// if the decoder is already created, reconfigure it
if (ctx->cudecoder) {
printf("Re-initializing decoder\n");
cf.ulWidth = ctx->coded_width;
cf.ulHeight = ctx->coded_height;
cf.ulTargetWidth = ctx->out_width;
cf.ulTargetHeight = ctx->out_height;
cf.display_area.left = format->display_area.left + ctx->crop.left;
cf.display_area.top = format->display_area.top + ctx->crop.top;
cf.display_area.right = format->display_area.right - ctx->crop.right;
cf.display_area.bottom = format->display_area.bottom - ctx->crop.bottom;
cf.ulNumDecodeSurfaces = ctx->decode_surface;
// reconfigure decoder
ctx->internal_error = cuvidReconfigureDecoder(ctx->cudecoder, &cf);
if (ctx->internal_error != 0) {
printf("cuvidReconfigureDecoder failed\n");
goto err;
}
printf("Re-initialized decoder\n");
}
// for the first time, create decoder
memset(&cuinfo, 0, sizeof(CUVIDDECODECREATEINFO));
cuinfo.CodecType = (cudaVideoCodec)ctx->codec_type;
cuinfo.ChromaFormat = ctx->in_pix_fmt;
cuinfo.OutputFormat = ctx->out_pix_fmt;
cuinfo.bitDepthMinus8 = ctx->bit_depth_minus8;
if (ctx->progressive_sequence)
cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
else
cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
cuinfo.ulNumOutputSurfaces = 2;
cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
cuinfo.ulNumDecodeSurfaces = ctx->decode_surface;
// cuinfo.vidLock = ctx->cu_ctx_lock;
cuinfo.ulWidth = ctx->coded_width;
cuinfo.ulHeight = ctx->coded_height;
printf(
"codec:%d,chroma_format:%d,width:%d,height:%d,output_format:%d,"
"num_decode_surfaces:%d,num_output_surfaces:%d,creation_flags:%d,"
"vidLock:%p,bitDepthMinus8:%d\n",
cuinfo.CodecType, cuinfo.ChromaFormat, cuinfo.ulWidth, cuinfo.ulHeight,
cuinfo.OutputFormat, cuinfo.ulNumDecodeSurfaces,
cuinfo.ulNumOutputSurfaces, cuinfo.ulCreationFlags, cuinfo.vidLock,
cuinfo.bitDepthMinus8);
printf("createdecoder,ulNumDecodeSurfaces:%d\n",
cuinfo.ulNumDecodeSurfaces);
// apply cropping
// cuinfo.display_area.left = format->display_area.left + ctx->crop.left;
// cuinfo.display_area.top = format->display_area.top + ctx->crop.top;
// cuinfo.display_area.right = format->display_area.right -
// ctx->crop.right; cuinfo.display_area.bottom = format->display_area.bottom
// - ctx->crop.bottom;
// printf("display_area:left:%d,top:%d,right:%d,bottom:%d\n",
// cuinfo.display_area.left, cuinfo.display_area.top,
// cuinfo.display_area.right, cuinfo.display_area.bottom);
ctx->out_width = ctx->coded_width; // cuinfo.display_area.right -
// cuinfo.display_area.left;
ctx->out_height = ctx->coded_height; // cuinfo.display_area.bottom -
// cuinfo.display_area.hecoded_height
printf("out_width:%d,out_height:%d\n", ctx->out_width, ctx->out_height);
// target width/height need to be multiples of two align to 2
cuinfo.ulTargetWidth = ctx->out_width;
cuinfo.ulTargetHeight = ctx->out_height;
// // aspect ratio conversion, 1:1, depends on scaled resolution
// cuinfo.target_rect.left = 0;
// cuinfo.target_rect.top = 0;
// cuinfo.target_rect.right = cuinfo.ulTargetWidth;
// cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
// printf("target_rect:left:%d,top:%d,right:%d,bottom:%d\n",
// cuinfo.target_rect.left, cuinfo.target_rect.top,
// cuinfo.target_rect.right, cuinfo.target_rect.bottom);
// max resolution
cuinfo.ulMaxWidth = cuinfo.ulTargetWidth;
cuinfo.ulMaxHeight = cuinfo.ulTargetHeight;
printf("max resolution:width:%d,height:%d\n", cuinfo.ulMaxWidth,
cuinfo.ulMaxHeight);
// cuCtxPushCurrent(ctx->cu_ctx);
ctx->internal_error = cuvidCreateDecoder(&ctx->cudecoder, &cuinfo);
if (ctx->internal_error < 0) {
printf("cuvidCreateDecoder failed\n");
goto err;
}
// cuCtxPopCurrent(NULL);
printf("cuvidCreateDecoder success, ret:%d\n", ctx->internal_error);
err:
return (0 == ctx->internal_error);
}
static int handle_picture_display(void* opaque, CUVIDPARSERDISPINFO* dispinfo) {
int ret = 0;
int bpp = 1;
void* dst_frame = NULL;
int dst_size = 0;
nvcuvid_test_ctx* ctx = NULL;
CUVIDGETDECODESTATUS s;
printf("<<<handle_picture_display\n");
ctx = (nvcuvid_test_ctx*)opaque;
ctx->internal_error = 0;
if (ctx->bit_depth_minus8 > 0) {
bpp = 2;
}
if (!dispinfo) {
printf("dispinfo is null\n");
return 0;
}
CUVIDPROCPARAMS map_info = {0};
map_info.progressive_frame = dispinfo->progressive_frame;
map_info.second_field = dispinfo->repeat_first_field + 1;
map_info.top_field_first = dispinfo->top_field_first;
map_info.unpaired_field = dispinfo->repeat_first_field < 0;
map_info.output_stream = NULL;
CUdeviceptr map_frame = 0;
unsigned int map_pitch = 0; // for 8 bit pitch=1, for 10/12 bit pitch=2
ret = cuvidMapVideoFrame(ctx->cudecoder, dispinfo->picture_index,
&map_frame, &map_pitch, &map_info);
if (ret != 0) {
printf("cuvidMapVideoFrame failed\n");
ctx->internal_error = ret;
goto err;
}
memset(&s, 0, sizeof(s));
ret = cuvidGetDecodeStatus(ctx->cudecoder, dispinfo->picture_index, &s);
if (ret != 0) {
printf("cuvidGetDecodeStatus failed\n");
ctx->internal_error = ret;
goto err;
}
if (s.decodeStatus == cuvidDecodeStatus_Error ||
s.decodeStatus == cuvidDecodeStatus_Error_Concealed) {
printf("Decode Error occurred for picture.\n");
exit(1);
}
printf("cuvidMapVideoFrame success,pitch:%u\n", map_pitch);
dst_frame = NULL;
// only for nv12
dst_size = ctx->out_width * ctx->out_height * bpp * 1.5;
dst_frame = (uint8_t*)malloc(dst_size);
// cpy to host
ret = cudaMemcpy(dst_frame, (void*)map_frame, dst_size,
cudaMemcpyDeviceToHost);
if (ret != 0) {
printf("cuMemcpyDtoH failed\n");
ctx->internal_error = ret;
goto err;
}
fwrite(dst_frame, 1, dst_size, ctx->out_file);
ctx->internal_error = cuvidUnmapVideoFrame(ctx->cudecoder, map_frame);
if (ctx->internal_error != 0) {
printf("cuvidUnmapVideoFrame failed\n");
goto err;
}
printf("cuvidUnmapVideoFrame success\n");
err:
return (0 == ctx->internal_error);
static int handle_picture_decode(void* opaque, CUVIDPICPARAMS* picpara) {
nvcuvid_test_ctx* ctx = NULL;
ctx = (nvcuvid_test_ctx*)opaque;
if (ctx == NULL) {
printf("ctx is null\n");
return 0;
}
// decode picture
cuCtxPushCurrent(ctx->cu_ctx);
ctx->internal_error = cuvidDecodePicture(ctx->cudecoder, picpara);
if (ctx->internal_error < 0) {
printf("cuvidDecodePicture failed\n");
goto err;
}
printf("cuvidDecodePicture success,ret=%d.\n", ctx->internal_error);
cuCtxPopCurrent(NULL);
err:
return (0 == ctx->internal_error);
}
static int start_code_is_annexb(nvcuvid_test_ctx * ctx, uint8_t * data,
int pos) {
if (data[pos] == 1 && data[pos - 1] == 0 && data[pos - 2] == 0 &&
data[pos - 3] == 0) {
return 2;
}
// do not swap the order
if (data[pos] == 1 && data[pos - 1] == 0 && data[pos - 2] == 0) {
return 1;
}
return 0;
}
static int check_format(nvcuvid_test_ctx * ctx) {
// read file annexb h264 start code:0x00 00 01
int ret;
if (!ctx->tmp_data) {
ctx->tmp_data = malloc(1024);
}
uint8_t* d = (uint8_t*)ctx->tmp_data;
// read firt 4 bytes
ret = fread(d, 1, START_CODE_SIZE, ctx->in_file);
if (ret != START_CODE_SIZE) {
printf("read file failed\n");
return -1;
}
// this first start code is 0x00 00 00 01
ret = start_code_is_annexb(ctx, d, START_CODE_SIZE - 1);
if (ret != 2) {
printf("file format error\n");
printf("start code:%02x %02x %02x %02x\n", d[0], d[1], d[2], d[3]);
return -1;
}
ctx->tmp_data_size = START_CODE_SIZE;
printf("start code:%02x %02x %02x %02x\n", d[0], d[1], d[2], d[3]);
return 0;
}
static int open_file(nvcuvid_test_ctx * ctx) {
ctx->in_file = fopen(ctx->in_file_name, "rb");
if (!ctx->in_file) {
printf("open input file:%s failed\n", ctx->in_file_name);
return -1;
}
fseek(ctx->in_file, 0, SEEK_END);
ctx->in_file_size = ftell(ctx->in_file);
fseek(ctx->in_file, 0, SEEK_SET);
printf("input file size:%d\n", ctx->in_file_size);
char out_file_name[1024];
memset(out_file_name, 0, sizeof(out_file_name));
snprintf(out_file_name, sizeof(out_file_name), "%s.yuv",
ctx->in_file_name);
ctx->out_file = fopen(out_file_name, "wb+");
if (!ctx->out_file) {
printf("open output file:%s failed\n", out_file_name);
return -1;
}
return check_format(ctx);
}
static void print_hex(uint8_t * data, int all_size, int print_size) {
int n = 0;
printf("data size:%d\n", all_size);
for (int i = 0; i < print_size; i++) {
if (i != 0 && i % 2 == 0) {
printf(" ");
}
printf("%02x", data[i]);
n++;
if (n % 64 == 0) {
printf("\n");
}
}
printf("\n");
}
static int nal_type(uint8_t data) {
printf("nal type:0x%02x\n", data);
int type = data & 0x1f;
switch (type) {
case 1:
printf("NAL_TYPE_SLICE\n");
break;
case 5:
printf("NAL_TYPE_IDR\n");
break;
case 6:
printf("NAL_TYPE_SEI\n");
break;
case 7:
printf("NAL_TYPE_SPS\n");
break;
case 8:
printf("NAL_TYPE_PPS\n");
break;
case 9:
printf("NAL_TYPE_AUD\n");
break;
case 12:
printf("NAL_TYPE_EOSEQ\n");
break;
default:
printf("NAL_TYPE_UNKNOWN\n");
break;
}
return type;
}
static int demux(nvcuvid_test_ctx * ctx, CUVIDSOURCEDATAPACKET * pkt) {
int again = 0;
int size;
int pos = 0;
uint8_t* data;
if (!ctx->host_data) {
ctx->host_data = (uint8_t*)malloc(1024 * 1024);
printf("malloc host data,size:1024*1024\n");
}
do {
size = 0;
data = (uint8_t*)ctx->host_data;
again = 0;
memset(data, 0, 1024 * 1024);
memcpy(data, ctx->tmp_data, ctx->tmp_data_size);
size = ctx->tmp_data_size;
pos = ctx->tmp_data_size;
ctx->tmp_data_size = 0;
while (1) {
data[size++] = fgetc(ctx->in_file);
if (feof(ctx->in_file)) {
size--;
break;
}
// check start code
int type = start_code_is_annexb(ctx, data, size - 1);
if (type == 1) {
ctx->tmp_data_size = 3;
memcpy(ctx->tmp_data, data + size - 3, 3);
size -= 3;
break;
}
if (type == 2) {
ctx->tmp_data_size = 4;
memcpy(ctx->tmp_data, data + size - 4, 4);
size -= 4;
break;
}
}
if (size > 0) {
pkt->payload_size = size;
pkt->timestamp = 0;
pkt->payload = (const unsigned char*)ctx->host_data;
// detect nal type
print_hex((uint8_t*)pkt->payload, size, size > 32 ? 32 : size);
int type = nal_type(data[pos]);
if (type == 9) {
again = 1;
printf("sei/aud data, read size:%d, discard.\n", size);
printf("---------------------------------\n");
}
} else {
size = 0;
pkt->flags = CUVID_PKT_ENDOFSTREAM | CUVID_PKT_NOTIFY_EOS;
ctx->decoder_flushing = 1;
pkt->payload = NULL;
pkt->payload_size = 0;
pkt->timestamp = 0;
printf("end of stream, read size:%d\n", ctx->read_size);
}
ctx->read_size += size;
} while (again);
return 0;
}
static int demux_ffmpeg(nvcuvid_test_ctx * ctx,
CUVIDSOURCEDATAPACKET * pkt) {
void* data = NULL;
int size = 0;
int64_t pts = 0;
if (!ctx->host_data) {
ctx->host_data = (uint8_t*)malloc(1024 * 1024);
printf("malloc host data,size:1024*1024\n");
}
ctx->demuxer->Demux((uint8_t**)&data, &size, &pts);
if (size > 0) {
ctx->read_size += size;
pkt->payload_size = size;
pkt->timestamp = pts;
pkt->flags |= CUVID_PKT_TIMESTAMP; // | CUVID_PKT_TIMESTAMP;
// memcpy(ctx->host_data, data, size);
pkt->payload = (const unsigned char*)data;
// detect nal type
// print_hex((uint8_t*)pkt->payload, size, size > 32 ? 32 : size);
// int type = nal_type(((uint8_t*)pkt->payload)[0]);
// if (type == 9) {
// printf("sei/aud data, read size:%d, discard.\n", size);
// printf("---------------------------------\n");
// }
} else {
size = 0;
pkt->flags = CUVID_PKT_ENDOFSTREAM;
ctx->decoder_flushing = 1;
pkt->payload = NULL;
pkt->payload_size = 0;
pkt->timestamp = 0;
printf("end of stream, read size:%d\n", ctx->read_size);
}
return 0;
}
static int cu_init(nvcuvid_test_ctx * ctx) {
int ret;
void* test;
ret = cuInit(0);
if (ret != 0) {
printf("cuInit failed\n");
return -1;
}
ret = cudaMalloc(&test, 1024);
if (ret != 0) {
printf("cudaMalloc failed\n");
return -1;
}
cudaFree(test);
CUdevice cu_device = 0;
int gpu = 0;
cuDeviceGet(&cu_device, gpu);
char gpu_name[80];
cuDeviceGetName(gpu_name, sizeof(gpu_name), cu_device);
printf("GPU in use:%s\n", gpu_name);
int nGpu = 0;
cuDeviceGetCount(&nGpu);
printf("cuDeviceGetCount:%d\n", nGpu);
ret = cuCtxCreate(&ctx->cu_ctx, 0, 0);
if (ret != 0) {
printf("createCudaContext failed\n");
return -1;
}
ret = cuvidCtxLockCreate(&ctx->cu_ctx_lock, ctx->cu_ctx);
if (ret != 0) {
printf("cuvidCtxLockCreate failed\n");
return -1;
}
return 0;
}
int main(int argc, char* argv[]) {
int ret;
nvcuvid_test_ctx* ctx;
if (argc < 2) {
printf("Usage: %s <input_file>\n", argv[0]);
return -1;
}
ctx = (nvcuvid_test_ctx*)malloc(sizeof(nvcuvid_test_ctx));
memset(ctx, 0, sizeof(nvcuvid_test_ctx));
ctx->in_file_name = argv[1];
ret = open_file(ctx);
if (ret != 0) {
printf("open file failed\n");
goto error;
}
// 1.init cuda
ret = cu_init(ctx);
if (ret != 0) goto error;
// 2.create parser
ctx->out_pix_fmt = cudaVideoSurfaceFormat_NV12;
ctx->codec_type = cudaVideoCodec_H264;
// fill parser info
memset(&ctx->cuparseinfo, 0, sizeof(CUVIDPARSERPARAMS));
ctx->cuparseinfo.CodecType = ctx->codec_type;
ctx->cuparseinfo.ulMaxNumDecodeSurfaces = 1;
ctx->cuparseinfo.ulClockRate = 1000;
ctx->cuparseinfo.pUserData = ctx;
ctx->cuparseinfo.ulMaxDisplayDelay = 1;
ctx->cuparseinfo.pfnSequenceCallback = handle_video_sequence;
ctx->cuparseinfo.pfnDecodePicture = handle_picture_decode;
ctx->cuparseinfo.pfnDisplayPicture = handle_picture_display;
ret = cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo);
if (ret != 0) {
printf("cuvidCreateVideoParser failed\n");
goto error;
}
ctx->demuxer = new FFmpegDemuxer(ctx->in_file_name);
// decode video
while (!ctx->decoder_flushing) {
CUVIDSOURCEDATAPACKET pkt = {0};
memset(&pkt, 0, sizeof(CUVIDSOURCEDATAPACKET));
ret = demux_ffmpeg(ctx, &pkt);
if (ret != 0) {
printf("demux packet failed\n");
goto error;
}
// parse data
// cuCtxPushCurrent(ctx->cu_ctx);
print_hex((uint8_t*)pkt.payload, pkt.payload_size,
pkt.payload_size > 32 ? 32 : pkt.payload_size);
ret = cuvidParseVideoData(ctx->cuparser, &pkt);
if (ret != 0) {
printf("cuvidParseVideoData failed\n");
goto error;
}
// cuCtxPopCurrent(NULL);
ctx->frame_count++;
printf("parse video pkt[%ld] success\n", ctx->frame_count);
printf("-----------------------\n");
}
error:
// 3.destroy
if (!ctx) return 0;
if (ctx->cuparser) cuvidDestroyVideoParser(ctx->cuparser);
if (ctx->cudecoder) cuvidDestroyDecoder(ctx->cudecoder);
if (ctx->in_file) fclose(ctx->in_file);
if (ctx->out_file) fclose(ctx->out_file);
if (ctx->host_data) free(ctx->host_data);
if (ctx->tmp_data) free(ctx->tmp_data);
if (ctx->demuxer) delete ctx->demuxer;
cuCtxDestroy(ctx->cu_ctx);
cuvidCtxLockDestroy(ctx->cu_ctx_lock);
if (ctx) free(ctx);
printf("main end.\n");
return 0;
}
//---------------------------------------------------------------------------
// main.cpp
//
// cuviddec decode sample frontend
//
// Copyright 2008 NVIDIA Corporation. All rights reserved.
//---------------------------------------------------------------------------
#include <nvcuvid.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
// #include <platform/NvSystemClock/NvSystemClock.h>
// #if defined WIN32 || defined _WIN32
// #include <windows.h>
// #include <d3d9.h>
// #include <cudad3d9.h>
// #elif defined NV_UNIX
// #include <unistd.h>
// #include <sys/time.h>
// #include <time.h>
// #include <platform/include/NvTypes.h>
// #include <codecs/include/nvutil.h>
// #endif
#define MAX_FRM_CNT 16
#define DISPLAY_DELAY 2 // Attempt to decode up to 2 frames ahead of display
#define USE_ASYNC_COPY 1
#define USE_FLOATING_CONTEXTS 1 // Use floating contexts
// Autolock for floating contexts
class CAutoCtxLock {
private:
CUvideoctxlock m_lock;
public:
#if USE_FLOATING_CONTEXTS
CAutoCtxLock(CUvideoctxlock lck) {
m_lock = lck;
cuvidCtxLock(m_lock, 0);
}
~CAutoCtxLock() { cuvidCtxUnlock(m_lock, 0); }
#else
CAutoCtxLock(CUvideoctxlock lck) { m_lock = lck; }
#endif
};
typedef struct {
CUvideoparser cuParser;
CUvideodecoder cuDecoder;
CUstream cuStream;
CUvideoctxlock cuCtxLock;
CUVIDDECODECREATEINFO dci;
CUVIDPARSERDISPINFO DisplayQueue[DISPLAY_DELAY];
unsigned char* pRawNV12;
int raw_nv12_size;
int pic_cnt;
FILE* fd_yuv;
} DecodeSession;
static int DisplayPicture(DecodeSession* state,
CUVIDPARSERDISPINFO* pPicParams);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Parser callbacks
//
// Called when the decoder encounters a video format change (or initial sequence
// header)
static int CUDAAPI HandleVideoSequence(void* pvUserData,
CUVIDEOFORMAT* pFormat) {
DecodeSession* state = (DecodeSession*)pvUserData;
if ((pFormat->codec != state->dci.CodecType) ||
(pFormat->coded_width != state->dci.ulWidth) ||
(pFormat->coded_height != state->dci.ulHeight) ||
(pFormat->chroma_format != state->dci.ChromaFormat)) {
CAutoCtxLock lck(state->cuCtxLock);
if (state->cuDecoder) {
cuvidDestroyDecoder(state->cuDecoder);
state->cuDecoder = NULL;
}
memset(&state->dci, 0, sizeof(CUVIDDECODECREATEINFO));
state->dci.ulWidth = pFormat->coded_width;
state->dci.ulHeight = pFormat->coded_height;
state->dci.ulNumDecodeSurfaces = MAX_FRM_CNT;
state->dci.CodecType = pFormat->codec;
state->dci.ChromaFormat = pFormat->chroma_format;
// Output (pass through)
state->dci.OutputFormat = cudaVideoSurfaceFormat_NV12;
state->dci.DeinterlaceMode =
cudaVideoDeinterlaceMode_Weave; // No deinterlacing
state->dci.ulTargetWidth = state->dci.ulWidth;
state->dci.ulTargetHeight = state->dci.ulHeight;
state->dci.ulNumOutputSurfaces = 1;
// Create the decoder
if (CUDA_SUCCESS !=
cuvidCreateDecoder(&state->cuDecoder, &state->dci)) {
printf("Failed to create video decoder\n");
return 0;
}
}
return 1;
}
// Called by the video parser to decode a single picture
// Since the parser will deliver data as fast as it can, we need to make sure
// that the picture index we're attempting to use for decode is no longer used
// for display
static int CUDAAPI HandlePictureDecode(void* pvUserData,
CUVIDPICPARAMS* pPicParams) {
DecodeSession* state = (DecodeSession*)pvUserData;
CAutoCtxLock lck(state->cuCtxLock);
CUresult result;
if (pPicParams->CurrPicIdx < 0) // Should never happen
{
printf("Invalid picture index\n");
return 0;
}
result = cuvidDecodePicture(state->cuDecoder, pPicParams);
if (result != CUDA_SUCCESS) {
printf("cuvidDecodePicture: %d\n", result);
}
return (result == CUDA_SUCCESS);
}
// Called by the video parser to display a video frame (in the case of field
// pictures, there may be 2 decode calls per 1 display call, since two fields
// make up one frame)
static int CUDAAPI HandlePictureDisplay(void* pvUserData,
CUVIDPARSERDISPINFO* pPicParams) {
DecodeSession* state = (DecodeSession*)pvUserData;
printf("HandlePictureDisplay>>>>>\n");
DisplayPicture(state, pPicParams);
return 1;
}
static int DisplayPicture(DecodeSession* state,
CUVIDPARSERDISPINFO* pPicParams) {
CAutoCtxLock lck(state->cuCtxLock);
CUVIDPROCPARAMS vpp;
CUdeviceptr devPtr;
CUresult result;
unsigned int pitch = 0, w, h;
int nv12_size;
memset(&vpp, 0, sizeof(vpp));
vpp.progressive_frame = pPicParams->progressive_frame;
vpp.top_field_first = pPicParams->top_field_first;
result = cuvidMapVideoFrame(state->cuDecoder, pPicParams->picture_index,
&devPtr, &pitch, &vpp);
if (result != CUDA_SUCCESS) {
printf("cuvidMapVideoFrame: %d\n", result);
return 0;
}
w = state->dci.ulTargetWidth;
h = state->dci.ulTargetHeight;
nv12_size = pitch * (h + h / 2); // 12bpp
if ((!state->pRawNV12) || (nv12_size > state->raw_nv12_size)) {
state->raw_nv12_size = 0;
if (state->pRawNV12) {
cuMemFreeHost(state->pRawNV12); // Just to be safe (the pitch
// should be constant)
state->pRawNV12 = NULL;
}
result = cuMemAllocHost((void**)&state->pRawNV12, nv12_size);
if (result != CUDA_SUCCESS)
printf("cuMemAllocHost failed to allocate %d bytes (%d)\n",
nv12_size, result);
state->raw_nv12_size = nv12_size;
}
if (state->pRawNV12) {
#if USE_ASYNC_COPY
result = cuMemcpyDtoHAsync(state->pRawNV12, devPtr, nv12_size,
state->cuStream);
if (result != CUDA_SUCCESS) printf("cuMemcpyDtoHAsync: %d\n", result);
// Gracefully wait for async copy to complete
while (CUDA_ERROR_NOT_READY == cuStreamQuery(state->cuStream)) {
// NVSleep(1);
sleep(1);
}
#else
result = cuMemcpyDtoH(state->pRawNV12, devPtr, nv12_size);
#endif
}
cuvidUnmapVideoFrame(state->cuDecoder, devPtr);
// Convert the output to standard IYUV and dump it to disk (note: very
// inefficient)
if ((state->fd_yuv) && (state->pRawNV12)) {
unsigned int y;
const unsigned char* p = state->pRawNV12;
unsigned char* iyuv = new unsigned char[w * h + w * (h >> 1)];
// Copy luma
for (y = 0; y < h; y++) {
memcpy(iyuv + y * w, p + y * pitch, w);
}
// De-interleave chroma (NV12 stored as U,V,U,V,...)
p += h * pitch;
for (y = 0; y < h / 2; y++) {
for (unsigned int x = 0; x < w / 2; x++) {
iyuv[w * h + y * w / 2 + x] = p[y * pitch + x * 2];
iyuv[w * h + (h / 2) * (w / 2) + y * w / 2 + x] =
p[y * pitch + x * 2 + 1];
}
}
fwrite(iyuv, 1, w * h + w * (h / 2), state->fd_yuv);
delete iyuv;
}
state->pic_cnt++;
return 1;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// CUDA 2.0 initialization
//
#if defined WIN32 || defined _WIN32
IDirect3D9* g_pD3D = NULL;
IDirect3DDevice9* g_pD3Dev = NULL;
#endif
CUcontext g_cuContext = NULL;
CUdevice g_cuDevice = 0;
int g_cuInstanceCount = 0;
CUvideoctxlock g_cuCtxLock = NULL;
static bool InitCuda(CUvideoctxlock* pLock) {
CUresult err;
int lAdapter, lAdapterCount;
if (g_cuInstanceCount != 0) {
g_cuInstanceCount++;
*pLock = g_cuCtxLock;
return true;
}
err = cuInit(0);
if (err != CUDA_SUCCESS) {
printf("ERROR: cuInit failed (%d)\n", err);
return false;
}
lAdapterCount = 0;
err = cuDeviceGetCount(&lAdapterCount);
if ((err) || (lAdapterCount <= 0)) {
printf("Failed to find a CUDA 2.0 device (%d)\n", err);
return false;
}
unsigned int ctx_flags = 0x00;
for (g_cuDevice = 0; g_cuDevice < lAdapterCount; g_cuDevice++) {
err = cuCtxCreate(&g_cuContext, ctx_flags, g_cuDevice);
if (err == CUDA_SUCCESS) return true;
}
printf("Failed to create CUDA context (%d)\n", err);
return false;
}
static bool DeinitCuda() {
if (g_cuInstanceCount > 0) {
if (--g_cuInstanceCount != 0) {
return true;
}
}
#if USE_FLOATING_CONTEXTS
if (g_cuCtxLock) {
cuvidCtxLockDestroy(g_cuCtxLock);
g_cuCtxLock = NULL;
}
#endif
if (g_cuContext) {
CUresult err = cuCtxDestroy(g_cuContext);
if (err != CUDA_SUCCESS)
printf("WARNING: cuCtxDestroy failed (%d)\n", err);
g_cuContext = NULL;
}
#if defined WIN32 || defined _WIN32
if (g_pD3Dev) {
g_pD3Dev->Release();
g_pD3Dev = NULL;
}
if (g_pD3D) {
g_pD3D->Release();
g_pD3D = NULL;
}
#endif
return true;
}
int main(int argc, char* argv[]) {
unsigned char io_buffer[16 * 1024];
CUVIDPARSERPARAMS parserInitParams;
DecodeSession state;
CUstream cuStream = NULL;
CUresult result;
char* arg_input = NULL;
char* arg_output = NULL;
FILE* fInput = NULL;
int retval = 1;
int i, elapsed_time;
cudaVideoCodec_enum codec = cudaVideoCodec_H264;
if (argc < 2) {
fprintf(stderr, "cuh264dec input.264 [output.yuv]\n");
return retval;
}
for (i = 1; i < argc; i++) {
if (!arg_input)
arg_input = argv[i];
else if (!arg_output)
arg_output = argv[i];
else if (!strcasecmp(argv[i], "mpeg1"))
codec = cudaVideoCodec_MPEG1;
else if (!strcasecmp(argv[i], "mpeg2"))
codec = cudaVideoCodec_MPEG2;
else if (!strcasecmp(argv[i], "mpeg4"))
codec = cudaVideoCodec_MPEG4;
else if (!strcasecmp(argv[i], "vc1"))
codec = cudaVideoCodec_VC1;
else if (!strcasecmp(argv[i], "h264"))
codec = cudaVideoCodec_H264;
else if (!strcasecmp(argv[i], "jpeg"))
codec = cudaVideoCodec_JPEG;
else {
fprintf(stderr, "invalid parameter\n");
return retval;
}
}
if (!arg_input) {
fprintf(stderr, "input file not specified\n");
return retval;
}
memset(&state, 0, sizeof(state));
#if defined WIN32 || defined _WIN32
timeBeginPeriod(1);
#endif
// Initialize CUDA 2.0 using with D3D9 interoperability
if (!InitCuda(&state.cuCtxLock)) {
printf("Failed to initialize CUDA 2.0\n");
goto exit;
}
// Create video parser
memset(&parserInitParams, 0, sizeof(parserInitParams));
parserInitParams.CodecType = codec;
parserInitParams.ulMaxNumDecodeSurfaces = MAX_FRM_CNT;
parserInitParams.ulMaxDisplayDelay = DISPLAY_DELAY;
parserInitParams.pUserData = &state;
parserInitParams.pfnSequenceCallback = HandleVideoSequence;
parserInitParams.pfnDecodePicture = HandlePictureDecode;
parserInitParams.pfnDisplayPicture = HandlePictureDisplay;
result = cuvidCreateVideoParser(&state.cuParser, &parserInitParams);
if (result != CUDA_SUCCESS) {
printf("Failed to create video parser (%d)\n", result);
goto exit;
}
{
CAutoCtxLock lck(state.cuCtxLock);
result = cuStreamCreate(&state.cuStream, 0);
if (result != CUDA_SUCCESS) {
printf("cuStreamCreate failed (%d)\n", result);
goto exit;
}
}
// Open input file
fInput = fopen(arg_input, "rb");
if (fInput == NULL) {
printf("Failed to open \"%s\"\n", arg_input);
goto exit;
}
// Open output file
if (arg_output) {
state.fd_yuv = fopen(arg_output, "wb");
if (state.fd_yuv == NULL) {
printf("Failed to create \"%s\"\n", arg_output);
goto exit;
}
}
// Start decoding
printf("Decoding...\n");
// elapsed_time = NvGetSystemClockMicrosecs() / 1000;
for (;;) {
CUVIDSOURCEDATAPACKET pkt;
int len = fread(io_buffer, 1, sizeof(io_buffer), fInput);
if (len <= 0) {
// Flush the decoder
pkt.flags = CUVID_PKT_ENDOFSTREAM;
pkt.payload_size = 0;
pkt.payload = NULL;
pkt.timestamp = 0;
cuvidParseVideoData(state.cuParser, &pkt);
break;
}
pkt.flags = 0;
pkt.payload_size = len;
pkt.payload = io_buffer;
pkt.timestamp = 0; // not using timestamps
cuvidParseVideoData(state.cuParser, &pkt);
}
// elapsed_time = NvGetSystemClockMicrosecs() / 1000 - elapsed_time;
// retval = 0;
// printf("Processed %d frames in %dms (%5.2ffps)\n",
// state.pic_cnt, elapsed_time,
// ((float)state.pic_cnt*1000.0/(float)elapsed_time));
exit:
if (state.fd_yuv) {
fclose(state.fd_yuv);
state.fd_yuv = NULL;
}
// Delete all created objects
if (state.cuParser != NULL) {
cuvidDestroyVideoParser(state.cuParser);
state.cuParser = NULL;
}
if (state.cuDecoder != NULL) {
CAutoCtxLock lck(state.cuCtxLock);
cuvidDestroyDecoder(state.cuDecoder);
state.cuDecoder = NULL;
}
if (state.cuStream != NULL) {
CAutoCtxLock lck(state.cuCtxLock);
cuStreamDestroy(state.cuStream);
state.cuStream = NULL;
}
if (state.pRawNV12) {
cuMemFreeHost(state.pRawNV12);
state.pRawNV12 = NULL;
}
DeinitCuda();
#if defined WIN32 || defined _WIN32
timeEndPeriod(1);
#endif
return retval;
}
下面是makefile:
cmake_minimum_required(VERSION 3.14)
project(nvcodec_test_case)
set(CMAKE_CXX_STANDARD 14)
# 编译命令通过 cmake .. -DCMAKE_BUILD_TYPE=Release/Debug来选择具体是那种模式
if(CMAKE_BUILD_TYPE AND(CMAKE_BUILD_TYPE STREQUAL "Debug"))
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} $ENV{CXXFLAGS} -O0 -Wall -g")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKEXX_C_FLAGS_DEBUG} $ENV{CXXFLAGS} -O0 -Wall -g")
message("Debug mode:${CMAKE_C_FLAGS_DEBUG}")
message("Debug mode:${CMAKE_CXX_FLAGS_DEBUG}")
else()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wall -O3 -fPIC")
message("Release mode:${CMAKE_C_FLAGS_RELEASE}")
endif()
set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} -fuse-ld=gold ")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin/)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)
set(FFMPEG_LIBRARIES avformat avcodec avutil swscale avdevice swresample pthread m lzma bz2 z)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/nvcuvid/include /usr/local/cuda-12.4/targets/x86_64-linux/include/ )
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/nvcuvid/lib/x86_64 /usr/local/cuda-12.4/targets/x86_64-linux/lib/)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/ffmpeg/include)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/ffmpeg/lib)
get_property(inc_dirs DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
foreach(dir ${inc_dirs})
message(STATUS "include dir=${dir}")
endforeach()
get_property(lib_dirs DIRECTORY PROPERTY LINK_DIRECTORIES)
foreach(dir ${lib_dirs})
message(STATUS "lib dir=${dir}")
endforeach()
link_directories(/opt/tops/lib)
set(UTILS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/demuxer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/log.cpp)
#for cuvid_test
add_executable(cuh264dec src/cuh264dec.cpp ${UTILS_FILES})
target_include_directories(cuh264dec PUBLIC ${PROJECT_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/include)
target_link_libraries(cuh264dec PUBLIC nvcuvid cudart cuda ${FFMPEG_LIBRARIES})
#for nvcuid_test
add_executable(nvcuvid_test src/nvcuvid_test.cpp ${UTILS_FILES})
target_include_directories(nvcuvid_test PUBLIC ${PROJECT_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/include)
target_link_libraries(nvcuvid_test PUBLIC nvcuvid cudart cuda ${FFMPEG_LIBRARIES})