直接传入image_t输出检测结果

该博客介绍了一个H264解码线程的实现,通过`H264DecoderThread`连接、解码RTSP流。使用`sws_scale`进行色彩空间转换,并在转换后的RGB图像上应用Yolo检测。博客详细展示了如何从AVFrame到image_t的转换,以及Yolo检测的时间消耗分析。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#include "H264DecoderThread.h"
#include "Poco/Thread.h"  
#include<iostream>
#include<fstream>
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <unistd.h>

using namespace std;
using Poco::Thread;

FFmpegErrorCode H264DecoderThread::connect()
{
    AVDictionary *opts = 0;
    av_dict_set(&opts, "rtsp_transport", "tcp", 0);
    av_dict_set(&opts, "stimeout", "1000000", 0);
    FFmpegErrorCode res = FFmpeg_NoError;

    printf("H264DecoderThread connect()\n");

    do 
    {
        if (avformat_open_input(&m_pFormatCtx, m_strMrl.c_str(), NULL, &opts) != 0)
        {
            res = FFmpeg_ConnectFail;
            break;
        }

        if (avformat_find_stream_info(m_pFormatCtx, NULL) < 0)
        {
            res = FFmpeg_FindStreamInfo;
            break;
        }

        m_nVideoStreamIndex = av_find_best_stream(m_pFormatCtx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
        if (m_nVideoStreamIndex < 0)
        {
            res = FFmpeg_FindBestStream;
            break;
        }
        AVStream* st = m_pFormatCtx->streams[m_nVideoStreamIndex];
        AVCodec *pCodec = avcodec_find_decoder(st->codec->codec_id);
        if (!pCodec || avcodec_open2(st->codec, pCodec, NULL) < 0)
        {
            res = FFmpeg_FindDecoder;
            break;
        }
    } while (false);
    if (res != FFmpeg_NoError)
    {
        avformat_close_input(&m_pFormatCtx);
    }
    else
    {
        m_bConnected = true;

        string names_file = "coco.names";
        string cfg_file = "yolov3.cfg";
        string weights_file = "yolov3.weights";

       pDetector = new Detector(cfg_file, weights_file);
        
        if (pDetector == NULL) 
        {
            printf("error pDetector == NULL\n");
        }

        #if 0
        m_dst_pix_fmt = AV_PIX_FMT_RGB24;
        m_dst_w = 1280;
        m_dst_h = 720;
        m_dst_c = 3;
        #else
        m_dst_pix_fmt = AV_PIX_FMT_BGR24;
        m_dst_w = 416;
        m_dst_h = 416;
        m_dst_c = 3;
        #endif
        
    }
    av_dict_free(&opts);
    return res;
}

void H264DecoderThread::AddTask(PicInfo &picInfo)
{
    
    printf("H264DecoderThread AddTask()\n");
}


static image_t my_make_empty_image(int w, int h, int c)
{
    image_t out;
    out.data = 0;
    out.h = h;
    out.w = w;
    out.c = c;
    return out;
}

static image_t my_make_image_custom(int w, int h, int c)
{
    image_t out = my_make_empty_image(w, h, c);
    out.data = (float *)calloc(h*w*c, sizeof(float));
    return out;
}

void H264DecoderThread::run()
{
    printf("H264DecoderThread run()\n");

    AVFrame *frame = av_mallocz(sizeof(*frame));
    printf("H264DecoderThread run()sizeof(*frame) %d\n", sizeof(*frame));
    printf("H264DecoderThread run()sizeof(AVFrame) %d\n", sizeof(AVFrame));
    /*
    H264DecoderThread run()sizeof(*frame) 488
    H264DecoderThread run()sizeof(AVFrame) 488
  */
    AVCodecContext* avctx = m_pFormatCtx->streams[m_nVideoStreamIndex]->codec;
    //sws_scale将输入参数, 转成RGB24
      /*
  enum AVPixelFormat {
      AV_PIX_FMT_NONE = -1,
      AV_PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
      AV_PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
      AV_PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
      AV_PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
  
  */
    //输出0 表示 AV_PIX_FMT_YUV420P
    printf("H264DecoderThread::run() pix_fmt:  %d \n", avctx->pix_fmt);

    //参数int flags选择缩放算法(只有当输入输出图像大小不同时有效) SWS_BILINEAR
    /*
    struct SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat 
    srcFormat,
                                      int dstW, int dstH, enum AVPixelFormat 
    dstFormat,
                                      int flags, 
                                      SwsFilter *srcFilter, SwsFilter *dstFilter, 
    const double *param);
    参数int srcW, int srcH, enum AVPixelFormat srcFormat
    定义输入图像信息(寬、高、颜色空间(像素格式))
    参数int dstW, int dstH, enum AVPixelFormat dstFormat
    定义输出图像信息寬、高、颜色空间(像素格式))
    参数int flags选择缩放算法(只有当输入输出图像大小不同时有效)
    参数SwsFilter *srcFilter, SwsFilter *dstFilter
    分别定义输入/输出图像滤波器信息,如果不做前后图像滤波,输入NULL
    参数const double *param定义特定缩放算法需要的参数(?),默认为NULL
    函数返回SwsContext结构体,定义了基本变换信息。
    如果是对一个序列的所有帧做相同的处理,函数sws_getContext
    只需要调用一次就可以了。
    sws_getContext(w, h, YV12, w, h, NV12, 0, NULL, NULL, NULL);      // YV12->
    NV12 色彩空间转换
    sws_getContext(w, h, YV12, w/2, h/2, YV12, 0, NULL, NULL, NULL);  // YV12
    图像缩小到原图1/4
    sws_getContext(w, h, YV12, 2w, 2h, YN12, 0, NULL, NULL, NULL);    // YV12
    图像放大到原图4倍,并转换为NV12结构

    */


    SwsContext * pSwsCtx = sws_getContext(avctx->width, avctx->height, avctx->pix_fmt, m_dst_w, m_dst_h,m_dst_pix_fmt, SWS_BILINEAR, NULL, NULL, NULL);
    AVPacket packet;
    //分配 sizeof(AVFrame) 488个字节, 并做简单初始化填充
    AVFrame* pYUVFrame = av_frame_alloc();
    AVFrame* pRGBFrame = av_frame_alloc();
    //rgbBufSize:  1280 X 720 X 3 = 2764800 
    int rgbBufSize = avpicture_get_size(m_dst_pix_fmt, m_dst_w, m_dst_h);
    printf("H264DecoderThread::run() rgbBufSize:  %d \n", rgbBufSize);
    /*
    typedef signed char             int8_t;
    typedef short int               int16_t;
    typedef int                     int32_t;
    typedef unsigned char           
    uint8_t;typedef unsigned short int      uint16_t;
    typedef unsigned 
    int            uint32_t;
    */
    //H264DecoderThread::run() sizeof(uint8_t):  1 
    //rgbBufSize*sizeof(uint8_t) = 1280 X 720 X 3 X 1= 2764800
    uint8_t* rgbBuf = (uint8_t*)(av_malloc(rgbBufSize*sizeof(uint8_t)));    
    printf("H264DecoderThread::run() sizeof(uint8_t):  %d \n", sizeof(uint8_t));
    /*
    typedef struct AVPicture {
        attribute_deprecated
        uint8_t *data[AV_NUM_DATA_POINTERS];    ///< pointers to the image data planes
        attribute_deprecated
        int linesize[AV_NUM_DATA_POINTERS];     ///< number of bytes per line
    } AVPicture;
    avpicture_fill((AVPicture *) pFrameRGB, buffer, PIX_FMT_RGB565, pCodecCtx->
    width, pCodecCtx->height);
    复制代码
    这句调用时,pFrameRGB和buffer都是已经申请到的一段内存, 会将pFrameRGB的数据按
    RGB565格式自动"关联"到buffer。
    //以上就是为pFrameRGB挂上buffer。这个buffer是用于存缓冲数据的
    sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0, pCodecCtx->
    height, pFrameRGB->data, pFrameRGB->linesize)
    复制代码
    转换完成的数据保存到了pFrameRGB,也自动到了buffer里面。


    注意: AVPicture 结果已经不再使用, 为了avpicture_fill 接口兼容, 不做改动
    所以将AVFrame *强制转成 AVPicture*, 只使用 AVFrame的data及linesize字段

         * pointer to the picture/channel planes.
         * This might be different from the first allocated byte
         *
         * Some decoders access areas outside 0,0 - width,height, please
         * see avcodec_align_dimensions2(). Some filters and swscale can read
         * up to 16 bytes beyond the planes, if these filters are to be used,
         * then 16 extra bytes must be allocated.
         *
         * NOTE: Except for hwaccel formats, pointers not needed by the format
         * MUST be set to NULL.

    uint8_t *data[AV_NUM_DATA_POINTERS];
         * For video, size in bytes of each picture line.
         * For audio, size in bytes of each plane.
         *
         * For audio, only linesize[0] may be set. For planar audio, each channel
         * plane must be the same size.
         *
         * For video the linesizes should be multiples of the CPUs alignment
         * preference, this is 16 or 32 for modern desktop CPUs.
         * Some code requires such alignment other code can be slower without
         * correct alignment, for yet other it makes no difference.
         *
         * @note The linesize may be larger than the size of usable data -- there
         * may be extra padding present for performance reasons.

    int linesize[AV_NUM_DATA_POINTERS];


    int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
                       enum AVPixelFormat pix_fmt, int width, int height)
    {
        return av_image_fill_arrays(picture->data, picture->linesize,
                                    ptr, pix_fmt, width, height, 1);
    }

    int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4],
                             const uint8_t *src, enum AVPixelFormat pix_fmt,
                             int width, int height, int align)
    {
        int ret, i;

        //查看这个宽高是不是一个正常图片宽高
        //if ((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8)
        //必须满足上面的条件, 不然会报图像无效
        //av_log(&imgutils, AV_LOG_ERROR, "Picture size %ux%u is invalid\n", w, h);
        ret = av_image_check_size(width, height, 0, NULL);
        if (ret < 0)
            return ret;

        //用格式参数填充linesizes
        ret = av_image_fill_linesizes(dst_linesize, pix_fmt, width);
        if (ret < 0)
            return ret;
    
        for (i = 0; i < 4; i++)
            dst_linesize[i] = FFALIGN(dst_linesize[i], align);

        //用分的内存填充pointers
        return av_image_fill_pointers(dst_data, pix_fmt, height, (uint8_t *)src, 
    dst_linesize);
    }

    
    */

    avpicture_fill((AVPicture *) pRGBFrame, (uint8_t *)rgbBuf, m_dst_pix_fmt, m_dst_w, m_dst_h);
    av_read_play(m_pFormatCtx);//play RTSP
    Poco::Int64 nFrameNum = 0;


    while(!m_stop)
    {
        if (av_read_frame(m_pFormatCtx, &packet) != 0)
        {
            // 线程退出、启动timer(另一个线程跑)
            m_bConnected = false;
            m_timer.stop();
            m_interval = 2;
            m_timer.setPeriodicInterval(m_interval * 1000);
            m_timer.start(Poco::TimerCallback<Connector>(m_connector, &Connector::onTimer));
            break;
        }

        if(packet.stream_index == m_nVideoStreamIndex)
        {//packet is video

            int got_picture = 0;
            int res = avcodec_decode_video2(avctx, pYUVFrame, &got_picture, &packet);

             //printf("res %d, %d\n", res, got_picture);

             if (res>=0 && got_picture)
            {
                ++nFrameNum;
                if (m_stop)
                {
                    break;
                }
                //// 做转换, 将原来的420p 转成 AV_PIX_FMT_RGB24
                /*
        // 做转换
        int sws_scale(struct SwsContext *c, 
                      const uint8_t *const srcSlice[], const int srcStride[], 
                      int srcSliceY, int srcSliceH,
                      uint8_t *const dst[], const int dstStride[]);
        参数struct SwsContext *c,为上面sws_getContext函数返回值;
        参数const uint8_t *const srcSlice[], const int srcStride[]
        定义输入图像信息(当前处理区域的每个通道数据指针,每个通道行字节数)
        stride定义下一行的起始位置。stride和width不一定相同,这是因为:
        //这里的 stride 与 opencv里 step 一样
        1.由于数据帧存储的对齐,有可能会向每行后面增加一些填充字节这样 stride = width 
        + N;
        2.packet色彩空间下,每个像素几个通道数据混合在一起,例如RGB24,每个像素3
        字节连续存放,因此下一行的位置需要跳过3*width字节。
        srcSlice和srcStride的维数相同,由srcFormat值来。
        csp       维数        宽width      跨度stride      高
        YUV420     3        w, w/2, w/2    s, s/2, s/2   h, h/2, h/2
        YUYV       1        w, w/2, w/2   2s, 0, 0       h, h, h
        NV12       2        w, w/2, w/2    s, s, 0       h, h/2
        RGB24      1        w, w,   w     3s, 0, 0       h, 0, 0           
        参数int srcSliceY, int srcSliceH,定义在输入图像上处理区域,srcSliceY
        是起始位置,srcSliceH是处理多少行。如果srcSliceY=0,srcSliceH=height
        ,表示一次性处理完整个图像。
        这种设置是为了多线程并行,例如可以创建两个线程,第一个线程处理 [0, h/2-1]
        行,第二个线程处理 [h/2, h-1]行。并行处理加快速度。
        参数uint8_t *const dst[], const int dstStride[]
        定义输出图像信息(输出的每个通道数据指针,每个通道行字节数)
        注意:
        Stride 只是步长, 跨度意思
        不是输出图像真正内容, 真正输出内容在 rgbBufSize中
        */

                sws_scale(pSwsCtx, pYUVFrame->data, pYUVFrame->linesize, 0, avctx->height, pRGBFrame->data, pRGBFrame->linesize);

                //直接传入cv::Mat输出检测结果
                if(1){
                    
                    struct timeval tv, tv1, tv2;
                    
                    gettimeofday(&tv,NULL);
                    IplImage* pImg = cvCreateImage(cvSize(m_dst_w , m_dst_h), 8, 3);
                    memcpy(pImg->imageData, rgbBuf, rgbBufSize);
                    
                    cv::Mat src(pImg);
                    //imwrite("./ttCV_3311RGB2BGR416.jpg", src); 
                    auto box_list = pDetector->detect(src);
                    gettimeofday(&tv1,NULL);
                    auto diff = tv1.tv_sec*1000 + tv1.tv_usec/1000 - (tv.tv_sec*1000 + tv.tv_usec/1000);
                    cout << "inputMat diff : " << diff << endl;
                    
                    for(auto& b: box_list)
                    {
                            cout << "input Mat;  Object type: " << b.obj_id << ", x: " << b.x << ", y: " << b.y << endl;
                    }
                }

                //直接传入image_t输出检测结果
                if(1){
                    struct timeval tv, tv1, tv2;

                    gettimeofday(&tv,NULL);
                    int w = m_dst_w;
                    int h = m_dst_h;
                    int c = m_dst_c;
                    image_t im = my_make_image_custom(w, h, c);
                    unsigned char *datayolo = rgbBuf; //(unsigned char *)pRGBFrame->data;
                    int step = m_dst_w * m_dst_c;

                    for (int y = 0; y < h; ++y) {
                        for (int k = 0; k < c; ++k) {
                            for (int x = 0; x < w; ++x) {
                              im.data[k*w*h + y*w + x] = datayolo[y*step + x*c + k] / 255.0f;
                            }
                        }
                    }

                    //auto box_list = pDetector->detect_resized(im, 1280, 720);
                    auto box_list1 = pDetector->detect_resized(im, 416, 416);
                    gettimeofday(&tv1,NULL);
                    auto diff = tv1.tv_sec*1000 + tv1.tv_usec/1000 - (tv.tv_sec*1000 + tv.tv_usec/1000);
                    cout << "inputimage_t diff : " << diff << endl;

                    
                    for(auto& b: box_list1)
                    {
                    cout << "inputimage_t;   Object type: " << b.obj_id << ", x: " << b.x << ", y: " << b.y << endl;
                    }

                }


                #if 0
                exec_time = ((double)cv::getTickCount() - exec_time)*1000. / cv::getTickFrequency();
                cout << "exec_time1 : " << exec_time << endl;


                IplImage* pImg = cvCreateImage(cvSize(avctx->width, avctx->height), 8, 3);
                memcpy(pImg->imageData, rgbBuf, rgbBufSize);

                exec_time = ((double)cv::getTickCount() - exec_time)*1000. / cv::getTickFrequency();
                cout << "exec_time2 : " << exec_time << endl;

                
                cvCvtColor(pImg, pImg, CV_RGB2BGR);
                ImageFrame data(pImg, "1");
                data.frameNum = nFrameNum;

                exec_time = ((double)cv::getTickCount() - exec_time)*1000. / cv::getTickFrequency();
                cout << "exec_time3 : " << exec_time << endl;


                cv::Mat src;
                cv::cvtColor(cv::Mat(pImg), src, CV_BGR2RGB);
                printf("sH264DecoderThread A\n");

                exec_time = ((double)cv::getTickCount() - exec_time)*1000. / cv::getTickFrequency();
                cout << "exec_time4 : " << exec_time << endl;


                static int Static_nCount = 0;
                printf("avcodec_decode_video2 Count %d,  Interval %d\n", Static_nCount, m_nDetectionInterval);

                //if(Static_nCount % m_nDetectionInterval == 0)
                {
                    //m_frameCallback(data);
                }

                Static_nCount++;
                
                #endif
            }
             
        }
        
        av_free_packet(&packet);
    }
    av_read_pause(m_pFormatCtx);
    av_frame_free(&pYUVFrame);
    av_frame_free(&pRGBFrame);
    av_free(rgbBuf);
    sws_freeContext(pSwsCtx);
    avcodec_close(avctx);
    //avformat_close_input(&m_pFormatCtx);
}

void H264DecoderThread::start(const std::function<frameCBfunc> &cb)
{
    printf("H264DecoderThread start()\n");

    if (!m_pFormatCtx)
    {
        
        printf("m_pFormatCtx == Null\n");
        return;
    }

    
    printf("H264DecoderThread start() b\n");
    m_connector.m_do = std::bind(&H264DecoderThread::reconnect, this);
    m_frameCallback = cb;
    m_thread.start(*this);
}

void H264DecoderThread::exit()
{
    m_stop = true;
    m_thread.join();
}

FFmpegErrorCode H264DecoderThread::init(const std::string &strMrl, Poco::Logger* pLogger, int nDetectionInterval)
{
    printf("H264DecoderThread init()\n");
    m_nDetectionInterval = nDetectionInterval;
    av_register_all();
    avformat_network_init();
    m_strMrl = strMrl;
    FFmpegErrorCode res = connect();
    return res;
}

void H264DecoderThread::reconnect()
{
    std::cout <<"H264DecoderThread reconnect"  <<std::endl;

    if (connect() == FFmpeg_NoError)
    {
    
        std::cout <<"H264DecoderThread reconnect A"  <<std::endl;
        //stop the timer and start decoder thread
        m_timer.restart(0);
        m_thread.start(*this);
        if (m_connectCallback)
        {
            m_connectCallback(true);
        }
    }
    else
    {
    
        std::cout <<"H264DecoderThread reconnect B"  <<std::endl;
        if (m_interval < 256)
        {
            m_interval = (m_interval * 2) % 257;
        }
        m_timer.setPeriodicInterval(m_interval * 1000);
        if (m_disconnectCallback)
        {
            m_disconnectCallback(false);
        }
    }
}
 

#include <assert.h> #include <fcntl.h> #include <getopt.h> #include <opencv2/core/hal/interface.h> #include <opencv2/imgproc.hpp> #include <opencv2/imgproc/imgproc_c.h> #include <pthread.h> #include <signal.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <unistd.h> // #include "common/sample_common.h" #include "rkmedia_api.h" #include "rockx.h" #include <opencv2/core.hpp> // #include <opencv2/imgoroc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/opencv.hpp> #define CAMERA_PATH "rkispp_scale0" #define CAMERA_ID 0 #define CAMERA_CHN 0 #define VENC_CHN 0 #define WIDTH 1920 #define HEIGHT 1080 using namespace cv; void nv12ToBgr(uint8_t * nv12_buf, int width, int height, uint8_t * bgr_buf) { Mat nv12_mat(height + height/2, width, CV_8UC1, nv12_buf); Mat bgr_mat(height, width, CV_8UC3, bgr_buf); cvtColor(nv12_mat,bgr_mat,COLOR_YUV2BGR_NV12); } int main(int argc, char **argv) { int ret; VI_CHN_ATTR_S vi_chn_attr; vi_chn_attr.pcVideoNode = CAMERA_PATH; // Path vi_chn_attr.u32Width = 1920; // Width vi_chn_attr.u32Height = 1080; // Height vi_chn_attr.enPixFmt = IMAGE_TYPE_NV12; // ImageType vi_chn_attr.enBufType = VI_CHN_BUF_TYPE_MMAP; // BufType vi_chn_attr.u32BufCnt = 3; // Cnt vi_chn_attr.enWorkMode = VI_WORK_MODE_NORMAL; // Mode ret = RK_MPI_VI_SetChnAttr(CAMERA_ID, CAMERA_CHN, &vi_chn_attr); if (ret) { printf("Vi Set Attr Failed.....\n"); return 0; } else { printf("Vi Set Attr Success.....\n"); } ret = RK_MPI_VI_EnableChn(CAMERA_ID, CAMERA_CHN); if (ret) { printf("Vi Enable Attr Failed.....\n"); return 0; } else { printf("Vi Enable Attr Success.....\n"); } ret = RK_MPI_VI_StartStream(CAMERA_ID, CAMERA_CHN); if(ret) { printf("Start Vi Failed.....\n"); } else { printf("Start Vi Success.....\n"); } rockx_config_t * rockx_face_det_config = rockx_create_config(); rockx_add_config(rockx_face_det_config,ROCKX_CONFIG_DATA_PATH, "/userdata/rockx_data/"); rockx_handle_t face_detect_handle; rockx_ret_t face_ret; rockx_module_t face_det_module = ROCKX_MODULE_FACE_DETECTION_V2; printf("rock_reate 1 \n"); face_ret = rockx_create(&face_detect_handle, face_det_module, rockx_face_det_config, 0); if (face_ret != ROCKX_RET_SUCCESS) { printf("rockx_create failed....\n"); } else { printf("rockx_create success....\n"); } rockx_image_t rv1126_image_frame={0}; rv1126_image_frame.width =1920; rv1126_image_frame.height = 1080; rv1126_image_frame.pixel_format = ROCKX_PIXEL_FORMAT_YUV420SP_NV12; MEDIA_BUFFER mb; uint8_t *bgr = new uint8_t[WIDTH * HEIGHT *3]; char save_pic_name[50]; while (1) { mb = RK_MPI_SYS_GetMediaBuffer(RK_ID_VI, CAMERA_CHN, -1); if(!mb) { printf("RK_MPI_SYS_GetMediaBuffer break....\n"); break; } void *data =RK_MPI_MB_GetPtr(mb); uint8_t *data_uint8=(uint8_t *)data; rv1126_image_frame.data = data_uint8; rv1126_image_frame.size = RK_MPI_MB_GetSize(mb); rockx_object_array_t face_detect_array={0}; printf(" detect \n"); rockx_face_detect(face_detect_handle, &rv1126_image_frame,&face_detect_array,NULL); printf(" detect 2 \n"); if(face_ret != ROCKX_RET_SUCCESS) { printf("rockx_face_detect failed...\n"); } else { //printf("rockx_face_detect Success...\n"); } if(face_detect_array.count > 0) { printf("Snap face Success...\n"); Mat original_nv12_mat = Mat(HEIGHT,WIDTH,CV_8UC1,rv1126_image_frame.data); nv12ToBgr(original_nv12_mat.data,WIDTH,HEIGHT,bgr); Mat output_bgr_mat = Mat(HEIGHT,WIDTH,CV_8UC3,bgr); srand(time(NULL)); sprintf(save_pic_name,"face_%d.jpg",rand()); imwrite(save_pic_name,output_bgr_mat); } RK_MPI_MB_ReleaseBuffer(mb); } return 0; }这个代码在 rockx_face_detect(face_detect_handle, &rv1126_image_frame,&face_detect_array,NULL);出现段错误,怎么回事,是哪个参数的配置错了
06-22
import pickle import numpy as np import matplotlib.pyplot as plt from PIL import Image import sys def load_pkl_data(/Users/yangguang/Documents/python_workspace/data/2025-07-15T14:55:55.540272.pkl): """安全加载PKL文件数据""" try: with open(/Users/yangguang/Documents/python_workspace/data/2025-07-15T14:55:55.540272.pkl, 'rb') as f: # 尝试不同编码方式解决兼容性问题 try: return pickle.load(f) except UnicodeDecodeError: return pickle.load(f, encoding='latin1') except Exception as e: print(f"加载PKL文件失败: {str(e)}") sys.exit(1) def diagnose_data_structure(data): """诊断数据结构并返回报告""" report = { 'data_type': type(data).__name__, 'keys': [], 'nested_levels': 0, 'contains_image': False } if isinstance(data, dict): report['keys'] = list(data.keys()) # 检查嵌套层级 def count_nesting(obj, level=1): if isinstance(obj, dict): return max([count_nesting(v, level+1) for v in obj.values()] + [level]) elif isinstance(obj, (list, tuple)): return max([count_nesting(item, level+1) for item in obj] + [level]) if obj else level return level report['nested_levels'] = count_nesting(data) # 检查是否包含图像数据 for value in data.values(): if isinstance(value, np.ndarray) and value.ndim in (2, 3): report['contains_image'] = True break return report def find_image_data(data, depth=0, max_depth=5): """在数据结构中递归查找图像数据""" # 防止无限递归 if depth > max_depth: return None # 如果是NumPy数组且符合图像维度 if isinstance(data, np.ndarray): if data.ndim == 2: # 灰度图像 (高度, 宽度) return data elif data.ndim == 3: # 彩色图像 (高度, 宽度, 通道) return data # 如果是字典,检查常见键名 if isinstance(data, dict): # 常见图像键名列表 image_keys = ['image', 'img', 'data', 'pixels', 'array', 'frame', 'picture'] # 优先检查常见键名 for key in image_keys: if key in data: result = find_image_data(data[key], depth+1, max_depth) if result is not None: return result # 深度搜索所有值 for value in data.values(): result = find_image_data(value, depth+1, max_depth) if result is not None: return result # 如果是列表或元组 elif isinstance(data, (list, tuple)): for item in data: result = find_image_data(item, depth+1, max_depth) if result is not None: return result return None def analyze_image(image_data): """分析图像数据并返回信息""" if not isinstance(image_data, np.ndarray): try: # 尝试转换为NumPy数组 image_data = np.array(image_data) except Exception as e: return {"error": f"无法转换为图像数组: {str(e)}"} info = { 'dimensions': image_data.ndim, 'shape': image_data.shape, 'dtype': str(image_data.dtype), 'resolution': None, 'channels': None } # 获取分辨率 if image_data.ndim == 2: # 灰度图像 (高度, 宽度) height, width = image_data.shape info['resolution'] = f"{width}×{height}" info['channels'] = 1 elif image_data.ndim == 3: # 彩色图像 (高度, 宽度, 通道) height, width, channels = image_data.shape info['resolution'] = f"{width}×{height}" info['channels'] = channels return info def display_images(image_data, num_images=6, title="提取的图像数据"): """显示图像数据""" if not isinstance(image_data, np.ndarray): print("无法显示: 图像数据不是NumPy数组") return # 处理单个图像 if image_data.ndim == 2 or (image_data.ndim == 3 and image_data.shape[2] in [1, 3, 4]): plt.figure(figsize=(6, 6)) plt.imshow(image_data, cmap='gray' if image_data.ndim == 2 or image_data.shape[2] == 1 else None) plt.title(title) plt.axis('off') plt.show() return # 处理多个图像 if image_data.ndim == 4: # 图像集合 (数量, 高度, 宽度, 通道) num_images = min(num_images, image_data.shape[0]) fig, axes = plt.subplots(1, num_images, figsize=(12, 3)) for i in range(num_images): ax = axes[i] if num_images > 1 else axes img = image_data[i] if img.shape[2] == 1: # 单通道灰度图 ax.imshow(img[:, :, 0], cmap='gray') else: # 多通道彩色图 ax.imshow(img) ax.axis('off') plt.suptitle(f"{title} (前{num_images}张)") plt.tight_layout() plt.show() def main(pkl_file): """主处理函数""" print(f"加载文件: {pkl_file}") data = load_pkl_data(pkl_file) # 诊断数据结构 print("\n=== 数据结构诊断 ===") report = diagnose_data_structure(data) print(f"数据类型: {report['data_type']}") print(f"包含的键: {report['keys'][:10]}{'...' if len(report['keys']) > 10 else ''}") print(f"嵌套层级: {report['nested_levels']}") print(f"包含图像数据: {'是' if report['contains_image'] else '否'}") # 查找图像数据 print("\n=== 搜索图像数据 ===") image_data = find_image_data(data) if image_data is None: print("错误: 未找到图像数据") return # 分析图像 print("\n=== 图像分析 ===") image_info = analyze_image(image_data) print(f"图像维度: {image_info['dimensions']}") print(f"图像形状: {image_info['shape']}") if image_info['resolution']: print(f"分辨率: {image_info['resolution']}") print(f"通道数: {image_info['channels']}") else: print("警告: 无法确定分辨率") # 显示图像 print("\n=== 显示图像 ===") display_images(image_data) if __name__ == "__main__": # 替换为您的PKL文件路径 PKL_FILE = "image_data.pkl" main(PKL_FILE) 在Mac上运行该代码 有问题 帮我改改
07-16
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值