draw_2_boxes.cpp

本文提供了一个Windows应用程序的编程实例,展示了如何使用C语言创建窗口、加载资源、处理消息等基本操作,并介绍了针对不同Windows版本的注册类方法。此外,还涉及了调用API函数、实现绘图等功能。

  name="google_ads_frame" marginwidth="0" marginheight="0" src="http://pagead2.googlesyndication.com/pagead/ads?client=ca-pub-5572165936844014&dt=1194442938015&lmt=1194190197&format=336x280_as&output=html&correlator=1194442937843&url=file%3A%2F%2F%2FC%3A%2FDocuments%2520and%2520Settings%2Flhh1%2F%E6%A1%8C%E9%9D%A2%2FCLanguage.htm&color_bg=FFFFFF&color_text=000000&color_link=000000&color_url=FFFFFF&color_border=FFFFFF&ad_type=text&ga_vid=583001034.1194442938&ga_sid=1194442938&ga_hid=1942779085&flash=9&u_h=768&u_w=1024&u_ah=740&u_aw=1024&u_cd=32&u_tz=480&u_java=true" frameborder="0" width="336" scrolling="no" height="280" allowtransparency="allowtransparency"> 
#include <windows.h> 
#include "Draw_2_Boxes.h"


#if defined (WIN32)
 #define IS_WIN32 TRUE
#else
 #define IS_WIN32 FALSE
#endif

#define IS_NT      IS_WIN32 && (BOOL)(GetVersion() < 0x80000000)
#define IS_WIN32S  IS_WIN32 && (BOOL)(!(IS_NT) && (LOBYTE(LOWORD(GetVersion()))<4))
#define IS_WIN95   (BOOL)(!(IS_NT) && !(IS_WIN32S)) && IS_WIN32

HINSTANCE hInst;   // current instance

LPCTSTR lpszAppName  = "MyApp";
LPCTSTR lpszTitle    = "My Application";

BOOL RegisterWin95( CONST WNDCLASS* lpwc );

int APIENTRY WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance,
                      LPTSTR lpCmdLine, int nCmdShow)
{
   MSG      msg;
   HWND     hWnd;
   WNDCLASS wc;

   // Register the main application window class.
   //............................................
   wc.style         = CS_HREDRAW | CS_VREDRAW;
   wc.lpfnWndProc   = (WNDPROC)WndProc;      
   wc.cbClsExtra    = 0;                     
   wc.cbWndExtra    = 0;                     
   wc.hInstance     = hInstance;             
   wc.hIcon         = LoadIcon( hInstance, lpszAppName );
   wc.hCursor       = LoadCursor(NULL, IDC_ARROW);
   wc.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
   wc.lpszMenuName  = lpszAppName;             
   wc.lpszClassName = lpszAppName;             

   if ( IS_WIN95 )
   {
      if ( !RegisterWin95( &wc ) )
         return( FALSE );
   }
   else if ( !RegisterClass( &wc ) )
      return( FALSE );

   hInst = hInstance;

   // Create the main application window.
   //....................................
   hWnd = CreateWindow( lpszAppName,
                        lpszTitle,   
                        WS_OVERLAPPEDWINDOW,
                        CW_USEDEFAULT, 0,
                        CW_USEDEFAULT, 0, 
                        NULL,             
                        NULL,             
                        hInstance,        
                        NULL              
                      );

   if ( !hWnd )
      return( FALSE );

   ShowWindow( hWnd, nCmdShow );
   UpdateWindow( hWnd );        

   while( GetMessage( &msg, NULL, 0, 0) )  
   {
      TranslateMessage( &msg );
      DispatchMessage( &msg ); 
   }

   return( msg.wParam );
}


BOOL RegisterWin95( CONST WNDCLASS* lpwc )
{
   WNDCLASSEX wcex;

   wcex.style         = lpwc->style;
   wcex.lpfnWndProc   = lpwc->lpfnWndProc;
   wcex.cbClsExtra    = lpwc->cbClsExtra;
   wcex.cbWndExtra    = lpwc->cbWndExtra;
   wcex.hInstance     = lpwc->hInstance;
   wcex.hIcon         = lpwc->hIcon;
   wcex.hCursor       = lpwc->hCursor;
   wcex.hbrBackground = lpwc->hbrBackground;
   wcex.lpszMenuName  = lpwc->lpszMenuName;
   wcex.lpszClassName = lpwc->lpszClassName;

   // Added elements for Windows 95.
   //...............................
   wcex.cbSize = sizeof(WNDCLASSEX);
   wcex.hIconSm = LoadImage(wcex.hInstance, lpwc->lpszClassName,
                            IMAGE_ICON, 16, 16,
                            LR_DEFAULTCOLOR );
   
   return RegisterClassEx( &wcex );
}

LRESULT CALLBACK WndProc( HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam )
{
static HPALETTE hPalette;
static int      nColorData;

   switch( uMsg )
   {
      case WM_CREATE  :
              {
                 HANDLE       hRes, hPal;
                 LPBITMAPINFO lpBi;
                 LPLOGPALETTE lpPal;
                 int          i;

                 // Load the bitmap.
                 //.................
                 hRes = LoadResource( hInst,
                            FindResource( hInst, "testdib", RT_BITMAP ) );

                 lpBi = (LPBITMAPINFO)LockResource( hRes );

                 // Find out how many colors we need.
                 //..................................
                 if ( lpBi->bmiHeader.biClrUsed != 0 )
                    nColorData = lpBi->bmiHeader.biClrUsed;
                 else
                    switch( lpBi->bmiHeader.biBitCount )
                    {
                       case 1  : nColorData = 2;   break; // Monochrome
                       case 4  : nColorData = 16;  break; // VGA
                       case 8  : nColorData = 256; break; // SVGA
                       case 24 : nColorData = 0;   break; // True Color
                    }

                 // Allocate memory for color palette.
                 //...................................
                 hPal = GlobalAlloc( GHND, sizeof(LOGPALETTE)+
                                        (nColorData * sizeof(PALETTEENTRY)) );
                 lpPal = (LPLOGPALETTE)GlobalLock( hPal );

                 lpPal->palVersion    = 0x300;     
                 lpPal->palNumEntries = nColorData;

                 // Load each color into the palette.
                 //..................................
                 for ( i = 0; i < nColorData; i++ )
                 {
                    lpPal->palPalEntry[i].peRed   = lpBi->bmiColors[i].rgbRed;
                    lpPal->palPalEntry[i].peGreen =
                                                  lpBi->bmiColors[i].rgbGreen;
                    lpPal->palPalEntry[i].peBlue  =
                                                  lpBi->bmiColors[i].rgbBlue;
                 }

                 // Create the Palette.
                 //....................
                 hPalette = CreatePalette( lpPal );

                 GlobalUnlock( hPal );
                 GlobalFree( hPal );
              }
              break;

      case WM_PAINT :
              {
                 PAINTSTRUCT  ps;
                 LPBITMAPINFO lpBi;
                 HANDLE       hRes;
                 LPTSTR       lpBits;

                 BeginPaint( hWnd, &ps );

                 // Select palette and realize it.
                 //...............................
                 SelectPalette( ps.hdc, hPalette, FALSE );
                 RealizePalette( ps.hdc );

                 // Load the bitmap.
                 //.................
                 hRes = LoadResource( hInst,
                            FindResource( hInst, "testdib", RT_BITMAP ) );

                 lpBi = (LPBITMAPINFO)LockResource( hRes );

                 // Locate the bitmap data.
                 //........................
                 lpBits =  (LPSTR) lpBi;
                 lpBits +=  lpBi->bmiHeader.biSize +
                            ( nColorData*sizeof(RGBQUAD) );

                 // Paint the bitmap normal size.
                 //..............................
                 SetDIBitsToDevice( ps.hdc, 10, 10,
                                            lpBi->bmiHeader.biWidth,
                                            lpBi->bmiHeader.biHeight,
                                            0, 0,
                                            0, lpBi->bmiHeader.biHeight,
                                            lpBits, lpBi, DIB_RGB_COLORS );

                 // Paint the bitmap 200% size.
                 //............................
                 StretchDIBits( ps.hdc, 40+lpBi->bmiHeader.biWidth, 10,
                                        lpBi->bmiHeader.biWidth*2,
                                        lpBi->bmiHeader.biHeight*2,
                                        0, 0,
                                        lpBi->bmiHeader.biWidth,
                                        lpBi->bmiHeader.biHeight,
                                        lpBits, lpBi, DIB_RGB_COLORS,
                                        SRCCOPY );

                 EndPaint( hWnd, &ps );
              }
              break;

      case WM_COMMAND :
              switch( LOWORD( wParam ) )
              {
                 case IDM_ABOUT :
                        DialogBox( hInst, "AboutBox", hWnd, (DLGPROC)About );
                        break;

                 case IDM_EXIT :
                        DestroyWindow( hWnd );
                        break;
              }
              break;
     
      case WM_DESTROY :
              DeleteObject( hPalette );
              PostQuitMessage(0);
              break;

      default :
            return( DefWindowProc( hWnd, uMsg, wParam, lParam ) );
   }

   return( 0L );
}


LRESULT CALLBACK About( HWND hDlg,          
                        UINT message,       
                        WPARAM wParam,      
                        LPARAM lParam)
{
   switch (message)
   {
       case WM_INITDIALOG:
               return (TRUE);

       case WM_COMMAND:                             
               if (   LOWORD(wParam) == IDOK        
                   || LOWORD(wParam) == IDCANCEL)   
               {
                       EndDialog(hDlg, TRUE);       
                       return (TRUE);
               }
               break;
   }

   return (FALSE);
}

>------ 已启动生成: 项目: opencv_imgproc_SSE4_1, 配置: Release x64 ------ 2>------ 已启动生成: 项目: opencv_imgproc_AVX512_SKX, 配置: Release x64 ------ 3>------ 已启动生成: 项目: opencv_imgproc_AVX2, 配置: Release x64 ------ 4>------ 已启动生成: 项目: opencv_imgproc_AVX, 配置: Release x64 ------ 5>------ 已启动生成: 项目: opencv_features2d_SSE4_1, 配置: Release x64 ------ 6>------ 已启动生成: 项目: opencv_features2d_AVX512_SKX, 配置: Release x64 ------ 7>------ 已启动生成: 项目: opencv_features2d_AVX2, 配置: Release x64 ------ 8>------ 已启动生成: 项目: opencv_dnn_AVX512_SKX, 配置: Release x64 ------ 9>------ 已启动生成: 项目: opencv_dnn_AVX2, 配置: Release x64 ------ 10>------ 已启动生成: 项目: opencv_dnn_AVX, 配置: Release x64 ------ 11>------ 已启动生成: 项目: opencv_cudev, 配置: Release x64 ------ 12>------ 已启动生成: 项目: opencv_core_SSE4_2, 配置: Release x64 ------ 13>------ 已启动生成: 项目: opencv_core_SSE4_1, 配置: Release x64 ------ 14>------ 已启动生成: 项目: opencv_core_AVX512_SKX, 配置: Release x64 ------ 15>------ 已启动生成: 项目: opencv_core_AVX2, 配置: Release x64 ------ 16>------ 已启动生成: 项目: opencv_core_AVX, 配置: Release x64 ------ 1>accum.sse4_1.cpp 1>box_filter.sse4_1.cpp 1>color_hsv.sse4_1.cpp 1>color_rgb.sse4_1.cpp 1>color_yuv.sse4_1.cpp 1>filter.sse4_1.cpp 1>median_blur.sse4_1.cpp 1>morph.sse4_1.cpp 1>smooth.sse4_1.cpp 1>imgwarp.sse4_1.cpp 1>resize.sse4_1.cpp 2>sumpixels.avx512_skx.cpp 5>sift.sse4_1.cpp 6>sift.avx512_skx.cpp 3>accum.avx2.cpp 3>bilateral_filter.avx2.cpp 3>box_filter.avx2.cpp 3>color_hsv.avx2.cpp 3>color_rgb.avx2.cpp 3>color_yuv.avx2.cpp 3>filter.avx2.cpp 3>median_blur.avx2.cpp 3>morph.avx2.cpp 3>smooth.avx2.cpp 3>sumpixels.avx2.cpp 3>imgwarp.avx2.cpp 3>resize.avx2.cpp 8>layers_common.avx512_skx.cpp 9>layers_common.avx2.cpp 4>accum.avx.cpp 4>corner.avx.cpp 10>conv_block.avx.cpp 10>conv_depthwise.avx.cpp 10>conv_winograd_f63.avx.cpp 10>fast_gemm_kernels.avx.cpp 10>layers_common.avx.cpp 7>sift.avx2.cpp 7>fast.avx2.cpp 14>matmul.avx512_skx.cpp 13>arithm.sse4_1.cpp 13>matmul.sse4_1.cpp 15>arithm.avx2.cpp 15>convert.avx2.cpp 15>convert_scale.avx2.cpp 12>stat.sse4_2.cpp 15>count_non_zero.avx2.cpp 15>has_non_zero.avx2.cpp 15>mathfuncs_core.avx2.cpp 15>matmul.avx2.cpp 15>mean.avx2.cpp 15>merge.avx2.cpp 15>split.avx2.cpp 15>stat.avx2.cpp 15>sum.avx2.cpp 11>stub.cpp 6>opencv_features2d_AVX512_SKX.vcxproj -> E:\opencv-build\build\modules\features2d\opencv_features2d_AVX512_SKX.dir\Release\opencv_features2d_AVX512_SKX.lib 17>------ 已启动生成: 项目: opencv_calib3d_AVX2, 配置: Release x64 ------ 16>mathfuncs_core.avx.cpp 5>opencv_features2d_SSE4_1.vcxproj -> E:\opencv-build\build\modules\features2d\opencv_features2d_SSE4_1.dir\Release\opencv_features2d_SSE4_1.lib 9>layers_common.avx2.cpp 17>undistort.avx2.cpp 4>opencv_imgproc_AVX.vcxproj -> E:\opencv-build\build\modules\imgproc\opencv_imgproc_AVX.dir\Release\opencv_imgproc_AVX.lib 18>------ 已启动生成: 项目: gen_opencv_python_source, 配置: Release x64 ------ 2>opencv_imgproc_AVX512_SKX.vcxproj -> E:\opencv-build\build\modules\imgproc\opencv_imgproc_AVX512_SKX.dir\Release\opencv_imgproc_AVX512_SKX.lib 11> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudev4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudev4110.exp 8>layers_common.avx512_skx.cpp 10>opencv_dnn_AVX.vcxproj -> E:\opencv-build\build\modules\dnn\opencv_dnn_AVX.dir\Release\opencv_dnn_AVX.lib 7>opencv_features2d_AVX2.vcxproj -> E:\opencv-build\build\modules\features2d\opencv_features2d_AVX2.dir\Release\opencv_features2d_AVX2.lib 11>opencv_cudev.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudev4110.dll 3>opencv_imgproc_AVX2.vcxproj -> E:\opencv-build\build\modules\imgproc\opencv_imgproc_AVX2.dir\Release\opencv_imgproc_AVX2.lib 12>opencv_core_SSE4_2.vcxproj -> E:\opencv-build\build\modules\core\opencv_core_SSE4_2.dir\Release\opencv_core_SSE4_2.lib 1>opencv_imgproc_SSE4_1.vcxproj -> E:\opencv-build\build\modules\imgproc\opencv_imgproc_SSE4_1.dir\Release\opencv_imgproc_SSE4_1.lib 13>opencv_core_SSE4_1.vcxproj -> E:\opencv-build\build\modules\core\opencv_core_SSE4_1.dir\Release\opencv_core_SSE4_1.lib 15>opencv_core_AVX2.vcxproj -> E:\opencv-build\build\modules\core\opencv_core_AVX2.dir\Release\opencv_core_AVX2.lib 16>opencv_core_AVX.vcxproj -> E:\opencv-build\build\modules\core\opencv_core_AVX.dir\Release\opencv_core_AVX.lib 9>conv_block.avx2.cpp 9>conv_depthwise.avx2.cpp 9>conv_winograd_f63.avx2.cpp 9>fast_gemm_kernels.avx2.cpp 17>opencv_calib3d_AVX2.vcxproj -> E:\opencv-build\build\modules\calib3d\opencv_calib3d_AVX2.dir\Release\opencv_calib3d_AVX2.lib 14>opencv_core_AVX512_SKX.vcxproj -> E:\opencv-build\build\modules\core\opencv_core_AVX512_SKX.dir\Release\opencv_core_AVX512_SKX.lib 19>------ 已启动生成: 项目: opencv_core, 配置: Release x64 ------ 8>opencv_dnn_AVX512_SKX.vcxproj -> E:\opencv-build\build\modules\dnn\opencv_dnn_AVX512_SKX.dir\Release\opencv_dnn_AVX512_SKX.lib 19>cmake_pch.cxx 9>opencv_dnn_AVX2.vcxproj -> E:\opencv-build\build\modules\dnn\opencv_dnn_AVX2.dir\Release\opencv_dnn_AVX2.lib 19>opencl_kernels_core.cpp 19>algorithm.cpp 19>arithm.cpp 19>arithm.dispatch.cpp 19>array.cpp 19>async.cpp 19>batch_distance.cpp 19>bindings_utils.cpp 19>buffer_area.cpp 19>channels.cpp 19>check.cpp 19>command_line_parser.cpp 19>conjugate_gradient.cpp 19>convert.dispatch.cpp 19>convert_c.cpp 19>convert_scale.dispatch.cpp 19>copy.cpp 19>count_non_zero.dispatch.cpp 19>cuda_gpu_mat.cpp 19>cuda_gpu_mat_nd.cpp 19>cuda_host_mem.cpp 19>cuda_info.cpp 19>cuda_stream.cpp 19>datastructs.cpp 19>directx.cpp 19>downhill_simplex.cpp 19>dxt.cpp 19>gl_core_3_1.cpp 19>glob.cpp 19>hal_internal.cpp 19>has_non_zero.dispatch.cpp 19>kmeans.cpp 19>lapack.cpp 19>lda.cpp 19>logger.cpp 19>lpsolver.cpp 19>D:\Visual Studio\VC\Tools\MSVC\14.43.34808\include\xutility(506,82): warning C4267: “参数”: 从“size_t”转换到“const unsigned int”,可能丢失数据 19>(编译源文件“../../../opencv/modules/core/src/cuda_stream.cpp”) 19> D:\Visual Studio\VC\Tools\MSVC\14.43.34808\include\xutility(506,82): 19> 模板实例化上下文(最早的实例化上下文)为 19> E:\opencv-build\opencv\modules\core\src\cuda_stream.cpp(468,13): 19> 查看对正在编译的函数 模板 实例化“cv::Ptr<cv::cuda::Stream::Impl> cv::makePtr<cv::cuda::Stream::Impl,size_t>(const size_t &)”的引用 19> E:\opencv-build\opencv\modules\core\include\opencv2\core\cvstd_wrapper.hpp(146,27): 19> 查看对正在编译的函数 模板 实例化“std::shared_ptr<T> std::make_shared<_Tp,const size_t&>(const size_t &)”的引用 19> with 19> [ 19> T=cv::cuda::Stream::Impl, 19> _Tp=cv::cuda::Stream::Impl 19> ] 19> D:\Visual Studio\VC\Tools\MSVC\14.43.34808\include\memory(2903,46): 19> 查看对正在编译的函数 模板 实例化“std::_Ref_count_obj2<_Ty>::_Ref_count_obj2<const size_t&>(const size_t &)”的引用 19> with 19> [ 19> _Ty=cv::cuda::Stream::Impl 19> ] 19> D:\Visual Studio\VC\Tools\MSVC\14.43.34808\include\memory(2092,18): 19> 查看对正在编译的函数 模板 实例化“void std::_Construct_in_place<_Ty,const size_t&>(_Ty &,const size_t &) noexcept(false)”的引用 19> with 19> [ 19> _Ty=cv::cuda::Stream::Impl 19> ] 19>lut.cpp 19>mathfuncs.cpp 19>mathfuncs_core.dispatch.cpp 19>matmul.dispatch.cpp 19>matrix.cpp 19>matrix_c.cpp 19>matrix_decomp.cpp 19>matrix_expressions.cpp 19>matrix_iterator.cpp 19>matrix_operations.cpp 19>matrix_sparse.cpp 19>matrix_transform.cpp 19>matrix_wrap.cpp 19>mean.dispatch.cpp 19>merge.dispatch.cpp 19>minmax.cpp 19>norm.cpp 19>ocl.cpp 19>opencl_clblas.cpp 19>opencl_clfft.cpp 19>opencl_core.cpp 19>opengl.cpp 19>out.cpp 19>ovx.cpp 19>parallel_openmp.cpp 19>parallel_tbb.cpp 19>parallel_impl.cpp 19>pca.cpp 19>persistence.cpp 19>persistence_base64_encoding.cpp 19>persistence_json.cpp 19>persistence_types.cpp 19>persistence_xml.cpp 19>persistence_yml.cpp 19>rand.cpp 19>softfloat.cpp 19>split.dispatch.cpp 19>stat.dispatch.cpp 19>stat_c.cpp 19>stl.cpp 19>sum.dispatch.cpp 19>system.cpp 19>tables.cpp 19>trace.cpp 19>types.cpp 19>umatrix.cpp 19>datafile.cpp 19>filesystem.cpp 19>logtagconfigparser.cpp 19>logtagmanager.cpp 19>samples.cpp 19>va_intel.cpp 19>alloc.cpp 19>parallel.cpp 19>parallel.cpp 19> 正在创建库 E:/opencv-build/build/lib/Release/opencv_core4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_core4110.exp 19>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 19>opencv_core.vcxproj -> E:\opencv-build\build\bin\Release\opencv_core4110.dll 19>已完成生成项目“opencv_core.vcxproj”的操作。 20>------ 已启动生成: 项目: opencv_version_win32, 配置: Release x64 ------ 21>------ 已启动生成: 项目: opencv_version, 配置: Release x64 ------ 22>------ 已启动生成: 项目: opencv_signal, 配置: Release x64 ------ 23>------ 已启动生成: 项目: opencv_ml, 配置: Release x64 ------ 24>------ 已启动生成: 项目: opencv_imgproc, 配置: Release x64 ------ 25>------ 已启动生成: 项目: opencv_flann, 配置: Release x64 ------ 26>------ 已启动生成: 项目: opencv_cudaarithm, 配置: Release x64 ------ 20>opencv_version.cpp 22>cmake_pch.cxx 23>cmake_pch.cxx 25>cmake_pch.cxx 21>opencv_version.cpp 26>cmake_pch.cxx 24>cmake_pch.cxx 22>opencv_signal_main.cpp 22>signal_resample.cpp 23>opencv_ml_main.cpp 23>ann_mlp.cpp 23>boost.cpp 23>data.cpp 23>em.cpp 23>gbt.cpp 23>inner_functions.cpp 23>kdtree.cpp 23>knearest.cpp 23>lr.cpp 23>nbayes.cpp 23>rtrees.cpp 23>svm.cpp 23>svmsgd.cpp 23>testset.cpp 23>tree.cpp 21>opencv_version.vcxproj -> E:\opencv-build\build\bin\Release\opencv_version.exe 24>opencl_kernels_imgproc.cpp 24>opencv_imgproc_main.cpp 24>accum.cpp 24>accum.dispatch.cpp 24>approx.cpp 24>bilateral_filter.dispatch.cpp 24>blend.cpp 24>box_filter.dispatch.cpp 24>canny.cpp 20>opencv_version_win32.vcxproj -> E:\opencv-build\build\bin\Release\opencv_version_win32.exe 24>clahe.cpp 24>color.cpp 24>color_hsv.dispatch.cpp 24>color_lab.cpp 24>color_rgb.dispatch.cpp 24>color_yuv.dispatch.cpp 24>colormap.cpp 24>connectedcomponents.cpp 24>contours.cpp 24>contours_approx.cpp 24>contours_common.cpp 24>contours_link.cpp 25>opencv_flann_main.cpp 24>contours_new.cpp 24>convhull.cpp 25>flann.cpp 24>corner.cpp 25>miniflann.cpp 24>cornersubpix.cpp 22> 正在创建库 E:/opencv-build/build/lib/Release/opencv_signal4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_signal4110.exp 26>opencv_cudaarithm_main.cpp 24>demosaicing.cpp 26>arithm.cpp 24>deriv.cpp 26>core.cpp 24>distransform.cpp 24>drawing.cpp 24>emd.cpp 24>emd_new.cpp 24>featureselect.cpp 26>element_operations.cpp 24>filter.dispatch.cpp 26>lut.cpp 26>reductions.cpp 24>floodfill.cpp 24>gabor.cpp 24>generalized_hough.cpp 24>geometry.cpp 24>grabcut.cpp 24>hershey_fonts.cpp 24>histogram.cpp 24>hough.cpp 24>imgwarp.cpp 24>intelligent_scissors.cpp 24>intersection.cpp 24>linefit.cpp 24>lsd.cpp 24>main.cpp 23> 正在创建库 E:/opencv-build/build/lib/Release/opencv_ml4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_ml4110.exp 24>matchcontours.cpp 24>median_blur.dispatch.cpp 24>min_enclosing_triangle.cpp 24>moments.cpp 24>morph.dispatch.cpp 24>phasecorr.cpp 24>pyramids.cpp 24>resize.cpp 24>rotcalipers.cpp 24>samplers.cpp 24>segmentation.cpp 24>shapedescr.cpp 24>smooth.dispatch.cpp 24>spatialgradient.cpp 24>stackblur.cpp 22>opencv_signal.vcxproj -> E:\opencv-build\build\bin\Release\opencv_signal4110.dll 24>subdivision2d.cpp 24>sumpixels.dispatch.cpp 24>tables.cpp 24>templmatch.cpp 24>thresh.cpp 24>utils.cpp 23>opencv_ml.vcxproj -> E:\opencv-build\build\bin\Release\opencv_ml4110.dll 26> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudaarithm4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudaarithm4110.exp 26>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 26>opencv_cudaarithm.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudaarithm4110.dll 26>已完成生成项目“opencv_cudaarithm.vcxproj”的操作。 25> 正在创建库 E:/opencv-build/build/lib/Release/opencv_flann4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_flann4110.exp 25>opencv_flann.vcxproj -> E:\opencv-build\build\bin\Release\opencv_flann4110.dll 27>------ 已启动生成: 项目: opencv_surface_matching, 配置: Release x64 ------ 27>cmake_pch.cxx 24> 正在创建库 E:/opencv-build/build/lib/Release/opencv_imgproc4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_imgproc4110.exp 24>opencv_imgproc.vcxproj -> E:\opencv-build\build\bin\Release\opencv_imgproc4110.dll 28>------ 已启动生成: 项目: opencv_reg, 配置: Release x64 ------ 29>------ 已启动生成: 项目: opencv_quality, 配置: Release x64 ------ 30>------ 已启动生成: 项目: opencv_plot, 配置: Release x64 ------ 31>------ 已启动生成: 项目: opencv_phase_unwrapping, 配置: Release x64 ------ 32>------ 已启动生成: 项目: opencv_intensity_transform, 配置: Release x64 ------ 33>------ 已启动生成: 项目: opencv_imgcodecs, 配置: Release x64 ------ 34>------ 已启动生成: 项目: opencv_img_hash, 配置: Release x64 ------ 35>------ 已启动生成: 项目: opencv_hfs, 配置: Release x64 ------ 36>------ 已启动生成: 项目: opencv_fuzzy, 配置: Release x64 ------ 37>------ 已启动生成: 项目: opencv_features2d, 配置: Release x64 ------ 38>------ 已启动生成: 项目: opencv_dnn, 配置: Release x64 ------ 39>------ 已启动生成: 项目: opencv_cudawarping, 配置: Release x64 ------ 40>------ 已启动生成: 项目: opencv_cudafilters, 配置: Release x64 ------ 31>cmake_pch.cxx 30>cmake_pch.cxx 29>cmake_pch.cxx 32>cmake_pch.cxx 28>map.cpp 28>mapaffine.cpp 28>mapper.cpp 28>mappergradaffine.cpp 28>mappergradeuclid.cpp 28>mappergradproj.cpp 28>mappergradshift.cpp 28>mappergradsimilar.cpp 28>mapperpyramid.cpp 28>mapprojec.cpp 28>mapshift.cpp 34>cmake_pch.cxx 36>cmake_pch.cxx 27>opencv_surface_matching_main.cpp 27>icp.cpp 40>cmake_pch.cxx 27>pose_3d.cpp 27>ppf_helpers.cpp 27>ppf_match_3d.cpp 35>cmake_pch.cxx 27>t_hash_int.cpp 38>cmake_pch.cxx 39>cmake_pch.cxx 29>opencv_quality_main.cpp 29>qualitybrisque.cpp 29>qualitygmsd.cpp 34>opencv_img_hash_main.cpp 32>opencv_intensity_transform_main.cpp 31>opencv_phase_unwrapping_main.cpp 30>opencv_plot_main.cpp 29>qualitymse.cpp 29>qualityssim.cpp 34>average_hash.cpp 34>block_mean_hash.cpp 34>color_moment_hash.cpp 31>histogramphaseunwrapping.cpp 32>bimef.cpp 34>img_hash_base.cpp 32>intensity_transform.cpp 30>plot.cpp 34>marr_hildreth_hash.cpp 34>phash.cpp 35>opencv_hfs_main.cpp 34>radial_variance_hash.cpp 35>hfs.cpp 35>hfs_core.cpp 35>magnitude.cpp 36>opencv_fuzzy_main.cpp 36>fuzzy_F0_math.cpp 36>fuzzy_F1_math.cpp 36>fuzzy_image.cpp 35>merge.cpp 35>gslic_engine.cpp 35>slic.cpp 33>cmake_pch.cxx 40>opencv_cudafilters_main.cpp 40>filtering.cpp 27> 正在创建库 E:/opencv-build/build/lib/Release/opencv_surface_matching4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_surface_matching4110.exp 39>opencv_cudawarping_main.cpp 38>opencl_kernels_dnn.cpp 28> 正在创建库 E:/opencv-build/build/lib/Release/opencv_reg4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_reg4110.exp 39>pyramids.cpp 39>remap.cpp 39>resize.cpp 39>warp.cpp 38>opencv_dnn_main.cpp 38>opencv-caffe.pb.cc 38>opencv-onnx.pb.cc 38>attr_value.pb.cc 38>function.pb.cc 38>graph.pb.cc 38>op_def.pb.cc 38>tensor.pb.cc 38>tensor_shape.pb.cc 38>types.pb.cc 38>versions.pb.cc 38>caffe_importer.cpp 38>caffe_io.cpp 38>caffe_shrinker.cpp 38>darknet_importer.cpp 38>darknet_io.cpp 31> 正在创建库 E:/opencv-build/build/lib/Release/opencv_phase_unwrapping4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_phase_unwrapping4110.exp 32> 正在创建库 E:/opencv-build/build/lib/Release/opencv_intensity_transform4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_intensity_transform4110.exp 29> 正在创建库 E:/opencv-build/build/lib/Release/opencv_quality4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_quality4110.exp 27>opencv_surface_matching.vcxproj -> E:\opencv-build\build\bin\Release\opencv_surface_matching4110.dll 38>debug_utils.cpp 28>opencv_reg.vcxproj -> E:\opencv-build\build\bin\Release\opencv_reg4110.dll 30> 正在创建库 E:/opencv-build/build/lib/Release/opencv_plot4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_plot4110.exp 38>dnn.cpp 38>dnn_params.cpp 38>dnn_read.cpp 38>dnn_utils.cpp 32>opencv_intensity_transform.vcxproj -> E:\opencv-build\build\bin\Release\opencv_intensity_transform4110.dll 38>graph_simplifier.cpp 31>opencv_phase_unwrapping.vcxproj -> E:\opencv-build\build\bin\Release\opencv_phase_unwrapping4110.dll 38>halide_scheduler.cpp 38>ie_ngraph.cpp 38>init.cpp 35> 正在创建库 E:/opencv-build/build/lib/Release/opencv_hfs4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_hfs4110.exp 35>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 30>opencv_plot.vcxproj -> E:\opencv-build\build\bin\Release\opencv_plot4110.dll 34> 正在创建库 E:/opencv-build/build/lib/Release/opencv_img_hash4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_img_hash4110.exp 38>layers_rvp052.cpp 38>quantization_utils.cpp 38>layer.cpp 38>layer_factory.cpp 29>opencv_quality.vcxproj -> E:\opencv-build\build\bin\Release\opencv_quality4110.dll 38>accum_layer.cpp 38>arg_layer.cpp 38>attention_layer.cpp 36> 正在创建库 E:/opencv-build/build/lib/Release/opencv_fuzzy4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_fuzzy4110.exp 38>blank_layer.cpp 38>concat_layer.cpp 38>const_layer.cpp 38>correlation_layer.cpp 38>conv_depthwise.cpp 38>conv_winograd_f63.cpp 38>conv_winograd_f63.dispatch.cpp 38>convolution.cpp 38>fast_gemm.cpp 38>fast_norm.cpp 38>softmax.cpp 38>crop_and_resize_layer.cpp 38>cumsum_layer.cpp 38>depth_space_ops_layer.cpp 38>detection_output_layer.cpp 34>opencv_img_hash.vcxproj -> E:\opencv-build\build\bin\Release\opencv_img_hash4110.dll 38>einsum_layer.cpp 38>expand_layer.cpp 33>opencv_imgcodecs_main.cpp 35>opencv_hfs.vcxproj -> E:\opencv-build\build\bin\Release\opencv_hfs4110.dll 39> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudawarping4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudawarping4110.exp 33>bitstrm.cpp 33>exif.cpp 33>grfmt_avif.cpp 39>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 38>flatten_layer.cpp 33>grfmt_base.cpp 38>flow_warp_layer.cpp 38>gather_elements_layer.cpp 33>grfmt_bmp.cpp 33>grfmt_exr.cpp 33>grfmt_gdal.cpp 33>grfmt_gdcm.cpp 33>grfmt_gif.cpp 33>grfmt_hdr.cpp 33>grfmt_jpeg.cpp 38>gather_layer.cpp 38>gemm_layer.cpp 33>grfmt_jpeg2000.cpp 33>grfmt_jpeg2000_openjpeg.cpp 40> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudafilters4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudafilters4110.exp 33>grfmt_jpegxl.cpp 40>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 38>group_norm_layer.cpp 33>grfmt_pam.cpp 38>instance_norm_layer.cpp 33>grfmt_pfm.cpp 33>grfmt_png.cpp 33>grfmt_pxm.cpp 33>grfmt_spng.cpp 36>opencv_fuzzy.vcxproj -> E:\opencv-build\build\bin\Release\opencv_fuzzy4110.dll 33>grfmt_sunras.cpp 33>grfmt_tiff.cpp 33>grfmt_webp.cpp 38>layer_norm.cpp 38>layers_common.cpp 33>loadsave.cpp 33>rgbe.cpp 33>utils.cpp 38>lrn_layer.cpp 38>matmul_layer.cpp 38>max_unpooling_layer.cpp 38>mvn_layer.cpp 38>nary_eltwise_layers.cpp 38>normalize_bbox_layer.cpp 38>not_implemented_layer.cpp 38>padding_layer.cpp 38>permute_layer.cpp 38>prior_box_layer.cpp 39>opencv_cudawarping.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudawarping4110.dll 39>已完成生成项目“opencv_cudawarping.vcxproj”的操作。 35>已完成生成项目“opencv_hfs.vcxproj”的操作。 38>proposal_layer.cpp 38>recurrent_layers.cpp 38>reduce_layer.cpp 38>region_layer.cpp 38>reorg_layer.cpp 38>reshape_layer.cpp 38>resize_layer.cpp 38>scatterND_layer.cpp 38>scatter_layer.cpp 40>opencv_cudafilters.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudafilters4110.dll 38>shuffle_channel_layer.cpp 38>slice_layer.cpp 33>LINK : fatal error LNK1181: 无法打开输入文件“E:\Anaconda\Library\bin\avif.dll” 38>split_layer.cpp 40>已完成生成项目“opencv_cudafilters.vcxproj”的操作。 38>tile_layer.cpp 33>已完成生成项目“opencv_imgcodecs.vcxproj”的操作 - 失败。 41>------ 已启动生成: 项目: opencv_videoio, 配置: Release x64 ------ 42>------ 已启动生成: 项目: opencv_cudaimgproc, 配置: Release x64 ------ 38>topk_layer.cpp 38>legacy_backend.cpp 38>model.cpp 38>net.cpp 38>net_cann.cpp 37>cmake_pch.cxx 38>net_impl_backend.cpp 38>net_impl.cpp 38>net_impl_fuse.cpp 38>net_openvino.cpp 38>net_quantization.cpp 38>nms.cpp 38>common.cpp 38>math_functions.cpp 38>ocl4dnn_conv_spatial.cpp 38>ocl4dnn_inner_product.cpp 38>ocl4dnn_lrn.cpp 38>ocl4dnn_pool.cpp 38>ocl4dnn_softmax.cpp 38>onnx_graph_simplifier.cpp 38>onnx_importer.cpp 41>cmake_pch.cxx 38>op_cann.cpp 38>op_cuda.cpp 38>op_halide.cpp 38>op_inf_engine.cpp 38>op_timvx.cpp 38>op_vkcom.cpp 38>op_webnn.cpp 38>registry.cpp 38>tf_graph_simplifier.cpp 38>tf_importer.cpp 42>cmake_pch.cxx 38>tf_io.cpp 38>tflite_importer.cpp 38>THDiskFile.cpp 38>THFile.cpp 38>THGeneral.cpp 38>torch_importer.cpp 38>conv_1x1_fast_spv.cpp 38>conv_depthwise_3x3_spv.cpp 38>conv_depthwise_spv.cpp 38>conv_implicit_gemm_spv.cpp 38>gemm_spv.cpp 38>nary_eltwise_binary_forward_spv.cpp 38>spv_shader.cpp 38>buffer.cpp 38>command.cpp 38>context.cpp 38>fence.cpp 38>internal.cpp 37>opencl_kernels_features2d.cpp 37>opencv_features2d_main.cpp 37>affine_feature.cpp 38>op_base.cpp 38>op_conv.cpp 37>agast.cpp 37>agast_score.cpp 37>akaze.cpp 37>bagofwords.cpp 37>blobdetector.cpp 37>brisk.cpp 37>draw.cpp 37>dynamic.cpp 38>op_matmul.cpp 38>op_naryEltwise.cpp 38>pipeline.cpp 38>tensor.cpp 37>evaluation.cpp 37>fast.cpp 37>fast_score.cpp 38>vk_functions.cpp 37>feature2d.cpp 37>gftt.cpp 38>vk_loader.cpp 37>kaze.cpp 37>AKAZEFeatures.cpp 37>KAZEFeatures.cpp 37>fed.cpp 37>nldiffusion_functions.cpp 37>keypoint.cpp 37>main.cpp 37>matchers.cpp 37>mser.cpp 37>orb.cpp 37>sift.dispatch.cpp 42>opencv_cudaimgproc_main.cpp 42>bilateral_filter.cpp 42>blend.cpp 42>canny.cpp 42>color.cpp 42>connectedcomponents.cpp 42>corners.cpp 42>generalized_hough.cpp 42>gftt.cpp 42>histogram.cpp 42>hough_circles.cpp 42>hough_lines.cpp 42>hough_segments.cpp 42>match_template.cpp 42>mean_shift.cpp 42>moments.cpp 42>mssegmentation.cpp 41>opencv_videoio_main.cpp 41>backend_static.cpp 41>cap.cpp 41>cap_dshow.cpp 41>cap_images.cpp 41>cap_mjpeg_decoder.cpp 41>cap_mjpeg_encoder.cpp 41>cap_msmf.cpp 41>obsensor_stream_channel_msmf.cpp 41>obsensor_uvc_stream_channel.cpp 41>cap_obsensor_capture.cpp 41>container_avi.cpp 41>videoio_c.cpp 41>videoio_registry.cpp 38>backend.cpp 42> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudaimgproc4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudaimgproc4110.exp 42>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 42>opencv_cudaimgproc.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudaimgproc4110.dll 42>已完成生成项目“opencv_cudaimgproc.vcxproj”的操作。 43>------ 已启动生成: 项目: opencv_photo, 配置: Release x64 ------ 37> 正在创建库 E:/opencv-build/build/lib/Release/opencv_features2d4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_features2d4110.exp 43>cmake_pch.cxx 41>backend_plugin.cpp 37>opencv_features2d.vcxproj -> E:\opencv-build\build\bin\Release\opencv_features2d4110.dll 44>------ 已启动生成: 项目: opencv_saliency, 配置: Release x64 ------ 45>------ 已启动生成: 项目: opencv_line_descriptor, 配置: Release x64 ------ 46>------ 已启动生成: 项目: opencv_cudafeatures2d, 配置: Release x64 ------ 47>------ 已启动生成: 项目: opencv_calib3d, 配置: Release x64 ------ 38>batch_norm_layer.cpp 44>cmake_pch.cxx 45>cmake_pch.cxx 46>cmake_pch.cxx 47>cmake_pch.cxx 38>convolution_layer.cpp 41>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_imgcodecs4110.lib” 41>已完成生成项目“opencv_videoio.vcxproj”的操作 - 失败。 48>------ 已启动生成: 项目: opencv_highgui, 配置: Release x64 ------ 49>------ 已启动生成: 项目: opencv_cudacodec, 配置: Release x64 ------ 43>opencl_kernels_photo.cpp 43>opencv_photo_main.cpp 43>align.cpp 43>calibrate.cpp 43>contrast_preserve.cpp 43>denoise_tvl1.cpp 43>denoising.cpp 43>denoising.cuda.cpp 43>hdr_common.cpp 43>inpaint.cpp 43>merge.cpp 43>npr.cpp 43>seamless_cloning.cpp 43>seamless_cloning_impl.cpp 43>tonemap.cpp 48>cmake_pch.cxx 49>cmake_pch.cxx 44>opencv_saliency_main.cpp 44>CmFile.cpp 44>CmShow.cpp 44>FilterTIG.cpp 44>ValStructVec.cpp 44>objectnessBING.cpp 44>motionSaliency.cpp 44>motionSaliencyBinWangApr2014.cpp 44>objectness.cpp 44>saliency.cpp 44>staticSaliency.cpp 44>staticSaliencyFineGrained.cpp 44>staticSaliencySpectralResidual.cpp 47>opencl_kernels_calib3d.cpp 47>opencv_calib3d_main.cpp 47>ap3p.cpp 47>calibinit.cpp 47>calibration.cpp 47>calibration_base.cpp 45>opencv_line_descriptor_main.cpp 47>calibration_handeye.cpp 45>LSDDetector.cpp 45>binary_descriptor.cpp 47>checkchessboard.cpp 47>chessboard.cpp 47>circlesgrid.cpp 45>binary_descriptor_matcher.cpp 47>compat_ptsetreg.cpp 47>dls.cpp 47>epnp.cpp 47>fisheye.cpp 47>five-point.cpp 45>draw.cpp 47>fundam.cpp 47>homography_decomp.cpp 47>ippe.cpp 47>levmarq.cpp 46>opencv_cudafeatures2d_main.cpp 46>brute_force_matcher.cpp 46>fast.cpp 47>main.cpp 46>feature2d_async.cpp 47>p3p.cpp 46>orb.cpp 47>polynom_solver.cpp 38>elementwise_layers.cpp 47>ptsetreg.cpp 47>quadsubpix.cpp 47>rho.cpp 47>solvepnp.cpp 47>sqpnp.cpp 47>stereo_geom.cpp 47>stereobm.cpp 47>stereosgbm.cpp 47>triangulate.cpp 47>undistort.dispatch.cpp 47>upnp.cpp 47>bundle.cpp 47>degeneracy.cpp 47>dls_solver.cpp 47>essential_solver.cpp 47>estimator.cpp 47>fundamental_solver.cpp 45> 正在创建库 E:/opencv-build/build/lib/Release/opencv_line_descriptor4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_line_descriptor4110.exp 44> 正在创建库 E:/opencv-build/build/lib/Release/opencv_saliency4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_saliency4110.exp 47>gamma_values.cpp 47>homography_solver.cpp 47>local_optimization.cpp 47>pnp_solver.cpp 46> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudafeatures2d4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudafeatures2d4110.exp 47>quality.cpp 38>eltwise_layer.cpp 47>ransac_solvers.cpp 47>sampler.cpp 46>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 43> 正在创建库 E:/opencv-build/build/lib/Release/opencv_photo4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_photo4110.exp 47>termination.cpp 43>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 47>utils.cpp 49>E:\opencv-build\opencv_contrib\modules\cudacodec\src\video_decoder.hpp(107,118): error C2065: “cudaVideoSurfaceFormat_YUV444”: 未声明的标识符 49>(编译源文件“CMakeFiles/opencv_cudacodec.dir/cmake_pch.cxx”) 49>E:\opencv-build\opencv_contrib\modules\cudacodec\src\video_decoder.hpp(107,19): error C2737: “type”: 必须初始化 const 对象 49>(编译源文件“CMakeFiles/opencv_cudacodec.dir/cmake_pch.cxx”) 49>已完成生成项目“opencv_cudacodec.vcxproj”的操作 - 失败。 45>opencv_line_descriptor.vcxproj -> E:\opencv-build\build\bin\Release\opencv_line_descriptor4110.dll 44>opencv_saliency.vcxproj -> E:\opencv-build\build\bin\Release\opencv_saliency4110.dll 46>opencv_cudafeatures2d.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudafeatures2d4110.dll 43>opencv_photo.vcxproj -> E:\opencv-build\build\bin\Release\opencv_photo4110.dll 48>opencv_highgui_main.cpp 48>backend.cpp 48>roiSelector.cpp 48>window.cpp 48>window_w32.cpp 43>已完成生成项目“opencv_photo.vcxproj”的操作。 50>------ 已启动生成: 项目: opencv_xphoto, 配置: Release x64 ------ 46>已完成生成项目“opencv_cudafeatures2d.vcxproj”的操作。 50>bm3d_image_denoising.cpp 50>dct_image_denoising.cpp 50>grayworld_white_balance.cpp 50>inpainting.cpp 50>learning_based_color_balance.cpp 50>oilpainting.cpp 38>fully_connected_layer.cpp 50>simple_color_balance.cpp 50>tonemap.cpp 48>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_videoio4110.lib” 48>已完成生成项目“opencv_highgui.vcxproj”的操作 - 失败。 51>------ 已启动生成: 项目: opencv_visualisation, 配置: Release x64 ------ 52>------ 已启动生成: 项目: opencv_ts, 配置: Release x64 ------ 53>------ 已启动生成: 项目: opencv_bioinspired, 配置: Release x64 ------ 54>------ 已启动生成: 项目: opencv_annotation, 配置: Release x64 ------ 51>opencv_visualisation.cpp 54>opencv_annotation.cpp 52>cmake_pch.cxx 53>cmake_pch.cxx 38>pooling_layer.cpp 38>scale_layer.cpp 53>opencl_kernels_bioinspired.cpp 53>opencv_bioinspired_main.cpp 53>basicretinafilter.cpp 53>imagelogpolprojection.cpp 53>magnoretinafilter.cpp 53>parvoretinafilter.cpp 53>retina.cpp 53>retina_ocl.cpp 53>retinacolor.cpp 53>retinafasttonemapping.cpp 53>retinafilter.cpp 53>transientareassegmentationmodule.cpp 54>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_highgui4110.lib” 54>已完成生成项目“opencv_annotation.vcxproj”的操作 - 失败。 52>cuda_perf.cpp 52>cuda_test.cpp 52>ocl_perf.cpp 52>ocl_test.cpp 52>ts.cpp 52>ts_arrtest.cpp 52>ts_func.cpp 52>ts_gtest.cpp 52>ts_perf.cpp 52>ts_tags.cpp 38>softmax_layer.cpp 53>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_highgui4110.lib” 51>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_highgui4110.lib” 53>已完成生成项目“opencv_bioinspired.vcxproj”的操作 - 失败。 51>已完成生成项目“opencv_visualisation.vcxproj”的操作 - 失败。 38>batch_norm_layer.cpp 50> 正在创建库 E:/opencv-build/build/lib/Release/opencv_xphoto4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_xphoto4110.exp 50>opencv_xphoto.vcxproj -> E:\opencv-build\build\bin\Release\opencv_xphoto4110.dll 38>convolution_layer.cpp 52>opencv_ts.vcxproj -> E:\opencv-build\build\lib\Release\opencv_ts4110.lib 38>elementwise_layers.cpp 38>eltwise_layer.cpp 38>fully_connected_layer.cpp 38>pooling_layer.cpp 47> 正在创建库 E:/opencv-build/build/lib/Release/opencv_calib3d4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_calib3d4110.exp 47>opencv_calib3d.vcxproj -> E:\opencv-build\build\bin\Release\opencv_calib3d4110.dll 55>------ 已启动生成: 项目: opencv_structured_light, 配置: Release x64 ------ 56>------ 已启动生成: 项目: opencv_shape, 配置: Release x64 ------ 57>------ 已启动生成: 项目: opencv_rgbd, 配置: Release x64 ------ 58>------ 已启动生成: 项目: opencv_rapid, 配置: Release x64 ------ 59>------ 已启动生成: 项目: opencv_cudastereo, 配置: Release x64 ------ 60>------ 已启动生成: 项目: opencv_ccalib, 配置: Release x64 ------ 55>cmake_pch.cxx 56>cmake_pch.cxx 57>cmake_pch.cxx 58>cmake_pch.cxx 60>cmake_pch.cxx 59>cmake_pch.cxx 38>scale_layer.cpp 58>opencv_rapid_main.cpp 55>opencv_structured_light_main.cpp 58>histogram.cpp 58>rapid.cpp 55>graycodepattern.cpp 55>sinusoidalpattern.cpp 56>opencv_shape_main.cpp 56>aff_trans.cpp 56>emdL1.cpp 56>haus_dis.cpp 56>hist_cost.cpp 56>sc_dis.cpp 60>opencv_ccalib_main.cpp 56>tps_trans.cpp 60>ccalib.cpp 60>multicalib.cpp 60>omnidir.cpp 60>randpattern.cpp 59>opencv_cudastereo_main.cpp 57>opencl_kernels_rgbd.cpp 59>disparity_bilateral_filter.cpp 57>opencv_rgbd_main.cpp 59>stereobm.cpp 57>colored_kinfu.cpp 57>colored_tsdf.cpp 57>depth_cleaner.cpp 57>depth_registration.cpp 57>depth_to_3d.cpp 57>dqb.cpp 57>dynafu.cpp 57>dynafu_tsdf.cpp 59>stereobp.cpp 59>stereocsbp.cpp 57>fast_icp.cpp 59>stereosgm.cpp 57>hash_tsdf.cpp 59>util.cpp 57>kinfu.cpp 57>kinfu_frame.cpp 57>large_kinfu.cpp 57>linemod.cpp 57>nonrigid_icp.cpp 57>normal.cpp 57>odometry.cpp 57>plane.cpp 57>pose_graph.cpp 57>tsdf.cpp 57>tsdf_functions.cpp 57>utils.cpp 57>volume.cpp 57>warpfield.cpp 38>softmax_layer.cpp 58> 正在创建库 E:/opencv-build/build/lib/Release/opencv_rapid4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_rapid4110.exp 55> 正在创建库 E:/opencv-build/build/lib/Release/opencv_structured_light4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_structured_light4110.exp 56> 正在创建库 E:/opencv-build/build/lib/Release/opencv_shape4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_shape4110.exp 59> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudastereo4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudastereo4110.exp 59>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 55>opencv_structured_light.vcxproj -> E:\opencv-build\build\bin\Release\opencv_structured_light4110.dll 58>opencv_rapid.vcxproj -> E:\opencv-build\build\bin\Release\opencv_rapid4110.dll 56>opencv_shape.vcxproj -> E:\opencv-build\build\bin\Release\opencv_shape4110.dll 61>------ 已启动生成: 项目: opencv_xfeatures2d, 配置: Release x64 ------ 59>opencv_cudastereo.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudastereo4110.dll 59>已完成生成项目“opencv_cudastereo.vcxproj”的操作。 60>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_highgui4110.lib” 60>已完成生成项目“opencv_ccalib.vcxproj”的操作 - 失败。 61>cmake_pch.cxx 61>opencl_kernels_xfeatures2d.cpp 61>opencv_xfeatures2d_main.cpp 61>affine_feature2d.cpp 61>beblid.cpp 61>brief.cpp 61>daisy.cpp 61>ellipticKeyPoint.cpp 61>fast.cpp 61>freak.cpp 61>gms.cpp 61>harris_lapace_detector.cpp 61>latch.cpp 61>Match.cpp 61>Point.cpp 61>PointPair.cpp 61>lucid.cpp 61>msd.cpp 61>pct_signatures.cpp 61>grayscale_bitmap.cpp 61>pct_clusterizer.cpp 61>pct_sampler.cpp 61>pct_signatures_sqfd.cpp 61>stardetector.cpp 61>surf.cpp 61>surf.cuda.cpp 61>surf.ocl.cpp 61>tbmr.cpp 61>xfeatures2d_init.cpp 57> 正在创建库 E:/opencv-build/build/lib/Release/opencv_rgbd4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_rgbd4110.exp 38> 正在创建库 E:/opencv-build/build/lib/Release/opencv_dnn4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_dnn4110.exp 38>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 57>opencv_rgbd.vcxproj -> E:\opencv-build\build\bin\Release\opencv_rgbd4110.dll 38>opencv_dnn.vcxproj -> E:\opencv-build\build\bin\Release\opencv_dnn4110.dll 38>已完成生成项目“opencv_dnn.vcxproj”的操作。 62>------ 已启动生成: 项目: opencv_video, 配置: Release x64 ------ 63>------ 已启动生成: 项目: opencv_text, 配置: Release x64 ------ 64>------ 已启动生成: 项目: opencv_objdetect, 配置: Release x64 ------ 65>------ 已启动生成: 项目: opencv_model_diagnostics, 配置: Release x64 ------ 66>------ 已启动生成: 项目: opencv_mcc, 配置: Release x64 ------ 67>------ 已启动生成: 项目: opencv_dnn_superres, 配置: Release x64 ------ 68>------ 已启动生成: 项目: opencv_dnn_objdetect, 配置: Release x64 ------ 63>cmake_pch.cxx 62>cmake_pch.cxx 65>model_diagnostics.cpp 64>cmake_pch.cxx 66>cmake_pch.cxx 67>cmake_pch.cxx 68>cmake_pch.cxx 63>opencv_text_main.cpp 63>erfilter.cpp 63>ocr_beamsearch_decoder.cpp 63>ocr_hmm_decoder.cpp 63>ocr_holistic.cpp 63>ocr_tesseract.cpp 63>text_detectorCNN.cpp 63>text_detector_swt.cpp 62>opencl_kernels_video.cpp 64>opencl_kernels_objdetect.cpp 62>opencv_video_main.cpp 62>bgfg_KNN.cpp 62>bgfg_gaussmix2.cpp 64>opencv_objdetect_main.cpp 64>apriltag_quad_thresh.cpp 62>camshift.cpp 64>zmaxheap.cpp 64>aruco_board.cpp 64>aruco_detector.cpp 64>aruco_dictionary.cpp 62>dis_flow.cpp 64>aruco_utils.cpp 64>charuco_detector.cpp 62>ecc.cpp 62>kalman.cpp 68>opencv_dnn_objdetect_main.cpp 62>lkpyramid.cpp 62>optflowgf.cpp 62>optical_flow_io.cpp 64>barcode.cpp 64>abs_decoder.cpp 62>tracker_feature.cpp 64>hybrid_binarizer.cpp 64>super_scale.cpp 64>utils.cpp 64>ean13_decoder.cpp 62>tracker_feature_set.cpp 64>ean8_decoder.cpp 64>upcean_decoder.cpp 62>tracker_mil_model.cpp 68>core_detect.cpp 62>tracker_mil_state.cpp 62>tracker_model.cpp 64>bardetect.cpp 62>tracker_sampler.cpp 62>tracker_sampler_algorithm.cpp 62>tracker_state_estimator.cpp 62>tracking_feature.cpp 64>cascadedetect.cpp 62>tracking_online_mil.cpp 64>cascadedetect_convert.cpp 64>detection_based_tracker.cpp 62>tracker.cpp 64>face_detect.cpp 62>tracker_dasiamrpn.cpp 64>face_recognize.cpp 62>tracker_goturn.cpp 64>graphical_code_detector.cpp 64>hog.cpp 62>tracker_mil.cpp 67>opencv_dnn_superres_main.cpp 64>main.cpp 64>qrcode.cpp 64>qrcode_encoder.cpp 62>tracker_nano.cpp 62>tracker_vit.cpp 62>variational_refinement.cpp 67>dnn_superres.cpp 66>opencv_mcc_main.cpp 66>bound_min.cpp 66>ccm.cpp 66>charts.cpp 66>checker_detector.cpp 66>checker_model.cpp 66>color.cpp 66>colorspace.cpp 66>common.cpp 66>debug.cpp 66>distance.cpp 66>graph_cluster.cpp 66>io.cpp 66>linearize.cpp 66>mcc.cpp 66>operations.cpp 66>utils.cpp 66>wiener_filter.cpp 65>opencv_model_diagnostics.vcxproj -> E:\opencv-build\build\bin\Release\opencv_model_diagnostics.exe 68>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_highgui4110.lib” 68>已完成生成项目“opencv_dnn_objdetect.vcxproj”的操作 - 失败。 67> 正在创建库 E:/opencv-build/build/lib/Release/opencv_dnn_superres4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_dnn_superres4110.exp 67>opencv_dnn_superres.vcxproj -> E:\opencv-build\build\bin\Release\opencv_dnn_superres4110.dll 63> 正在创建库 E:/opencv-build/build/lib/Release/opencv_text4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_text4110.exp 62> 正在创建库 E:/opencv-build/build/lib/Release/opencv_video4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_video4110.exp 63>opencv_text.vcxproj -> E:\opencv-build\build\bin\Release\opencv_text4110.dll 69>------ 已启动生成: 项目: opencv_datasets, 配置: Release x64 ------ 62>opencv_video.vcxproj -> E:\opencv-build\build\bin\Release\opencv_video4110.dll 70>------ 已启动生成: 项目: opencv_ximgproc, 配置: Release x64 ------ 71>------ 已启动生成: 项目: opencv_cudabgsegm, 配置: Release x64 ------ 72>------ 已启动生成: 项目: opencv_bgsegm, 配置: Release x64 ------ 69>ar_hmdb.cpp 71>cmake_pch.cxx 69>ar_sports.cpp 69>dataset.cpp 69>fr_adience.cpp 72>cmake_pch.cxx 69>fr_lfw.cpp 69>gr_chalearn.cpp 69>gr_skig.cpp 69>hpe_humaneva.cpp 69>hpe_parse.cpp 70>cmake_pch.cxx 69>ir_affine.cpp 69>ir_robot.cpp 69>is_bsds.cpp 69>is_weizmann.cpp 69>msm_epfl.cpp 69>msm_middlebury.cpp 69>or_imagenet.cpp 66> 正在创建库 E:/opencv-build/build/lib/Release/opencv_mcc4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_mcc4110.exp 69>or_mnist.cpp 66>opencv_mcc.vcxproj -> E:\opencv-build\build\bin\Release\opencv_mcc4110.dll 69>or_pascal.cpp 69>or_sun.cpp 69>pd_caltech.cpp 69>pd_inria.cpp 69>slam_kitti.cpp 69>slam_tumindoor.cpp 69>sr_bsds.cpp 69>sr_div2k.cpp 69>sr_general100.cpp 69>tr_chars.cpp 69>tr_icdar.cpp 69>tr_svt.cpp 64> 正在创建库 E:/opencv-build/build/lib/Release/opencv_objdetect4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_objdetect4110.exp 69>track_alov.cpp 69>track_vot.cpp 69>util.cpp 71>opencv_cudabgsegm_main.cpp 71>mog.cpp 71>mog2.cpp 64>opencv_objdetect.vcxproj -> E:\opencv-build\build\bin\Release\opencv_objdetect4110.dll 73>------ 已启动生成: 项目: opencv_xobjdetect, 配置: Release x64 ------ 74>------ 已启动生成: 项目: opencv_wechat_qrcode, 配置: Release x64 ------ 75>------ 已启动生成: 项目: opencv_interactive-calibration, 配置: Release x64 ------ 76>------ 已启动生成: 项目: opencv_face, 配置: Release x64 ------ 77>------ 已启动生成: 项目: opencv_cudalegacy, 配置: Release x64 ------ 78>------ 已启动生成: 项目: opencv_aruco, 配置: Release x64 ------ 70>opencl_kernels_ximgproc.cpp 70>opencv_ximgproc_main.cpp 70>adaptive_manifold_filter_n.cpp 70>anisodiff.cpp 70>bilateral_texture_filter.cpp 70>brightedges.cpp 70>deriche_filter.cpp 70>disparity_filters.cpp 70>domain_transform.cpp 70>dtfilter_cpu.cpp 70>edge_drawing.cpp 70>edgeaware_filters_common.cpp 70>edgeboxes.cpp 70>edgepreserving_filter.cpp 70>estimated_covariance.cpp 70>fast_hough_transform.cpp 70>fast_line_detector.cpp 70>fbs_filter.cpp 70>fgs_filter.cpp 70>find_ellipses.cpp 70>fourier_descriptors.cpp 70>graphsegmentation.cpp 70>guided_filter.cpp 72>opencv_bgsegm_main.cpp 72>bgfg_gaussmix.cpp 72>bgfg_gmg.cpp 72>bgfg_gsoc.cpp 72>bgfg_subcnt.cpp 70>joint_bilateral_filter.cpp 76>cmake_pch.cxx 70>l0_smooth.cpp 70>lsc.cpp 70>niblack_thresholding.cpp 70>paillou_filter.cpp 75>calibController.cpp 70>peilin.cpp 70>quaternion.cpp 70>radon_transform.cpp 75>calibPipeline.cpp 75>frameProcessor.cpp 72>synthetic_seq.cpp 73>cmake_pch.cxx 75>main.cpp 70>ridgedetectionfilter.cpp 75>parametersController.cpp 70>rolling_guidance_filter.cpp 70>scansegment.cpp 70>seeds.cpp 70>run_length_morphology.cpp 70>selectivesearchsegmentation.cpp 70>slic.cpp 75>rotationConverters.cpp 74>cmake_pch.cxx 70>sparse_match_interpolators.cpp 70>structured_edge_detection.cpp 78>cmake_pch.cxx 70>thinning.cpp 70>weighted_median_filter.cpp 77>cmake_pch.cxx 71> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudabgsegm4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudabgsegm4110.exp 71>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 61>boostdesc.cpp 71>opencv_cudabgsegm.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudabgsegm4110.dll 74>opencv_wechat_qrcode_main.cpp 69>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_imgcodecs4110.lib” 74>binarizermgr.cpp 74>decodermgr.cpp 74>align.cpp 74>ssd_detector.cpp 74>imgsource.cpp 74>super_scale.cpp 74>wechat_qrcode.cpp 74>binarizer.cpp 74>binarybitmap.cpp 74>adaptive_threshold_mean_binarizer.cpp 74>fast_window_binarizer.cpp 74>global_histogram_binarizer.cpp 74>hybrid_binarizer.cpp 74>simple_adaptive_binarizer.cpp 74>bitarray.cpp 70>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_imgcodecs4110.lib” 72> 正在创建库 E:/opencv-build/build/lib/Release/opencv_bgsegm4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_bgsegm4110.exp 74>bitmatrix.cpp 74>bitsource.cpp 74>bytematrix.cpp 74>characterseteci.cpp 74>decoder_result.cpp 69>已完成生成项目“opencv_datasets.vcxproj”的操作 - 失败。 79>------ 已启动生成: 项目: opencv_tracking, 配置: Release x64 ------ 70>已完成生成项目“opencv_ximgproc.vcxproj”的操作 - 失败。 71>已完成生成项目“opencv_cudabgsegm.vcxproj”的操作。 80>------ 已启动生成: 项目: opencv_optflow, 配置: Release x64 ------ 74>detector_result.cpp 74>greyscale_luminance_source.cpp 74>greyscale_rotated_luminance_source.cpp 74>grid_sampler.cpp 74>imagecut.cpp 74>kmeans.cpp 74>perspective_transform.cpp 74>genericgf.cpp 74>genericgfpoly.cpp 74>reed_solomon_decoder.cpp 74>str.cpp 74>stringutils.cpp 74>unicomblock.cpp 74>errorhandler.cpp 74>luminance_source.cpp 74>bitmatrixparser.cpp 61>logos.cpp 74>datablock.cpp 78>opencv_aruco_main.cpp 74>datamask.cpp 74>decoded_bit_stream_parser.cpp 78>aruco.cpp 74>decoder.cpp 78>aruco_calib.cpp 74>mode.cpp 78>charuco.cpp 74>alignment_pattern.cpp 74>alignment_pattern_finder.cpp 76>opencv_face_main.cpp 74>detector.cpp 76>bif.cpp 74>finder_pattern.cpp 74>finder_pattern_finder.cpp 76>eigen_faces.cpp 74>finder_pattern_info.cpp 74>pattern_result.cpp 76>face_alignment.cpp 74>error_correction_level.cpp 74>format_information.cpp 76>face_basic.cpp 76>facemark.cpp 76>facemarkAAM.cpp 76>facemarkLBF.cpp 76>facerec.cpp 76>fisher_faces.cpp 76>getlandmarks.cpp 74>qrcode_reader.cpp 74>version.cpp 76>lbph_faces.cpp 76>mace.cpp 76>predict_collector.cpp 74>reader.cpp 74>result.cpp 76>regtree.cpp 74>resultpoint.cpp 80>cmake_pch.cxx 76>trainFacemark.cpp 72>opencv_bgsegm.vcxproj -> E:\opencv-build\build\bin\Release\opencv_bgsegm4110.dll 75>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_highgui4110.lib” 73>opencv_xobjdetect_main.cpp 75>已完成生成项目“opencv_interactive-calibration.vcxproj”的操作 - 失败。 73>feature_evaluator.cpp 73>lbpfeatures.cpp 73>waldboost.cpp 79>cmake_pch.cxx 73>wbdetector.cpp 61>Logos.cpp 77>opencv_cudalegacy_main.cpp 77>NCV.cpp 77>bm.cpp 77>bm_fast.cpp 77>calib3d.cpp 77>fgd.cpp 77>gmg.cpp 77>graphcuts.cpp 77>image_pyramid.cpp 77>interpolate_frames.cpp 77>needle_map.cpp 61>vgg.cpp 78> 正在创建库 E:/opencv-build/build/lib/Release/opencv_aruco4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_aruco4110.exp 73>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_imgcodecs4110.lib” 73>已完成生成项目“opencv_xobjdetect.vcxproj”的操作 - 失败。 81>------ 已启动生成: 项目: opencv_waldboost_detector, 配置: Release x64 ------ 78>opencv_aruco.vcxproj -> E:\opencv-build\build\bin\Release\opencv_aruco4110.dll 81>waldboost_detector.cpp 77> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudalegacy4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudalegacy4110.exp 80>opencl_kernels_optflow.cpp 80>opencv_optflow_main.cpp 80>deepflow.cpp 80>interfaces.cpp 80>motempl.cpp 80>pcaflow.cpp 80>geo_interpolation.cpp 80>rlof_localflow.cpp 80>rlofflow.cpp 77>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 80>simpleflow.cpp 80>sparse_matching_gpc.cpp 80>sparsetodenseflow.cpp 80>tvl1flow.cpp 76> 正在创建库 E:/opencv-build/build/lib/Release/opencv_face4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_face4110.exp 74> 正在创建库 E:/opencv-build/build/lib/Release/opencv_wechat_qrcode4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_wechat_qrcode4110.exp 77>opencv_cudalegacy.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudalegacy4110.dll 77>已完成生成项目“opencv_cudalegacy.vcxproj”的操作。 82>------ 已启动生成: 项目: opencv_cudaobjdetect, 配置: Release x64 ------ 79>opencl_kernels_tracking.cpp 79>opencv_tracking_main.cpp 79>augmented_unscented_kalman.cpp 79>feature.cpp 79>featureColorName.cpp 79>gtrUtils.cpp 79>kuhn_munkres.cpp 79>mosseTracker.cpp 79>multiTracker.cpp 79>multiTracker_alt.cpp 79>onlineBoosting.cpp 79>tldDataset.cpp 79>tldDetector.cpp 79>tldEnsembleClassifier.cpp 79>tldModel.cpp 79>tldTracker.cpp 79>tldUtils.cpp 79>tracker.cpp 74>opencv_wechat_qrcode.vcxproj -> E:\opencv-build\build\bin\Release\opencv_wechat_qrcode4110.dll 76>opencv_face.vcxproj -> E:\opencv-build\build\bin\Release\opencv_face4110.dll 79>trackerBoosting.cpp 79>trackerBoostingModel.cpp 79>trackerCSRT.cpp 79>trackerCSRTScaleEstimation.cpp 79>trackerCSRTSegmentation.cpp 79>trackerCSRTUtils.cpp 79>trackerFeature.cpp 81>LINK : fatal error LNK1181: 无法打开输入文件“..\..\..\..\lib\Release\opencv_highgui4110.lib” 79>trackerFeatureSet.cpp 79>trackerKCF.cpp 79>trackerMIL_legacy.cpp 79>trackerMedianFlow.cpp 79>trackerSampler.cpp 81>已完成生成项目“opencv_waldboost_detector.vcxproj”的操作 - 失败。 79>trackerSamplerAlgorithm.cpp 79>trackerStateEstimator.cpp 79>tracking_by_matching.cpp 79>tracking_utils.cpp 79>twist.cpp 79>unscented_kalman.cpp 61> 正在创建库 E:/opencv-build/build/lib/Release/opencv_xfeatures2d4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_xfeatures2d4110.exp 61>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 80>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_ximgproc4110.lib” 80>已完成生成项目“opencv_optflow.vcxproj”的操作 - 失败。 83>------ 已启动生成: 项目: opencv_cudaoptflow, 配置: Release x64 ------ 61>opencv_xfeatures2d.vcxproj -> E:\opencv-build\build\bin\Release\opencv_xfeatures2d4110.dll 61>已完成生成项目“opencv_xfeatures2d.vcxproj”的操作。 84>------ 已启动生成: 项目: opencv_stitching, 配置: Release x64 ------ 82>cmake_pch.cxx 83>cmake_pch.cxx 84>cmake_pch.cxx 79>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_datasets4110.lib” 79>已完成生成项目“opencv_tracking.vcxproj”的操作 - 失败。 85>------ 已启动生成: 项目: opencv_stereo, 配置: Release x64 ------ 85>cmake_pch.cxx 83>brox.cpp 83>farneback.cpp 83>nvidiaOpticalFlow.cpp 83>pyrlk.cpp 83>tvl1flow.cpp 83>opencv_cudaoptflow_main.cpp 83>E:\opencv-build\opencv_contrib\modules\cudaoptflow\src\nvidiaOpticalFlow.cpp(52,10): error C1083: 无法打开包括文件: “nvOpticalFlowCuda.h”: No such file or directory 83>(编译源文件“../../../opencv_contrib/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp”) 82>opencv_cudaobjdetect_main.cpp 82>cascadeclassifier.cpp 82>hog.cpp 83>已完成生成项目“opencv_cudaoptflow.vcxproj”的操作 - 失败。 86>------ 已启动生成: 项目: opencv_videostab, 配置: Release x64 ------ 87>------ 已启动生成: 项目: opencv_superres, 配置: Release x64 ------ 85>opencv_stereo_main.cpp 85>descriptor.cpp 85>quasi_dense_stereo.cpp 85>stereo_binary_bm.cpp 85>stereo_binary_sgbm.cpp 86>cmake_pch.cxx 87>cmake_pch.cxx 84>opencl_kernels_stitching.cpp 84>opencv_stitching_main.cpp 84>autocalib.cpp 84>blenders.cpp 84>camera.cpp 84>exposure_compensate.cpp 84>matchers.cpp 84>motion_estimators.cpp 84>seam_finders.cpp 84>stitcher.cpp 84>timelapsers.cpp 84>util.cpp 84>warpers.cpp 84>warpers_cuda.cpp 85>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_tracking4110.lib” 85>已完成生成项目“opencv_stereo.vcxproj”的操作 - 失败。 82> 正在创建库 E:/opencv-build/build/lib/Release/opencv_cudaobjdetect4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_cudaobjdetect4110.exp 82>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 82>opencv_cudaobjdetect.vcxproj -> E:\opencv-build\build\bin\Release\opencv_cudaobjdetect4110.dll 82>已完成生成项目“opencv_cudaobjdetect.vcxproj”的操作。 86>opencv_videostab_main.cpp 86>deblurring.cpp 86>fast_marching.cpp 86>frame_source.cpp 86>global_motion.cpp 86>inpainting.cpp 86>log.cpp 86>motion_stabilizing.cpp 86>optical_flow.cpp 86>outlier_rejection.cpp 86>stabilizer.cpp 86>wobble_suppression.cpp 84> 正在创建库 E:/opencv-build/build/lib/Release/opencv_stitching4110.lib 和对象 E:/opencv-build/build/lib/Release/opencv_stitching4110.exp 84>LINK : warning LNK4098: 默认库“LIBCMT”与其他库的使用冲突;请使用 /NODEFAULTLIB:library 87>opencl_kernels_superres.cpp 87>opencv_superres_main.cpp 87>btv_l1.cpp 87>btv_l1_cuda.cpp 87>frame_source.cpp 87>input_array_utility.cpp 87>optical_flow.cpp 87>super_resolution.cpp 84>opencv_stitching.vcxproj -> E:\opencv-build\build\bin\Release\opencv_stitching4110.dll 84>已完成生成项目“opencv_stitching.vcxproj”的操作。 87>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_cudacodec4110.lib” 87>已完成生成项目“opencv_superres.vcxproj”的操作 - 失败。 86>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_videoio4110.lib” 86>已完成生成项目“opencv_videostab.vcxproj”的操作 - 失败。 88>------ 已启动生成: 项目: opencv_python3, 配置: Release x64 ------ 88>LINK : fatal error LNK1181: 无法打开输入文件“..\..\lib\Release\opencv_xobjdetect4110.lib” 88>已完成生成项目“opencv_python3.vcxproj”的操作 - 失败。 89>------ 已启动生成: 项目: INSTALL, 配置: Release x64 ------ 89>1> 89>-- Install configuration: "Release" 89>CMake Error at cmake_install.cmake:36 (file): 89> file INSTALL cannot find 89> "E:/opencv-build/build/3rdparty/ippicv/ippicv_win/icv/readme.htm": No 89> error. 89> 89> 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: 命令“setlocal 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: D:\CMake\bin\cmake.exe -DBUILD_TYPE=Release -P cmake_install.cmake 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: if %errorlevel% neq 0 goto :cmEnd 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: :cmEnd 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: :cmErrorLevel 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: exit /b %1 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: :cmDone 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: if %errorlevel% neq 0 goto :VCEnd 89>D:\Visual Studio\MSBuild\Microsoft\VC\v170\Microsoft.CppCommon.targets(166,5): error MSB3073: :VCEnd”已退出,代码为 1。 89>已完成生成项目“INSTALL.vcxproj”的操作 - 失败。 ========== 生成: 67 成功,22 失败,15 最新,0 已跳过 ========== ========== 生成 于 22:55 完成,耗时 03:12.593 分钟 ==========
05-13
import argparse import os import sys import numpy as np import json import torch from PIL import Image sys.path.append(os.path.join(os.getcwd(), "GroundingDINO")) sys.path.append(os.path.join(os.getcwd(), "segment_anything")) # Grounding DINO import GroundingDINO.groundingdino.datasets.transforms as T from GroundingDINO.groundingdino.models import build_model from GroundingDINO.groundingdino.util.slconfig import SLConfig from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap # segment anything from segment_anything import sam_model_registry import cv2 import numpy as np import matplotlib.pyplot as plt def load_image(image_path): # load image image_pil = Image.open(image_path).convert("RGB") # load image transform = T.Compose( [ T.RandomResize([800], max_size=1333), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ] ) image, _ = transform(image_pil, None) # 3, h, w return image_pil, image def load_model(model_config_path, model_checkpoint_path, bert_base_uncased_path, device): args = SLConfig.fromfile(model_config_path) args.device = device args.bert_base_uncased_path = bert_base_uncased_path model = build_model(args) checkpoint = torch.load(model_checkpoint_path, map_location="cpu") load_res = model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False) print(load_res) _ = model.eval() return model def get_grounding_output(model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"): caption = caption.lower() caption = caption.strip() if not caption.endswith("."): caption = caption + "." model = model.to(device) image = image.to(device) with torch.no_grad(): outputs = model(image[None], captions=[caption]) logits = outputs["pred_logits"].cpu().sigmoid()[0] # (nq, 256) boxes = outputs["pred_boxes"].cpu()[0] # (nq, 4) logits.shape[0] # filter output logits_filt = logits.clone() boxes_filt = boxes.clone() filt_mask = logits_filt.max(dim=1)[0] > box_threshold logits_filt = logits_filt[filt_mask] # num_filt, 256 boxes_filt = boxes_filt[filt_mask] # num_filt, 4 logits_filt.shape[0] # get phrase tokenlizer = model.tokenizer tokenized = tokenlizer(caption) # build pred pred_phrases = [] for logit, box in zip(logits_filt, boxes_filt): pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer) if with_logits: pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})") else: pred_phrases.append(pred_phrase) return boxes_filt, pred_phrases def show_mask(mask, ax, random_color=False): if random_color: color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) else: color = np.array([30/255, 144/255, 255/255, 0.6]) h, w = mask.shape[-2:] mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) ax.imshow(mask_image) def show_box(box, ax, label): x0, y0 = box[0], box[1] w, h = box[2] - box[0], box[3] - box[1] ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2)) ax.text(x0, y0, label) def save_mask_data(output_dir, mask_list, box_list, label_list): value = 0 # 0 for background mask_img = torch.zeros(mask_list.shape[-2:]) for idx, mask in enumerate(mask_list): mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1 plt.figure(figsize=(10, 10)) plt.imshow(mask_img.numpy()) plt.axis('off') plt.savefig(os.path.join(output_dir, 'mask.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0) json_data = [{ 'value': value, 'label': 'background' }] for label, box in zip(label_list, box_list): value += 1 name, logit = label.split('(') logit = logit[:-1] # the last is ')' json_data.append({ 'value': value, 'label': name, 'logit': float(logit), 'box': box.numpy().tolist(), }) with open(os.path.join(output_dir, 'mask.json'), 'w') as f: json.dump(json_data, f) if __name__ == "__main__": parser = argparse.ArgumentParser("Grounded-Segment-Anything Demo", add_help=True) parser.add_argument("--config", type=str, default="./GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py", help="path to config file") parser.add_argument( "--grounded_checkpoint", type=str, default="./groundingdino_swint_ogc.pth", help="path to checkpoint file" ) parser.add_argument( "--sam_version", type=str, default="vit_h", required=False, help="SAM ViT version: vit_b / vit_l / vit_h" ) parser.add_argument( "--sam_checkpoint", type=str, default="./sam_vit_h_4b8939.pth", help="path to sam checkpoint file" ) parser.add_argument( "--sam_hq_checkpoint", type=str, default=None, help="path to sam-hq checkpoint file" ) parser.add_argument( "--use_sam_hq", action="store_true", help="using sam-hq for prediction" ) parser.add_argument("--input_image", type=str, required=True, help="path to image file") parser.add_argument("--text_prompt", type=str, required=True, help="text prompt") parser.add_argument( "--output_dir", "-o", type=str, default="./outputs", help="output directory" ) parser.add_argument("--box_threshold", type=float, default=0.3, help="box threshold") parser.add_argument("--text_threshold", type=float, default=0.25, help="text threshold") parser.add_argument("--device", type=str, default="cpu", help="running on cpu only!, default=False") parser.add_argument("--bert_base_uncased_path", type=str, required=False, help="bert_base_uncased model path, default=False") args = parser.parse_args() # cfg config_file = args.config # change the path of the model config file grounded_checkpoint = args.grounded_checkpoint # change the path of the model sam_version = args.sam_version sam_checkpoint = args.sam_checkpoint sam_hq_checkpoint = args.sam_hq_checkpoint use_sam_hq = args.use_sam_hq image_path = args.input_image text_prompt = args.text_prompt output_dir = args.output_dir box_threshold = args.box_threshold text_threshold = args.text_threshold device = args.device bert_base_uncased_path = args.bert_base_uncased_path # make dir os.makedirs(output_dir, exist_ok=True) # load image image_pil, image = load_image(image_path) # load model model = load_model(config_file, grounded_checkpoint, bert_base_uncased_path, device=device) # visualize raw image image_pil.save(os.path.join(output_dir, "raw_image.jpg")) # run grounding dino model boxes_filt, pred_phrases = get_grounding_output( model, image, text_prompt, box_threshold, text_threshold, device=device ) # initialize SAM if use_sam_hq: predictor = SamPredictor(sam_model_registry[sam_version](checkpoint=sam_hq_checkpoint).to(device)) else: predictor = SamPredictor(sam_model_registry[sam_version](checkpoint=sam_checkpoint).to(device)) image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) predictor.set_image(image) size = image_pil.size H, W = size[1], size[0] for i in range(boxes_filt.size(0)): boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H]) boxes_filt[i][:2] -= boxes_filt[i][2:] / 2 boxes_filt[i][2:] += boxes_filt[i][:2] boxes_filt = boxes_filt.cpu() transformed_boxes = predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2]).to(device) masks, _, _ = predictor.predict_torch( point_coords = None, point_labels = None, boxes = transformed_boxes.to(device), multimask_output = False, ) # draw output image plt.figure(figsize=(10, 10)) plt.imshow(image) for mask in masks: show_mask(mask.cpu().numpy(), plt.gca(), random_color=True) for box, label in zip(boxes_filt, pred_phrases): show_box(box.numpy(), plt.gca(), label) plt.axis('off') plt.savefig( os.path.join(output_dir, "grounded_sam_output.jpg"), bbox_inches="tight", dpi=300, pad_inches=0.0 ) save_mask_data(output_dir, masks, boxes_filt, pred_phrases) 运行报错 C:\Users\29386\.conda\envs\grounded_sam\python.exe C:\Users\29386\segment-anything\Grounded-Segment-Anything\grounded_sam_demo.py --config GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py --grounded_checkpoint weights/groundingdino_swint_ogc.pth --sam_checkpoint weights/sam_vit_h_4b8939.pth --input_image assets/demo1.jpg --output_dir outputs --text_prompt cat C:\Users\29386\.conda\envs\grounded_sam\lib\site-packages\timm\models\layers\__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) C:\Users\29386\.conda\envs\grounded_sam\lib\site-packages\torch\functional.py:513: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3610.) return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined] final text_encoder_type: bert-base-uncased C:\Users\29386\segment-anything\Grounded-Segment-Anything\grounded_sam_demo.py:47: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(model_checkpoint_path, map_location="cpu") Traceback (most recent call last): File "C:\Users\29386\segment-anything\Grounded-Segment-Anything\grounded_sam_demo.py", line 187, in <module> model = load_model(config_file, grounded_checkpoint, bert_base_uncased_path, device=device) File "C:\Users\29386\segment-anything\Grounded-Segment-Anything\grounded_sam_demo.py", line 47, in load_model checkpoint = torch.load(model_checkpoint_path, map_location="cpu") File "C:\Users\29386\.conda\envs\grounded_sam\lib\site-packages\torch\serialization.py", line 1072, in load with _open_zipfile_reader(opened_file) as opened_zipfile: File "C:\Users\29386\.conda\envs\grounded_sam\lib\site-packages\torch\serialization.py", line 480, in __init__ super().__init__(torch._C.PyTorchFileReader(name_or_buffer)) RuntimeError: PytorchStreamReader failed reading zip archive: failed finding central directory
最新发布
07-25
出现下面的错误“root@imx8mpevk:/mnt/ourput# python3 yolov8_tflite-nnstreamer.py --model /mnt/ourput/11best.onnx --img /mnt/ourput/test_6_17.mp4 -e /usr/lib/libvx_delegate.so 模型输入形状: [1, 3, 640, 640] ====== VPUDEC: 4.9.1 build on Aug 8 2024 05:28:21. ====== wrapper: 3.0.0 (VPUWRAPPER_ARM64_LINUX Build on Jul 24 2024 01:56:50) vpulib: 1.1.1 firmware: 1.1.1.1158257229 [ WARN:0@3.678] global cap_gstreamer.cpp:1750 open OpenCV | GStreamer warning: frame count is estimated by duration and fps 预处理后图像形状: (1, 3, 640, 640) Traceback (most recent call last): File "/mnt/ourput/yolov8_tflite-nnstreamer.py", line 347, in <module> detector.main() File "/mnt/ourput/yolov8_tflite-nnstreamer.py", line 282, in main output_image = self.postprocess(self.img.copy(), output) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/mnt/ourput/yolov8_tflite-nnstreamer.py", line 217, in postprocess indices = cv2.dnn.NMSBoxes( ^^^^^^^^^^^^^^^^^ TypeError: Can't parse 'bboxes'. Sequence item with index 0 has a wrong type”,是什么原因导致的?帮我修改代码“# Ultralytics YOLO 🚀, AGPL-3.0 license import argparse import re import yaml import time import cv2 import numpy as np from pathlib import Path import onnxruntime as ort import gi import cairo import subprocess # 用于获取GPU内存信息 # 初始化GStreamer gi.require_version("Gst", "1.0") gi.require_foreign("cairo") from gi.repository import Gst, GObject, GLib # 定义全局变量:模型输入图像尺寸 img_width = 640 img_height = 640 def yaml_load(file="data.yaml", append_filename=False): """加载YAML配置文件""" assert Path(file).suffix in {".yaml", ".yml"}, f"非YAML文件: {file}" with open(file, errors="ignore", encoding="utf-8") as f: s = f.read() if not s.isprintable(): s = re.sub(r"[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+", "", s) data = yaml.safe_load(s) or {} if append_filename: data["yaml_file"] = str(file) return data class LetterBox: def __init__(self, new_shape=(img_width, img_height), auto=False, scaleFill=False, scaleup=True, center=True, stride=32): """图像预处理:调整尺寸并添加边框""" self.new_shape = new_shape self.auto = auto self.scaleFill = scaleFill self.scaleup = scaleup self.stride = stride self.center = center def __call__(self, labels=None, image=None): """对图像进行尺寸调整和边框填充""" if labels is None: labels = {} img = labels.get("img") if image is None else image shape = img.shape[:2] # 原始图像高宽 new_shape = labels.pop("rect_shape", self.new_shape) if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # 计算缩放比例 r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not self.scaleup: # 仅缩小,不放大 r = min(r, 1.0) # 计算填充尺寸 ratio = r, r new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r))) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] if self.auto: # 最小矩形填充 dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) elif self.scaleFill: # 拉伸填充 dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # 中心填充 if self.center: dw /= 2 dh /= 2 # 调整图像尺寸 if shape[::-1] != new_unpad: img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) # 添加边框 top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) if len(labels): labels = self._update_labels(labels, ratio, dw, dh) labels["img"] = img labels["resized_shape"] = new_shape return labels else: return img def _update_labels(self, labels, ratio, padw, padh): """更新标签坐标""" labels["instances"].convert_bbox(format="xyxy") labels["instances"].denormalize(*labels["img"].shape[:2][::-1]) labels["instances"].scale(*ratio) labels["instances"].add_padding(padw, padh) return labels class Yolov8ONNX: def __init__(self, onnx_model, input_image, confidence_thres, iou_thres, ext_delegate=None): """初始化YOLOv8模型(基于ONNX Runtime)""" self.onnx_model = onnx_model self.input_image = input_image self.confidence_thres = confidence_thres self.iou_thres = iou_thres # 加载类别名称 self.classes = yaml_load("SSDD.yaml")["names"] # 生成类别颜色 self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 初始化ONNX Runtime会话 self.session = ort.InferenceSession( self.onnx_model, providers=['CPUExecutionProvider'] # 可根据硬件改为VPUExecutionProvider ) self.input_name = self.session.get_inputs()[0].name self.output_name = self.session.get_outputs()[0].name self.input_shape = self.session.get_inputs()[0].shape print(f"模型输入形状: {self.input_shape}") # FPS计算相关 self.fps = 0 self.prev_time = 0 self.frame_count = 0 # 记录处理时间 self.process_times = [] def draw_detections(self, img, box, score, class_id): """在图像上绘制检测结果""" x1, y1, x2, y2 = box # 左上角和右下角坐标 color = self.color_palette[class_id].astype(int).tolist() # 绘制边界框 cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 2) # 绘制标签 label = f"{self.classes[class_id]}: {score:.2f}" (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) label_x, label_y = x1, y1 - 10 if y1 - 10 > label_height else y1 + 10 # 绘制标签背景 cv2.rectangle(img, (int(label_x), int(label_y - label_height)), (int(label_x + label_width), int(label_y + label_height)), color, cv2.FILLED) # 绘制标签文本 cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) def get_gpu_memory_usage(self): """获取GPU内存使用情况(适用于i.MX 8M Plus)""" try: # 使用系统命令获取GPU内存信息(需要特定工具支持) # 对于i.MX 8M Plus,可能需要使用专用工具或接口 # 这里使用示例命令,实际可能需要调整 result = subprocess.run( ["cat", "/sys/kernel/debug/vmallocinfo"], capture_output=True, text=True ) # 解析输出获取GPU内存使用情况(简化示例) gpu_mem = "N/A" return f"GPU Mem: {gpu_mem}" except: return "GPU Mem: N/A" def preprocess(self): """图像预处理:调整尺寸、归一化、维度转换""" self.img_height, self.img_width = self.img.shape[:2] # 尺寸调整和边框填充 letterbox = LetterBox(new_shape=[img_width, img_height], auto=False, stride=32) image = letterbox(image=self.img) image = [image] image = np.stack(image) # 添加batch维度 # BGR转RGB并调整为NCHW格式 image = image[..., ::-1].transpose((0, 3, 1, 2)) image = np.ascontiguousarray(image) image = image.astype(np.float32) return image / 255.0 # 归一化 def postprocess(self, input_image, output): """后处理:解析输出、转换边界框格式、应用NMS""" try: output = output[0] # 移除batch维度 # 确保输出是numpy数组 if not isinstance(output, np.ndarray): output = np.array(output) # 检查输出维度 if output.ndim < 2: output = np.expand_dims(output, 0) # 提取边界框、置信度、类别ID boxes_centroid = output[..., :4].astype(np.float32) # 假设格式为[x, y, w, h] scores = np.max(output[..., 4:], axis=1).astype(np.float32) class_ids = np.argmax(output[..., 4:], axis=1).astype(np.int32) # 转换边界框格式:中心点+宽高 -> 左上角+右下角 if boxes_centroid.shape[1] >= 4: boxes_xyxy = np.zeros_like(boxes_centroid) boxes_xyxy[..., 0] = boxes_centroid[..., 0] - boxes_centroid[..., 2] / 2 # x1 boxes_xyxy[..., 1] = boxes_centroid[..., 1] - boxes_centroid[..., 3] / 2 # y1 boxes_xyxy[..., 2] = boxes_centroid[..., 0] + boxes_centroid[..., 2] / 2 # x2 boxes_xyxy[..., 3] = boxes_centroid[..., 1] + boxes_centroid[..., 3] / 2 # y2 else: print("错误:边界框数据格式不正确") return input_image # 应用NMS indices = cv2.dnn.NMSBoxes( bboxes=boxes_xyxy, scores=scores, score_threshold=self.confidence_thres, nms_threshold=self.iou_thres ) # 确保indices是一维数组 if len(indices) > 0: indices = indices.flatten() else: indices = [] # 绘制检测结果 for i in indices: score = scores[i] class_id = class_ids[i] if score > self.confidence_thres: box = boxes_xyxy[i].copy() # 映射回原始图像尺寸 gain = min(img_width / self.img_width, img_height / self.img_height) pad = (round((img_width - self.img_width * gain) / 2 - 0.1), round((img_height - self.img_height * gain) / 2 - 0.1)) # 转换回原始图像坐标 box[0] = (box[0] - pad[0]) / gain # x1 box[1] = (box[1] - pad[1]) / gain # y1 box[2] = (box[2] - pad[0]) / gain # x2 box[3] = (box[3] - pad[1]) / gain # y2 # 绘制边界框和标签 self.draw_detections(input_image, box, score, class_id) except IndexError as e: print(f"索引错误: {e}") print(f"输出形状: {output.shape}") print(f"边界框形状: {boxes_centroid.shape if 'boxes_centroid' in locals() else 'N/A'}") return input_image def main(self): """主函数:执行推理流程并显示FPS和GPU内存""" # 初始化GStreamer管道 pipeline = f'filesrc location={self.input_image} ! qtdemux ! h264parse ! vpudec ! videoconvert ! video/x-raw format=RGB ! appsink' cap = cv2.VideoCapture(pipeline) while cap.isOpened(): ret, self.img = cap.read() if not ret: break # 记录开始时间 start_time = time.time() # 预处理 img_data = self.preprocess() print(f"预处理后图像形状: {img_data.shape}") # 执行推理 inference_start = time.time() output = self.session.run([self.output_name], {self.input_name: img_data}) inference_time = time.time() - inference_start self.process_times.append(inference_time) # 后处理 output_image = self.postprocess(self.img.copy(), output) # 计算FPS self.frame_count += 1 current_time = time.time() if current_time - self.prev_time >= 1.0: self.fps = self.frame_count / (current_time - self.prev_time) self.prev_time = current_time self.frame_count = 0 # 获取GPU内存使用情况 gpu_info = self.get_gpu_memory_usage() # 在图像上绘制FPS和GPU内存信息 cv2.putText( output_image, f"FPS: {self.fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2 ) cv2.putText( output_image, gpu_info, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2 ) # 显示结果 cv2.imshow("YOLOv8 Detection (with FPS and GPU Mem)", output_image) if cv2.waitKey(1) & 0xFF == ord('q'): break # 打印推理时间 print(f"Inference time: {inference_time * 1000:.1f} ms") print(f"Average FPS: {self.fps:.2f}") cap.release() cv2.destroyAllWindows() if __name__ == "__main__": # 命令行参数 parser = argparse.ArgumentParser(description="YOLOv8 ONNX推理,显示FPS和GPU内存") parser.add_argument("--model", type=str, default="yolov8n.onnx", help="ONNX模型路径") parser.add_argument("--img", type=str, default="./test.mp4", help="输入视频/图像路径") parser.add_argument("--conf-thres", type=float, default=0.5, help="置信度阈值") parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU阈值") parser.add_argument("-e", "--ext_delegate", help="外部加速库路径") args = parser.parse_args() # 初始化检测器 detector = Yolov8ONNX( onnx_model=args.model, input_image=args.img, confidence_thres=args.conf_thres, iou_thres=args.iou_thres, ext_delegate=args.ext_delegate ) # 运行检测 detector.main()”
06-19
import traceback import cv2 import json import os import sys import time import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from albumentations import ( Compose, Resize, Normalize, HorizontalFlip, VerticalFlip, Rotate, OneOf, RandomBrightnessContrast, GaussNoise, ElasticTransform, RandomGamma, HueSaturationValue, CoarseDropout, Perspective, KeypointParams, CLAHE, MotionBlur, ISONoise,Lambda ) from albumentations.pytorch import ToTensorV2 from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, StepLR, ReduceLROnPlateau from torch.utils.data import Dataset, DataLoader from torchvision.models import resnet18, ResNet18_Weights from sklearn.metrics import precision_score, recall_score, f1_score import matplotlib.pyplot as plt from tqdm import tqdm # 添加进度条库 class EnhancedTrainingLogger: """增强的训练日志记录器,跟踪多种损失指标并实时可视化""" def __init__(self): self.total_losses = [] self.bin_losses = [] self.thresh_losses = [] self.db_losses = [] self.timestamps = [] self.start_time = time.time() self.lr_history = [] self.val_metrics = {'precision': [], 'recall': [], 'f1': []} # 实时可视化设置 plt.ion() # 开启交互模式 self.fig, self.axs = plt.subplots(2, 2, figsize=(15, 10)) self.fig.suptitle('Training Progress', fontsize=16) # 初始化图表 self.loss_line, = self.axs[0, 0].plot([], [], 'r-', label='Total Loss') self.bin_line, = self.axs[0, 0].plot([], [], 'g-', label='Binary Loss') self.thresh_line, = self.axs[0, 0].plot([], [], 'b-', label='Threshold Loss') self.db_line, = self.axs[0, 0].plot([], [], 'm-', label='DB Loss') self.axs[0, 0].set_title('Training Loss Components') self.axs[0, 0].set_xlabel('Batch') self.axs[0, 0].set_ylabel('Loss') self.axs[0, 0].legend() self.axs[0, 0].grid(True) self.lr_line, = self.axs[0, 1].plot([], [], 'c-') self.axs[0, 1].set_title('Learning Rate Schedule') self.axs[0, 1].set_xlabel('Batch') self.axs[0, 1].set_ylabel('Learning Rate') self.axs[0, 1].grid(True) self.precision_line, = self.axs[1, 0].plot([], [], 'r-', label='Precision') self.recall_line, = self.axs[1, 0].plot([], [], 'g-', label='Recall') self.f1_line, = self.axs[1, 0].plot([], [], 'b-', label='F1 Score') self.axs[1, 0].set_title('Validation Metrics') self.axs[1, 0].set_xlabel('Epoch') self.axs[1, 0].set_ylabel('Score') self.axs[1, 0].legend() self.axs[1, 0].grid(True) # 添加文本区域显示当前指标 self.metrics_text = self.axs[1, 1].text(0.5, 0.5, "", horizontalalignment='center', verticalalignment='center', transform=self.axs[1, 1].transAxes, fontsize=12) self.axs[1, 1].axis('off') # 关闭坐标轴 plt.tight_layout() plt.subplots_adjust(top=0.9) plt.draw() plt.pause(0.1) def on_batch_end(self, batch_idx, total_loss, bin_loss, thresh_loss, db_loss, lr=None): elapsed = time.time() - self.start_time self.total_losses.append(total_loss) self.bin_losses.append(bin_loss) self.thresh_losses.append(thresh_loss) self.db_losses.append(db_loss) self.timestamps.append(elapsed) if lr is not None: self.lr_history.append(lr) # 更新实时图表 self.update_plots(batch_idx) # 每10个batch打印详细日志 if batch_idx % 10 == 0: avg_total = np.mean(self.total_losses[-10:]) if len(self.total_losses) >= 10 else total_loss avg_bin = np.mean(self.bin_losses[-10:]) if len(self.bin_losses) >= 10 else bin_loss avg_thresh = np.mean(self.thresh_losses[-10:]) if len(self.thresh_losses) >= 10 else thresh_loss avg_db = np.mean(self.db_losses[-10:]) if len(self.db_losses) >= 10 else db_loss # 更新文本区域 metrics_text = ( f"Batch: {batch_idx}\n" f"Total Loss: {total_loss:.4f} (Avg10: {avg_total:.4f})\n" f"Binary Loss: {bin_loss:.4f} (Avg10: {avg_bin:.4f})\n" f"Threshold Loss: {thresh_loss:.4f} (Avg10: {avg_thresh:.4f})\n" f"DB Loss: {db_loss:.4f} (Avg10: {avg_db:.4f})\n" f"Learning Rate: {lr:.2e}\n" f"Time: {int(elapsed // 3600):02d}:{int((elapsed % 3600) // 60):02d}:{int(elapsed % 60):02d}" ) self.metrics_text.set_text(metrics_text) # 刷新图表 plt.draw() plt.pause(0.01) def update_plots(self, batch_idx): # 更新损失图表 x_data = np.arange(len(self.total_losses)) self.loss_line.set_data(x_data, self.total_losses) self.bin_line.set_data(x_data, self.bin_losses) self.thresh_line.set_data(x_data, self.thresh_losses) self.db_line.set_data(x_data, self.db_losses) # 自动调整Y轴范围 all_losses = self.total_losses + self.bin_losses + self.thresh_losses + self.db_losses if all_losses: min_loss = min(all_losses) * 0.9 max_loss = max(all_losses) * 1.1 self.axs[0, 0].set_ylim(min_loss, max_loss) # 更新学习率图表 if self.lr_history: self.lr_line.set_data(np.arange(len(self.lr_history)), self.lr_history) self.axs[0, 1].set_ylim(min(self.lr_history) * 0.9, max(self.lr_history) * 1.1) # 更新验证指标图表 if self.val_metrics['precision']: x_epochs = np.arange(len(self.val_metrics['precision'])) self.precision_line.set_data(x_epochs, self.val_metrics['precision']) self.recall_line.set_data(x_epochs, self.val_metrics['recall']) self.f1_line.set_data(x_epochs, self.val_metrics['f1']) # 自动调整Y轴范围 all_metrics = self.val_metrics['precision'] + self.val_metrics['recall'] + self.val_metrics['f1'] if all_metrics: min_metric = min(all_metrics) * 0.9 max_metric = max(all_metrics) * 1.1 self.axs[1, 0].set_ylim(min_metric, max_metric) # 调整X轴范围 self.axs[0, 0].set_xlim(0, max(1, len(self.total_losses))) self.axs[0, 1].set_xlim(0, max(1, len(self.lr_history))) if self.val_metrics['precision']: self.axs[1, 0].set_xlim(0, max(1, len(self.val_metrics['precision']))) def on_epoch_end(self, epoch, optimizer=None): # 添加空列表检查 total_min = min(self.total_losses) if self.total_losses else 0.0 total_max = max(self.total_losses) if self.total_losses else 0.0 total_avg = np.mean(self.total_losses) if self.total_losses else 0.0 bin_min = min(self.bin_losses) if self.bin_losses else 0.0 bin_avg = np.mean(self.bin_losses) if self.bin_losses else 0.0 thresh_min = min(self.thresh_losses) if self.thresh_losses else 0.0 thresh_avg = np.mean(self.thresh_losses) if self.thresh_losses else 0.0 db_min = min(self.db_losses) if self.db_losses else 0.0 db_avg = np.mean(self.db_losses) if self.db_losses else 0.0 # 生成详细的损失报告 report = ( f"\n{'=' * 70}\n" f"EPOCH {epoch + 1} SUMMARY:\n" f" - Total Loss: Min={total_min:.6f}, Max={total_max:.6f}, Avg={total_avg:.6f}\n" f" - Binary Loss: Min={bin_min:.6f}, Avg={bin_avg:.6f}\n" f" - Threshold Loss: Min={thresh_min:.6f}, Avg={thresh_avg:.6f}\n" f" - DB Loss: Min={db_min:.6f}, Avg={db_avg:.6f}\n" ) if self.val_metrics['precision']: report += ( f" - Val Metrics: Precision={self.val_metrics['precision'][-1]:.4f}, " f"Recall={self.val_metrics['recall'][-1]:.4f}, F1={self.val_metrics['f1'][-1]:.4f}\n" ) if optimizer: report += f" - Learning Rate: {optimizer.param_groups[0]['lr']:.6e}\n" report += f"{'=' * 70}" print(report) # 保存CSV日志 with open(f'training_log_epoch_{epoch + 1}.csv', 'w') as f: f.write("Timestamp,Total_Loss,Bin_Loss,Thresh_Loss,DB_Loss,Learning_Rate\n") for i, t in enumerate(self.timestamps): lr_val = self.lr_history[i] if i < len(self.lr_history) else 0 f.write( f"{t:.2f},{self.total_losses[i]:.6f},{self.bin_losses[i]:.6f},{self.thresh_losses[i]:.6f},{self.db_losses[i]:.6f},{lr_val:.6e}\n") # 重置记录(保留最后一个批次的值) self.total_losses = [self.total_losses[-1]] if self.total_losses else [] self.bin_losses = [self.bin_losses[-1]] if self.bin_losses else [] self.thresh_losses = [self.thresh_losses[-1]] if self.thresh_losses else [] self.db_losses = [self.db_losses[-1]] if self.db_losses else [] self.timestamps = [self.timestamps[-1]] if self.timestamps else [] self.lr_history = [self.lr_history[-1]] if self.lr_history else [] # 更新图表 self.update_plots(0) plt.draw() plt.pause(0.1) def on_train_end(self): """训练结束后生成图表并保存""" plt.ioff() # 关闭交互模式 # 保存最终图表 plt.savefig('training_summary.png') plt.close() # 生成详细的训练报告图 self.generate_detailed_report() def generate_detailed_report(self): """生成详细的训练报告图表""" fig, axs = plt.subplots(3, 1, figsize=(12, 15)) # 损失图表 axs[0].plot(self.total_losses, label='Total Loss') axs[0].plot(self.bin_losses, label='Binary Loss') axs[0].plot(self.thresh_losses, label='Threshold Loss') axs[0].plot(self.db_losses, label='DB Loss') axs[0].set_title('Training Loss Components') axs[0].set_xlabel('Batch') axs[0].set_ylabel('Loss') axs[0].legend() axs[0].grid(True) # 学习率图表 axs[1].plot(self.lr_history) axs[1].set_title('Learning Rate Schedule') axs[1].set_xlabel('Batch') axs[1].set_ylabel('Learning Rate') axs[1].grid(True) # 验证指标图表 if self.val_metrics['precision']: axs[2].plot(self.val_metrics['precision'], 'o-', label='Precision') axs[2].plot(self.val_metrics['recall'], 'o-', label='Recall') axs[2].plot(self.val_metrics['f1'], 'o-', label='F1 Score') axs[2].set_title('Validation Metrics') axs[2].set_xlabel('Epoch') axs[2].set_ylabel('Score') axs[2].legend() axs[2].grid(True) # 标记最佳F1分数 best_f1_idx = np.argmax(self.val_metrics['f1']) best_f1 = self.val_metrics['f1'][best_f1_idx] axs[2].plot(best_f1_idx, best_f1, 'ro', markersize=8) axs[2].annotate(f'Best F1: {best_f1:.4f}', xy=(best_f1_idx, best_f1), xytext=(best_f1_idx + 0.5, best_f1 - 0.05), arrowprops=dict(facecolor='black', shrink=0.05)) plt.tight_layout() plt.savefig('training_detailed_report.png') plt.close() # 在类外部定义全局函数 # 在类外部定义全局函数 def suppress_water_meter_glare(img, **kwargs): """水表专用反光抑制(忽略额外参数)""" lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB) l, a, b = cv2.split(lab) # 动态计算CLAHE参数 l_mean = np.mean(l) clip_limit = 2.0 + (l_mean / 40) # 亮度越高,clipLimit越大 clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(8, 8)) l_clahe = clahe.apply(l) # 选择性增强暗部区域 _, mask = cv2.threshold(l, 100, 255, cv2.THRESH_BINARY_INV) blended = cv2.addWeighted(l, 0.7, l_clahe, 0.3, 0) l_final = np.where(mask > 0, blended, l) lab = cv2.merge((l_final, a, b)) return cv2.cvtColor(lab, cv2.COLOR_LAB2RGB) # ---------------------------- # 1. 数据集加载与预处理 (优化浮点坐标处理) # ---------------------------- class WaterMeterDataset(Dataset): """水表数字区域检测数据集 - 优化浮点坐标处理""" # ... (初始化代码保持不变) ... def __init__(self, image_dir, label_dir, input_size=(640, 640), augment=True): self.image_dir = image_dir self.label_dir = label_dir self.input_size = input_size self.augment = augment self.image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))] # 基础预处理流程 self.base_transform = Compose([ Resize(height=input_size[0], width=input_size[1]), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) # 简化但有效的数据增强 self.augmentation = Compose([ # 水表专用增强 OneOf([ # 模拟不同角度拍摄 Perspective(scale=(0.05, 0.1), p=0.3), # 模拟水表玻璃反光 RandomGamma(gamma_limit=(80, 120), p=0.2), # 模拟水表污渍 CoarseDropout(max_holes=5, max_height=20, max_width=20, fill_value=0, p=0.2) ], p=0.8), # 水表反光抑制 Lambda(name='glare_reduction', image=suppress_water_meter_glare), Lambda(name='water_meter_aug', image=water_meter_specific_aug, p=0.7), OneOf([ HorizontalFlip(p=0.3), VerticalFlip(p=0.2), Rotate(limit=15, p=0.5) ], p=0.7), OneOf([ RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5), CLAHE(clip_limit=2.0, p=0.3), GaussNoise(std_range=(0.15, 0.4), # 优化后范围 mean_range=(0, 0), per_channel=True, p=0.3), ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5)) ], p=0.7) ], p=0.8, keypoint_params=KeypointParams(format='xyas')) if augment else None def __len__(self): return len(self.image_files) def __getitem__(self, idx): img_name = self.image_files[idx] img_path = os.path.join(self.image_dir, img_name) # 加载图像 image = cv2.imread(img_path) if image is None: print(f"错误: 无法读取图像 {img_path}") return self[(idx + 1) % len(self)] # 跳过错误图像 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 应用反光抑制 if np.random.rand() > 0.5: lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB) l, a, b = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) l = clahe.apply(l) lab = cv2.merge([l, a, b]) image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB) # 解析标注 base_name = os.path.splitext(img_name)[0] label_path = os.path.join(self.label_dir, base_name + '.json') try: with open(label_path) as f: label_data = json.load(f) polygons = [] orig_h, orig_w = image.shape[:2] # 获取标注时的图像尺寸(如果存在) json_h = label_data.get('imageHeight', orig_h) json_w = label_data.get('imageWidth', orig_w) # 计算缩放比例(处理不同尺寸的标注) scale_x = orig_w / json_w scale_y = orig_h / json_h for shape in label_data['shapes']: if shape['shape_type'] == 'polygon': # 直接使用浮点坐标,避免整数转换 poly = np.array(shape['points'], dtype=np.float32) # 应用缩放比例 poly[:, 0] = poly[:, 0] * scale_x poly[:, 1] = poly[:, 1] * scale_y # 裁剪到实际图像范围内 poly[:, 0] = np.clip(poly[:, 0], 0, orig_w - 1) poly[:, 1] = np.clip(poly[:, 1], 0, orig_h - 1) polygons.append(poly) # 生成目标前验证标注有效性 if len(polygons) == 0: print(f"警告: {img_name} 无有效标注,使用随机样本替代") return self[np.random.randint(0, len(self))] # === 调试可视化 === if idx < 5: debug_img = image.copy() for poly in polygons: int_poly = poly.astype(np.int32).reshape(-1, 1, 2) cv2.polylines(debug_img, [int_poly], True, (0, 255, 0), 3) debug_info = f"Size: {orig_w}x{orig_h} | Polys: {len(polygons)}" cv2.putText(debug_img, debug_info, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) debug_path = f"debug_{base_name}.jpg" cv2.imwrite(debug_path, cv2.cvtColor(debug_img, cv2.COLOR_RGB2BGR)) print(f"保存调试图像: {debug_path}") # 应用数据增强 keypoints = [] for poly in polygons: for point in poly: # 保留浮点精度 keypoints.append((point[0], point[1], 0, 0)) if self.augment and self.augmentation: poly_lengths = [len(poly) for poly in polygons] # 应用增强 augmented = self.augmentation(image=image, keypoints=keypoints) image = augmented['image'] keypoints = augmented['keypoints'] # 正确重组多边形 polygons = [] start_idx = 0 for poly_len in poly_lengths: end_idx = start_idx + poly_len if end_idx <= len(keypoints): poly_points = keypoints[start_idx:end_idx] new_poly = np.array([[p[0], p[1]] for p in poly_points], dtype=np.float32) polygons.append(new_poly) start_idx = end_idx # 对所有多边形进行边界裁剪 for poly in polygons: poly[:, 0] = np.clip(poly[:, 0], 0, image.shape[1] - 1) poly[:, 1] = np.clip(poly[:, 1], 0, image.shape[0] - 1) except (FileNotFoundError, json.JSONDecodeError) as e: print(f"警告: 无法加载标注文件 {label_path} - {str(e)}") polygons = [] # 记录数据增强后的图像尺寸 aug_h, aug_w = image.shape[:2] # 基础预处理(包含Resize) processed = self.base_transform(image=image) image_tensor = processed['image'] # 将多边形坐标缩放到input_size scale_x = self.input_size[1] / aug_w scale_y = self.input_size[0] / aug_h scaled_polygons = [] for poly in polygons: scaled_poly = poly.copy() scaled_poly[:, 0] = scaled_poly[:, 0] * scale_x scaled_poly[:, 1] = scaled_poly[:, 1] * scale_y scaled_poly[:, 0] = np.clip(scaled_poly[:, 0], 0, self.input_size[1] - 1) scaled_poly[:, 1] = np.clip(scaled_poly[:, 1], 0, self.input_size[0] - 1) scaled_polygons.append(scaled_poly) # 生成目标(使用input_size尺寸) binary_target = self.generate_binary_target(scaled_polygons, (self.input_size[0], self.input_size[1])) threshold_target = self.generate_threshold_target(scaled_polygons, (self.input_size[0], self.input_size[1])) return image_tensor, binary_target, threshold_target def generate_threshold_target(self, polygons, img_shape, ratio=0.4): """生成阈值目标图(优化浮点坐标处理)""" # 定义输出尺寸(特征图尺寸) output_size = (self.input_size[0] // 8, self.input_size[1] // 8) # 创建全尺寸距离图 full_size_map = np.zeros(img_shape[:2], dtype=np.float32) for poly in polygons: if len(poly) < 3: continue # 确保坐标在图像范围内 poly[:, 0] = np.clip(poly[:, 0], 0, img_shape[1] - 1) poly[:, 1] = np.clip(poly[:, 1], 0, img_shape[0] - 1) # 计算最大距离(防止除零错误) area = cv2.contourArea(poly) perimeter = cv2.arcLength(poly, True) if perimeter < 1e-3 or area < 10: continue max_dist = area * (1 - ratio ** 2) / max(perimeter, 1e-3) # 创建浮点精度的多边形掩码 mask = np.zeros(img_shape[:2], dtype=np.uint8) int_poly = poly.reshape((-1, 1, 2)).astype(np.int32) cv2.fillPoly(mask, [int_poly], 255) # 计算距离变换并更新全尺寸图 dist = cv2.distanceTransform(mask, cv2.DIST_L2, 3) normalized = np.clip(dist / max(max_dist, 1e-6), 0, 1) full_size_map = np.maximum(full_size_map, normalized) # 下采样到特征图尺寸 dist_map = cv2.resize(full_size_map, output_size, interpolation=cv2.INTER_LINEAR) # 空目标检查 if np.max(dist_map) < 1e-6: return torch.zeros((1, *output_size), dtype=torch.float32) return torch.from_numpy(dist_map).unsqueeze(0).float() def generate_binary_target(self, polygons, img_shape): """生成二值化目标图(优化浮点坐标处理)""" # 直接在目标尺寸上创建 output_size = (self.input_size[0] // 8, self.input_size[1] // 8) binary_map = np.zeros(output_size, dtype=np.float32) # 计算缩放比例 (原始图像->特征图) scale_x = output_size[1] / img_shape[1] scale_y = output_size[0] / img_shape[0] for poly in polygons: if len(poly) > 2: # 缩放多边形到特征图尺寸(保持浮点精度) scaled_poly = poly.copy() scaled_poly[:, 0] = scaled_poly[:, 0] * scale_x scaled_poly[:, 1] = scaled_poly[:, 1] * scale_y # 使用浮点坐标填充(更精确) int_poly = scaled_poly.reshape((-1, 1, 2)).astype(np.float32) # 创建临时画布进行填充 temp_canvas = np.zeros(output_size, dtype=np.uint8) cv2.fillPoly(temp_canvas, [int_poly.astype(np.int32)], 1) binary_map = np.maximum(binary_map, temp_canvas.astype(np.float32)) return torch.from_numpy(binary_map).unsqueeze(0).float() # ---------------------------- # 2. DBNet模型定义 (增强版) # ---------------------------- class DBNet(nn.Module): """基于ResNet18的DBNet文本检测模型""" def __init__(self, pretrained=True): super(DBNet, self).__init__() base_model = resnet18(weights=ResNet18_Weights.DEFAULT) # 提取中间特征层 self.conv1 = base_model.conv1 self.bn1 = base_model.bn1 self.relu = base_model.relu self.maxpool = base_model.maxpool self.layer1 = base_model.layer1 self.layer2 = base_model.layer2 self.layer3 = base_model.layer3 self.layer4 = base_model.layer4 # 特征融合层 self.fusion_conv = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.Conv2d(128, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True) ) # 检测头 self.db_head = DBHead(64) def forward(self, x): # 骨干网络前向传播 x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) # 特征融合 fused = self.fusion_conv(x) # 检测头 binary_map, thresh_map = self.db_head(fused) return binary_map, thresh_map class DBHead(nn.Module): """DBNet检测头,包含注意力机制和残差连接""" def __init__(self, in_channels): super(DBHead, self).__init__() # 修改DBHead的残差块 self.res_block = nn.Sequential( nn.Conv2d(in_channels, in_channels, 3, padding=1), nn.BatchNorm2d(in_channels), nn.LeakyReLU(0.2, inplace=True), # 使用LeakyReLU防止梯度消失 nn.Conv2d(in_channels, in_channels, 3, padding=1), nn.BatchNorm2d(in_channels) ) # 添加空间注意力机制 self.spatial_attn = nn.Sequential( nn.Conv2d(in_channels, 1, kernel_size=3, padding=1), nn.Sigmoid() ) # 通道注意力机制 self.attention = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, in_channels // 8, 1), nn.ReLU(inplace=True), nn.Conv2d(in_channels // 8, in_channels, 1), nn.Sigmoid() ) # 二值化分支 self.binarize = nn.Sequential( nn.Conv2d(in_channels, in_channels // 2, 3, padding=1), nn.BatchNorm2d(in_channels // 2), nn.ReLU(inplace=True), nn.Conv2d(in_channels // 2, in_channels // 2, 3, padding=1), nn.BatchNorm2d(in_channels // 2), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 2, in_channels // 4, 4, stride=2, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, 1, 4, stride=2, padding=1), nn.Sigmoid() ) # 阈值分支 self.thresh = nn.Sequential( nn.Conv2d(in_channels, in_channels // 2, 3, padding=1), nn.BatchNorm2d(in_channels // 2), nn.ReLU(inplace=True), nn.Conv2d(in_channels // 2, in_channels // 4, 3, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, in_channels // 8, 4, stride=2, padding=1), nn.BatchNorm2d(in_channels // 8), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 8, 1, 4, stride=2, padding=1), nn.Sigmoid() ) def forward(self, x): # 残差连接 residual = x x = self.res_block(x) + residual # 空间注意力 attn_map = self.spatial_attn(x) x = x * attn_map binary_map = self.binarize(x) thresh_map = self.thresh(x) return binary_map, thresh_map # ---------------------------- # 3. 损失函数定义 (增强版) # ---------------------------- class DBLoss(nn.Module): """重构后的 DBNet 损失函数,符合原始论文设计[1,2,4](@ref)""" def __init__(self, alpha=1.0, beta=10.0, k=50, ohem_ratio=3.0): super(DBLoss, self).__init__() self.alpha = alpha # 概率图损失权重 self.beta = beta # 阈值图损失权重 self.k = k # 可微二值化参数[1](@ref) self.ohem_ratio = ohem_ratio def forward(self, preds, targets): binary_pred, thresh_pred = preds binary_target, thresh_target = targets # 1. 概率图损失(二值图损失)使用带 OHEM 的 Dice Loss[2](@ref) prob_loss = self.dice_loss_with_ohem(binary_pred, binary_target) # 2. 阈值图损失使用 L1 Loss[4](@ref) thresh_loss = F.l1_loss(thresh_pred, thresh_target, reduction='mean') # 3. 可微二值化计算[1](@ref) with torch.no_grad(): # 计算近似二值图 B = 1 / (1 + exp(-k(P - T))) binary_map = torch.sigmoid(self.k * (binary_pred - thresh_pred)) # 4. 二值图损失使用 Dice Loss bin_loss = self.dice_loss(binary_map, binary_target) # 5. 组合损失:L = L_s + α × L_t + β × L_b total_loss = prob_loss + self.alpha * thresh_loss + self.beta * bin_loss return total_loss, prob_loss, thresh_loss, bin_loss def dice_loss(self, pred, target): """标准 Dice Loss 实现""" smooth = 1.0 intersection = (pred * target).sum() union = pred.sum() + target.sum() return 1 - (2. * intersection + smooth) / (union + smooth) def dice_loss_with_ohem(self, pred, target): """带 OHEM 的 Dice Loss 实现[2](@ref)""" # 计算每个像素的损失 loss_map = 1 - (2 * pred * target + 1) / (pred + target + 1) # 应用 OHEM 采样 pos_mask = (target > 0.5).float() neg_mask = 1 - pos_mask # 计算正负样本数量 n_pos = pos_mask.sum().item() n_neg = min(int(n_pos * self.ohem_ratio), neg_mask.sum().item()) if n_neg == 0: return self.dice_loss(pred, target) # 选择最难负样本 neg_loss = loss_map * neg_mask neg_loss = neg_loss.view(-1) topk_neg_loss, _ = torch.topk(neg_loss, n_neg) # 组合正负样本损失 pos_loss = (loss_map * pos_mask).sum() total_loss = (pos_loss + topk_neg_loss.sum()) / (n_pos + n_neg + 1e-6) return total_loss # ---------------------------- # 辅助函数 (保持不变) # ---------------------------- def calculate_metrics(pred, target, threshold=0.5): """计算精确度、召回率和F1分数""" pred_bin = (pred > threshold).float() target_bin = (target > 0.5).float() pred_flat = pred_bin.view(-1).cpu().numpy() target_flat = target_bin.view(-1).cpu().numpy() # 避免全零情况 if np.sum(target_flat) == 0: return 0.0, 0.0, 0.0 precision = precision_score(target_flat, pred_flat, zero_division=0) recall = recall_score(target_flat, pred_flat, zero_division=0) f1 = f1_score(target_flat, pred_flat, zero_division=0) return precision, recall, f1 # ... (保持不变) ... def validate_model(model, dataloader, device): """验证模型性能""" model.eval() total_precision = 0.0 total_recall = 0.0 total_f1 = 0.0 num_batches = 0 with torch.no_grad(): for images, binary_targets, _ in dataloader: images = images.to(device) binary_targets = binary_targets.to(device) binary_preds, _ = model(images) precision, recall, f1 = calculate_metrics(binary_preds, binary_targets) total_precision += precision total_recall += recall total_f1 += f1 num_batches += 1 avg_precision = total_precision / num_batches avg_recall = total_recall / num_batches avg_f1 = total_f1 / num_batches return avg_precision, avg_recall, avg_f1 # 2. 动态损失权重校准 - 修改DBLoss类 class AdaptiveDBLoss(DBLoss): def __init__(self, alpha=1.0, beta=5.0, gamma=2.0, adapt_step=100): super().__init__(alpha, beta, gamma) self.adapt_step = adapt_step self.beta_history = [] def forward(self, preds, targets, step): # 动态调整β系数 if step % self.adapt_step == 0 and len(self.beta_history) > 10: db_median = np.median(self.beta_history[-10:]) self.beta = max(1.0, min(db_median * 0.8, 10.0)) total_loss, bin_loss, thresh_loss, db_loss = super().forward(preds, targets) # 记录当前β值的表现 self.beta_history.append(db_loss.item()) return total_loss, bin_loss, thresh_loss, db_loss # 3. 模型架构增强 - 替换原始DBHead class EnhancedDBHead(DBHead): def __init__(self, in_channels): super().__init__(in_channels) # 增加通道容量 self.res_block = nn.Sequential( nn.Conv2d(in_channels, in_channels * 2, 3, padding=1), nn.GroupNorm(8, in_channels * 2), nn.GELU(), nn.Conv2d(in_channels * 2, in_channels, 3, padding=1), nn.GroupNorm(8, in_channels) ) # 深度可分离卷积增强特征 self.depthwise = nn.Sequential( nn.Conv2d(in_channels, in_channels, 3, padding=1, groups=in_channels), nn.Conv2d(in_channels, in_channels * 4, 1), nn.GELU() ) # 自门控注意力机制 self.gate_attn = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, in_channels // 4, 1), nn.GELU(), nn.Conv2d(in_channels // 4, in_channels, 1), nn.Sigmoid() ) def forward(self, x): residual = x x = self.res_block(x) + residual # 深度特征提取 depth_feat = self.depthwise(x) # 门控特征融合 gate = self.gate_attn(depth_feat) x = x * gate + depth_feat # 原始输出 return super().forward(x) # ---------------------------- # 4. 训练函数 (增强版,添加进度条) # ---------------------------- def enhanced_train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=200, checkpoint_path='dbnet_checkpoint.pth', lr_init=5e-5): # 初始化 start_epoch = 0 best_loss = float('inf') best_f1 = 0.0 logger = EnhancedTrainingLogger() # 学习率调度器 (CosineAnnealingWarmRestarts) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True) # 混合精度训练 scaler = torch.cuda.amp.GradScaler() # 检查点恢复机制 if os.path.exists(checkpoint_path): print(f"发现检查点文件 {checkpoint_path}, 尝试恢复训练...") checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] + 1 best_loss = checkpoint['best_loss'] logger = checkpoint['logger'] print(f"成功恢复训练状态: 从第 {start_epoch} 轮开始, 最佳损失: {best_loss:.6f}") if not logger.total_losses: # 检查日志是否为空 logger = EnhancedTrainingLogger() # 创建新的日志记录器 model.train() optimizer.param_groups[0]['lr'] = lr_init try: for epoch in range(start_epoch, epochs): epoch_total_loss = 0.0 epoch_bin_loss = 0.0 epoch_thresh_loss = 0.0 epoch_db_loss = 0.0 epoch_start = time.time() # 使用tqdm添加进度条 pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}/{epochs}", unit="batch") for batch_idx, (images, binary_targets, thresh_targets) in pbar: images = images.to(device) binary_targets = binary_targets.to(device) thresh_targets = thresh_targets.to(device) # 混合精度训练 with torch.cuda.amp.autocast(): binary_preds, thresh_preds = model(images) total_loss, bin_loss, thresh_loss, db_loss = criterion( (binary_preds, thresh_preds), (binary_targets, thresh_targets) ) # 记录损失 epoch_total_loss += total_loss.item() epoch_bin_loss += bin_loss.item() epoch_thresh_loss += thresh_loss.item() epoch_db_loss += db_loss.item() # 记录日志 current_lr = optimizer.param_groups[0]['lr'] logger.on_batch_end( batch_idx, total_loss.item(), bin_loss.item(), thresh_loss.item(), db_loss.item(), current_lr ) # 更新进度条描述 pbar.set_postfix({ 'Loss': f"{total_loss.item():.4f}", 'Bin': f"{bin_loss.item():.4f}", 'Thresh': f"{thresh_loss.item():.4f}", 'DB': f"{db_loss.item():.4f}", 'LR': f"{current_lr:.2e}" }) # 反向传播 optimizer.zero_grad() scaler.scale(total_loss).backward() # 梯度裁剪 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) scaler.step(optimizer) scaler.update() # 更新学习率 scheduler.step(epoch + batch_idx / len(train_loader)) # 每100个batch保存一次紧急检查点 if batch_idx % 100 == 0: checkpoint = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'best_loss': best_loss, 'logger': logger, 'scheduler_state': scheduler.state_dict() } torch.save(checkpoint, checkpoint_path) # 计算平均损失 num_batches = len(train_loader) avg_total_loss = epoch_total_loss / num_batches avg_bin_loss = epoch_bin_loss / num_batches avg_thresh_loss = epoch_thresh_loss / num_batches avg_db_loss = epoch_db_loss / num_batches # 验证模型 precision, recall, f1 = validate_model(model, val_loader, device) logger.val_metrics['precision'].append(precision) logger.val_metrics['recall'].append(recall) logger.val_metrics['f1'].append(f1) epoch_time = time.time() - epoch_start print(f"Epoch [{epoch + 1}/{epochs}] completed in {epoch_time:.2f}s") print( f" - Avg Loss: {avg_total_loss:.6f} (Bin:{avg_bin_loss:.6f}, Thresh:{avg_thresh_loss:.6f}, DB:{avg_db_loss:.6f})") print(f" - Val Metrics: Precision={precision:.4f}, Recall={recall:.4f}, F1={f1:.4f}") logger.on_epoch_end(epoch, optimizer) # 保存最佳模型 if f1 > best_f1 or (f1 == best_f1 and avg_total_loss < best_loss): best_f1 = f1 best_loss = avg_total_loss torch.save({ 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': avg_total_loss, 'f1': best_f1 }, 'dbnet_best.pth') print(f"🔥 发现新的最佳模型! F1: {best_f1:.4f}, 损失: {best_loss:.6f}") # 保存常规检查点 checkpoint = { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'best_loss': best_loss, 'logger': logger, 'scheduler_state': scheduler.state_dict() } torch.save(checkpoint, checkpoint_path) except KeyboardInterrupt: print("\n训练被用户中断!") except Exception as e: print(f"\n❌ 训练中断! 原因: {str(e)}") traceback.print_exc() finally: print("训练完成! 保存最终模型...") torch.save(model.state_dict(), 'dbnet_final.pth') logger.on_train_end() return model # ---------------------------- # 5. 推理与区域裁剪 (增强版) # ---------------------------- def enhanced_detect_text_regions(image, model, device, threshold=0.3): # 预处理 orig_h, orig_w = image.shape[:2] input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) input_img = cv2.resize(input_img, (640, 640)) input_img = input_img.astype(np.float32) / 255.0 input_img = (input_img - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] input_tensor = torch.from_numpy(input_img).permute(2, 0, 1).unsqueeze(0).to(device) input_tensor = input_tensor.to(torch.float32) # 推理 with torch.no_grad(): binary_map, _ = model(input_tensor) # 后处理 binary_map = binary_map.squeeze().cpu().numpy() binary_output = (binary_map > threshold).astype(np.uint8) * 255 # 形态学操作增强 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) binary_output = cv2.morphologyEx(binary_output, cv2.MORPH_CLOSE, kernel) # 查找轮廓 contours, _ = cv2.findContours(binary_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) return contours, orig_h, orig_w # 返回检测到的文本区域轮廓 # ... (保持不变) ... def perspective_transform(image, contour): """对检测到的文本区域进行透视变换校正""" # 多边形逼近轮廓 epsilon = 0.02 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) # 确保是四边形 if len(approx) != 4: # 使用最小外接矩形 rect = cv2.minAreaRect(contour) box = cv2.boxPoints(rect) approx = np.int0(box) # 获取四边形顶点并排序 (左上, 右上, 右下, 左下) pts = approx.reshape(4, 2) rect_pts = np.zeros((4, 2), dtype="float32") # 计算顶点和 s = pts.sum(axis=1) rect_pts[0] = pts[np.argmin(s)] # 左上 rect_pts[2] = pts[np.argmax(s)] # 右下 # 计算顶点差 diff = np.diff(pts, axis=1) rect_pts[1] = pts[np.argmin(diff)] # 右上 rect_pts[3] = pts[np.argmax(diff)] # 左下 # 计算目标矩形尺寸 (tl, tr, br, bl) = rect_pts widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) # 目标点坐标 dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") # 计算透视变换矩阵并应用 M = cv2.getPerspectiveTransform(rect_pts, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) return warped def crop_text_regions(image, contours, orig_h, orig_w): """裁剪检测到的文本区域并进行透视校正""" cropped_regions = [] # 计算缩放比例 (从640x640到原始尺寸) scale_x = orig_w / 640.0 scale_y = orig_h / 640.0 for contour in contours: # 过滤小区域 if cv2.contourArea(contour) < 100: continue # 缩放轮廓到原始图像尺寸 scaled_contour = contour.copy() scaled_contour[:, :, 0] = scaled_contour[:, :, 0] * scale_x scaled_contour[:, :, 1] = scaled_contour[:, :, 1] * scale_y # 获取轮廓边界框 x, y, w, h = cv2.boundingRect(scaled_contour) # 扩展边界框 (增加10%的边距) margin_x = int(w * 0.1) margin_y = int(h * 0.1) x = max(0, x - margin_x) y = max(0, y - margin_y) w = min(orig_w - x, w + 2 * margin_x) h = min(orig_h - y, h + 2 * margin_y) # 裁剪区域 roi = image[y:y + h, x:x + w] # 对裁剪区域进行透视校正 try: # 调整轮廓坐标到ROI坐标系 roi_contour = scaled_contour.copy() roi_contour[:, :, 0] -= x roi_contour[:, :, 1] -= y # 应用透视变换 warped_roi = perspective_transform(roi, roi_contour) # 确保最小尺寸 if warped_roi.shape[0] > 10 and warped_roi.shape[1] > 10: cropped_regions.append(warped_roi) except Exception as e: # 如果透视变换失败,使用原始ROI print(f"透视变换失败: {str(e)},使用原始ROI") cropped_regions.append(roi) return cropped_regions # ---------------------------- # 7. 模型加载与推理接口 (新增功能) # ---------------------------- def load_trained_model(model_path, device='cuda'): """加载训练好的模型""" model = DBNet(pretrained=False).to(device) checkpoint = torch.load(model_path, map_location=device) model.load_state_dict(checkpoint['model_state_dict']) model.eval() return model # 5. 水表图像增强改进 def water_meter_specific_aug(image, **kwargs): """水表专用增强链""" # 抑制高频反光 kernel_size = int(min(image.shape[:2]) * 0.01) if kernel_size % 2 == 0: kernel_size += 1 blurred = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0) # 自适应直方图均衡 lab = cv2.cvtColor(blurred, cv2.COLOR_RGB2LAB) l, a, b = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) l_eq = clahe.apply(l) # 色偏校正 a_balanced = cv2.normalize(a, None, 0, 255, cv2.NORM_MINMAX) b_balanced = cv2.normalize(b, None, 0, 255, cv2.NORM_MINMAX) return cv2.cvtColor(cv2.merge([l_eq, a_balanced, b_balanced]), cv2.COLOR_LAB2RGB) def suppress_glare(image): """减少图像反光区域的影响[1](@ref)""" lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB) l, a, b = cv2.split(lab) # 对亮度通道进行CLAHE均衡化 clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) l_clahe = clahe.apply(l) # 合并通道 lab_clahe = cv2.merge((l_clahe, a, b)) return cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2RGB) def detect_and_crop(image_path, model, device='cuda', output_dir='cropped_regions'): """使用训练好的模型检测并裁剪水表数字区域""" # 创建输出目录 os.makedirs(output_dir, exist_ok=True) # 读取图像 image = cv2.imread(image_path) if image is None: print(f"错误: 无法读取图像 {image_path}") return [] # 应用反光抑制 image = suppress_glare(image) # 检测文本区域 contours, orig_h, orig_w = enhanced_detect_text_regions(image, model, device) # 裁剪文本区域 cropped_regions = crop_text_regions(image, contours, orig_h, orig_w) # 保存结果 base_name = os.path.splitext(os.path.basename(image_path))[0] for i, region in enumerate(cropped_regions): output_path = os.path.join(output_dir, f'{base_name}_region_{i}.jpg') cv2.imwrite(output_path, region) print(f"成功裁剪 {len(cropped_regions)} 个文本区域到 {output_dir}") return cropped_regions # ---------------------------- # 8. 主程序 (优化版) # ---------------------------- if __name__ == "__main__": # 优化参数 INPUT_SIZE = (512, 512) # 减小输入尺寸适配水表 # 配置参数 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' DATA_DIR = 'images_train' LABEL_DIR = 'labels_train' VAL_DATA_DIR = 'images_val' VAL_LABEL_DIR = 'labels_val' BATCH_SIZE = 16 EPOCHS = 100 LR = 1e-4 CHECKPOINT_PATH = 'dbnet_checkpoint.pth' TRAINED_MODEL_PATH = 'dbnet_best.pth' # 模式选择: 'train' 或 'inference' MODE = 'train' if MODE == 'train': # 1. 准备数据集 print("准备训练数据集...") train_dataset = WaterMeterDataset( image_dir=DATA_DIR, label_dir=LABEL_DIR, input_size=INPUT_SIZE, augment=True ) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) print("准备验证数据集...") val_dataset = WaterMeterDataset( image_dir=VAL_DATA_DIR, label_dir=VAL_LABEL_DIR, input_size=INPUT_SIZE, augment=False ) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) # 2. 初始化模型 print("初始化模型...") model = DBNet(pretrained=True).to(DEVICE) # 3. 损失函数和优化器 # 初始化时使用自适应损失 criterion = DBLoss(alpha=1.0, beta=8.0) # 使用更先进的优化器 # 1. 强化学习率调度机制 - 更新优化器配置 optimizer = optim.AdamW( # 替换原始Adam model.parameters(), lr=3e-4, # 适当提升基础学习率 weight_decay=1e-4 ) # 4. 训练模型 print("开始训练...") model = enhanced_train_model( model, train_loader, val_loader, criterion, optimizer, DEVICE, epochs=EPOCHS, checkpoint_path=CHECKPOINT_PATH, lr_init=LR ) print(f"✅ 训练完成! 最佳模型已保存到 {TRAINED_MODEL_PATH}") elif MODE == 'inference': # 加载训练好的模型 print(f"加载训练好的模型: {TRAINED_MODEL_PATH}") model = load_trained_model(TRAINED_MODEL_PATH, DEVICE) # 处理单个图像 test_image_path = 'test_images/test_1.jpg' print(f"处理测试图像: {test_image_path}") detect_and_crop(test_image_path, model, DEVICE) # 处理整个目录 input_dir = 'test_images' output_dir = 'cropped_results' print(f"批量处理目录: {input_dir}") for img_file in os.listdir(input_dir): if img_file.lower().endswith(('.jpg', '.png', '.jpeg')): img_path = os.path.join(input_dir, img_file) print(f"处理图像: {img_file}") detect_and_crop(img_path, model, DEVICE, output_dir)
06-07
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值