OpenCV视频追踪实例之akaze_tracking

最新推荐文章于 2025-11-15 00:38:16 发布

原创最新推荐文章于 2025-11-15 00:38:16 发布 · 4.3k 阅读

30 ·

CC 4.0 BY-SA版权

OpenCV 专栏收录该内容

32 篇文章

订阅专栏

使用AKAZE算法进行视频追踪(对比ORB算法)

简述orb与akaze算法
David. Lowe提出的SIFT算法由于卓越的性能，在过去二十多年几乎应用于目标追踪、图像匹配等相关计算机视觉领域。2012年Pablo F. Alcantarilla所提出的KAZE算法在匹配鲁棒性与速度上要优于SIFT算法；但是，KAZE算法的时间依然较高：因此原作者在13年提出Accelerated-KAZE(AKAZE)算法，在保证匹配鲁棒性的同时减少算法耗时。对比当前主要的快速ORB匹配算法，实验结果表明akaze算法从匹配性能与算法耗时都优于orb算法。

akaze算法较kaze算法改进主要在于两个方面：
一、在构建非线性尺度空间时候，kaze算法采取AOS算法进行求解扩散方程，虽然稳定但是求解耗时较高，akaze算法采取快速显示扩散(FED)算法进行快速求解。附：kaze算法尺度空间不是金字塔模式，akaze算法采取金字塔方式，在提取特征点会进一步加速。
二、kaze算法采用MSURF描述符，MSURF描述符求取过程是进行特征点局部梯度求取，耗时较高。akaze算法采用的是LDB描述子(二值描述子)，速度快，同时作者对LDB描述子进一步改进M-LDB描述子(更好的支持旋转的鲁棒性)。

orb算法主要是通过改进FAsT角点算法提取特征点与BRIEF二值描述子进行描述局部特征，最后进行匹配。orb算法的主要优点：速度快，性能能够满足实时性匹配要求，特别对于移动端应用。

下面为orb与akaze算法进行视频追踪的性能比较代码
主函数cpp如下(工程随后附下载地址链接)

/***********************************************************************
*            akaze_tracking.cpp : 定义控制台应用程序的入口点。
*            akaze算法对比orb算法的视频追踪demo
*            操作说明: 
*                    运行程序  视频流正常播放中....
*                    按下Enter键 用鼠标选定ROI(感兴趣区域)
*                    再次按下Enter键视频流播放...观察匹配结果
************************************************************************/
#include "stdafx.h"
#include <opencv2/features2d.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>  
#include <vector>
#include <iostream>
#include <iomanip>

#include "stats.h" // 数据结构定义头文件
#include "utils.h" // 绘制和打印功能头文件

using namespace std;
using namespace cv;
// akaze算法进行追踪的相关参数阈值设置
const double akaze_thresh = 3e-4; // AKAZE 检测阈值设置为1000个关键点
const double ransac_thresh = 2.5f; // RANSAC内联点阈值大小
const double nn_match_ratio = 0.8f; // 最近邻匹配阈值
const int bb_min_inliers = 10; // 设置画矩形框的最少匹配点对
const int stats_update_period = 10; // 屏幕上的统计信息每10帧更新一次

namespace example {
    class Tracker
    {
    public:
        // 构造函数入口
        Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher) :
            detector(_detector),
            matcher(_matcher)
        {}

        void setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats);
        Mat process(const Mat frame, Stats& stats);
        Ptr<Feature2D> getDetector() {
            return detector;
        }
    protected:
        Ptr<Feature2D> detector;
        Ptr<DescriptorMatcher> matcher;
        Mat first_frame, first_desc;  // 第一帧ROI区域图像   描述符矩阵
        vector<KeyPoint> first_kp;    // 特征点数据
        vector<Point2f> object_bb;    // ROI对应的四个顶点坐标
    };

    void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)
    {
        cv::Point *ptMask = new cv::Point[bb.size()];
        const Point* ptContain = { &ptMask[0] };
        int iSize = static_cast<int>(bb.size());
        for (size_t i = 0; i<bb.size(); i++) {
            ptMask[i].x = static_cast<int>(bb[i].x);
            ptMask[i].y = static_cast<int>(bb[i].y);
        }
        first_frame = frame.clone();
        cv::Mat matMask = cv::Mat::zeros(frame.size(), CV_8UC1);
        // fillPoly()函数功能可以用来填充复杂多边形轮廓的区域
        cv::fillPoly(matMask, &ptContain, &iSize, 1, cv::Scalar::all(255));
        // 检测特征点和描述符
        detector->detectAndCompute(first_frame, matMask, first_kp, first_desc);
        stats.keypoints = (int)first_kp.size();
        // 绘制选定ROI区域的矩形框
        drawBoundingBox(first_frame, bb);
        putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
        object_bb = bb;
        delete[] ptMask;
    }
    // 进行视频播放抽帧进行匹配过程函数......
    Mat Tracker::process(const Mat frame, Stats& stats)
    {
        TickMeter tm;
        vector<KeyPoint> kp;
        Mat desc;

        tm.start();  // 计时开始
        detector->detectAndCompute(frame, noArray(), kp, desc); // 检测特征点与计算特征点的局部描述符
        stats.keypoints = (int)kp.size();

        vector< vector<DMatch> > matches;
        vector<KeyPoint> matched1, matched2;
        matcher->knnMatch(first_desc, desc, matches, 2);   // knnMatch进行匹配
        for (unsigned i = 0; i < matches.size(); i++) 
        {   
            // nn_match_ratio距离阈值进行匹配
            if (matches[i][0].distance < nn_match_ratio * matches[i][1].distance) 
            {
                matched1.push_back(first_kp[matches[i][0].queryIdx]);
                matched2.push_back(kp[matches[i][0].trainIdx]);
            }
        }
        stats.matches = (int)matched1.size();

        Mat inlier_mask, homography;
        vector<KeyPoint> inliers1, inliers2;
        vector<DMatch> inlier_matches;
        if (matched1.size() >= 4) {
            homography = findHomography(Points(matched1), Points(matched2),
                                        RANSAC, ransac_thresh, inlier_mask); // RANSAC算法进行提纯估计单应矩阵
        }
        tm.stop();  // 匹配结束
        stats.fps = 1. / tm.getTimeSec();
        // 若匹配点对少于4对或者单应矩阵为空
        if (matched1.size() < 4 || homography.empty()) {
            Mat res;
            hconcat(first_frame, frame, res);
            stats.inliers = 0;
            stats.ratio = 0;
            return res;
        }
        for (unsigned i = 0; i < matched1.size(); i++) {
            if (inlier_mask.at<uchar>(i)) {
                int new_i = static_cast<int>(inliers1.size());
                inliers1.push_back(matched1[i]);
                inliers2.push_back(matched2[i]);
                inlier_matches.push_back(DMatch(new_i, new_i, 0));
            }
        }
        // 匹配率
        stats.inliers = (int)inliers1.size();
        stats.ratio = stats.inliers * 1.0 / stats.matches;

        vector<Point2f> new_bb;
        // 根据匹配估计的单应矩阵 + ROI坐标计算对应视频流帧对应的ROI坐标
        perspectiveTransform(object_bb, new_bb, homography);
        Mat frame_with_bb = frame.clone();
        if (stats.inliers >= bb_min_inliers) 
        {   // 若匹配点数大于设置的内联点数绘制矩形框
            drawBoundingBox(frame_with_bb, new_bb);
        }
        Mat res;
        // 匹配点对绘制
        drawMatches(first_frame, inliers1, frame_with_bb, inliers2,
                    inlier_matches, res, Scalar(255, 0, 0), Scalar(255, 0, 0));
        return res;
    }
}


int main(int argc, char **argv)
{
    // 视频输入路径参数
    string input_path = "..\\akaze_tracking\\video\\Megamind.avi";
    string video_name = input_path;

    VideoCapture video_in;
    // 视频路径判断是否正确打开
    if ((isdigit(input_path[0]) && input_path.size() == 1))
    {
        int camera_no = input_path[0] - '0';
        video_in.open(camera_no);
    }
    else {
        video_in.open(video_name);
    }

    if (!video_in.isOpened()) {
        cerr << "Couldn't open " << video_name << endl;
        system("pause");
        return 1;
    }
    // 匹配相关变量数据结构: 内联点、匹配点等
    Stats stats, akaze_stats, orb_stats;
    // AKAZE算法初始化
    Ptr<AKAZE> akaze = AKAZE::create();
    akaze->setThreshold(akaze_thresh);
    // ORB算法初始化
    Ptr<ORB> orb = ORB::create();
    // 匹配方式暴力汉明距离
    Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming");
    // Tracker类对象 akaze_tracker 与 orb_tracker    
    // Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher)
    example::Tracker akaze_tracker(akaze, matcher);
    example::Tracker orb_tracker(orb, matcher);

    Mat frame;
    namedWindow(video_name, WINDOW_NORMAL);
    cout << "\nPress any key to stop the video and select a bounding box" << endl;

    while (waitKey(1) < 1)
    {
        video_in >> frame;
        cv::resizeWindow(video_name, frame.size());
        imshow(video_name, frame);
    }
    // 选取当前视频某一帧ROI区域进行获取数据
    vector<Point2f> bb;
    cv::Rect uBox = cv::selectROI(video_name, frame);
    // bb 为视频ROOI区域的坐标信息
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x + uBox.width), static_cast<float>(uBox.y)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x + uBox.width), static_cast<float>(uBox.y + uBox.height)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y + uBox.height)));

    akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats);
    orb_tracker.setFirstFrame(frame, bb, "ORB", stats);

    Stats akaze_draw_stats, orb_draw_stats;
    Mat akaze_res, orb_res, res_frame;
    int i = 0;
    for (;;) 
    {
        i++;
        // stats_update_period视频帧更新时间
        bool update_stats = (i % stats_update_period == 0);
        video_in >> frame;
        // 视频流播放结束(抽帧失败)
        if (frame.empty())
            break;
        // akaze_res 绘制匹配输出的结果
        akaze_res = akaze_tracker.process(frame, stats);
        akaze_stats += stats;
        if (update_stats) {
            akaze_draw_stats = stats;
        }
        // orb视频抽帧追踪
        orb->setMaxFeatures(stats.keypoints);
        orb_res = orb_tracker.process(frame, stats);
        orb_stats += stats;
        if (update_stats) {
            orb_draw_stats = stats;
        }
        // 绘制匹配点
        drawStatistics(akaze_res, akaze_draw_stats);
        drawStatistics(orb_res, orb_draw_stats);
        vconcat(akaze_res, orb_res, res_frame);
        cv::imshow(video_name, res_frame);
        if (waitKey(1) == 27) 
            break; //quit on ESC button
    }
    akaze_stats /= i - 1;
    orb_stats /= i - 1;
    printStatistics("AKAZE", akaze_stats);
    printStatistics("ORB", orb_stats);

    system("pause");
    return 0;
}

orb与akaze算法对比实验结果图
一、首先运行程序，视频流播放窗口弹出，按下Enter键，通过鼠标选择ROI(感兴趣区域)

二、选择ROI区域后，再次按下Enter键让程序进行匹配追踪

其中：左边的为选取的ROI区域，右边为视频播放进行抽帧进行匹配(上：akaze 下：orb)。

三、最终视频播放结束，orb与akaze算法性能比较……

本机配置：windows10 64位操作系统 + visual studio2015 x64+ opencv3.3.1