一、meanshift原理
meanshift是个迭代的算法,每次迭代会往概率密度大的方向移动,所以MeanShift算法得用在概率密度图上,而反向投影图就是一个典型的颜色概率密度图。把meanshift用来进行目标跟踪,其过程如下:
(1)初始化:我们检测到图像中目标区域后,统计得到该区域HSV颜色空间H分量的直方图。得到的直方图横坐标为0-255的灰度值(H分量的值),纵坐标为目标图像区域中某个H分量的值的像素个数。
(2)反向投影:利用(1)的直方图计算整幅图像的概率分布。实际计算中不必计算整幅图像,只需计算比搜索窗口大一些的范围,这样可以缩小计算量。
(3)利用meanshift算法迭代找到当前颜色概率分布图的重心。也就是的当前帧目标的位置了。
跟踪过程的动态图如下:
opencv中meanshift源码如下:
int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria )
{
CV_INSTRUMENT_REGION();
Size size;
int cn;
Mat mat;
UMat umat;
bool isUMat = _probImage.isUMat();
if (isUMat)
umat = _probImage.getUMat(), cn = umat.channels(), size = umat.size();
else
mat = _probImage.getMat(), cn = mat.channels(), size = mat.size();
Rect cur_rect = window;
CV_Assert( cn == 1 );
if( window.height <= 0 || window.width <= 0 )
CV_Error( Error::StsBadArg, "Input window has non-positive sizes" );
window = window & Rect(0, 0, size.width, size.height);
double eps = (criteria.type & TermCriteria::EPS) ? std::max(criteria.epsilon, 0.) : 1.;
eps = cvRound(eps*eps);
int i, niters = (criteria.type & TermCriteria::MAX_ITER) ? std::max(criteria.maxCount, 1) : 100;
for( i = 0; i < niters; i++ )
{
cur_rect = cur_rect & Rect(0, 0, size.width, size.height);
if( cur_rect == Rect() )
{
cur_rect.x = size.width/2;
cur_rect.y = size.height/2;
}
cur_rect.width = std::max(cur_rect.width, 1);
cur_rect.height = std::max(cur_rect.height, 1);
//计算框选区域的质心
Moments m = isUMat ? moments(umat(cur_rect)) : moments(mat(cur_rect));
// Calculating center of mass
if( fabs(m.m00) < DBL_EPSILON )
break;
int dx = cvRound( m.m10/m.m00 - window.width*0.5 );
int dy = cvRound( m.m01/m.m00 - window.height*0.5 );
int nx = std::min(std::max(cur_rect.x + dx, 0), size.width - cur_rect.width);
int ny = std::min(std::max(cur_rect.y + dy, 0), size.height - cur_rect.height);
dx = nx - cur_rect.x;
dy = ny - cur_rect.y;
cur_rect.x = nx;
cur_rect.y = ny;
// Check for coverage centers mass & window
if( dx*dx + dy*dy < eps )
break;
}
window = cur_rect;
return i;
}
二、camshift原理
camshift跟踪算法里采用的也是meanshift,只不过多了目标跟踪框的估计部分。camshift的步骤如下:
1. 采用meanshift算法进行计算,得到跟踪框;
2.对跟踪框进行扩大,上下左右各扩大TOLERANCE=10个像素,得到一个扩大的跟踪区域;
3.对这个扩大的跟踪区域采用中心矩,二阶矩来计算目标框的旋转方向和长宽。这里面的计算公式可以参考camshift的论文《Computer Vision Face Tracking for Use in a Perceptual User Interface》。
跟踪过程的动态过程如下图:
opencv中camshift的源码如下:
cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window,
TermCriteria criteria )
{
CV_INSTRUMENT_REGION();
const int TOLERANCE = 10;
Size size;
Mat mat;
UMat umat;
bool isUMat = _probImage.isUMat();
if (isUMat)
umat = _probImage.getUMat(), size = umat.size();
else
mat = _probImage.getMat(), size = mat.size();
meanShift( _probImage, window, criteria );
window.x -= TOLERANCE;
if( window.x < 0 )
window.x = 0;
window.y -= TOLERANCE;
if( window.y < 0 )
window.y = 0;
window.width += 2 * TOLERANCE;
if( window.x + window.width > size.width )
window.width = size.width - window.x;
window.height += 2 * TOLERANCE;
if( window.y + window.height > size.height )
window.height = size.height - window.y;
// Calculating moments in new center mass
Moments m = isUMat ? moments(umat(window)) : moments(mat(window));
double m00 = m.m00, m10 = m.m10, m01 = m.m01;
double mu11 = m.mu11, mu20 = m.mu20, mu02 = m.mu02;
if( fabs(m00) < DBL_EPSILON )
return RotatedRect();
double inv_m00 = 1. / m00;
int xc = cvRound( m10 * inv_m00 + window.x );
int yc = cvRound( m01 * inv_m00 + window.y );
double a = mu20 * inv_m00, b = mu11 * inv_m00, c = mu02 * inv_m00;
// Calculating width & height
double square = std::sqrt( 4 * b * b + (a - c) * (a - c) );
// Calculating orientation
double theta = atan2( 2 * b, a - c + square );
// Calculating width & length of figure
double cs = cos( theta );
double sn = sin( theta );
double rotate_a = cs * cs * mu20 + 2 * cs * sn * mu11 + sn * sn * mu02;
double rotate_c = sn * sn * mu20 - 2 * cs * sn * mu11 + cs * cs * mu02;
rotate_a = std::max(0.0, rotate_a); // avoid negative result due calculation numeric errors
rotate_c = std::max(0.0, rotate_c); // avoid negative result due calculation numeric errors
double length = std::sqrt( rotate_a * inv_m00 ) * 4;
double width = std::sqrt( rotate_c * inv_m00 ) * 4;
// In case, when tetta is 0 or 1.57... the Length & Width may be exchanged
if( length < width )
{
std::swap( length, width );
std::swap( cs, sn );
theta = CV_PI*0.5 - theta;
}
// Saving results
int _xc = cvRound( xc );
int _yc = cvRound( yc );
int t0 = cvRound( fabs( length * cs ));
int t1 = cvRound( fabs( width * sn ));
t0 = MAX( t0, t1 ) + 2;
window.width = MIN( t0, (size.width - _xc) * 2 );
t0 = cvRound( fabs( length * sn ));
t1 = cvRound( fabs( width * cs ));
t0 = MAX( t0, t1 ) + 2;
window.height = MIN( t0, (size.height - _yc) * 2 );
window.x = MAX( 0, _xc - window.width / 2 );
window.y = MAX( 0, _yc - window.height / 2 );
window.width = MIN( size.width - window.x, window.width );
window.height = MIN( size.height - window.y, window.height );
RotatedRect box;
box.size.height = (float)length;
box.size.width = (float)width;
box.angle = (float)((CV_PI*0.5+theta)*180./CV_PI);
while(box.angle < 0)
box.angle += 360;
while(box.angle >= 360)
box.angle -= 360;
if(box.angle >= 180)
box.angle -= 180;
box.center = Point2f( window.x + window.width*0.5f, window.y + window.height*0.5f);
return box;
}
三、跟踪效果
1.只采用H分量的跟踪效果,由于H分量的区分度不高,背景里有很多一样的,导致跟丢。
2.采用HSV三个分量的颜色特征的跟踪效果,跟踪效果比单个h颜色的要好,但遇到相同颜色的目标会跟丢。需要加入其他方法辅助。
四、算法优缺点和改进思路
1.缺乏必要的模板更新,可以尝试加入模板更新策略。
2.依赖于颜色信息,直方图特征在目标颜色特征描述方面略显匮乏,缺少空间信息,跟踪特点单一,可以尝试用其他特征图来跟踪,比如LBP特征直方图或者hog特征直方图,还可以加入运动估计来避免跟到其他相似目标。
3.尺度估计不准确,由于采用的颜色直方图反向投影不能完整的描述目标的信息导致camshift算法计算出的目标大小不准确。
4.需要加入目标被遮挡或者跟丢的判断决策。
五、跟踪测试源码demo
#include <iostream>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>
using namespace cv;
using namespace std;
enum featureType
{
GRAY,BGR,HSV
};
int main(int argc, char** argv)
{
string filename = "D:\\opencv\\opencv\\sources\\samples\\data\\vtest.avi";
//设置跟踪选择的特征
featureType type = BGR;
VideoCapture capture(filename);
if (!capture.isOpened()) {
//error in opening the video input
cerr << "Unable to open file!" << endl;
return 0;
}
Mat frame, roi, h_roi, mask;
//采用第一张图像进行初始化
capture >> frame;
//设置跟踪框
Rect track_window(255, 219, 26, 87);
//提取跟踪目标
roi = frame(track_window);
//根据选择特征类别进行类型转换
if (type == GRAY || type == HSV)
cvtColor(roi, h_roi, COLOR_BGR2HSV);
else
h_roi = roi;
float range_[] = { 0, 180 };
float range_1[] = { 0, 255 };
const float* range[] = { range_ ,range_1,range_1 };
Mat roi_hist;
int histSize[] = { 180, 255, 255};//每个维度直方图的bin的数目,这里第一维有180个bin,第二维有255个bin,第三维有255个bin
int channels[] = { 0, 1, 2};//选择1张图像的0,1,2通道的图像
//计算图像直方图,如果是bgr和hsv则计算三个通道的直方图,如果是gray就只计算h通道的直方图
int dims = 3;
if (type == GRAY)
dims = 1;
calcHist(&h_roi, 1, channels, cv::Mat(), roi_hist, dims, histSize, range);
normalize(roi_hist, roi_hist, 0, 255, NORM_MINMAX);
// 设置meanshif算法迭代的终止条件,条件为迭代了10次就终止,或者计算出来的位置前后相差1个像素就终止迭代,表示已经找到目标位置。
TermCriteria term_crit(TermCriteria::EPS | TermCriteria::COUNT, 10, 1);
cv::Mat showBack;
while (true) {
Mat hsv, dst;
capture >> frame;
if (frame.empty())
break;
cvtColor(frame, hsv, COLOR_BGR2HSV);
calcBackProject(&hsv, 1, channels, roi_hist, dst, range);
cvtColor(dst, showBack, COLOR_GRAY2BGR);
rectangle(showBack, track_window, 255, 2);
imshow("backProject", showBack);
#if 1
// 采用camshift算法
RotatedRect rot_rect = CamShift(dst, track_window, term_crit);
// 把跟踪框画在图像上
Point2f points[4];
rot_rect.points(points);
for (int i = 0; i < 4; i++)
line(frame, points[i], points[(i + 1) % 4], 255, 2);
imshow("img2", frame);
#else
//采用meanshif算法跟踪
meanShift(dst, track_window, term_crit);
// 把跟踪框画在图像上
rectangle(frame, track_window, 255, 2);
imshow("img2", frame);
#endif
int keyboard = waitKey(10);
if (keyboard == 's')
{
//按下s键重新选择跟踪目标
track_window = selectROI(frame, true);
roi = frame(track_window);
if (type == GRAY || type == HSV)
cvtColor(roi, h_roi, COLOR_BGR2HSV);
else
h_roi = roi;
calcHist(&h_roi, 1, channels, mask, roi_hist, dims, histSize, range);
normalize(roi_hist, roi_hist, 0, 255, NORM_MINMAX);
}
if (keyboard == 'q' || keyboard == 27)
break;
}
}