//
// Created by Xiang on 2017/12/19.
//
#include <opencv2/opencv.hpp>
#include <string>
#include <chrono>
#include <Eigen/Core>
#include <Eigen/Dense>
using namespace std;
using namespace cv;
string file_1 = "/home/slambook2/ch8/LK1.png"; // first image
string file_2 = "/home/slambook2/ch8/LK2.png"; // second image
/// Optical flow tracker and interface
class OpticalFlowTracker {
public:
OpticalFlowTracker(
const Mat &img1_,
const Mat &img2_,
const vector<KeyPoint> &kp1_,
vector<KeyPoint> &kp2_,
vector<bool> &success_,
bool inverse_ = true, bool has_initial_ = false) :
img1(img1_), img2(img2_), kp1(kp1_), kp2(kp2_), success(success_), inverse(inverse_),
has_initial(has_initial_) {}
void calculateOpticalFlow(const Range &range);
private:
const Mat &img1;
const Mat &img2;
const vector<KeyPoint> &kp1;
vector<KeyPoint> &kp2;
vector<bool> &success;
bool inverse = true;
bool has_initial = false;
};
/**
* single level optical flow
* @param [in] img1 the first image
* @param [in] img2 the second image
* @param [in] kp1 keypoints in img1
* @param [in|out] kp2 keypoints in img2, if empty, use initial guess in kp1
* @param [out] success true if a keypoint is tracked successfully
* @param [in] inverse use inverse formulation?
*/
void OpticalFlowSingleLevel(
const Mat &img1,
const Mat &img2,
const vector<KeyPoint> &kp1,
vector<KeyPoint> &kp2,
vector<bool> &success,
bool inverse = false,
bool has_initial_guess = false
);
/**
* multi level optical flow, scale of pyramid is set to 2 by default
* the image pyramid will be create inside the function
* @param [in] img1 the first pyramid
* @param [in] img2 the second pyramid
* @param [in] kp1 keypoints in img1
* @param [out] kp2 keypoints in img2
* @param [out] success true if a keypoint is tracked successfully
* @param [in] inverse set true to enable inverse formulation
*/
void OpticalFlowMultiLevel(
const Mat &img1,
const Mat &img2,
const vector<KeyPoint> &kp1,
vector<KeyPoint> &kp2,
vector<bool> &success,
bool inverse = false
);
/**
* get a gray scale value from reference image (bi-linear interpolated) 双线性插值
* @param img
* @param x
* @param y
* @return the interpolated value of this pixel
*/
inline float GetPixelValue(const cv::Mat &img, float x, float y) {
// boundary check
if (x < 0) x = 0;
if (y < 0) y = 0;
if (x >= img.cols - 1) x = img.cols - 2;
if (y >= img.rows - 1) y = img.rows - 2;
float xx = x - floor(x);
float yy = y - floor(y);
int x_a1 = std::min(img.cols - 1, int(x) + 1);
int y_a1 = std::min(img.rows - 1, int(y) + 1);
return (1 - xx) * (1 - yy) * img.at<uchar>(y, x)// 左上角像素值, 猜测yx非整数时,对其向下取整,得整数像素单位,而xal,yal则是向上取整,从而利用四个像素的灰度进行加权平均,得到双线性插值的像素灰度.前面的系数是由像素浮点数构成的权数
+ xx * (1 - yy) * img.at<uchar>(y, x_a1)// 右上角像素值
+ (1 - xx) * yy * img.at<uchar>(y_a1, x)// 左下角像素值
+ xx * yy * img.at<uchar>(y_a1, x_a1);// 右下角像素值
}
int main(int argc, char **argv) {
// images, note they are CV_8UC1, not CV_8UC3
Mat img1 = imread(file_1, 0);
Mat img2 = imread(file_2, 0);
// key points, using GFTT here.
vector<KeyPoint> kp1;
Ptr<GFTTDetector> detector = GFTTDetector::create(500, 0.01, 20); // maximum 500 keypoints
detector->detect(img1, kp1);
// now lets track these key points in the second image
// first use single level LK in the validation picture
vector<KeyPoint> kp2_single;
vector<bool> success_single;
OpticalFlowSingleLevel(img1, img2, kp1, kp2_single, success_single);//直接使用单层光流跟踪函数时,kp2均初始化为0,未做其他初始赋值,因此初始dxdy很大
// then test multi-level LK
vector<KeyPoint> kp2_multi;
vector<bool> success_multi;
chrono::steady_clock::time_point t1 = chrono::steady_clock::now();
OpticalFlowMultiLevel(img1, img2, kp1, kp2_multi, success_multi, true);//使用多层光流调用单层光流时,先将顶层kp2初始化为顶层kp1,这样初始的dxdy均为0
chrono::steady_clock::time_point t2 = chrono::steady_clock::now();
auto time_used = chrono::duration_cast<chrono::duration<double>>(t2 - t1);
cout << "optical flow by gauss-newton: " << time_used.count() << endl;
// use opencv's flow for validation
vector<Point2f> pt1, pt2;
for (auto &kp: kp1) pt1.push_back(kp.pt);
vector<uchar> status;
vector<float> error;
t1 = chrono::steady_clock::now();
cv::calcOpticalFlowPyrLK(img1, img2, pt1, pt2, status, error);
t2 = chrono::steady_clock::now();
time_used = chrono::duration_cast<chrono::duration<double>>(t2 - t1);
cout << "optical flow by opencv: " << time_used.count() << endl;
// plot the differences of those functions
Mat img2_single;
cv::cvtColor(img2, img2_single, CV_GRAY2BGR);//将图像img2从灰度颜色空间转换为BGR(蓝色、绿色、红色)颜色空间
for (int i = 0; i < kp2_single.size(); i++) {
if (success_single[i]) {//在这一帧被上一帧光流追踪到的关键点处画圆
cv::circle(img2_single, kp2_single[i].pt, 2, cv::Scalar(0, 250, 0), 2);//在img2_single图像上绘制一个圆圈。圆圈的中心是kp2_single[i].pt,半径为2像素,颜色为绿色(BGR值为(0, 250, 0)),线宽为2像素。
cv::line(img2_single, kp1[i].pt, kp2_single[i].pt, cv::Scalar(0, 250, 0));//画线,线的一端是当前帧被光流追踪到的关键点,另一端是待匹配kp1在上以帧的位置。简单来说,如果追踪成功,一个关键点上一帧在线无圆的一端,这一帧会出现在线有圆的一端
}
}
Mat img2_multi;
cv::cvtColor(img2, img2_multi, CV_GRAY2BGR);
for (int i = 0; i < kp2_multi.size(); i++) {
if (success_multi[i]) {//
cv::circle(img2_multi, kp2_multi[i].pt, 2, cv::Scalar(0, 250, 0), 2);
cv::line(img2_multi, kp1[i].pt, kp2_multi[i].pt, cv::Scalar(0, 250, 0));
}
}
Mat img2_CV;
cv::cvtColor(img2, img2_CV, CV_GRAY2BGR);
for (int i = 0; i < pt2.size(); i++) {
if (status[i]) {
cv::circle(img2_CV, pt2[i], 2, cv::Scalar(0, 250, 0), 2);
cv::line(img2_CV, pt1[i], pt2[i], cv::Scalar(0, 250, 0));
}
}
cv::imshow("tracked single level", img2_single);
cv::imshow("tracked multi level", img2_multi);
cv::imshow("tracked by opencv", img2_CV);
cv::waitKey(0);
return 0;
}
void OpticalFlowSingleLevel(
const Mat &img1,
const Mat &img2,
const vector<KeyPoint> &kp1,
vector<KeyPoint> &kp2,
vector<bool> &success,
bool inverse, bool has_initial) {
kp2.resize(kp1.size());// 将kp2的大小调整为kp1的大小,为存储第二帧图像的关键点做准备
success.resize(kp1.size());
OpticalFlowTracker tracker(img1, img2, kp1, kp2, success, inverse, has_initial);
parallel_for_(Range(0, kp1.size()),// 使用OpenCV的parallel_for_函数并行计算每个关键点的光流 ,Range(0, kp1.size()) 创建了一个范围对象,它表示从索引0开始,到 kp1 向量中的最后一个关键点的索引结束的范围
std::bind(&OpticalFlowTracker::calculateOpticalFlow, &tracker, placeholders::_1));
}
void OpticalFlowTracker::calculateOpticalFlow(const Range &range) {
// parameters
int half_patch_size = 4;//补丁大小的一半,用于定义关键点邻域的大小
int iterations = 10;
for (size_t i = range.start; i < range.end; i++) {
auto kp = kp1[i];
double dx = 0, dy = 0; // dx,dy need to be estimated
if (has_initial) {
dx = kp2[i].pt.x - kp.pt.x;//使用关键点的初始位移作为初值 这里的kp2应该是指其他方法(如超定方程)求出来的第一帧关键点在第二帧对应的像素? 而不是提取的角点?
dy = kp2[i].pt.y - kp.pt.y;
}
double cost = 0, lastCost = 0;
bool succ = true; // indicate if this point succeeded
// Gauss-Newton iterations
Eigen::Matrix2d H = Eigen::Matrix2d::Zero(); // hessian
Eigen::Vector2d b = Eigen::Vector2d::Zero(); // bias
Eigen::Vector2d J; // jacobian
for (int iter = 0; iter < iterations; iter++) {
if (inverse == false) {//是否使用反向光流,如果使用反向光流只重置b即可,H阵一直保持不变
H = Eigen::Matrix2d::Zero();
b = Eigen::Vector2d::Zero();
} else {
// only reset b
b = Eigen::Vector2d::Zero();
}
cost = 0;
// compute cost and jacobian
for (int x = -half_patch_size; x < half_patch_size; x++)
for (int y = -half_patch_size; y < half_patch_size; y++) {
double error = GetPixelValue(img1, kp.pt.x + x, kp.pt.y + y) -
GetPixelValue(img2, kp.pt.x + x + dx, kp.pt.y + y + dy);; // Jacobian
if (inverse == false) {
J = -1.0 * Eigen::Vector2d(
0.5 * (GetPixelValue(img2, kp.pt.x + dx + x + 1, kp.pt.y + dy + y) - //线性插值求I2关于dx,dy的偏导,
GetPixelValue(img2, kp.pt.x + dx + x - 1, kp.pt.y + dy + y)),
0.5 * (GetPixelValue(img2, kp.pt.x + dx + x, kp.pt.y + dy + y + 1) -
GetPixelValue(img2, kp.pt.x + dx + x, kp.pt.y + dy + y - 1))
);
} else if (iter == 0) {
// in inverse mode, J keeps same for all iterations
// NOTE this J does not change when dx, dy is updated, so we can store it and only compute error
J = -1.0 * Eigen::Vector2d(
0.5 * (GetPixelValue(img1, kp.pt.x + x + 1, kp.pt.y + y) -//线性插值求I2关于dx,dy的偏导
GetPixelValue(img1, kp.pt.x + x - 1, kp.pt.y + y)),
0.5 * (GetPixelValue(img1, kp.pt.x + x, kp.pt.y + y + 1) -
GetPixelValue(img1, kp.pt.x + x, kp.pt.y + y - 1))
);
}
// compute H, b and set cost;
b += -error * J;//error是标量
cost += error * error;
if (inverse == false || iter == 0) {
// also update H
H += J * J.transpose();
}
}
// compute update
Eigen::Vector2d update = H.ldlt().solve(b);
if (std::isnan(update[0])) {
// sometimes occurred when we have a black or white patch and H is irreversible
cout << "update is nan" << endl;
succ = false;
break;
}
if (iter > 0 && cost > lastCost) {
break;
}
// update dx, dy
dx += update[0];
dy += update[1];
lastCost = cost;
succ = true;
if (update.norm() < 1e-2) {
// converge
break;
}
}
success[i] = succ;
// set kp2
kp2[i].pt = kp.pt + Point2f(dx, dy);//kp2[i]用于存储图像二中经过光流追踪与kp1[i]匹配到的关键点位置,形式为初始估计值加最优估计增量
}
}
void OpticalFlowMultiLevel(
const Mat &img1,
const Mat &img2,
const vector<KeyPoint> &kp1,//kp1已知,对应的kp2待求
vector<KeyPoint> &kp2,//kp2未知,由kp1按金字塔比例缩放后构成初始顶层kp2
vector<bool> &success,
bool inverse) {
// parameters
int pyramids = 4;//金字塔层数
double pyramid_scale = 0.5;//缩放比
double scales[] = {1.0, 0.5, 0.25, 0.125};
// create pyramids
chrono::steady_clock::time_point t1 = chrono::steady_clock::now();
vector<Mat> pyr1, pyr2; // image pyramids
for (int i = 0; i < pyramids; i++) {
if (i == 0) {
pyr1.push_back(img1);
pyr2.push_back(img2);
} else {//从第二层开始,每一层都是将上一层的图像缩小一定的比例(由 pyramid_scale 决定),然后将缩小后的图像加入到金字塔列表中。通过这种方式,pyr1 和 pyr2 将包含相同层数的缩小图像,形成了一个图像金字塔,可以用于多尺度图像处理。
Mat img1_pyr, img2_pyr;
cv::resize(pyr1[i - 1], img1_pyr, //把上一层金字塔按比例缩放后,存入到img1_pyr,进而存入到金字塔数组中,形成新的一层金字塔
cv::Size(pyr1[i - 1].cols * pyramid_scale, pyr1[i - 1].rows * pyramid_scale));
cv::resize(pyr2[i - 1], img2_pyr,
cv::Size(pyr2[i - 1].cols * pyramid_scale, pyr2[i - 1].rows * pyramid_scale));
pyr1.push_back(img1_pyr);
pyr2.push_back(img2_pyr);
}
}
chrono::steady_clock::time_point t2 = chrono::steady_clock::now();
auto time_used = chrono::duration_cast<chrono::duration<double>>(t2 - t1);
cout << "build pyramid time: " << time_used.count() << endl;
// coarse-to-fine LK tracking in pyramids
vector<KeyPoint> kp1_pyr, kp2_pyr;//顶层的关键点数组
for (auto &kp:kp1) {//kp是单个关键点,kp1是一幅图像中的关键点数组
auto kp_top = kp;
kp_top.pt *= scales[pyramids - 1];//关键点像素坐标*顶层缩放系数=对应关键点在顶层的像素坐标
kp1_pyr.push_back(kp_top);//将每个缩放的关键点及其坐标存入本层金字塔的关键点数组中
kp2_pyr.push_back(kp_top);//把第一帧关键点坐标存入第二帧中,构成kp2_pyr, 在单层光流计算中和kp1作差,构成dx,dy初值=0
}
for (int level = pyramids - 1; level >= 0; level--) {//底层是第一层,工作时从顶层4开始向下
// from coarse to fine
success.clear();
t1 = chrono::steady_clock::now();
OpticalFlowSingleLevel(pyr1[level], pyr2[level], kp1_pyr, kp2_pyr, success, inverse, true);//顶层kp2坐标为顶层kp1直接复制得到,因此顶层各关键点dx dy初值均为0
//经过单层光流追踪函数后,kp2中的关键点变为优化后的值,并对优化后的值进行缩放成为下一层初始kp2,与下一层kp1作差得该层关键点的dx,dy初值
t2 = chrono::steady_clock::now();
auto time_used = chrono::duration_cast<chrono::duration<double>>(t2 - t1);
cout << "track pyr " << level << " cost time: " << time_used.count() << endl;
if (level > 0) {
for (auto &kp: kp1_pyr)//每一层使用单层光流追踪方法追踪关键点,然后/缩放倍数scale,得到下一层的关键点坐标,作为优化初值
kp.pt /= pyramid_scale;
for (auto &kp: kp2_pyr)
kp.pt /= pyramid_scale;
}
}
for (auto &kp: kp2_pyr)
kp2.push_back(kp);//将金字塔最后最后一层完成优化后的kp2_pyr存入kp2,这时的kp2即为kp1中各个关键点经过光流跟踪后的匹配结果
}