最近,转到了施工场景下的行为识别领域。该应用领域在技术上可拆分为两部分:视频跟踪和行为识别。这一周密集调研了文献,发现着实是一个大坑。其中的视频跟踪最近的各頂会论文出现最多的是单目标跟踪,而我们要解决的确是多目标跟踪,最近出的较好的能实用性的是deepSort;真实的施工场景中摄像头的远近,拍摄的遮挡,工人服装的统一,重叠,违规动作幅度的大小等都是巨大的挑战;行为识别方面最近出的论文较多,能实用性的目前敲定ECO模型;在跟踪过程中某一个工人的时空管道数据的抽取也是一个难题等等。无论如何,这块硬骨头得啃下来。
行为识别模型的训练一般是用摄像头采集某一个动作大概2-3秒的视频,然后将这些小视频打上行为标签用于训练;在测试中得结合视频跟踪出某一个目标,再抽取该目标的时空管道数据,主要数据的抽取中背景也很重要。
接下来,采集数据。带着4个工大本科生开启了为期10天的数据采集体验。为了提神效率,笔记本搭载了3个仅有的摄像头不同角度的抓取数据。充分考虑光照变化、场景变化、工人高低、行为夸张、拍摄角度等,进行不同组合来抓取行为视频数据。感谢4个本科生,这10天共收集了640个小视频以及测试用的demo。
抓取数据借用OpenCV,的代码如下:
#include "opencv2/opencv.hpp"
#include <iostream>
#include <iomanip>
#include <windows.h>
#define KEY_DOWN(VK_NONAME) ((GetAsyncKeyState(VK_NONAME) & 0x8000) ? 1:0) //必要
using namespace std;
using namespace cv;
int main() {
// Create a VideoCapture object and use camera to capture the video
VideoCapture cap1(0);
VideoCapture cap2(1);
VideoCapture cap3(2);
// Check if camera opened successfully
if (!cap1.isOpened() || !cap2.isOpened() || !cap3.isOpened())
{
cout << "Error opening video stream" << endl;
return -1;
}
// Default resolution of the frame is obtained.The default resolution is system dependent.
//int frame_width = cap1.get(CV_CAP_PROP_FRAME_WIDTH);
//int frame_height = cap1.get(CV_CAP_PROP_FRAME_HEIGHT);
string filename;
string cam_num1 = "cam1";
string cam_num2 = "cam2";
string cam_num3 = "cam3";
while (1){
//filename << "smoking32"; //smoking27 wear_hat27
//cam_num1 << "cam7";
//cam_num2 << "cam8";
//cam_num3 << "cam9";
if (!cap1.isOpened() || !cap2.isOpened() || !cap3.isOpened())
{
cout << "Error opening video stream" << endl;
break;
}
//cout << "please input action and cam_num." << endl << "For example:" << endl << "smoking32" << endl << "cam7" << endl << "cam8" << endl << "cam9" << endl;
//cin >> filename >> cam_num1 >> cam_num2 >> cam_num3;
cout << "please input action and cam_num." << endl << "For example:" << endl << "smoking32" << endl << endl;
cout << "Now, please input (input 'q' will exit):" << endl;
cin >> filename;
if (filename == "q")
break;
// Define the codec and create VideoWriter object.The output is stored in 'outcpp.avi' file.
VideoWriter video1("C:\\个人\\dataset\\ASD\\" + filename + "\\" + cam_num1 + "\\" + filename + cam_num1 + ".avi", CV_FOURCC('D', 'I', 'V', 'X'), 30, Size(640, 480));
VideoWriter video2("C:\\个人\\dataset\\ASD\\" + filename + "\\" + cam_num2 + "\\" + filename + cam_num2 + ".avi", CV_FOURCC('D', 'I', 'V', 'X'), 30, Size(640, 480));
VideoWriter video3("C:\\个人\\dataset\\ASD\\" + filename + "\\" + cam_num3 + "\\" + filename + cam_num3 + ".avi", CV_FOURCC('D', 'I', 'V', 'X'), 30, Size(640, 480));
long currentFrame = 1;
while (1)
{
Mat frame1;
Mat frame2;
Mat frame3;
// Capture frame-by-frame
cap1 >> frame1;
cap2 >> frame2;
cap3 >> frame3;
// If the frame is empty, break immediately
if (frame1.empty() || frame2.empty() || frame3.empty())
break;
stringstream str1;
str1 << "img_" << setw(4) << setfill('0') << currentFrame << ".jpg";
cout << "正在处理第" << currentFrame << "帧" << endl;
printf("\n");
// 设置每1帧获取一次帧
if (currentFrame % 1 == 0) {
// 将帧转成图片输出
imwrite("C:\\个人\\dataset\\ASD\\" + filename + "\\" + cam_num1 + "\\" + str1.str(), frame1);
imwrite("C:\\个人\\dataset\\ASD\\" + filename + "\\" + cam_num2 + "\\" + str1.str(), frame2);
imwrite("C:\\个人\\dataset\\ASD\\" + filename + "\\" + cam_num3 + "\\" + str1.str(), frame3);
}
currentFrame++;
// Write the frame into the file 'outcpp.avi'
video1.write(frame1);
video2.write(frame2);
video3.write(frame3);
// Display the resulting frame
imshow("cam1", frame1);
imshow("cam2", frame2);
imshow("cam3", frame3);
waitKey(5);
if (KEY_DOWN(32))
break;
if (KEY_DOWN(27))
{
video1.release();
video2.release();
video3.release();
cap1.release();
cap2.release();
cap3.release();
// Closes all the windows
destroyAllWindows();
return 0;
}
}
// When everything done, release the video capture and write object
video1.release();
video2.release();
video3.release();
}
cap1.release();
cap2.release();
cap3.release();
// Closes all the windows
destroyAllWindows();
return 0;
}
本文探讨了施工场景下行为识别的技术挑战,包括视频跟踪和行为识别。研究发现,多目标跟踪问题中deepSort是实用性较好的方案,而行为识别选择了ECO模型。在数据采集过程中,面临光照、场景、工人行为等多种因素的影响,最终收集了640个带标签的小视频,为模型训练提供数据。
1032

被折叠的 条评论
为什么被折叠?



