主要思路
每个thread负责一个rtsp流,主线程里定义WrapData类型的消息队列(目的是包含frame和对应的rtsp信息),传到每个线程里面去通过opencv获取frame,将frame和rtsp信息包装到WrapData后push到队列中,主函数中捕捉到队列长度大于1则进行.front()推理并在推理结束后.pop()
一、超参数和常量
#define NMS_THRESH 0.4
#define CONF_THRESH 0.5
#define BATCH_SIZE 1
// stuff we know about the network and the input/output blobs
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;
const char *class_name[4] = {"1","2","3","4"};
二、简易的WrapData类
class WrapData{
public:
Mat img;
string channel;
};
三、opencv获取rtsp流
int capture_show(string& rtsp_address, queue<WrapData>& Wrap_images){
WrapData Wrap_img;
Mat frame;
VideoCapture cap;
cap.open(rtsp_address);
if (!cap.isOpened()) {
cerr << "ERROR! Unable to open camera\n";
return -1;
}
for (;;)
{
// wait for a new frame from camera and store it into 'frame'
cap.read(frame);
// check if we succeeded
if (frame.empty()) {
cerr << "ERROR! blank frame grabbed\n";
break;
}
Wrap_img.channel = rtsp_address;
Wrap_img.img = frame;
Wrap_images.push(Wrap_img);
}
cap.release();
return 1;
}
四、.engine 推理部分
void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* input, float* output, int batchSize) {
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
}
五、主函数流程
int main()
{
queue<WrapData> Wrap_images;
Mat image;
string imgname;
string rtsp1 = "rtsp://admin:a12345678@192.168.1.2:554/Streaming/Channels/302";
string rtsp2 = "rtsp://admin:a12345678@192.168.1.2:554/Streaming/Channels/202";
// std::thread thread_size(printsize,std::ref(images));
std::string engine_name = "last.engine";
// deserialize the .engine and run inference
std::ifstream file(engine_name, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << engine_name << " error!" << std::endl;
return -1;
}
char *trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
// prepare input data ---------------------------
static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
printf("batch_size: %d --- data_size: %d!!!\n",BATCH_SIZE,BATCH_SIZE * 3 * INPUT_H * INPUT_W);
//for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
// data[i] = 1.0;
static float prob[BATCH_SIZE * OUTPUT_SIZE];
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 2);
void* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
// Create stream
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
string window_name = "Yolov5 USB Camera";
namedWindow(window_name);
std::thread video1(capture_show, std::ref(rtsp1), std::ref(Wrap_images));
std::thread video2(capture_show, std::ref(rtsp2), std::ref(Wrap_images));
while(1)
{
if(time(NULL) % 5 == 0)
{}
if (Wrap_images.size()>0)
{
cout<<"ready to infer"<<endl;
cv::Mat pr_img = preprocess_img(Wrap_images.front().img, INPUT_W, INPUT_H); // letterbox BGR to RGB & resize
// TODO: remove these parameters, no batch inteference
int b=0, i=0;
int fcount = 1;
// This for loop is convert the cv::Mat into 1D Float array and pass into doInteference
for (int row = 0; row < INPUT_H; ++row) {
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < INPUT_W; ++col) {
data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
uc_pixel += 3;
++i;
}
}
// Run inference
auto start = std::chrono::system_clock::now();
doInference(*context, stream, buffers, data, prob, BATCH_SIZE);
auto end = std::chrono::system_clock::now();
// std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
// std::cout<<"FPS: "<<int(1000/std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count())<<std::endl;
std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
}
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
//std::cout << res.size() << std::endl;
for (size_t j = 0; j < res.size(); j++) {
cv::Rect r = get_rect(pr_img, res[j].bbox);
cv::rectangle(pr_img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
// cv::putText(pr_img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
cv::putText(pr_img, class_name[(int)res[j].class_id], cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
}
imshow(window_name, pr_img);
}
Wrap_images.pop();
}
if (waitKey(10) == 27)
{
cout << "Esc key is pressed by user. Stoppig the video" << endl;
break;
}
}
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[outputIndex]));
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
}