YOLOV5.CPP文件修改内容,可直接复制
#include <iostream>
#include <chrono>
#include <cmath>
#include "cuda_utils.h"
#include "logging.h"
#include "common.hpp"
#include "utils.h"
#include "calibrator.h"
#include "preprocess.h"
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
#define DEVICE 0 // GPU id
#define NMS_THRESH 0.4
#define CONF_THRESH 0.5
#define BATCH_SIZE 1
#define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !
// stuff we know about the network and the input/output blobs
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int CLASS_NUM = Yolo::CLASS_NUM;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;
static int get_width(int x, float gw, int divisor = 8) {
return int(ceil((x * gw) / divisor)) * divisor;
}
static int get_depth(int x, float gd) {
if (x == 1) return 1;
int r = round(x * gd);
if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) {
--r;
}
return std::max<int>(r, 1);
}
ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
INetworkDefinition* network = builder->createNetworkV2(0U);
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
assert(data);
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
/* ------ yolov5 backbone------ */
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
assert(conv0);
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
/* ------ yolov5 head ------ */
auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
auto upsample11 = network->addResize(*conv10->getOutput(0));
assert(upsample11);
upsample11->setResizeMode(ResizeMode::kNEAREST);
upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
auto cat12 = network->addConcatenation(inputTensors12, 2);
auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
auto upsample15 = network->addResize(*conv14->getOutput(0));
assert(upsample15);
upsample15->setResizeMode(ResizeMode::kNEAREST);
upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
auto cat16 = network->addConcatenation(inputTensors16, 2);
auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
/* ------ detect ------ */
IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
auto cat19 = network->addConcatenation(inputTensors19, 2);
auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
auto cat22 = network->addConcatenation(inputTensors22, 2);
auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
#if defined(USE_FP16)
config->setFlag(BuilderFlag::kFP16);
#elif defined(USE_INT8)
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
assert(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8);
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
config->setInt8Calibrator(calibrator);
#endif
std::cout << "Building engine, please wait for a while..." << std::endl;
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "Build engine successfully!" << std::endl;
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap) {
free((void*)(mem.second.values));
}
return engine;
}
ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
INetworkDefinition* network = builder->createNetworkV2(0U);
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
assert(data);
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
/* ------ yolov5 backbone------ */
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
auto c3_2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
auto conv3 = convBlock(network, weightMap, *c3_2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
auto c3_4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
auto conv5 = convBlock(network, weightMap, *c3_4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
auto c3_6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
auto conv7 = convBlock(network, weightMap, *c3_6->getOutput(0), get_width(768, gw), 3, 2, 1, "model.7");
auto c3_8 = C3(network, weightMap, *conv7->getOutput(0), get_width(768, gw), get_width(768, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
auto conv9 = convBlock(network, weightMap, *c3_8->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.9");
auto c3_10 = C3(network, weightMap, *conv9->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.10");
auto sppf11 = SPPF(network, weightMap, *c3_10->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.11");
/* ------ yolov5 head ------ */
auto conv12 = convBlock(network, weightMap, *sppf11->getOutput(0), get_width(768, gw), 1, 1, 1, "model.12");
auto upsample13 = network->addResize(*conv12->getOutput(0));
assert(upsample13);
upsample13->setResizeMode(ResizeMode::kNEAREST);
upsample13->setOutputDimensions(c3_8->getOutput(0)->getDimensions());
ITensor* inputTensors14[] = { upsample13->getOutput(0), c3_8->getOutput(0) };
auto cat14 = network->addConcatenation(inputTensors14, 2);
auto c3_15 = C3(network, weightMap, *cat14->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.15");
auto conv16 = convBlock(network, weightMap, *c3_15->getOutput(0), get_width(512, gw), 1, 1, 1, "model.16");
auto upsample17 = network->addResize(*conv16->getOutput(0));
assert(upsample17);
upsample17->setResizeMode(ResizeMode::kNEAREST);
upsample17->setOutputDimensions(c3_6->getOutput(0)->getDimensions());
ITensor* inputTensors18[] = { upsample17->getOutput(0), c3_6->getOutput(0) };
auto cat18 = network->addConcatenation(inputTensors18, 2);
auto c3_19 = C3(network, weightMap, *cat18->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.19");
auto conv20 = convBlock(network, weightMap, *c3_19->getOutput(0), get_width(256, gw), 1, 1, 1, "model.20");
auto upsample21 = network->addResize(*conv20->getOutput(0));
assert(upsample21);
upsample21->setResizeMode(ResizeMode::kNEAREST);
upsample21->setOutputDimensions(c3_4->getOutput(0)->getDimensions());
ITensor* inputTensors21[] = { upsample21->getOutput(0), c3_4->getOutput(0) };
auto cat22 = network->addConcatenation(inputTensors21, 2);
auto c3_23 = C3(network, weightMap, *cat22->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
auto conv24 = convBlock(network, weightMap, *c3_23->getOutput(0), get_width(256, gw), 3, 2, 1, "model.24");
ITensor* inputTensors25[] = { conv24->getOutput(0), conv20->getOutput(0) };
auto cat25 = network->addConcatenation(inputTensors25, 2);
auto c3_26 = C3(network, weightMap, *cat25->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.26");
auto conv27 = convBlock(network, weightMap, *c3_26->getOutput(0), get_width(512, gw), 3, 2, 1, "model.27");
ITensor* inputTensors28[] = { conv27->getOutput(0), conv16->getOutput(0) };
auto cat28 = network->addConcatenation(inputTensors28, 2);
auto c3_29 = C3(network, weightMap, *cat28->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.29");
auto conv30 = convBlock(network, weightMap, *c3_29->getOutput(0), get_width(768, gw), 3, 2, 1, "model.30");
ITensor* inputTensors31[] = { conv30->getOutput(0), conv12->getOutput(0) };
auto cat31 = network->addConcatenation(inputTensors31, 2);
auto c3_32 = C3(network, weightMap, *cat31->getOutput(0), get_width(2048, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.32");
/* ------ detect ------ */
IConvolutionLayer* det0 = network->addConvolutionNd(*c3_23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.0.weight"], weightMap["model.33.m.0.bias"]);
IConvolutionLayer* det1 = network->addConvolutionNd(*c3_26->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.1.weight"], weightMap["model.33.m.1.bias"]);
IConvolutionLayer* det2 = network->addConvolutionNd(*c3_29->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.2.weight"], weightMap["model.33.m.2.bias"]);
IConvolutionLayer* det3 = network->addConvolutionNd(*c3_32->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.3.weight"], weightMap["model.33.m.3.bias"]);
auto yolo = addYoLoLayer(network, weightMap, "model.33", std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
#if defined(USE_FP16)
config->setFlag(BuilderFlag::kFP16);
#elif defined(USE_INT8)
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
assert(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8);
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
config->setInt8Calibrator(calibrator);
#endif
std::cout << "Building engine, please wait for a while..." << std::endl;
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "Build engine successfully!" << std::endl;
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap) {
free((void*)(mem.second.values));
}
return engine;
}
void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, bool& is_p6, float& gd, float& gw, std::string& wts_name) {
// Create builder
IBuilder* builder = createInferBuilder(gLogger);
IBuilderConfig* config = builder->createBuilderConfig();
// Create model to populate the network, then set the outputs and create an engine
ICudaEngine* engine = nullptr;
if (is_p6) {
engine = build_engine_p6(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
}
else {
engine = build_engine(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
}
assert(engine != nullptr);
// Serialize the engine
(*modelStream) = engine->serialize();
// Close everything down
engine->destroy();
builder->destroy();
config->destroy();
}
void doInference(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* output, int batchSize) {
// infer on the batch asynchronously, and DMA output back to host
context.enqueue(batchSize, buffers, stream, nullptr);
CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
}
//bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, bool& is_p6, float& gd, float& gw, std::string& img_dir) {
bool parse_args(std::string argv[], std::string& wts, std::string& engine, bool& is_p6, float& gd, float& gw, std::string& img_dir) {
//if (argc < 4) return false;
//if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) {
// wts = std::string(argv[2]);
// engine = std::string(argv[3]);
// auto net = std::string(argv[4]);
if (std::string(argv[1]) == "-s") {
wts = std::string(argv[2]);
engine = std::string(argv[3]);
auto net = std::string(argv[4]);
if (net[0] == 'n') {
gd = 0.33;
gw = 0.25;
}
else if (net[0] == 's') {
gd = 0.33;
gw = 0.50;
}
else if (net[0] == 'm') {
gd = 0.67;
gw = 0.75;
}
else if (net[0] == 'l') {
gd = 1.0;
gw = 1.0;
}
else if (net[0] == 'x') {
gd = 1.33;
gw = 1.25;
}
//else if (net[0] == 'c' && argc == 7) { //修改
else if (net[0] == 'c') {
//gd = atof(argv[5]); //修改
gd = atof(argv[5].c_str());
//gw = atof(argv[6]); //修改
gw = atof(argv[6].c_str());
}
else {
return false;
}
if (net.size() == 2 && net[1] == '6') {
is_p6 = true;
}
}
//else if (std::string(argv[1]) == "-d" && argc == 4) { //修改
else if (std::string(argv[1]) == "-d") {
//engine = std::string(argv[2]); //修改
engine = std::string(argv[3]);
//img_dir = std::string(argv[3]); //注释掉
}
else {
return false;
}
return true;
}
int main(int argc, char** argv) {
cudaSetDevice(DEVICE);
std::string wts_name = "";
std::string engine_name = "";
bool is_p6 = false;
float gd = 0.0f, gw = 0.0f;
std::string img_dir;
std::string parse[10];
parse[0] = "./yolov5"; //无用
parse[1] = "-d"; // -s:构建engine -d run engine
parse[2] = "yolov5s.wts"; //wts文件路径,-s 才使用
parse[3] = "yolov5s.engine"; //engine文件路径,-d才使用
parse[4] = "s"; //模型类型 [n/s/m/l/x/n6/s6/m6/l6/x6 or c/c6 gd gw]
if (!parse_args(parse, wts_name, engine_name, is_p6, gd, gw, img_dir)) {
/*std::cerr << "arguments not right!" << std::endl;
std::cerr << "./yolov5 -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6 or c/c6 gd gw] // serialize model to plan file" << std::endl;
std::cerr << "./yolov5 -d [.engine] ../samples // deserialize plan file and run inference" << std::endl;
return -1;*/
std::cerr << "arguments not right!" << std::endl;
return -1;
}
// create a model using the API directly and serialize it to a stream
if (!wts_name.empty()) {
IHostMemory* modelStream{ nullptr };
APIToModel(BATCH_SIZE, &modelStream, is_p6, gd, gw, wts_name);
assert(modelStream != nullptr);
std::ofstream p(engine_name, std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
return 0;
}
//反序列化
// deserialize the .engine and run inference
std::ifstream file(engine_name, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << engine_name << " error!" << std::endl;
return -1;
}
char* trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
//std::vector<std::string> file_names;
//if (read_files_in_dir(img_dir.c_str(), file_names) < 0) {
// std::cerr << "read_files_in_dir failed." << std::endl;
// return -1;
//}
static float prob[BATCH_SIZE * OUTPUT_SIZE];
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 2);
float* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
// Create stream
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
uint8_t* img_host = nullptr;
uint8_t* img_device = nullptr;
// prepare input data cache in pinned memory
CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
// prepare input data cache in device memory
CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
//int fcount = 0;
//std::vector<cv::Mat> imgs_buffer(BATCH_SIZE);
//for (int f = 0; f < (int)file_names.size(); f++) {
// fcount++;
// if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue;
// //auto start = std::chrono::system_clock::now();
// float* buffer_idx = (float*)buffers[inputIndex];
// for (int b = 0; b < fcount; b++) {
// cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
// if (img.empty()) continue;
// imgs_buffer[b] = img;
// size_t size_image = img.cols * img.rows * 3;
// size_t size_image_dst = INPUT_H * INPUT_W * 3;
// //copy data to pinned memory
// memcpy(img_host, img.data, size_image);
// //copy data to device memory
// CUDA_CHECK(cudaMemcpyAsync(img_device, img_host, size_image, cudaMemcpyHostToDevice, stream));
// preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream);
// buffer_idx += size_image_dst;
// }
//
//视频流输入 ,填入文件路径
cv::VideoCapture capture("rtsp://admin:hk123456@192.168.18.211/Streaming/Channels/1");
//摄像头输入,0:本机摄像头,1:usb摄像头
//cv::VideoCapture capture(0);
cv::Mat frame;
while (cv::waitKey(1) != 27)
{
capture >> frame;
cv::Mat img = frame;
//std::cout << "imagesizeaaaaaaaaa" << img.size(); // 读图像的尺寸大小
if (img.empty())
{
std::cout << "No Video input\n" << std::endl;
return -1;
}
std::vector<cv::Mat> imgs_buffer(BATCH_SIZE);
float* buffer_idx = (float*)buffers[inputIndex];
imgs_buffer[0] = img;
size_t size_image = img.cols * img.rows * 3;
size_t size_image_dst = INPUT_H * INPUT_W * 3;
//copy data to pinned memory
memcpy(img_host, img.data, size_image);
//copy data to device memory
CUDA_CHECK(cudaMemcpyAsync(img_device, img_host, size_image, cudaMemcpyHostToDevice, stream));
preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream);
buffer_idx += size_image_dst;
// Run inference
//从start上面开始都是读取视频
auto start = std::chrono::system_clock::now();
doInference(*context, stream, (void**)buffers, prob, BATCH_SIZE);
auto end = std::chrono::system_clock::now();
std::cout << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
//std::vector<std::vector<Yolo::Detection>> batch_res(fcount); //修改
std::vector<std::vector<Yolo::Detection>> batch_res(1);
/*for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
}
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
cv::Mat img = imgs_buffer[b];
for (size_t j = 0; j < res.size(); j++) {
cv::Rect r = get_rect(img, res[j].bbox);
cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
}
cv::imwrite("_" + file_names[f - fcount + 1 + b], img);
}
fcount = 0;*/ // 修改
auto& res = batch_res[0];
nms(res, &prob[0], CONF_THRESH, NMS_THRESH);
std::cout << res.size() << std::endl;
auto& res_pre = batch_res[0];
cv::Mat img_pre = imgs_buffer[0];
std::cout << res_pre.size() << std::endl;
//画框
if (res_pre.size() > 0)
for (size_t j = 0; j < res_pre.size(); j++) {
cv::Rect r = get_rect(img_pre, res_pre[j].bbox);
cv::rectangle(img_pre, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
cv::putText(img_pre, std::to_string((int)res_pre[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
//粗糙的方式,计算FPS
cv::putText(img, "FPS:" + std::to_string(1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()), cv::Point(img.cols * 0.02, img.rows * 0.05), cv::FONT_HERSHEY_PLAIN, 2, cv::Scalar(0, 255, 0), 2, 8);
}
cv::namedWindow("Inference", cv::WINDOW_FREERATIO);
cv::imshow("Inference", img_pre);
}
// Release stream and buffers
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(img_device));
CUDA_CHECK(cudaFreeHost(img_host));
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[outputIndex]));
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
// Print histogram of the output distribution
//std::cout << "\nOutput:\n\n";
//for (unsigned int i = 0; i < OUTPUT_SIZE; i++)
//{
// std::cout << prob[i] << ", ";
// if (i % 10 == 0) std::cout << std::endl;
//}
//std::cout << std::endl;
return 0;
}
============================2023.4.25更新===============================
更新内容:
加入了socket通信,当接收到通信信息时采取检测,没有通讯信息时不作任何检测
加入了多线程检测,可以同时调用多个海康摄像头进行检测
加入了线程锁,保证多线程访问socket时出现资源抢占问题
加入了逻辑判断,当物体出现在规定区域内方可检测
加入了逻辑,解决opencv连接摄像头后出现资源抢占断掉不连接的情况
加入了socket通信错误异常机制处理,当出现10054连接错误时候返回空字段并继续执行程序
10054错误:对方客户端强制下线连接用户
#include <iostream>
#include <chrono>
#include <cmath>
#include "cuda_utils.h"
#include "logging.h"
#include "common.hpp"
#include "utils.h"
#include "calibrator.h"
#include "preprocess.h"
#include <ctime>
#include <thread>
//这里导入socket依赖
#include <winsock2.h>
#include <sstream>
#include <iomanip>
#pragma comment(lib, "Ws2_32.lib")
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
#define DEVICE 0 // GPU id
#define NMS_THRESH 0.4 //非极大值抑制
#define CONF_THRESH 0.5 //
#define BATCH_SIZE 1 //batchsize
#define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !
// stuff we know about the network and the input/output blobs
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int CLASS_NUM = Yolo::CLASS_NUM;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;
static int get_width(int x, float gw, int divisor = 8) {
return int(ceil((x * gw) / divisor)) * divisor;
}
static int get_depth(int x, float gd) {
if (x == 1) return 1;
int r = round(x * gd);
if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) {
--r;
}
return std::max<int>(r, 1);
}
ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
INetworkDefinition* network = builder->createNetworkV2(0U);
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
assert(data);
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
/* ------ yolov5 backbone------ */
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
assert(conv0);
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
/* ------ yolov5 head ------ */
auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
auto upsample11 = network->addResize(*conv10->getOutput(0));
assert(upsample11);
upsample11->setResizeMode(ResizeMode::kNEAREST);
upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
auto cat12 = network->addConcatenation(inputTensors12, 2);
auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
auto upsample15 = network->addResize(*conv14->getOutput(0));
assert(upsample15);
upsample15->setResizeMode(ResizeMode::kNEAREST);
upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
auto cat16 = network->addConcatenation(inputTensors16, 2);
auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
/* ------ detect ------ */
IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
auto cat19 = network->addConcatenation(inputTensors19, 2);
auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
auto cat22 = network->addConcatenation(inputTensors22, 2);
auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
#if defined(USE_FP16)
config->setFlag(BuilderFlag::kFP16);
#elif defined(USE_INT8)
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
assert(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8);
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
config->setInt8Calibrator(calibrator);
#endif
std::cout << "Building engine, please wait for a while..." << std::endl;
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "Build engine successfully!" << std::endl;
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap) {
free((void*)(mem.second.values));
}
return engine;
}
ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
INetworkDefinition* network = builder->createNetworkV2(0U);
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
assert(data);
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
/* ------ yolov5 backbone------ */
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
auto c3_2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
auto conv3 = convBlock(network, weightMap, *c3_2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
auto c3_4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
auto conv5 = convBlock(network, weightMap, *c3_4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
auto c3_6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
auto conv7 = convBlock(network, weightMap, *c3_6->getOutput(0), get_width(768, gw), 3, 2, 1, "model.7");
auto c3_8 = C3(network, weightMap, *conv7->getOutput(0), get_width(768, gw), get_width(768, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
auto conv9 = convBlock(network, weightMap, *c3_8->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.9");
auto c3_10 = C3(network, weightMap, *conv9->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.10");
auto sppf11 = SPPF(network, weightMap, *c3_10->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.11");
/* ------ yolov5 head ------ */
auto conv12 = convBlock(network, weightMap, *sppf11->getOutput(0), get_width(768, gw), 1, 1, 1, "model.12");
auto upsample13 = network->addResize(*conv12->getOutput(0));
assert(upsample13);
upsample13->setResizeMode(ResizeMode::kNEAREST);
upsample13->setOutputDimensions(c3_8->getOutput(0)->getDimensions());
ITensor* inputTensors14[] = { upsample13->getOutput(0), c3_8->getOutput(0) };
auto cat14 = network->addConcatenation(inputTensors14, 2);
auto c3_15 = C3(network, weightMap, *cat14->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.15");
auto conv16 = convBlock(network, weightMap, *c3_15->getOutput(0), get_width(512, gw), 1, 1, 1, "model.16");
auto upsample17 = network->addResize(*conv16->getOutput(0));
assert(upsample17);
upsample17->setResizeMode(ResizeMode::kNEAREST);
upsample17->setOutputDimensions(c3_6->getOutput(0)->getDimensions());
ITensor* inputTensors18[] = { upsample17->getOutput(0), c3_6->getOutput(0) };
auto cat18 = network->addConcatenation(inputTensors18, 2);
auto c3_19 = C3(network, weightMap, *cat18->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.19");
auto conv20 = convBlock(network, weightMap, *c3_19->getOutput(0), get_width(256, gw), 1, 1, 1, "model.20");
auto upsample21 = network->addResize(*conv20->getOutput(0));
assert(upsample21);
upsample21->setResizeMode(ResizeMode::kNEAREST);
upsample21->setOutputDimensions(c3_4->getOutput(0)->getDimensions());
ITensor* inputTensors21[] = { upsample21->getOutput(0), c3_4->getOutput(0) };
auto cat22 = network->addConcatenation(inputTensors21, 2);
auto c3_23 = C3(network, weightMap, *cat22->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
auto conv24 = convBlock(network, weightMap, *c3_23->getOutput(0), get_width(256, gw), 3, 2, 1, "model.24");
ITensor* inputTensors25[] = { conv24->getOutput(0), conv20->getOutput(0) };
auto cat25 = network->addConcatenation(inputTensors25, 2);
auto c3_26 = C3(network, weightMap, *cat25->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.26");
auto conv27 = convBlock(network, weightMap, *c3_26->getOutput(0), get_width(512, gw), 3, 2, 1, "model.27");
ITensor* inputTensors28[] = { conv27->getOutput(0), conv16->getOutput(0) };
auto cat28 = network->addConcatenation(inputTensors28, 2);
auto c3_29 = C3(network, weightMap, *cat28->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.29");
auto conv30 = convBlock(network, weightMap, *c3_29->getOutput(0), get_width(768, gw), 3, 2, 1, "model.30");
ITensor* inputTensors31[] = { conv30->getOutput(0), conv12->getOutput(0) };
auto cat31 = network->addConcatenation(inputTensors31, 2);
auto c3_32 = C3(network, weightMap, *cat31->getOutput(0), get_width(2048, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.32");
/* ------ detect ------ */
IConvolutionLayer* det0 = network->addConvolutionNd(*c3_23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.0.weight"], weightMap["model.33.m.0.bias"]);
IConvolutionLayer* det1 = network->addConvolutionNd(*c3_26->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.1.weight"], weightMap["model.33.m.1.bias"]);
IConvolutionLayer* det2 = network->addConvolutionNd(*c3_29->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.2.weight"], weightMap["model.33.m.2.bias"]);
IConvolutionLayer* det3 = network->addConvolutionNd(*c3_32->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.3.weight"], weightMap["model.33.m.3.bias"]);
auto yolo = addYoLoLayer(network, weightMap, "model.33", std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
#if defined(USE_FP16)
config->setFlag(BuilderFlag::kFP16);
#elif defined(USE_INT8)
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
assert(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8);
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
config->setInt8Calibrator(calibrator);
#endif
std::cout << "Building engine, please wait for a while..." << std::endl;
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "Build engine successfully!" << std::endl;
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap) {
free((void*)(mem.second.values));
}
return engine;
}
void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, bool& is_p6, float& gd, float& gw, std::string& wts_name) {
// Create builder
IBuilder* builder = createInferBuilder(gLogger);
IBuilderConfig* config = builder->createBuilderConfig();
// Create model to populate the network, then set the outputs and create an engine
ICudaEngine* engine = nullptr;
if (is_p6) {
engine = build_engine_p6(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
}
else {
engine = build_engine(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
}
assert(engine != nullptr);
// Serialize the engine
(*modelStream) = engine->serialize();
// Close everything down
engine->destroy();
builder->destroy();
config->destroy();
}
void doInference(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* output, int batchSize) {
// infer on the batch asynchronously, and DMA output back to host
context.enqueue(batchSize, buffers, stream, nullptr);
CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
}
//bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, bool& is_p6, float& gd, float& gw, std::string& img_dir) {
bool parse_args(std::string argv[], std::string& wts, std::string& engine, bool& is_p6, float& gd, float& gw, std::string& img_dir) {
//if (argc < 4) return false;
//if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) {
// wts = std::string(argv[2]);
// engine = std::string(argv[3]);
// auto net = std::string(argv[4]);
if (std::string(argv[1]) == "-s") {
wts = std::string(argv[2]);
engine = std::string(argv[3]);
auto net = std::string(argv[4]);
if (net[0] == 'n') {
gd = 0.33;
gw = 0.25;
}
else if (net[0] == 's') {
gd = 0.33;
gw = 0.50;
}
else if (net[0] == 'm') {
gd = 0.67;
gw = 0.75;
}
else if (net[0] == 'l') {
gd = 1.0;
gw = 1.0;
}
else if (net[0] == 'x') {
gd = 1.33;
gw = 1.25;
}
//else if (net[0] == 'c' && argc == 7) { //修改
else if (net[0] == 'c') {
//gd = atof(argv[5]); //修改
gd = atof(argv[5].c_str());
//gw = atof(argv[6]); //修改
gw = atof(argv[6].c_str());
}
else {
return false;
}
if (net.size() == 2 && net[1] == '6') {
is_p6 = true;
}
}
//else if (std::string(argv[1]) == "-d" && argc == 4) { //修改
else if (std::string(argv[1]) == "-d") {
//engine = std::string(argv[2]); //修改
engine = std::string(argv[3]);
//img_dir = std::string(argv[3]); //注释掉
}
else {
return false;
}
return true;
}
//创建socket连接
std::string receiveFromServer(const char* ip, int ip_id)
{
// 初始化Winsock库
WSADATA wsaData;
int err = WSAStartup(MAKEWORD(2, 2), &wsaData);
if (err != 0)
{
std::cout << "WSAStartup failed with error: " << err << std::endl;
}
// 创建套接字
SOCKET sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sock == INVALID_SOCKET)
{
std::cout << "socket failed with error: " << WSAGetLastError() << std::endl;
WSACleanup();
}
// 设置服务器地址和端口
sockaddr_in serverAddr;
serverAddr.sin_family = AF_INET;
serverAddr.sin_addr.s_addr = inet_addr(ip); // 服务器IP地址 192.168.110.161 172.18.18.252
serverAddr.sin_port = htons(ip_id); // 服务器端口号 // 9001 4001
// 连接到服务器
int ret;
while (true) {
ret = connect(sock, (SOCKADDR*)&serverAddr, sizeof(serverAddr));
if (ret == SOCKET_ERROR)
{
std::cout << "connect failed with error: " << WSAGetLastError() << std::endl;
Sleep(1000); //等待一秒重新连接
}
else {
break;
}
}
// 从服务器接收数据
//BYTE recvData[32]; //这里是byte类型接收参数
unsigned char recvData[32]; //这里更改为byte16类型接收参数
ret = recv(sock, (char*)recvData, 32, 0); // 修改byte16类型接收参数 1024变为32
if (ret == SOCKET_ERROR)
{
int error_code = WSAGetLastError();
std::cout << "recv failed with error: " << WSAGetLastError() << std::endl;
closesocket(sock);
WSACleanup();
return "";
}
else
{
std::stringstream ss;
for (int i = 0; i < ret; i += 2)
{
uint16_t value = (uint16_t)(recvData[i] << 8) | recvData[i + 1];
//std::cout << std::hex << std::setfill('0') << std::setw(4) << value << " ";
ss << std::hex << std::setfill('0') << std::setw(4) << value << " "; //将参数传给ss
}
// 关闭套接字和Winsock库
closesocket(sock);
WSACleanup();
return ss.str();
}
}
//调用socket连接 接收信号以2开头3结尾中间的type16进制数据
std::string extractSubstring(std::string str) {
std::string str_sub;
if (str.size() % 2 == 0) //当字符长度大于等于10并且不等于15的时候
{
str_sub = str.substr(0, str.size() - 3); //截取开头到倒数第二个字符
}
else
{
str_sub = str.substr(0, str.size() - 1); //截取开头到倒数第二个字符
//std::cout << "str_sub:::::::" << str_sub;
}
if (str_sub.substr(0, 2) == "02" && str_sub.substr(str_sub.length() - 2) == "03") {
return str_sub;
}
}
bool start_detection_1 = false; // 是否开始检测的标志位
std::mutex data_mutex; // 数据锁
std::deque<std::string> data_queue; // 数据队列
void str_socket_1(std::string ip, int ip_id) {
while (true) {
char* char_ip = &ip[0];
std::string str_1 = receiveFromServer(char_ip,ip_id);
std::string str_sub_1 = extractSubstring(str_1);
std::cout << "str_sub_1:::::::::" << str_sub_1 << "\n";
{
std::lock_guard<std::mutex> lock(data_mutex);
data_queue.push_front(str_sub_1);
if (data_queue.size() > 5) {
data_queue.pop_back(); // 删除队列末
}
}
}
}
//void thread1(cv::VideoCapture capture, const std::string& window_name,const std::string& save_path) {
void thread1(const std::string& rtsp, const std::string& window_name,const std::string& save_path) {
cudaSetDevice(DEVICE);
std::string wts_name = "";
std::string engine_name = "";
bool is_p6 = false;
float gd = 0.0f, gw = 0.0f;
std::string img_dir;
std::string parse[10];
parse[0] = "./yolov5"; //无用
parse[1] = "-d"; // -s:构建engine -d run engine
parse[2] = "yolov5s.wts"; //wts文件路径,-s 才使用
parse[3] = "./yolov5s.engine"; //engine文件路径,-d才使用
parse[4] = "s"; //模型类型 [n/s/m/l/x/n6/s6/m6/l6/x6 or c/c6 gd gw]
if (!parse_args(parse, wts_name, engine_name, is_p6, gd, gw, img_dir)) {
/*std::cerr << "arguments not right!" << std::endl;
std::cerr << "./yolov5 -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6 or c/c6 gd gw] // serialize model to plan file" << std::endl;
std::cerr << "./yolov5 -d [.engine] ../samples // deserialize plan file and run inference" << std::endl;
return -1;*/
std::cerr << "arguments not right!" << std::endl;
//return -1;
}
// create a model using the API directly and serialize it to a stream
if (!wts_name.empty()) {
IHostMemory* modelStream{ nullptr };
APIToModel(BATCH_SIZE, &modelStream, is_p6, gd, gw, wts_name);
assert(modelStream != nullptr);
std::ofstream p(engine_name, std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
//return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
//return 0;
}
//反序列化
// deserialize the .engine and run inference
std::ifstream file(engine_name, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << engine_name << " error!" << std::endl;
//return -1;
}
char* trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
static float prob[BATCH_SIZE * OUTPUT_SIZE];
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 2);
float* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
// Create stream
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
uint8_t* img_host = nullptr;
uint8_t* img_device = nullptr;
// prepare input data cache in pinned memory
CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
// prepare input data cache in device memory
CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
//视频流输入 ,填入文件路径
//cv::VideoCapture capture("rtsp://admin:hk123456@192.168.18.211/Streaming/Channels/3");
//cv::VideoCapture capture("./sample.mp4");
//摄像头输入,0:本机摄像头,1:usb摄像头
//cv::VideoCapture capture(0);
cv::VideoCapture capture(rtsp);
cv::Mat frame;
while (cv::waitKey(1) != 27)
{
capture >> frame;
cv::Mat img = frame;
if (img.empty()) //如果接收到的值是空的
{
std::cout << "No Video input\n" << std::endl; //输出提示没有摄像头连接并重新连接摄像头
cv::VideoCapture capture(rtsp);
continue;
}
// 获取数据
std::string received_data;
{
std::lock_guard<std::mutex> lock(data_mutex);
if (!data_queue.empty()) {
received_data = data_queue.front(); //返回队列中的第一个元素的引用
}
if (data_queue.empty()) {
received_data = "12346";
}
}
std::vector<cv::Mat> imgs_buffer(BATCH_SIZE);
float* buffer_idx = (float*)buffers[inputIndex];
imgs_buffer[0] = img;
size_t size_image = img.cols * img.rows * 3;
size_t size_image_dst = INPUT_H * INPUT_W * 3;
//copy data to pinned memory
memcpy(img_host, img.data, size_image);
//copy data to device memory
CUDA_CHECK(cudaMemcpyAsync(img_device, img_host, size_image, cudaMemcpyHostToDevice, stream));
preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream);
buffer_idx += size_image_dst;
// Run inference
//从start上面开始都是读取视频
auto start = std::chrono::system_clock::now();
doInference(*context, stream, (void**)buffers, prob, BATCH_SIZE);
auto end = std::chrono::system_clock::now();
std::cout << "inference time2: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
//std::vector<std::vector<Yolo::Detection>> batch_res(fcount); //修改
std::vector<std::vector<Yolo::Detection>> batch_res(1);
auto& res = batch_res[0];
nms(res, &prob[0], CONF_THRESH, NMS_THRESH);
std::cout << res.size() << std::endl;
auto& res_pre = batch_res[0];
cv::Mat img_pres = imgs_buffer[0];
cv::Mat img_pre2;
cv::resize(img_pres, img_pre2, cv::Size(1920, 1080)); //修改视频大小
//std::cout << "img_pre" << img_pre.size();
std::cout <<"res_pre.size()::" << res_pre.size() << std::endl;
//画框
if (res_pre.size() > 0)
{
for (size_t j = 0; j < res_pre.size(); j++) {
cv::Rect r = get_rect(img_pre2, res_pre[j].bbox);
//获取中心坐标
int x_center = r.x + r.width / 2;
int y_center = r.y + r.height / 2;
//根据图片显示自己规定框
int letf_top_x = 840;
int letf_top_y = 0;
int right_bottom_x = 1536;
int right_bottom_y = 1393;
cv::rectangle(img_pre2, cv::Point(letf_top_x, letf_top_y), cv::Point(right_bottom_x, right_bottom_y), cv::Scalar(0x27, 0xC1, 0x36), 1);
// r.x,r.y是左上角的坐标
//r.x+ r.width, r.y+ r.height 是右下角的坐标
//现在要保证在最中间的位置才做检测 所以我们要知道自己画框的左上角和右下角的坐标,然后作比较
//判断的应该是 左上X<r.x<右下X 右下Y<r.y<左上Y 这样才能确保物体在指定区域内做出检测02
//以下部分为后处理
//逻辑判断 当收到过车信号 并且 目标中心点落在规定区域内再去检测
std::cout << "x_center:::" << x_center << "\n";
std::cout << "y_center:::" << y_center << "\n";
std::cout << "x:::" << letf_top_x << "\n";
std::cout << "y:::" << right_bottom_x << "\n";
if (letf_top_x < x_center && x_center < right_bottom_x && !received_data.empty() && received_data.size() > 20) //
{
//std::cout << "xxxxxxxxxxxxxxxxxx:" << data_queue.size(); //打印一下接收的列表内元素个数
//cv::rectangle(img_pre2, cv::Point(letf_top_x, letf_top_y), cv::Point(right_bottom_x, right_bottom_y), cv::Scalar(0, 0, 255), 20);
cv::rectangle(img_pre2, r, cv::Scalar(0x27, 0xC1, 0x36), 2); //画框颜色
cv::putText(img_pre2, std::to_string((int)res_pre[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2); //在图片上显示处理内容
//粗糙的方式,计算FPS
cv::putText(img, "FPS:" + std::to_string(1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()), cv::Point(img.cols * 0.02, img.rows * 0.05), cv::FONT_HERSHEY_PLAIN, 2, cv::Scalar(0, 255, 0), 2, 8);
//这里获取当前时间精确到微秒
auto now = std::chrono::system_clock::now();
//通过不同精度获取相差的毫秒数
uint64_t dis_millseconds = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count()
- std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch()).count() * 1000;
time_t tt = std::chrono::system_clock::to_time_t(now);
auto time_tm = localtime(&tt);
char strTime[25] = { 0 };
sprintf(strTime, "%d%02d%02d%02d%02d%02d%03d", time_tm->tm_year + 1900,
time_tm->tm_mon + 1, time_tm->tm_mday, time_tm->tm_hour,
time_tm->tm_min, time_tm->tm_sec, (int)dis_millseconds);
std::string strtime = strTime; //将时间转换为string类型
std::cout << "object_name::" << std::to_string((int)res_pre[j].class_id) << "\n";
//cv::imwrite(save_path + strtime + ".jpg", img_pre2);
}
}
cv::namedWindow(window_name, cv::WINDOW_FREERATIO);
cv::imshow(window_name, img_pre2);
}
else {
cv::imshow(window_name, img_pre2);
}
}
// Release stream and buffers
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(img_device));
CUDA_CHECK(cudaFreeHost(img_host));
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[outputIndex]));
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
}
int main(int argc, char** argv) {
std::string rtsp1 = "rtsp://admin:hk123456@192.168.18.211/Streaming/Channels/1";
std::string rtsp2 = "rtsp://admin:hk123456@192.168.18.211/Streaming/Channels/3";
//启动接收线程1
std::thread task01(str_socket_1,"172.18.18.252",4001);// 服务器IP地址 192.168.110.161 172.18.18.252 // 服务器端口号 9001 4001
//启动图像处理线程1
std::thread task02(thread1, rtsp1, "Inference1","E:/aaaa/");
//启动图像处理线程2
std::thread task03(thread1, rtsp2,"Inference2","E:/bbbb/");
task01.join();
task02.join();
task03.join();
//task04.join();
return 0;
}
芝士