maskrcnn windows 上C++做预测

本文档详细记录了如何在Windows环境中,将预训练的Keras Mask R-CNN模型转换为.pb文件,并使用C++进行预测的过程,包括keras训练代码的链接和Windows调用代码的步骤。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

我自己做下记录

keras 训练代码 

https://github.com/matterport/Mask_RCNN

1.keras 模型转 .pb

import tensorflow as tf
from keras import backend as K
from tensorflow.python.framework import graph_util

model_keras = model.keras_model
# All new operations will be in test mode from now on.
K.set_learning_phase(0)

# Create output layer with customized names
num_output = 7
pred_node_names = ["detections", "mrcnn_class", "mrcnn_bbox", "mrcnn_mask",
                   "rois", "rpn_class", "rpn_bbox"]
pred_node_names = ["output_" + name for name in pred_node_names]
pred = [tf.identity(model_keras.outputs[i], name=pred_node_names[i])
        for i in range(num_output)]

sess = K.get_session()

# Get the object detection graph
od_graph_def = graph_util.convert_variables_to_constants(sess,
                                                         sess.graph.as_graph_def(),
                                                         pred_node_names)

model_dirpath = os.path.dirname("model/")
if  not os.path.exists(model_dirpath):
    os.mkdir(model_dirpath)
filename = 'seg_model.pb'
pb_filepath = os.path.join(model_dirpath, filename)
print('Saving frozen graph {} ...'.format(os.path.basename(pb_filepath)))

frozen_graph_path = pb_filepath
with tf.gfile.GFile(frozen_graph_path, 'wb') as f:
    f.write(od_graph_def.SerializeToString())

 

 

2.windows 调用代码

#include "pch.h"
#include <iostream>
#include <tchar.h>
#define COMPILER_MSVC
#define NOMINMA

//#include "stdafx.h"
#include <iostream>

//#include <Eigen\\Dense>
#include "tensorflow/core/public/session.h"
#include "tensorflow/cc/ops/standard_ops.h"

using namespace tensorflow;

#define COMPILER_MSVC
#define NOMINMAX
#define _SCL_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS

#include <fstream>
#include <utility>
#include <vector>
#include <iostream>
#include <sstream>
#include <string>


#include <tensorflow/cc/ops/array_ops.h>
#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/image_ops.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/graph/default_device.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/util/command_line_flags.h"
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include<vector>
using namespace cv;
// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;

using namespace std;

// ensure TensorFlow C++ build OK
//int main() {
//    printf("Hello World from Tensorflow C libnrary version %s\n", TF_Version());
//    tensorflow::Session* session = tensorflow::NewSession(tensorflow::SessionOptions());
//    return 0;
//}


struct maskBox {
    float fScore;
    int x1;
    int x2;
    int y1;
    int y2;
    int area;

    vector<cv::Point> vecContourPt;
    int iClass;
};

//升序排列
bool cmpScore(maskBox lsh, maskBox rsh) {
    if (lsh.fScore < rsh.fScore)
        return true;
    else
        return false;
}

void nms(vector<maskBox> &boundingBox_, const float overlap_threshold, string modelname = "Union") {

    if (boundingBox_.empty()) {
        return;
    }
    //对各个候选框根据score的大小进行升序排列
    sort(boundingBox_.begin(), boundingBox_.end(), cmpScore);
    float IOU = 0;
    float maxX = 0;
    float maxY = 0;
    float minX = 0;
    float minY = 0;
    vector<int> vPick;
    int nPick = 0;
    multimap<float, int> vScores;   //存放升序排列后的score和对应的序号
    const int num_boxes = boundingBox_.size();
    vPick.resize(num_boxes);
    for (int i = 0; i < num_boxes; ++i) {
        vScores.insert(pair<float, int>(boundingBox_[i].fScore, i));
    }
    while (vScores.size() > 0) {
        int last = vScores.rbegin()->second;  //反向迭代器,获得vScores序列的最后那个序列号
        vPick[nPick] = last;
        nPick += 1;

        auto iter = vScores.end();
        iter--;
        vScores.erase(iter);

        for (multimap<float, int>::iterator it = vScores.begin(); it != vScores.end();) {
            int it_idx = it->second;
            maxX = max(boundingBox_.at(it_idx).x1, boundingBox_.at(last).x1);
            maxY = max(boundingBox_.at(it_idx).y1, boundingBox_.at(last).y1);
            minX = min(boundingBox_.at(it_idx).x2, boundingBox_.at(last).x2);
            minY = min(boundingBox_.at(it_idx).y2, boundingBox_.at(last).y2);
            //转换成了两个边界框相交区域的边长
            maxX = ((minX - maxX + 1) > 0) ? (minX - maxX + 1) : 0;
            maxY = ((minY - maxY + 1) > 0) ? (minY - maxY + 1) : 0;
            //求交并比IOU

            IOU = (maxX * maxY) / (boundingBox_.at(it_idx).area + boundingBox_.at(last).area - IOU);
            if (IOU > overlap_threshold) {
                it = vScores.erase(it++);    //删除交并比大于阈值的候选框,erase返回删除元素的下一个元素
            }
            else {
                it++;
            }
        
        }
        

    }

    vPick.resize(nPick);
    vector<maskBox> tmp_;
    tmp_.resize(nPick);
    for (int i = 0; i < nPick; i++) {
        tmp_[i] = boundingBox_[vPick[i]];
    }
    boundingBox_ = tmp_;

    
}

int main(int argc, char* argv[])
{

    cv::Mat inputMat;
     inputMat = cv::imread("F:\\data\\segdata\\test\\16378\\16378.jpg", CV_LOAD_IMAGE_COLOR);
//    cvtColor(inputMat, inputMat, CV_BGR2GRAY);
    int TF_MASKRCNN_IMG_WIDTHHEIGHT = 768;
    cv::Scalar TF_MASKRCNN_MEAN_PIXEL(123.7, 116.8, 103.9);
//    float TF_MASKRCNN_IMAGE_METADATA[38] = { 0, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT, 3, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT, 3, 0, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT,1, 0, 0 };
    float TF_MASKRCNN_IMAGE_METADATA[38] = { 0, inputMat.rows, inputMat.cols, 3, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT, 3, 17, 0, TF_MASKRCNN_IMG_WIDTHHEIGHT,TF_MASKRCNN_IMG_WIDTHHEIGHT, 0.627, 0 };

    cv::Mat dest = cv::Mat(inputMat.size(), CV_8UC3);
    dest = inputMat.clone();

    //Resizr to square with max dim, so we can resize it to 256x256
    int largestDim = inputMat.size().height > inputMat.size().width ? inputMat.size().height : inputMat.size().width;
    cv::Mat squareInputMat(cv::Size(largestDim, largestDim), CV_8UC3);
    int leftBorder = (largestDim - inputMat.size().width) / 2;
    int topBorder = (largestDim - inputMat.size().height) / 2;
    cv::copyMakeBorder(inputMat, squareInputMat, topBorder, largestDim - (inputMat.size().height + topBorder), leftBorder, largestDim - (inputMat.size().width + leftBorder), cv::BORDER_CONSTANT, cv::Scalar(0));
    cv::Mat resizedInputMat(cv::Size(TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT), CV_8UC3);
    cv::resize(squareInputMat, resizedInputMat, resizedInputMat.size(), 0, 0);

    cv::Mat dst = resizedInputMat.clone();
    // Need to "mold_image" like in mask rcnn
    cv::Mat moldedInput(resizedInputMat.size(), CV_32FC3);
    resizedInputMat.convertTo(moldedInput, CV_32FC3);
    cv::subtract(moldedInput, TF_MASKRCNN_MEAN_PIXEL, moldedInput);
    


    tensorflow::Tensor inputTensor(tensorflow::DT_FLOAT, { 1, moldedInput.size().height, moldedInput.size().width, 3 }); // single image instance with 3 channels
    float_t *p = inputTensor.flat<float_t>().data();
    cv::Mat inputTensorMat(moldedInput.size(), CV_32FC3, p);
    moldedInput.convertTo(inputTensorMat, CV_32FC3);

    int TF_MASKRCNN_IMAGE_METADATA_LENGTH = 38;

    // Copy the TF_MASKRCNN_IMAGE_METADATA data into a tensor
    tensorflow::Tensor inputMetadataTensor(tensorflow::DT_FLOAT, { 1, TF_MASKRCNN_IMAGE_METADATA_LENGTH });
    auto inputMetadataTensorMap = inputMetadataTensor.tensor<float, 2>();
    for (int i = 0; i < TF_MASKRCNN_IMAGE_METADATA_LENGTH; ++i) {
        inputMetadataTensorMap(0, i) = TF_MASKRCNN_IMAGE_METADATA[i];
    }

    // for specific 1920x1280 images
    auto input_anchors = tensorflow::Tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ 1,147312,4 }));
    auto anchors_API = input_anchors.tensor<float, 3>();
    //input_anchors.flat<float_t>()(0, 0, 0) = 1.111111;
    string fileName = "F:\\gc\\maskrcnntest2017\\maskrcnntest\\x64\\Release\\model\\anchors.txt";
    fstream in;
    in.open(fileName.c_str(), ios::in);
    if (!in.is_open()) {
        cout << "Can not find " << fileName << endl;
        system("pause");
    }
    string buff;
    int i = 0; //line i
    while (getline(in, buff)) {
        vector<float> nums;
        // string->char *
        char *s_input = (char *)buff.c_str();
        const char * split = ",";
        char *p2 = strtok(s_input, split);
        double a;
        while (p2 != NULL) {
            // char * -> int
            a = atof(p2);
            //cout << a << endl;
            nums.push_back(a);
            p2 = strtok(NULL, split);
        }//end while
        for (int b = 0; b < nums.size(); b++) {
            anchors_API(0, i, b) = nums[b];
        }//end for
        i++;
    }//end while
    in.close();


    string root_dir = "";
    string graph = "F:\\gc\\maskrcnntest2017\\maskrcnntest\\x64\\Release\\model\\seg_model.pb";
    // First we load and initialize the model.

    string graph_path = tensorflow::io::JoinPath(root_dir, graph);
    tensorflow::GraphDef graph_def;
    tensorflow::SessionOptions options;
    std::unique_ptr<tensorflow::Session> session(tensorflow::NewSession(options));

    Status load_graph_status =
        ReadBinaryProto(tensorflow::Env::Default(), graph_path, &graph_def);
    //for (int n = 0; n < graph_def.node_size(); ++n) {
    //    graph_def.mutable_node(n)->clear_device();
    //}

    //tfSession.reset(tensorflow::NewSession(tensorflow::SessionOptions()));
    TF_CHECK_OK(session->Create(graph_def));
    //Status session_create_status = session->Create(graph_def);

    //Status load_graph_status = LoadGraph(graph_path, &session);
    if (!load_graph_status.ok()) {
        LOG(ERROR) << "LoadGraph ERROR!!!!" << load_graph_status;
        cout << load_graph_status << endl;
        return -1;
    }

    // Actually run the image through the model.
    std::vector<Tensor> outputs;
    tensorflow::Status run_status = session->Run({ { "input_image", inputTensor },{ "input_image_meta", inputMetadataTensor },{ "input_anchors",input_anchors } },
    { "output_detections", "output_mrcnn_class", "output_mrcnn_bbox", "output_mrcnn_mask",
        "output_rois", "output_rpn_class", "output_rpn_bbox" },
        {},
        &outputs);
    if (!run_status.ok()) {
        LOG(ERROR) << "Running model failed: " << run_status;
        return -1;
    }

    //if (outputs[3].shape().dims() != 5 || outputs[3].shape().dim_size(4) != 2)
    //{
    //    throw std::runtime_error("Expected mask dimensions to be [1,100,28,28,2] but got: " + outputs[3].shape().DebugString());
    //}
    vector<maskBox> vecBox;
    auto detectionsMap = outputs[0].tensor<float, 3>();
    auto mask = outputs[3].tensor<float, 5>();
    for (int i = 0; i < outputs[3].shape().dim_size(1); ++i)
    {

        auto y1 = detectionsMap(0, i, 0) * TF_MASKRCNN_IMG_WIDTHHEIGHT;
        float x1 = detectionsMap(0, i, 1) * TF_MASKRCNN_IMG_WIDTHHEIGHT;
        auto y2 = detectionsMap(0, i, 2) * TF_MASKRCNN_IMG_WIDTHHEIGHT;
        float x2 = detectionsMap(0, i, 3) * TF_MASKRCNN_IMG_WIDTHHEIGHT;
        auto scoreAtI = detectionsMap(0, i, 5); // detectionsMap(0, i, 1) 0.8862123; detectionsMap(0, i, 3) 0.91774625

 
        auto detectedClass = detectionsMap(0, i, 4);

        cout << x1 << " " << x2 << " " << y1 << " " << y2 << " " << scoreAtI << endl;
        maskBox stMaskBox;
        stMaskBox.fScore = scoreAtI;
        stMaskBox.iClass = detectedClass;
        auto walala = detectionsMap(0, i, 6);
        auto maskHeight = (y2 - y1), maskWidth = (x2 - x1);

        if (maskHeight != 0 && maskWidth != 0) {
            // Pointer arithmetic
            const int i0 = 0, /* size0 = (int)outputs[3].shape().dim_size(1), */ i1 = i,
                size1 = (int)outputs[3].shape().dim_size(1),
                h = (int)outputs[3].shape().dim_size(2),
                w = (int)outputs[3].shape().dim_size(3);
    
            int iClassNum = (int)outputs[3].shape().dim_size(4);
        //    int pointerLocationOfI = (i0*size1 + i1)*size2;
            int pointerLocationOfI = h * w * iClassNum * i;
            float_t *maskPointer = outputs[3].flat<float_t>().data();

            // The shape of the detection is [28,28,2], where the last index is the class of interest.
            // We'll extract index 1 because it's the toilet seat.
            cv::Mat initialMask(cv::Size(h, w), CV_32FC(iClassNum), &maskPointer[pointerLocationOfI]); // CV_32FC2 because I know size4 is 2
            cv::Mat detectedMask(initialMask.size(), CV_32FC1);
            cv::extractChannel(initialMask, detectedMask, (int)detectedClass);

            // Convert to B&W
            cv::Mat binaryMask(detectedMask.size(), CV_8UC1);
            cv::threshold(detectedMask, binaryMask, 0.5, 255, cv::THRESH_BINARY);

            // First scale and offset in relation to TF_MASKRCNN_IMG_WIDTHHEIGHT
            cv::Mat scaledDetectionMat(maskHeight, maskWidth, CV_8UC1);
            cv::resize(binaryMask, scaledDetectionMat, scaledDetectionMat.size(), 0, 0);

            vector<vector<cv::Point>> contours;
            scaledDetectionMat.convertTo(scaledDetectionMat, CV_8UC1);
            findContours(scaledDetectionMat, contours, CV_RETR_TREE, CHAIN_APPROX_NONE);

            int iMaxArea = 0;
            int iNum = 0;
            for (int c = 0; c < contours.size(); c++)
            {
                if (contours[c].size() == 0) continue;
                double area = contourArea(contours[c]);
                //        printf("area:%f \n", area);
                if (iMaxArea > area)
                {
                    iNum = c;
                }
            }
            cv::Mat scaledOffsetMat(moldedInput.size(), CV_8UC1, cv::Scalar(0));
            scaledDetectionMat.copyTo(scaledOffsetMat(cv::Rect(x1, y1, maskWidth, maskHeight)));

            cvtColor(scaledDetectionMat, scaledDetectionMat, CV_GRAY2BGR);
            int ilen = contours[iNum].size();
            for (int k = 0; k < ilen; k++)
            {
                Point pt = contours[iNum][k];
                Point org(x1, y1);
                pt = org+pt;
                contours[iNum][k] = pt;
            }


            //Scalar color(rand() / 255, rand() / 255, rand() / 255, rand() / 255);
            //drawContours(dst, contours, iNum, color);
            //Rect rect(x1, y1, x2 - x1, y2 - y1);
            //rectangle(dst, rect, color, 1);
        //    string strText = to_string(stBox.iClass) + string(" ") + to_string(stBox.fScore);
        //    putText(dst, strText, Point(stBox.x1, stBox.y1), 1, 1, color);


            stMaskBox.x1 = x1;
            stMaskBox.x2 = x2;
            stMaskBox.y1 = y1;
            stMaskBox.y2 = y2;
            stMaskBox.area = (x2 - x1)*(y2 - y1);
            stMaskBox.vecContourPt = contours[iNum];
            vecBox.push_back(stMaskBox);
        }

        /**/

    }

    nms(vecBox, 0.3, "Union");

    for (int i = 0; i < vecBox.size(); i++)
    {
        maskBox stBox;
        stBox = vecBox[i];

        vector<vector<cv::Point>> contours;
        contours.push_back(stBox.vecContourPt);
        Scalar color(rand() / 255, rand() / 255, rand() / 255, rand() / 255);
        drawContours(dst, contours, 0, color);
        Rect rect(stBox.x1, stBox.y1, stBox.x2-stBox.x1, stBox.y2-stBox.y1);


        rectangle(dst, rect, color, 1);
        string strText = to_string(stBox.iClass) + string(" ") + to_string(stBox.fScore);
        putText(dst, strText, Point(stBox.x1, stBox.y1), 2, 0.5, color);
    }
    cv::imshow("Detection Result", dst);
    cv::waitKey(0);
    //cv::imwrite("C:\\", dest);
    return 0;
}

3.

评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值