将训练好的 Mask RCNN 用于非实时检测视频

最新推荐文章于 2023-09-28 13:31:46 发布

逐水草而居的造轮人

最新推荐文章于 2023-09-28 13:31:46 发布

阅读量1.6k

点赞数 4

CC 4.0 BY-SA版权

分类专栏： CV

本文链接：https://blog.youkuaiyun.com/weixin_42880443/article/details/93628210

CV 专栏收录该内容

6 篇文章

订阅专栏

本文详细介绍了一种基于Mask R-CNN的视频目标检测与实例分割方法，通过修改开源项目Mask_RCNN的代码，实现对视频中每个帧的实时目标检测与分割。文章包括了模型配置、权重加载、实例分割显示及视频处理流程，最终将处理后的帧合成为视频。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

接前文，基于https://github.com/matterport/Mask_RCNN/blob/master/samples/shapes/train_shapes.ipynb

修改 detection 部分代码为：

class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

import cv2
import numpy as np
# 定义随机颜色函数
def random_colors(N):
    np.random.seed(1)
    colors=[tuple(255 * np.random.rand(3)) for _ in range(N)]
    return colors

def apply_mask(image,mask,color,alpha=0.5):
    for n, c in enumerate(color):
        image[:, :, n] = np.where(
            mask == 1,
            image[:, :, n] * (1 - alpha) + alpha * c,
            image[:, :, n]
        )
    return image


def display_instances(image, boxes, masks, ids, names, scores):
    """
        take the image and results and apply the mask, box, and Label
    """
    n_instances = boxes.shape[0]
    colors = random_colors(n_instances)

    if not n_instances:
        print('NO INSTANCES TO DISPLAY')
    else:
        assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]

    for i, color in enumerate(colors):
        if not np.any(boxes[i]):
            continue

        y1, x1, y2, x2 = boxes[i]
        label = names[ids[i]]
        score = scores[i] if scores is not None else None
        caption = '{} {:.2f}'.format(label, score) if score else label
        mask = masks[:, :, i]

        image = apply_mask(image, mask, color)
        image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        image = cv2.putText(
            image, caption, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.7, color, 2
        )

    return image

这部分代码是用来替代 https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/visualize.py 里的对应函数的，其实我感觉直接分析 visualize.py 文件、做出修改就可以，但是网上有人写了一版现成的，我就改了一下照搬了，虽然不太美观

视频处理

ROOT_DIR =os.path.abspath("../")      # 根目录的地址
video_path = os.path.join(ROOT_DIR, 'test_video.mp4')
capture = cv2.VideoCapture(video_path) #这里是输入视频的文件名
VIDEO_SAVE_DIR = os.path.join(ROOT_DIR, 'save_pic')
print(video_path)
print(VIDEO_SAVE_DIR)
frames = []
frame_count = 0
# these 2 lines can be removed if you dont have a 1080p camera.
#capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
#capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
batch_size = 1
colors = visualize.random_colors(len(dataset_val.class_names))
while True:
    ret, frame = capture.read()
    # Bail out when the video file ends
    if not ret:
        print("1")
        break

    # Save each frame of the video to a list
    frame_count += 1
    frames.append(frame)
    print('frame_count :{0}'.format(frame_count))
    if len(frames) == batch_size:
        results = model.detect(frames, verbose=0)
        print('Predicted')
        for i, item in enumerate(zip(frames, results)):
            frame = item[0]
            r = item[1]
            frame = display_instances(frame, r['rois'], r['masks'], r['class_ids'], dataset_val.class_names, r['scores'])
            name = '{0}.jpg'.format(frame_count + i - batch_size)
            name = os.path.join(VIDEO_SAVE_DIR, name)
            cv2.imwrite(name, frame)
            print('writing to file:{0}'.format(name))
        # Clear the frames array to start the next batch
        frames = []

capture.release()

注意这步不要在 notebook 里面生成图片，否则随着图片变多，notebook size 会增大，以至于崩掉

最后获得一个存放每一帧图片的文件夹

帧合并成一个视频

def make_video(outvid, images=None, fps=30, size=None,
               is_color=True, format="FMP4"):
    """
    Create a video from a list of images.
 
    @param      outvid      output video
    @param      images      list of images to use in the video
    @param      fps         frame per second
    @param      size        size of each frame
    @param      is_color    color
    @param      format      see http://www.fourcc.org/codecs.php
    @return                 see http://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html
 
    The function relies on http://opencv-python-tutroals.readthedocs.org/en/latest/.
    By default, the video will have the size of the first image.
    It will resize every image to this size before adding them to the video.
    """
    from cv2 import VideoWriter, VideoWriter_fourcc, imread, resize
    fourcc = VideoWriter_fourcc(*format)
    vid = None
    for image in images:
        print(image)
        if not os.path.exists(image):
            raise FileNotFoundError(image)
        img = imread(image)
        if vid is None:
            if size is None:
                size = img.shape[1], img.shape[0]
            vid = VideoWriter(outvid, fourcc, float(fps), size, is_color)
        if size[0] != img.shape[1] and size[1] != img.shape[0]:
            img = resize(img, size)
        vid.write(img)
    #vid.release()
    return vid

import glob
import os

# Directory of images to run detection on
root_dir = os.path.abspath("../")
images_dir = os.path.join(root_dir, "save_pic")
images = list(glob.iglob(os.path.join(images_dir, '*.*')))

# Sort the images by integer index
images = sorted(images, key=lambda x: float(os.path.split(x)[1][:-3]))

outvid = os.path.join(root_dir, "detect_out.mp4")
make_video(outvid, images, fps=30)
print('make video success')