前言
对于人密集的地方,人在图片或者视频中的像素比较小,遮挡比较严重的场景,我们往往需要重新训练自己的faster rcnn权重,怎么训练自己的faster rcnn权重,我在之前的博客写的有:【detectron2 faster rcnn 训练自己的数据集】,这篇博客要讲的是,怎么使用把训练好的faster rcnn权重用在slowfast中。
使用slowfast检测自己的视频,也可以参考我之前写的博客:
【SlowFast复现】SlowFast Networks for Video Recognition复现代码 使用自己的视频进行demo检测
一,添加自己训练的权重
我们在【detectron2 faster rcnn 训练自己的数据集】中训练的权重model_final_b275ba.pkl位于:
MODEL_ZOO.md
其属于faster rcnn的R50-FPN 1x
在/SlowFast-master/demo/AVA/SLOWFAST_32x2_R101_50_50s4.yaml中
代码如下:
#这个是80个类别的视频检测demo,但是使用的detectron2 faster rcnn权重是自己训练的
TRAIN:
ENABLE: False
DATASET: ava
BATCH_SIZE: 16
EVAL_PERIOD: 1
CHECKPOINT_PERIOD: 1
AUTO_RESUME: True
CHECKPOINT_FILE_PATH: '/home/lxn/0yangfan/Slowfast2/SlowFast-master/configs/AVA/c2/SLOWFAST_32x2_R101_50_50.pkl' #path to pretrain model
CHECKPOINT_TYPE: pytorch
DATA:
NUM_FRAMES: 32
SAMPLING_RATE: 2
TRAIN_JITTER_SCALES: [256, 320]
TRAIN_CROP_SIZE: 224
TEST_CROP_SIZE: 256
INPUT_CHANNEL_NUM: [3, 3]
DETECTION:
ENABLE: True
ALIGNED: False
AVA:
BGR: False
DETECTION_SCORE_THRESH: 0.8
TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
SLOWFAST:
ALPHA: 4
BETA_INV: 8
FUSION_CONV_CHANNEL_RATIO: 2
FUSION_KERNEL_SZ: 5
RESNET:
ZERO_INIT_FINAL_BN: True
WIDTH_PER_GROUP: 64
NUM_GROUPS: 1
DEPTH: 101
TRANS_FUNC: bottleneck_transform
STRIDE_1X1: False
NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [2, 2]]
SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [1, 1]]
NONLOCAL:
LOCATION: [[[], []], [[], []], [[6, 13, 20], []], [[], []]]
GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
INSTANTIATION: dot_product
POOL: [[[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]]]
BN:
USE_PRECISE_STATS: False
NUM_BATCHES_PRECISE: 200
SOLVER:
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-7
OPTIMIZING_METHOD: sgd
MODEL:
NUM_CLASSES: 80
ARCH: slowfast
MODEL_NAME: SlowFast
LOSS_FUNC: bce
DROPOUT_RATE: 0.5
HEAD_ACT: sigmoid
TEST:
ENABLE: False
DATASET: ava
BATCH_SIZE: 8
DATA_LOADER:
NUM_WORKERS: 2
PIN_MEMORY: True
NUM_GPUS: 1
NUM_SHARDS: 1
RNG_SEED: 0
OUTPUT_DIR: .
#TENSORBOARD:
# MODEL_VIS:
# TOPK: 2
DEMO:
ENABLE: True
LABEL_FILE_PATH: "/home/lxn/0yangfan/Slowfast2/SlowFast-master/demo/AVA/ava.json"
INPUT_VIDEO: "/home/lxn/0yangfan/Slowfast2/SlowFast-master/videoInAndOut/videoIn/class020.mp4"
OUTPUT_FILE: "/home/lxn/0yangfan/Slowfast2/SlowFast-master/videoInAndOut/videoOut/class020.mp4"
DETECTRON2_CFG: "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
DETECTRON2_WEIGHTS: "/home/lxn/0yangfan/detectron2_repo/demo/output/model_final.pth"
二,修改predictor.py
这里需要添加一部分代码在:/SlowFast-master/slowfast/visualization/predictor.py中:
#################################################
#下面这三行代码是用在使用自己训练的权重时才会用到
#用官方默认权重时要注销这三行
self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
self.cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
128
)
self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
########################################
完整代码如下:
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import queue
import cv2
import torch
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
import slowfast.utils.checkpoint as cu
from slowfast.datasets import cv2_transform
from slowfast.models import build_model
from slowfast.utils import logging
from slowfast.visualization.utils import process_cv2_inputs
logger = logging.get_logger(__name__)
class Predictor:
"""
Action Predictor for action recognition.
"""
def __init__(self, cfg, gpu_id=None):
"""
Args:
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
gpu_id (Optional[int]): GPU id.
"""
if cfg.NUM_GPUS:
self.gpu_id = (
torch.cuda.current_device() if gpu_id is None else gpu_id
)
# Build the video model and print model statistics.
self.model = build_model(cfg, gpu_id=gpu_id)
self.model.eval()
self.cfg = cfg
if cfg.DETECTION.ENABLE:
self.object_detector = Detectron2Predictor(cfg, gpu_id=self.gpu_id)
logger.info("Start loading model weights.")
cu.load_test_checkpoint(cfg, self.model)
logger.info("Finish loading model weights")
def __call__(self, task):
"""
Returns the prediction results for the current task.
Args:
task (TaskInfo object): task object that contain
the necessary information for action prediction. (e.g. frames, boxes)
Returns:
task (TaskInfo object): the same task info object but filled with
prediction values (a tensor) and the corresponding boxes for
action detection task.
"""
if self.cfg.DETECTION.ENABLE:
task = self.object_detector(task)
frames, bboxes = task.frames, task.bboxes
if bboxes is not None:
bboxes = cv2_transform.scale_boxes(
self.cfg.DATA.TEST_CROP_SIZE,
bboxes,
task.img_height,
task.img_width,
)
if self.cfg.DEMO.INPUT_FORMAT == "BGR":
frames = [
cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames
]
frames = [
cv2_transform.scale(self.cfg.DATA.TEST_CROP_SIZE, frame)
for frame in frames
]
inputs = process_cv2_inputs(frames, self.cfg)
if bboxes is not None:
index_pad = torch.full(
size=(bboxes.shape[0], 1),
fill_value=float(0),
device=bboxes.device,
)
# Pad frame index for each box.
bboxes = torch.cat([index_pad, bboxes], axis=1)
if self.cfg.NUM_GPUS > 0:
# Transfer the data to the current GPU device.
if isinstance(inputs, (list,)):
for i in range(len(inputs)):
inputs[i] = inputs[i].cuda(
device=torch.device(self.gpu_id), non_blocking=True
)
else:
inputs = inputs.cuda(
device=torch.device(self.gpu_id), non_blocking=True
)
if self.cfg.DETECTION.ENABLE and not bboxes.shape[0]:
preds = torch.tensor([])
else:
preds = self.model(inputs, bboxes)
if self.cfg.NUM_GPUS:
preds = preds.cpu()
if bboxes is not None:
bboxes = bboxes.detach().cpu()
preds = preds.detach()
task.add_action_preds(preds)
if bboxes is not None:
task.add_bboxes(bboxes[:, 1:])
return task
class ActionPredictor:
"""
Synchronous Action Prediction and Visualization pipeline with AsyncVis.
"""
def __init__(self, cfg, async_vis=None, gpu_id=None):
"""
Args:
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
async_vis (AsyncVis object): asynchronous visualizer.
gpu_id (Optional[int]): GPU id.
"""
self.predictor = Predictor(cfg=cfg, gpu_id=gpu_id)
self.async_vis = async_vis
def put(self, task):
"""
Make prediction and put the results in `async_vis` task queue.
Args:
task (TaskInfo object): task object that contain
the necessary information for action prediction. (e.g. frames, boxes)
"""
task = self.predictor(task)
self.async_vis.get_indices_ls.append(task.id)
self.async_vis.put(task)
def get(self):
"""
Get the visualized clips if any.
"""
try:
task = self.async_vis.get()
except (queue.Empty, IndexError):
raise IndexError("Results are not available yet.")
return task
class Detectron2Predictor:
"""
Wrapper around Detectron2 to return the required predicted bounding boxes
as a ndarray.
"""
def __init__(self, cfg, gpu_id=None):
"""
Args:
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
gpu_id (Optional[int]): GPU id.
"""
self.cfg = get_cfg()
self.cfg.merge_from_file(
model_zoo.get_config_file(cfg.DEMO.DETECTRON2_CFG)
)
#这里是faster rcnn的检测阈值,官方设定的是0.9,我这里改为0.9
#self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = cfg.DEMO.DETECTRON2_THRESH
self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
self.cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_WEIGHTS
self.cfg.INPUT.FORMAT = cfg.DEMO.INPUT_FORMAT
if cfg.NUM_GPUS and gpu_id is None:
gpu_id = torch.cuda.current_device()
self.cfg.MODEL.DEVICE = (
"cuda:{}".format(gpu_id) if cfg.NUM_GPUS > 0 else "cpu"
)
#################################################
#下面这三行代码是用在使用自己训练的权重时才会用到
#用官方默认权重时要注销这三行
self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
self.cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
128
)
self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
########################################
logger.info("Initialized Detectron2 Object Detection Model.")
self.predictor = DefaultPredictor(self.cfg)
def __call__(self, task):
"""
Return bounding boxes predictions as a tensor.
Args:
task (TaskInfo object): task object that contain
the necessary information for action prediction. (e.g. frames)
Returns:
task (TaskInfo object): the same task info object but filled with
prediction values (a tensor) and the corresponding boxes for
action detection task.
"""
middle_frame = task.frames[len(task.frames) // 2]
outputs = self.predictor(middle_frame)
# Get only human instances
mask = outputs["instances"].pred_classes == 0
#print("\n\n-------outputs--------")
#print(outputs["instances"])
#print("---------outputs------\n\n")
pred_boxes = outputs["instances"].pred_boxes.tensor[mask]
task.add_bboxes(pred_boxes)
return task
三,运行
在:/SlowFast-master/目录下运行:
python tools/run_net.py --cfg demo/AVA/SLOWFAST_32x2_R101_50_50s4.yaml
结果: