利用TensorFlow Object Detection API实现摄像头实时物体检测

最新推荐文章于 2025-05-14 09:22:16 发布

原创最新推荐文章于 2025-05-14 09:22:16 发布 · 2.3k 阅读

13 ·

CC 4.0 BY-SA版权

Deep Learning 专栏收录该内容

33 篇文章

订阅专栏

本文介绍了一个使用TensorFlow Object Detection API进行实时摄像头对象检测的Python代码示例。代码首先通过摄像头捕获实时视频流，然后利用预训练的SSD-MobileNet模型进行对象检测，并将检测到的对象框和类别标签实时显示在视频画面上。

部署运行你感兴趣的模型镜像

Object Detection API 的配置参考另一篇文章：https://blog.youkuaiyun.com/ytusdc/article/details/89883274

首先写一个测试代码实现摄像头实时捕捉，测试摄像头是否能调用成功

import cv2
import numpy as np
 
cap = cv2.VideoCapture(0)
while(1):
    # get a frame
    ret, frame = cap.read()
    # show a frame
    cv2.imshow("capture", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

最后上最终代码，实现摄像头实时检测：

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import time

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
#from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
    raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
import cv2

cap = cv2.VideoCapture(0)
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

#CWD_PATH = os.getcwd()
CWD_PATH= "/home/vision/Project/tf-models/research/object_detection"
PATH_TO_CKPT = os.path.join(CWD_PATH, 'ssd_mobilenet_v1_coco_2017_11_17', 'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        while True:
            time_start=time.time()
            ret, image_np = cap.read()
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            scores = detection_graph.get_tensor_by_name('detection_scores:0')
            classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            # Actual detection.
            (boxes, scores, classes, num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np, np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores), category_index,
                use_normalized_coordinates=True,
                line_thickness=8)

            cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
            time_end=time.time()
            print('time cost',(time_end-time_start)*1000,'ms')
            if cv2.waitKey(25) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                break
cap.release()
cv2.destroyAllWindows()

参考：https://blog.youkuaiyun.com/ytusdc/article/details/89883274

您可能感兴趣的与本文相关的镜像