import cv2
import numpy as np
import pyzbar.pyzbar as pyzbar
import mediapipe as mp
import pytesseract
import webbrowser
# 初始化MediaPipe解决方案
mp_hands = mp.solutions.hands
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=4)
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
# 颜色范围定义 (HSV颜色空间)
COLOR_RANGES = {
"red": ([0, 100, 100], [10, 255, 255]),
"green": ([40, 40, 40], [80, 255, 255]),
"blue": ([100, 100, 50], [140, 255, 255]),
"yellow": ([20, 100, 100], [40, 255, 255]),
"black": ([0, 0, 0], [180, 255, 30]),
"white": ([0, 0, 200], [180, 30, 255])
}
def detect_color(hsv_img):
center = hsv_img.shape[0]//2, hsv_img.shape[1]//2
radius = 5
roi = hsv_img[center[1]-radius:center[1]+radius,
center[0]-radius:center[0]+radius]
mean_hsv = np.mean(roi, axis=(0,1))
for color, (lower, upper) in COLOR_RANGES.items():
if (mean_hsv >= np.array(lower)).all() and (mean_hsv <= np.array(upper)).all():
return color
return "unknown"
def detect_shape(cnt):
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
if len(approx) == 3:
return "triangle"
elif len(approx) == 4:
x, y, w, h = cv2.boundingRect(approx)
aspect_ratio = w / float(h)
return "square" if 0.95 <= aspect_ratio <= 1.05 else "rectangle"
elif len(approx) == 5:
return "pentagon"
elif 6 <= len(approx) <= 8:
return "circle"
return "polygon"
def detect_gesture(hand_landmarks):
tip_ids = [4, 8, 12, 16, 20]
fingers = []
# Thumb
if hand_landmarks.landmark[tip_ids[0]].x < hand_landmarks.landmark[tip_ids[0]-1].x:
fingers.append(1)
else:
fingers.append(0)
# Other fingers
for id in range(1,5):
if hand_landmarks.landmark[tip_ids[id]].y < hand_landmarks.landmark[tip_ids[id]-2].y:
fingers.append(1)
else:
fingers.append(0)
count = fingers.count(1)
if count == 0:
return "fist"
elif count == 5:
return "open hand"
return f"finger_{count}"
cap = cv2.VideoCapture(0)
# 界面布局参数
INFO_PANEL_WIDTH = 300
TEXT_COLOR = (255, 255, 255)
PANEL_COLOR = (50, 50, 50)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# 创建信息面板
info_panel = np.zeros((frame.shape[0], INFO_PANEL_WIDTH, 3), dtype=np.uint8)
info_panel[:] = PANEL_COLOR
# QR Code检测
decoded = pyzbar.decode(frame)
if decoded:
url = decoded[0].data.decode()
webbrowser.open(url)
# 颜色识别
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
color = detect_color(hsv)
# 形状检测
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5,5), 0)
edges = cv2.Canny(blurred, 50, 150)
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
detected_shapes = []
for cnt in contours:
if cv2.contourArea(cnt) < 1000:
continue
shape = detect_shape(cnt)
x,y,w,h = cv2.boundingRect(cnt)
detected_shapes.append(shape)
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.putText(frame, shape, (x, y+h+20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
# 手势识别
gesture = "none"
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(rgb)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
gesture = detect_gesture(hand_landmarks)
# 人脸检测
face_count = 0
rgb_face = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results_faces = face_detection.process(rgb_face)
if results_faces.detections:
face_count = len(results_faces.detections)
for detection in results_faces.detections:
mp_drawing.draw_detection(frame, detection)
# 文字识别
gray_text = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
data = pytesseract.image_to_data(gray_text, output_type=pytesseract.Output.DICT)
detected_text = []
for i in range(len(data['text'])):
if int(data['conf'][i]) > 40:
x,y,w,h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
text = data['text'][i].strip()
if text:
detected_text.append(text)
cv2.rectangle(frame, (x,y), (x+w,y+h), (255,0,0), 2)
cv2.putText(frame, text, (x, y+h+15),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2)
# 更新信息面板
y_offset = 20
cv2.putText(info_panel, f"Color: {color}", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, TEXT_COLOR, 1)
y_offset += 40
cv2.putText(info_panel, f"Gesture: {gesture}", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, TEXT_COLOR, 1)
y_offset += 40
cv2.putText(info_panel, f"Faces: {face_count}", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, TEXT_COLOR, 1)
y_offset += 40
cv2.putText(info_panel, "Shapes detected:", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, TEXT_COLOR, 1)
y_offset += 30
for shape in set(detected_shapes):
cv2.putText(info_panel, f"- {shape}", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, TEXT_COLOR, 1)
y_offset += 25
y_offset += 20
cv2.putText(info_panel, "Text detected:", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, TEXT_COLOR, 1)
y_offset += 30
for text in set(detected_text):
cv2.putText(info_panel, f"- {text}", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, TEXT_COLOR, 1)
y_offset += 25
# 合并主画面和信息面板
combined = np.hstack((frame, info_panel))
cv2.imshow('Multi Detection System', combined)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Python程序:调用电脑摄像头同时识别摄像头中心处的颜色、二维码以及画面中存在的字符串、手势、人脸、物体的形状
于 2025-03-02 20:03:38 首次发布