numpy.minimum()和cv2.bitwise_and()

这篇博客介绍了numpy的`numpy.minimum()`函数和opencv的`cv2.bitwise_and()`函数。`numpy.minimum()`用于计算两个数组的元素最小值,返回类型为numpy.ndarray。而`cv2.bitwise_and()`则用于进行位与操作,适用于图像处理中像素级别的操作,其返回数据类型为array。这两个函数在数据分析和图像处理领域有广泛应用。

numpy.minimum(x1, x2)对应数据类型为numpy.ndarray

numpy.minimum(x1, x2, /, out=None, *, where=True, casting='same_kind', order='K', dtype=None, subok=True[, signature, extobj]) = <ufunc 'minimum'>

在这里插入图片描述
cv2.bitwise_and(src1, src2)对应数据类型为array
在这里插入图片描述

参考资料
python numpy minimum用法及代码示例

#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Computer Camera Cuboid Detection System - Traditional Algorithm Approach Features: 1. 支持红、蓝、绿三种颜色物块识别 2. 优化红色识别,减少橙色干扰 3. 增大了最大轮廓识别面积 4. 实时桌面预览性能统计 """ import cv2 import time import numpy as np import sys import logging # Configure logging system logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler('cuboid_detection.log') ] ) logger = logging.getLogger(__name__) class CuboidDetectionSystem: def __init__(self, camera_index=0): # Camera configuration self.camera_index = camera_index self.cap = None # Detection parameters self.min_area = 1000 # Minimum contour area to consider self.max_area = 150000 # Increased maximum contour area (was 50000) self.aspect_ratio_min = 0.3 # Minimum aspect ratio for rectangles self.aspect_ratio_max = 3.0 # Maximum aspect ratio for rectangles self.circularity_thresh = 0.6 # Circularity threshold for cylinders # Color detection parameters (in HSV space) # Red - optimized to reduce orange interference self.red_lower1 = np.array([0, 150, 100]) # Increased saturation and value self.red_upper1 = np.array([10, 255, 255]) self.red_lower2 = np.array([170, 150, 100]) # Increased saturation and value self.red_upper2 = np.array([180, 255, 255]) # Blue self.blue_lower = np.array([100, 120, 70]) self.blue_upper = np.array([130, 255, 255]) # Green - added for green objects self.green_lower = np.array([35, 80, 60]) # Green range self.green_upper = np.array([85, 255, 255]) # Performance tracking self.fps_history = [] self.detection_history = [] self.last_detection_time = 0 # Initialize camera self.open_camera() logger.info("Cuboid Detection System (Traditional Algorithm) initialized successfully") def open_camera(self): """Open computer's external camera""" try: self.cap = cv2.VideoCapture(self.camera_index) if not self.cap.isOpened(): # Try common alternative indices for idx in [2, 1, 0]: self.cap = cv2.VideoCapture(idx) if self.cap.isOpened(): self.camera_index = idx break if not self.cap.isOpened(): logger.error("Unable to open any camera!") return False # Set camera resolution self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) logger.info(f"Camera opened successfully at index {self.camera_index}") return True except Exception as e: logger.error(f"Camera initialization failed: {str(e)}") return False def preprocess_frame(self, frame): """Preprocess frame for contour detection""" # Convert to HSV color space hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # Blur to reduce noise hsv = cv2.GaussianBlur(hsv, (5, 5), 0) # Create masks for red, blue and green red_mask1 = cv2.inRange(hsv, self.red_lower1, self.red_upper1) red_mask2 = cv2.inRange(hsv, self.red_lower2, self.red_upper2) red_mask = cv2.bitwise_or(red_mask1, red_mask2) blue_mask = cv2.inRange(hsv, self.blue_lower, self.blue_upper) green_mask = cv2.inRange(hsv, self.green_lower, self.green_upper) # Combine masks color_mask = cv2.bitwise_or(red_mask, blue_mask) color_mask = cv2.bitwise_or(color_mask, green_mask) # Apply morphological operations to clean up the mask kernel = np.ones((7, 7), np.uint8) color_mask = cv2.morphologyEx(color_mask, cv2.MORPH_OPEN, kernel) color_mask = cv2.morphologyEx(color_mask, cv2.MORPH_CLOSE, kernel) # Additional dilation to fill gaps color_mask = cv2.dilate(color_mask, kernel, iterations=1) return color_mask, red_mask, blue_mask, green_mask def detect_shapes(self, mask, frame): """Detect rectangular and circular shapes in the mask""" contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) detected_objects = [] for contour in contours: # Filter by area - max_area increased to 150000 area = cv2.contourArea(contour) if area < self.min_area or area > self.max_area: continue # Approximate the contour to a polygon peri = cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, 0.04 * peri, True) # Calculate shape properties x, y, w, h = cv2.boundingRect(approx) aspect_ratio = float(w) / h circularity = 4 * np.pi * area / (peri * peri) if peri > 0 else 0 # Detect rectangles (books, boxes) if len(approx) == 4 and self.aspect_ratio_min < aspect_ratio < self.aspect_ratio_max: # Calculate the angles between consecutive edges vectors = [] for i in range(4): pt1 = approx[i][0] pt2 = approx[(i + 1) % 4][0] vectors.append(np.array(pt2) - np.array(pt1)) # Calculate angles between consecutive vectors angles = [] for i in range(4): v1 = vectors[i] v2 = vectors[(i + 1) % 4] cos_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-5) angle = np.arccos(np.clip(cos_angle, -1, 1)) * 180 / np.pi angles.append(angle) # Check if angles are close to 90 degrees (rectangle) if all(70 < angle < 110 for angle in angles): detected_objects.append({ 'type': 'rectangle', 'contour': contour, 'approx': approx, 'center': (x + w // 2, y + h // 2), 'box': (x, y, w, h), 'area': area }) # Detect circles/cylinders elif circularity > self.circularity_thresh: detected_objects.append({ 'type': 'cylinder', 'contour': contour, 'approx': approx, 'center': (x + w // 2, y + h // 2), 'box': (x, y, w, h), 'area': area }) return detected_objects def detect_colors(self, frame, detected_objects, red_mask, blue_mask, green_mask): """Determine the color of detected objects""" results = [] for obj in detected_objects: x, y, w, h = obj['box'] # Create mask for the object region obj_mask = np.zeros(frame.shape[:2], dtype=np.uint8) cv2.drawContours(obj_mask, [obj['contour']], -1, 255, -1) # Extract the object region from color masks obj_red = cv2.bitwise_and(red_mask, red_mask, mask=obj_mask) obj_blue = cv2.bitwise_and(blue_mask, blue_mask, mask=obj_mask) obj_green = cv2.bitwise_and(green_mask, green_mask, mask=obj_mask) # Count red, blue and green pixels in the object region red_pixels = cv2.countNonZero(obj_red) blue_pixels = cv2.countNonZero(obj_blue) green_pixels = cv2.countNonZero(obj_green) total_pixels = cv2.countNonZero(obj_mask) # Determine dominant color color = "unknown" if total_pixels > 0: red_ratio = red_pixels / total_pixels blue_ratio = blue_pixels / total_pixels green_ratio = green_pixels / total_pixels # Require at least 40% dominance for color classification if red_ratio > 0.4 and red_ratio > blue_ratio and red_ratio > green_ratio: color = "red" elif blue_ratio > 0.4 and blue_ratio > red_ratio and blue_ratio > green_ratio: color = "blue" elif green_ratio > 0.4 and green_ratio > red_ratio and green_ratio > blue_ratio: color = "green" # Add to results results.append({ 'type': obj['type'], 'color': color, 'center': obj['center'], 'box': obj['box'], 'contour': obj['contour'], 'timestamp': time.time() }) return results def detect_cuboids(self, frame): """Detect cuboid objects using traditional computer vision techniques""" # Step 1: Preprocess frame to create color mask color_mask, red_mask, blue_mask, green_mask = self.preprocess_frame(frame) # Step 2: Detect shapes detected_objects = self.detect_shapes(color_mask, frame) # Step 3: Detect colors of the shapes results = self.detect_colors(frame, detected_objects, red_mask, blue_mask, green_mask) return results, color_mask def run(self): """Main loop for desktop preview and detection""" logger.info("Starting main detection loop") window_name = "Cuboid Detection (Traditional Algorithm)" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(window_name, 1200, 800) # Create mask window cv2.namedWindow("Color Mask", cv2.WINDOW_NORMAL) cv2.resizeWindow("Color Mask", 600, 400) frame_count = 0 start_time = time.time() try: while True: # Start timer for FPS calculation frame_start = time.time() # Get frame from camera ret, frame = self.cap.read() if not ret: logger.warning("Failed to capture frame") time.sleep(0.1) continue frame_count += 1 # Detect cuboids results, color_mask = self.detect_cuboids(frame) # Draw results on frame for result in results: color_type = result['color'] obj_type = result['type'] x, y, w, h = result['box'] center_x, center_y = result['center'] # Determine drawing color based on detected color if color_type == "red": color = (0, 0, 255) # Red in BGR elif color_type == "blue": color = (255, 0, 0) # Blue in BGR elif color_type == "green": color = (0, 255, 0) # Green in BGR else: color = (0, 255, 255) # Yellow for unknown # Draw bounding box cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) # Draw contour cv2.drawContours(frame, [result['contour']], -1, color, 2) # Draw center point cv2.circle(frame, (center_x, center_y), 5, color, -1) # Draw label label = f"{color_type} {obj_type}" cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2) # Calculate FPS current_time = time.time() elapsed_time = current_time - start_time fps = frame_count / elapsed_time if elapsed_time > 0 else 0 # Update FPS history (keep last 30 values) self.fps_history.append(fps) if len(self.fps_history) > 30: self.fps_history.pop(0) avg_fps = sum(self.fps_history) / len(self.fps_history) if self.fps_history else 0 # Update detection history detection_status = 1 if results else 0 self.detection_history.append(detection_status) if len(self.detection_history) > 30: self.detection_history.pop(0) detection_rate = sum(self.detection_history) / len(self.detection_history) * 100 # Display performance stats stats_y = 30 cv2.putText(frame, f"FPS: {avg_fps:.1f}", (10, stats_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) cv2.putText(frame, f"Detection Rate: {detection_rate:.1f}%", (10, stats_y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) cv2.putText(frame, f"Objects: {len(results)}", (10, stats_y + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 255), 2) # Display algorithm info cv2.putText(frame, "Algorithm: Traditional CV (Contour + Color Analysis)", (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1) # Show frames cv2.imshow(window_name, frame) cv2.imshow("Color Mask", color_mask) # Process keyboard input key = cv2.waitKey(1) & 0xFF if key == ord('q') or key == 27: # 'q' or ESC break elif key == ord('c'): # Capture screenshot timestamp = time.strftime("%Y%m%d-%H%M%S") filename = f"capture_{timestamp}.png" cv2.imwrite(filename, frame) logger.info(f"Screenshot saved as {filename}") elif key == ord('r'): # Reset detection history self.detection_history = [] self.fps_history = [] start_time = time.time() frame_count = 0 logger.info("Reset performance counters") elif key == ord('d'): # Toggle debug mode cv2.imshow("Red Mask", self.red_mask) cv2.imshow("Green Mask", self.green_mask) cv2.imshow("Blue Mask", self.blue_mask) logger.info("Debug masks displayed") # Calculate processing time frame_time = time.time() - frame_start target_frame_time = 1 / 30 # Target 30 FPS if frame_time < target_frame_time: time.sleep(target_frame_time - frame_time) except Exception as e: logger.error(f"Main loop exception: {str(e)}", exc_info=True) finally: self.cleanup() def cleanup(self): """Clean up resources""" # Release camera if self.cap and self.cap.isOpened(): self.cap.release() logger.info("Camera resources released") # Close windows cv2.destroyAllWindows() logger.info("Program exited safely") def print_controls(): """Print program controls""" print("=" * 70) print("Computer Camera Cuboid Detection System - Traditional Algorithm") print("=" * 70) print("Detects cuboid objects (books, boxes, etc.) using computer vision techniques") print("Supports RED, BLUE and GREEN objects") print("Desktop window shows real-time preview with bounding boxes") print("Second window shows the color mask used for detection") print("=" * 70) print("Controls:") print(" q or ESC - Quit program") print(" c - Capture screenshot") print(" r - Reset performance counters") print(" d - Show debug masks (red, green, blue)") print("=" * 70) print("Detection parameters:") print(" - Red, blue and green color detection in HSV space") print(" - Rectangle detection based on polygon approximation and angle analysis") print(" - Cylinder detection based on circularity metric") print(f" - Max contour area: 150000 (was 50000)") print("=" * 70) if __name__ == "__main__": print_controls() # Default camera index (0 is usually the built-in or first external camera) camera_index = 0 # Allow specifying camera index from command line if len(sys.argv) > 1: try: camera_index = int(sys.argv[1]) print(f"Using camera index: {camera_index}") except ValueError: print(f"Invalid camera index: {sys.argv[1]}, using default (0)") try: detector = CuboidDetectionSystem(camera_index=camera_index) detector.run() except Exception as e: logger.error(f"System startup failed: {str(e)}", exc_info=True) print(f"System startup failed: {str(e)}") 现在就只是简单的将这个桌面运行的代码,完整的转化为在vnc上运行的代码就可以了,因为这个代码的效果是很不错的,我现在只想看看它在vnc上运行时机器狗的识别效果如何!!!! 给出完整的转换代码!!!
08-09
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 机器狗 VNC 实时立方体检测 依赖: sudo apt update && sudo apt install python3-pip -y pip3 install picamera2 opencv-python numpy """ import cv2 import numpy as np import time import json import argparse import logging from picamera2 import Picamera2 from libcamera import controls logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(), logging.FileHandler('dog_cuboid.log')] ) logger = logging.getLogger(__name__) class DogCuboidDetector: def __init__(self, cfg): self.cfg = cfg self.picam2 = None self.init_camera() self.build_windows() # ---------- 摄像头 ---------- def init_camera(self): try: self.picam2 = Picamera2() config = self.picam2.create_preview_configuration( main={"size": self.cfg["resolution"], "format": "RGB888"}, controls={"FrameRate": self.cfg["fps"]} ) self.picam2.configure(config) self.picam2.start() time.sleep(1) logger.info(f"Camera {self.cfg['resolution']}@{self.cfg['fps']} started") except Exception as e: logger.error(f"Camera init failed: {e}") raise # ---------- 窗口 ---------- def build_windows(self): cv2.namedWindow("Live", cv2.WINDOW_NORMAL) cv2.namedWindow("Mask", cv2.WINDOW_NORMAL) cv2.resizeWindow("Live", *self.cfg["window_size"]) cv2.resizeWindow("Mask", *self.cfg["mask_size"]) # ---------- 颜色阈值 ---------- def make_masks(self, hsv): masks = {} for color, (low, high) in self.cfg["hsv_ranges"].items(): masks[color] = cv2.inRange(hsv, np.array(low), np.array(high)) # 合并 mask_all = np.zeros_like(masks["red"]) for m in masks.values(): mask_all = cv2.bitwise_or(mask_all, m) # 形态学 kernel = np.ones((3, 3), np.uint8) mask_all = cv2.morphologyEx(mask_all, cv2.MORPH_OPEN, kernel) mask_all = cv2.morphologyEx(mask_all, cv2.MORPH_CLOSE, kernel) return mask_all, masks # ---------- 形状检测 ---------- def detect_shapes(self, mask, frame): contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) objs = [] for cnt in contours: area = cv2.contourArea(cnt) if area < self.cfg["min_area"] or area > self.cfg["max_area"]: continue peri = cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, 0.04 * peri, True) x, y, w, h = cv2.boundingRect(approx) aspect = float(w) / h circ = 4 * np.pi * area / (peri * peri + 1e-6) # 矩形 if len(approx) == 4 and 0.3 < aspect < 3: objs.append({"type": "rectangle", "cnt": cnt, "box": (x, y, w, h)}) # 圆柱 elif circ > 0.65: objs.append({"type": "cylinder", "cnt": cnt, "box": (x, y, w, h)}) return objs # ---------- 主循环 ---------- def run(self): logger.info("Start detection loop") fps = 0 frame_idx = 0 t0 = time.time() while True: frame = self.picam2.capture_array() hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV) mask_all, masks = self.make_masks(hsv) objs = self.detect_shapes(mask_all, frame) # 绘制 vis = frame.copy() for o in objs: x, y, w, h = o["box"] cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.drawContours(vis, [o["cnt"]], -1, (255, 0, 0), 1) # FPS frame_idx += 1 fps = frame_idx / (time.time() - t0) cv2.putText(vis, f"FPS:{fps:.1f} Objects:{len(objs)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2) # 显示 cv2.imshow("Live", vis) cv2.imshow("Mask", mask_all) key = cv2.waitKey(1) & 0xFF if key in (ord('q'), 27): break elif key == ord('c'): fname = f"capture_{int(time.time())}.jpg" cv2.imwrite(fname, vis) logger.info(f"Saved {fname}") self.cleanup() def cleanup(self): cv2.destroyAllWindows() if self.picam2: self.picam2.stop() logger.info("Stopped") # ---------- 配置 ---------- DEFAULT_CFG = { "resolution": [640, 480], "fps": 15, "window_size": [800, 600], "mask_size": [400, 300], "min_area": 800, "max_area": 60000, "hsv_ranges": { "red": [[0, 100, 60], [10, 255, 255]], "red2": [[170, 100, 60], [180, 255, 255]], "blue": [[100, 80, 40], [130, 255, 255]], "green": [[35, 60, 40], [85, 255, 255]] } } # ---------- 入口 ---------- if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--cfg", default="", help="JSON config file") args = parser.parse_args() cfg = DEFAULT_CFG.copy() if args.cfg and os.path.isfile(args.cfg): cfg.update(json.load(open(args.cfg))) try: DogCuboidDetector(cfg).run() except KeyboardInterrupt: logger.info("User quit") except Exception as e: logger.exception("Fatal error", exc_info=e) 参考这个代码,这个代码的效果很差,我不知道是不是用了形状优先的思路完成的。 #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Computer Camera Cuboid Detection System - Traditional Algorithm Approach Features: 1. 支持红、蓝、绿三种颜色物块识别 2. 优化红色识别,减少橙色干扰 3. 增大了最大轮廓识别面积 4. 实时桌面预览性能统计 """ import cv2 import time import numpy as np import sys import logging # Configure logging system logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler('cuboid_detection.log') ] ) logger = logging.getLogger(__name__) class CuboidDetectionSystem: def __init__(self, camera_index=0): # Camera configuration self.camera_index = camera_index self.cap = None # Detection parameters self.min_area = 1000 # Minimum contour area to consider self.max_area = 150000 # Increased maximum contour area (was 50000) self.aspect_ratio_min = 0.3 # Minimum aspect ratio for rectangles self.aspect_ratio_max = 3.0 # Maximum aspect ratio for rectangles self.circularity_thresh = 0.6 # Circularity threshold for cylinders # Color detection parameters (in HSV space) # Red - optimized to reduce orange interference self.red_lower1 = np.array([0, 150, 100]) # Increased saturation and value self.red_upper1 = np.array([10, 255, 255]) self.red_lower2 = np.array([170, 150, 100]) # Increased saturation and value self.red_upper2 = np.array([180, 255, 255]) # Blue self.blue_lower = np.array([100, 120, 70]) self.blue_upper = np.array([130, 255, 255]) # Green - added for green objects self.green_lower = np.array([35, 80, 60]) # Green range self.green_upper = np.array([85, 255, 255]) # Performance tracking self.fps_history = [] self.detection_history = [] self.last_detection_time = 0 # Initialize camera self.open_camera() logger.info("Cuboid Detection System (Traditional Algorithm) initialized successfully") def open_camera(self): """Open computer's external camera""" try: self.cap = cv2.VideoCapture(self.camera_index) if not self.cap.isOpened(): # Try common alternative indices for idx in [2, 1, 0]: self.cap = cv2.VideoCapture(idx) if self.cap.isOpened(): self.camera_index = idx break if not self.cap.isOpened(): logger.error("Unable to open any camera!") return False # Set camera resolution self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) logger.info(f"Camera opened successfully at index {self.camera_index}") return True except Exception as e: logger.error(f"Camera initialization failed: {str(e)}") return False def preprocess_frame(self, frame): """Preprocess frame for contour detection""" # Convert to HSV color space hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # Blur to reduce noise hsv = cv2.GaussianBlur(hsv, (5, 5), 0) # Create masks for red, blue and green red_mask1 = cv2.inRange(hsv, self.red_lower1, self.red_upper1) red_mask2 = cv2.inRange(hsv, self.red_lower2, self.red_upper2) red_mask = cv2.bitwise_or(red_mask1, red_mask2) blue_mask = cv2.inRange(hsv, self.blue_lower, self.blue_upper) green_mask = cv2.inRange(hsv, self.green_lower, self.green_upper) # Combine masks color_mask = cv2.bitwise_or(red_mask, blue_mask) color_mask = cv2.bitwise_or(color_mask, green_mask) # Apply morphological operations to clean up the mask kernel = np.ones((7, 7), np.uint8) color_mask = cv2.morphologyEx(color_mask, cv2.MORPH_OPEN, kernel) color_mask = cv2.morphologyEx(color_mask, cv2.MORPH_CLOSE, kernel) # Additional dilation to fill gaps color_mask = cv2.dilate(color_mask, kernel, iterations=1) return color_mask, red_mask, blue_mask, green_mask def detect_shapes(self, mask, frame): """Detect rectangular and circular shapes in the mask""" contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) detected_objects = [] for contour in contours: # Filter by area - max_area increased to 150000 area = cv2.contourArea(contour) if area < self.min_area or area > self.max_area: continue # Approximate the contour to a polygon peri = cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, 0.04 * peri, True) # Calculate shape properties x, y, w, h = cv2.boundingRect(approx) aspect_ratio = float(w) / h circularity = 4 * np.pi * area / (peri * peri) if peri > 0 else 0 # Detect rectangles (books, boxes) if len(approx) == 4 and self.aspect_ratio_min < aspect_ratio < self.aspect_ratio_max: # Calculate the angles between consecutive edges vectors = [] for i in range(4): pt1 = approx[i][0] pt2 = approx[(i + 1) % 4][0] vectors.append(np.array(pt2) - np.array(pt1)) # Calculate angles between consecutive vectors angles = [] for i in range(4): v1 = vectors[i] v2 = vectors[(i + 1) % 4] cos_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-5) angle = np.arccos(np.clip(cos_angle, -1, 1)) * 180 / np.pi angles.append(angle) # Check if angles are close to 90 degrees (rectangle) if all(70 < angle < 110 for angle in angles): detected_objects.append({ 'type': 'rectangle', 'contour': contour, 'approx': approx, 'center': (x + w // 2, y + h // 2), 'box': (x, y, w, h), 'area': area }) # Detect circles/cylinders elif circularity > self.circularity_thresh: detected_objects.append({ 'type': 'cylinder', 'contour': contour, 'approx': approx, 'center': (x + w // 2, y + h // 2), 'box': (x, y, w, h), 'area': area }) return detected_objects def detect_colors(self, frame, detected_objects, red_mask, blue_mask, green_mask): """Determine the color of detected objects""" results = [] for obj in detected_objects: x, y, w, h = obj['box'] # Create mask for the object region obj_mask = np.zeros(frame.shape[:2], dtype=np.uint8) cv2.drawContours(obj_mask, [obj['contour']], -1, 255, -1) # Extract the object region from color masks obj_red = cv2.bitwise_and(red_mask, red_mask, mask=obj_mask) obj_blue = cv2.bitwise_and(blue_mask, blue_mask, mask=obj_mask) obj_green = cv2.bitwise_and(green_mask, green_mask, mask=obj_mask) # Count red, blue and green pixels in the object region red_pixels = cv2.countNonZero(obj_red) blue_pixels = cv2.countNonZero(obj_blue) green_pixels = cv2.countNonZero(obj_green) total_pixels = cv2.countNonZero(obj_mask) # Determine dominant color color = "unknown" if total_pixels > 0: red_ratio = red_pixels / total_pixels blue_ratio = blue_pixels / total_pixels green_ratio = green_pixels / total_pixels # Require at least 40% dominance for color classification if red_ratio > 0.4 and red_ratio > blue_ratio and red_ratio > green_ratio: color = "red" elif blue_ratio > 0.4 and blue_ratio > red_ratio and blue_ratio > green_ratio: color = "blue" elif green_ratio > 0.4 and green_ratio > red_ratio and green_ratio > blue_ratio: color = "green" # Add to results results.append({ 'type': obj['type'], 'color': color, 'center': obj['center'], 'box': obj['box'], 'contour': obj['contour'], 'timestamp': time.time() }) return results def detect_cuboids(self, frame): """Detect cuboid objects using traditional computer vision techniques""" # Step 1: Preprocess frame to create color mask color_mask, red_mask, blue_mask, green_mask = self.preprocess_frame(frame) # Step 2: Detect shapes detected_objects = self.detect_shapes(color_mask, frame) # Step 3: Detect colors of the shapes results = self.detect_colors(frame, detected_objects, red_mask, blue_mask, green_mask) return results, color_mask def run(self): """Main loop for desktop preview and detection""" logger.info("Starting main detection loop") window_name = "Cuboid Detection (Traditional Algorithm)" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(window_name, 1200, 800) # Create mask window cv2.namedWindow("Color Mask", cv2.WINDOW_NORMAL) cv2.resizeWindow("Color Mask", 600, 400) frame_count = 0 start_time = time.time() try: while True: # Start timer for FPS calculation frame_start = time.time() # Get frame from camera ret, frame = self.cap.read() if not ret: logger.warning("Failed to capture frame") time.sleep(0.1) continue frame_count += 1 # Detect cuboids results, color_mask = self.detect_cuboids(frame) # Draw results on frame for result in results: color_type = result['color'] obj_type = result['type'] x, y, w, h = result['box'] center_x, center_y = result['center'] # Determine drawing color based on detected color if color_type == "red": color = (0, 0, 255) # Red in BGR elif color_type == "blue": color = (255, 0, 0) # Blue in BGR elif color_type == "green": color = (0, 255, 0) # Green in BGR else: color = (0, 255, 255) # Yellow for unknown # Draw bounding box cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) # Draw contour cv2.drawContours(frame, [result['contour']], -1, color, 2) # Draw center point cv2.circle(frame, (center_x, center_y), 5, color, -1) # Draw label label = f"{color_type} {obj_type}" cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2) # Calculate FPS current_time = time.time() elapsed_time = current_time - start_time fps = frame_count / elapsed_time if elapsed_time > 0 else 0 # Update FPS history (keep last 30 values) self.fps_history.append(fps) if len(self.fps_history) > 30: self.fps_history.pop(0) avg_fps = sum(self.fps_history) / len(self.fps_history) if self.fps_history else 0 # Update detection history detection_status = 1 if results else 0 self.detection_history.append(detection_status) if len(self.detection_history) > 30: self.detection_history.pop(0) detection_rate = sum(self.detection_history) / len(self.detection_history) * 100 # Display performance stats stats_y = 30 cv2.putText(frame, f"FPS: {avg_fps:.1f}", (10, stats_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) cv2.putText(frame, f"Detection Rate: {detection_rate:.1f}%", (10, stats_y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) cv2.putText(frame, f"Objects: {len(results)}", (10, stats_y + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 255), 2) # Display algorithm info cv2.putText(frame, "Algorithm: Traditional CV (Contour + Color Analysis)", (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1) # Show frames cv2.imshow(window_name, frame) cv2.imshow("Color Mask", color_mask) # Process keyboard input key = cv2.waitKey(1) & 0xFF if key == ord('q') or key == 27: # 'q' or ESC break elif key == ord('c'): # Capture screenshot timestamp = time.strftime("%Y%m%d-%H%M%S") filename = f"capture_{timestamp}.png" cv2.imwrite(filename, frame) logger.info(f"Screenshot saved as {filename}") elif key == ord('r'): # Reset detection history self.detection_history = [] self.fps_history = [] start_time = time.time() frame_count = 0 logger.info("Reset performance counters") elif key == ord('d'): # Toggle debug mode cv2.imshow("Red Mask", self.red_mask) cv2.imshow("Green Mask", self.green_mask) cv2.imshow("Blue Mask", self.blue_mask) logger.info("Debug masks displayed") # Calculate processing time frame_time = time.time() - frame_start target_frame_time = 1 / 30 # Target 30 FPS if frame_time < target_frame_time: time.sleep(target_frame_time - frame_time) except Exception as e: logger.error(f"Main loop exception: {str(e)}", exc_info=True) finally: self.cleanup() def cleanup(self): """Clean up resources""" # Release camera if self.cap and self.cap.isOpened(): self.cap.release() logger.info("Camera resources released") # Close windows cv2.destroyAllWindows() logger.info("Program exited safely") def print_controls(): """Print program controls""" print("=" * 70) print("Computer Camera Cuboid Detection System - Traditional Algorithm") print("=" * 70) print("Detects cuboid objects (books, boxes, etc.) using computer vision techniques") print("Supports RED, BLUE and GREEN objects") print("Desktop window shows real-time preview with bounding boxes") print("Second window shows the color mask used for detection") print("=" * 70) print("Controls:") print(" q or ESC - Quit program") print(" c - Capture screenshot") print(" r - Reset performance counters") print(" d - Show debug masks (red, green, blue)") print("=" * 70) print("Detection parameters:") print(" - Red, blue and green color detection in HSV space") print(" - Rectangle detection based on polygon approximation and angle analysis") print(" - Cylinder detection based on circularity metric") print(f" - Max contour area: 150000 (was 50000)") print("=" * 70) if __name__ == "__main__": print_controls() # Default camera index (0 is usually the built-in or first external camera) camera_index = 0 # Allow specifying camera index from command line if len(sys.argv) > 1: try: camera_index = int(sys.argv[1]) print(f"Using camera index: {camera_index}") except ValueError: print(f"Invalid camera index: {sys.argv[1]}, using default (0)") try: detector = CuboidDetectionSystem(camera_index=camera_index) detector.run() except Exception as e: logger.error(f"System startup failed: {str(e)}", exc_info=True) print(f"System startup failed: {str(e)}") 比如这个代码就形状优先的完成摄像头的识别,识别效果就很好。 请你结合这两个代码,修改实现在vnc上完成机器狗识别物块!!!给出完整代码
08-09
import cv2 import numpy as np import pytesseract img = cv2.imread('number1.jpg', cv2.IMREAD_COLOR) img = cv2.resize(img,(600,400)) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) edge = cv2.Canny(gray, 30, 200) contours,_ = cv2.findContours(edge.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key= cv2.contourArea, reverse = True)[:5] screenCnt =[] for c in contours: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: screenCnt = approx break if screenCnt is None: print("没有检测到车牌轮廓") else: # 创建掩码图像 mask = np.zeros(gray.shape, np.uint8) new_image = cv2.drawContours(mask, screenCnt, 0, 255, -1) image = cv2.bitwise_and(img, img, mask=mask) # 确定车牌区域的边界 (x,y) = np.where(mask == 255) (topx, topy) = (np.min(x), np.min(y)) (bottomx, bottomy) = (np.max(x), np.max(y)) Cropped = gray[topx: bottomx + 1, topy: bottomy + 1] # 车牌字符识别 text = pytesseract.image_to_string(Cropped, config='--psm 11') # 框出车牌并标注车牌内容 cv2.drawContours(img, [screenCnt], -1, (0, 255, 0), 2) cv2.putText(img, text, (topy, topx - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) cv2.imshow('recognition result', img) cv2.waitKey(0) cv2.destroyAllWindows()出现了报错,请帮我改一下Traceback (most recent call last): File "D:\xiangmu\PythonProject\1\CHEPY.py", line 33, in <module> (topx, topy) = (np.min(x), np.min(y)) ~~~~~~^^^ File "D:\xiangmu\PythonProject\1\.venv\Lib\site-packages\numpy\_core\fromnumeric.py", line 3302, in min return _wrapreduction(a, np.minimum, 'min', axis, None, out, keepdims=keepdims, initial=initial, where=where) File "D:\xiangmu\PythonProject\1\.venv\Lib\site-packages\numpy\_core\fromnumeric.py", line 86, in _wrapreduction return ufunc.reduce(obj, axis, dtype, out, **passkwargs) ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ValueError: zero-size array to reduction operation minimum which has no identity 进程已结束,退出代码为 1
最新发布
11-10
import copy import os import cv2 # from Cython.Compiler.Naming import obj_base_cname from shapely.creation import polygons from ultralytics import YOLO import numpy as np from paddleocr import PaddleOCR from rapidfuzz import process import Levenshtein import json from collections import defaultdict from shapely.geometry import Polygon, MultiPolygon, GeometryCollection,LineString import pandas as pd from shapely.errors import GEOSException ocr = PaddleOCR( use_angle_cls=True, lang='en' ) # indexc=0.6 def binarize_image(image): """ 对输入的图像进行自适应阈值二值化处理 :param image: 输入的图像 :return: 二值化后的图像 """ binary_image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 37, 3) return binary_image def sharpen_image(image, sharpen_strength=1.0): """ 对输入的图像进行锐化处理,可控制锐化程度 :param image: 输入的图像 :param sharpen_strength: 锐化程度,默认值为 1.0 :return: 锐化后的图像 """ kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) sharpened = cv2.filter2D(image, -1, kernel * sharpen_strength) return sharpened def binarizeAndSharpenImage(image, sharpen_strength=1.0, do_sharpen=True): """ 对指定路径的图像进行二值化锐化处理 :param input_image_path: 输入图像的路径 :param output_image_path: 输出图像的路径 :param sharpen_strength: 锐化程度,默认值为 1.0 :param do_sharpen: 是否进行锐化处理,默认值为 True """ # 读取图片 # image = cv2.imread(input_image_path, cv2.IMREAD_GRAYSCALE) # 二值化处理 binary_image = binarize_image(image) if do_sharpen: # 锐化处理 final_image = sharpen_image(binary_image, sharpen_strength) else: final_image = binary_image return final_image def image2OCR(im, conduits, features, path, index=0): ol = [] for i, (point, center, angle, box, tag, points) in enumerate(conduits): mask = np.zeros(im.shape[:2], dtype=np.uint8) cv2.fillPoly(mask, [np.array(points, dtype=np.int32)], 255) res = cv2.bitwise_and(im, im, mask=mask) if angle > 20: angle += 270 res = rotateImage(res, angle) sa = res.copy() # 找到非黑色区域的边界 gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) # 转为灰度图 _, binary = cv2.threshold(gray, 3, 255, cv2.THRESH_BINARY) # 将非黑色部分设为白色 # 计算非黑色区域的边界框 x, y, w, h = cv2.boundingRect(binary) if w > 0 and h > 0: # 裁剪非黑色部分 res = gray[y:y + h, x:x + w] # cv2.imshow("ocr", res) # cv2.waitKey(0) sa = sa[y:y + h, x:x + w] # 放大三倍 height, width = res.shape if height > width: res = rotateImage(res, 90) sa = rotateImage(sa, 90) # 增强对比度(直方图均衡化) res = binarizeAndSharpenImage(res) conduits[i].append(res) if res is not None: if tag == "empty": ocrImage, match = np.ones((50, 700, 3), dtype=im.dtype) * 255, "empty" ol.append(match) continue else: sa = cv2.cvtColor(res, cv2.COLOR_GRAY2BGR) # cv2.imwrite(f'{i}.jpg', res) ocrImage, match = im2OCR(res, features, ol) devices = {} for key in features: if key == "set": continue for match in ol: if match in features[key].values(): devices[key] = devices.get(key, 0) + 1 if not devices: return None, None, None device = max(devices, key=devices.get) deviceList = features[device]["set"] # ol = [item for item in ol if item in deviceList] used_indices = set() # 用于记录已使用的索引 matches = [] for match in ol: if match in deviceList: # 找到第一个未使用的索引 for idx, item in enumerate(deviceList): if item == match and idx not in used_indices: matches.append([match, idx]) used_indices.add(idx) # 标记该索引为已使用 break else: # 如果找不到未使用的索引,返回默认值 99 matches.append([match, 99]) else: matches.append([match, 99]) counters = [(i, value[1]) for i, value in enumerate(matches) if (value[1] != 99 and value[0] != "empty")] if len(counters) < 2: return None, None, None #有效计数器数量检查 revs = [counters[i][1] - counters[i - 1][1] for i in range(1, len(counters))] rev = 0 for item in revs: if item > 0: rev += 1 else: rev -= 1 rev = True if rev > 0 else False # if rev == 0: return None, None, None s = 1 if rev else -1 # 查找最大连续段的起始索引长度 max_start = None max_length = 0 current_start = None current_length = 0 max_end = 0 prev = max_end for i, (index, value) in enumerate(counters): step = counters[i - 1][1] + (counters[i][1] - counters[i - 1][1]) v = value if i == 0 or counters[i - 1][1] + (counters[i][1] - counters[i - 1][1]) == value: # 判断是否连续 if current_start is None: current_start = index current_length += 1 prev = max_end max_end = index if current_length > max_length: max_length = current_length max_start = current_start else: current_start = index current_length = 1 # # 检查最后一段是否是最长的 # if current_length > max_length: # max_length = current_length # max_start = current_start if max_length < 2: return None, None, None #连续段长度要求 # length = len(features[device]["set"]) / 2 min_required = max(3, len(features[device]["set"]) / 8) #最小匹配数量 1/2->1/8 ,最少3个匹配 counter, pos = matches[1][1], 1 # counter, pos = matches[prev][1], prev # if len(matches) < length: return None, None, None if len(matches) < min_required: print(f"匹配数量不足: {len(matches)} < {min_required}") return None, None, None return matches, device, [rev, counter, pos] def image2Cls(image, model): # 图像分类工具 results = model.predict(image, verbose=False, save=False) cls = results[0].probs.top1 names = [value for key, value in model.names.items()] return names[int(cls)] def rotateImage(image, angle, border_value=(0, 0, 0)): # 旋转图像 输出旋转后的图像 """ 随机旋转图像。 参数: image: np.ndarray,输入图像(灰度或彩色均可)。 max_angle: float,最大旋转角度(度)。实际旋转角度在 [-max_angle, +max_angle] 随机选取。 border_value: tuple/int,旋转后边界填充值。灰度图可传单个值,彩色图可传三元组。 返回: rotated: np.ndarray,旋转后的图像。 angle: float,实际旋转角度(度)。 """ h, w = image.shape[:2] center = (w / 2, h / 2) # 计算旋转矩阵 M = cv2.getRotationMatrix2D(center, angle, 1.0) # 计算旋转后图像的新尺寸,避免裁剪 cos = abs(M[0, 0]) sin = abs(M[0, 1]) new_w = int(h * sin + w * cos) new_h = int(h * cos + w * sin) # 调整平移部分,使图像居中 M[0, 2] += new_w / 2 - center[0] M[1, 2] += new_h / 2 - center[1] # 应用仿射变换 rotated = cv2.warpAffine( image, M, (new_w, new_h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=border_value ) return rotated # wrong_pic_count = int(indexc*10) def imageFlatten(image, model): # 图像方向校正工具 im = cv2.imread(image) name = image2Cls(im, model) if "180" in name: im = rotateImage(im, 180) if name == "090": im = rotateImage(im, 270) if name == "270": im = rotateImage(im, 90) return im, name def inverseTransformCoordinates(coords, angle, image_shape): """ 将变换后的坐标逆变换回原始图像坐标。 参数: coords: list of tuples,变换后的坐标列表 [(x1, y1), (x2, y2), ...]。 angle: int,图像旋转角度(顺时针方向)。 image_shape: tuple,图像的形状 (height, width)。 返回: 原始图像中的坐标列表。 """ h, w = image_shape[:2] nc = [] for xy in coords: if angle == 90: nc.append((int(xy[1]), int(w - xy[0]))) elif angle == 180: nc.append((int(w - xy[0]), int(h- xy[1]))) elif angle == 270: nc.append((int(h - xy[1]), int(xy[0]))) else: nc.append((int(xy[0]), int(xy[1]))) return nc def filter_objects_by_line(objects, image_shape, threshold=300): """ 根据图像方向(横版或竖版)选择聚类轴向,并保留数量最多的一行或一列。 :param objects: 对象列表,每个对象结构为 [point, center, ...],conduits :param image_shape: 图像的形状 (height, width),用于判断方向 :param threshold: 判断属于同一行/列的距离阈值(像素) :return: 过滤后的对象列表 """ if not objects: return [] # 提取所有对象的中心点坐标 centers = [obj[1] for obj in objects] # 判断图像方向,决定聚类轴向 height, width = image_shape[:2] use_x = height > width # 竖图 → 按列聚类(x) coord_index = 0 if use_x else 1 # 根据选定轴向进行聚类 clusters = defaultdict(list) for idx, (x, y) in enumerate(centers): # enumerate:获取中心点 val = x if use_x else y found = False for cluster_id, cluster_points in clusters.items(): comp_val = cluster_points[0][coord_index] if abs(val - comp_val) <= threshold: clusters[cluster_id].append((x, y)) found = True break if not found: clusters[len(clusters)] = [(x, y)] # 找出最大簇 largest_cluster = max(clusters.values(), key=len, default=[]) # 获取对应簇的索引 filtered_indices = [i for i, c in enumerate(centers) if c in largest_cluster] return [objects[i] for i in filtered_indices] def image2Align(image, orientedClsModel, SegModel, noseClsModel):# 对输入图像进行方向校正语义分割 img, tag = imageFlatten(image, orientedClsModel) # 图像方向校正 textImg = img.copy() results = SegModel.predict(img, verbose=False, save=True, show_conf=False, show_boxes=True, exist_ok=True, conf=0.65) # 语义分割 # img = results[0].plot(labels=False, boxes=False) names = [value for key, value in SegModel.names.items()] # cv2.imshow("img", results[0].plot(labels=False, boxes=False)) yList = [] contrast = 0 for res in results[0]: xy = [[item[0], item[1]] for mask in res.masks.xy for item in mask] yList.append(xy) ns = [names[int(ite)] for ite in results[0].boxes.cls.cpu().tolist()] zipList = [[xy, n] for xy, n in zip(yList, ns)] clsDict = {} for xy, key in zipList: if key in clsDict: clsDict[key].append(xy) else: clsDict[key] = [xy] return img, textImg, clsDict, results, int(tag) # img: 校正后的图像 textImg: 用于OCR的文本图像 clsDict: 分类结果字典 results: 分割结果 rotate: 旋转角度 def praseExcel(excelFile): # 从Excel文件中提取特征信息 features = { "set": [] } # 读取 Excel 文件 if excelFile.endswith((".xlsx", "xls")): # print("Extracting " + excelFile) geoDF = pd.read_excel(excelFile, sheet_name=0, header=0) # 这里可以指定 sheet 名称,如果不指定,默认读取第一个 sheet for index, row in geoDF.iterrows(): # serial = row.iloc[9] #新台账 # device = row.iloc[2] # code = row.iloc[10] serial = row.iloc[0] device = row.iloc[1] code = row.iloc[2] if type(code) is float: code = "empty" # if type(code) is float or code =="N/A" : code = "empty" #新台账 if device in features: features[device].update({serial: code}) features[device]["set"].append(code) else: features.update({device: {serial: code,"set": [code,]}}) features["set"].append(code) return features def im2OCR(gray, features, ol): # 识别图像中的文本,并将进行匹配 try: # 进行 OCR 识别 if len(gray.shape) == 2: # 如果是灰度图像 # 将灰度图转换为三通道 BGR 格式 img_bgr = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) result = ocr.predict(img_bgr) else: result = ocr.predict(gray) # print(f"OCR result: {result[0]['rec_texts']}") # 检查结果是否为空 if not result or len(result) == 0 or len(result[0]) == 0 or not result[0]['rec_texts']: match = "Unidentified" else: # 提取识别到的文本 text = result[0]['rec_texts'][0].strip() # 替换字符 text = text.replace("D", "0") # 获取特征集中的代码 codes = features["set"] # 使用 fuzzywuzzy 进行模糊匹配 matchList = process.extract(text, codes, limit=16) best_code, best_dist = None, None # 遍历匹配列表,找到最佳匹配 for match, conf, _ in matchList: # 计算 Levenshtein 距离 d = Levenshtein.distance(text, match) if best_dist is None or d < best_dist: best_code, best_dist = match, d match, conf = best_code, best_dist # 如果距离大于 2,则认为未识别 # if conf is not None and conf > 2: # match = "Unidentified" except IndexError as e: print(f"An error occurred: {e}") # 捕获索引越界错误 match = "Unidentified" except Exception as e: # 捕获其他异常 print(f"An unexpected error occurred: {e}") match = "Unidentified" # 如果匹配结果为 None,则认为未识别 if match is None: match = "Unidentified" # 将匹配结果添加到列表中 ol.append(match) return None, match def processPoints(points, polygons, items, tag=None): points = np.array(points, dtype=np.float32) pg = Polygon(points) for i, (polygon, t) in enumerate(polygons): # 计算交集面积 try: intersection = pg.intersection(polygon) except GEOSException as e: print(f"跳过多边形,因为它存在拓扑问题: {e}") # 可以记录日志或进行其他处理 continue if not intersection.is_empty: try: # 根据面积判断哪个多边形更大 if pg.area > polygon.area: # pg 减去交集 pg = pg.difference(intersection) if isinstance(pg, (MultiPolygon, GeometryCollection)): # 获取所有部分,并找到面积最大的 Polygon pg = max(pg.geoms, key=lambda p: p.area) else: # 当前 polygon 减去交集 polygons[i][0] = polygon.difference(intersection) if isinstance(polygons[i][0], (MultiPolygon, GeometryCollection)): # 获取所有部分,并找到面积最大的 Polygon polygons[i][0] = max(polygons[i][0].geoms, key=lambda p: p.area) except: continue polygons.append([pg, tag]) # items.append([point, center, angle, box, tag, points]) def conduit(conduitList,img_shape): # import matplotlib.pyplot as plt conduits, polygons = [], [] for points, tag in conduitList: processPoints(points, polygons, conduits, tag) # conduits.append(points) # fig, ax = plt.subplots(figsize=(10, 10)) for polygon, tag in polygons: # x, y = polygon.exterior.xy # ax.plot(x, y, label="Sub-Polygon") # ax.fill(x, y, alpha=0.4) coordinates = [[float(x), float(y)] for x, y in list(polygon.exterior.coords)] rect = cv2.minAreaRect(np.array(coordinates, dtype=np.float32)) center, size, angle = rect box = cv2.boxPoints(rect) # 获取矩形的四个角点 box = [(item[0], item[1]) for item in np.int32(box)] boxes = sorted(box, key=lambda box: box[0], reverse=True) point = [int((boxes[0][0] + boxes[1][0]) / 2), int((boxes[0][1] + boxes[1][1]) / 2)]# 按 x 值排序 conduits.append([point, center, angle, box, tag, coordinates]) conduits.sort(key=lambda x: x[0][1], reverse=True) conduits = filter_objects_by_line(conduits,img_shape, 500) #进行列聚类 # ax.set_title("title") # ax.set_xlabel("X Coordinates") # ax.set_ylabel("Y Coordinates") # plt.legend() # plt.axis("equal") # plt.show() # print("conduits:", len(conduits), "\tmatches:", len(matches)) return conduits def nut(clsDict): nuts, polygons = [], [] if "Nut" not in clsDict: return [] for points in clsDict["Nut"]: processPoints(points, polygons, nuts) for polygon, tag in polygons: coordinates = [[float(x), float(y)] for x, y in list(polygon.exterior.coords)] rect = cv2.minAreaRect(np.array(coordinates, dtype=np.float32)) center, size, angle = rect box = cv2.boxPoints(rect) # 获取矩形的四个角点 box = [(item[0], item[1]) for item in np.int32(box)] boxes = sorted(box, key=lambda box: box[0], reverse=True) point = [int((boxes[0][0] + boxes[1][0]) / 2), int((boxes[0][1] + boxes[1][1]) / 2)] # 按 x 值排序 nuts.append([point, center, angle, box, tag, coordinates]) nuts.sort(key=lambda x: x[0][1], reverse=True) return nuts def find_nearest_coordinate_index(target_coord, coord_array): """ 找到给定二维坐标在数组中最近的坐标索引。 参数: target_coord (tuple): 给定的二维坐标 (x, y)。 coord_array (list of tuple): 包含二维坐标的数组 [(x1, y1), (x2, y2), ...]。 返回: int: 最近坐标的索引值。 """ if not coord_array: raise ValueError("坐标数组为空,无法计算最近坐标。") # 转换为NumPy数组以便向量化计算 coord_array_np = np.array(coord_array) # 计算欧几里得距离 distances = np.sqrt((coord_array_np[:, 0] - target_coord[0]) ** 2 + (coord_array_np[:, 1] - target_coord[1]) ** 2) # 找到最小距离的索引 nearest_index = np.argmin(distances) return nearest_index def nose(conduits, clsDict): if "Nose" not in clsDict: return [] conduitBoxes = [sorted(item[3], key=lambda x: x[0], reverse=True) for item in conduits] conduitBoxes = [[(box[0][0] + box[1][0]) / 2, (box[0][1] + box[1][1]) / 2] for box in conduitBoxes] noses, polygons = [], [] for points in clsDict["Nose"]: processPoints(points, polygons, noses) for polygon, tag in polygons: coordinates = [[float(x), float(y)] for x, y in list(polygon.exterior.coords)] if len(coordinates) == 0: continue rect = cv2.minAreaRect(np.array(coordinates, dtype=np.float32)) center, size, angle = rect box = cv2.boxPoints(rect) # 获取矩形的四个角点 box = [(item[0], item[1]) for item in np.int32(box)] boxes = sorted(box, key=lambda box: box[0], reverse=True) point = [int((boxes[0][0] + boxes[1][0]) / 2), int((boxes[0][1] + boxes[1][1]) / 2)] # 按 x 值排序 noses.append([point, center, angle, box, tag, coordinates]) noses.sort(key=lambda x: x[0][1], reverse=True) for i, (_, _, _, box, _, _) in enumerate(noses): box.sort(key=lambda x: x[0]) leftTop = [(box[0][0] + box[1][0]) / 2, (box[0][1] + box[1][1]) / 2] belong = find_nearest_coordinate_index(leftTop, conduitBoxes) noses[i] = [noses[i], belong] return noses def progress(img, conduits, noses, nuts, matches, paras, devicesParas, response, noseClsModel, detModel, chModel): def ocrLength(image): image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) ocrRes = chModel.predict(image, verbose=False)[0] obb = ocrRes.obb.cls.tolist() return len(obb) def processConduit(lal, right, cPos, coords, idx): OK[idx][1] = 0 occ = " ".join([lal, f"#{cPos + 1:02d}", right]) if occ in occur: return occur.append(occ) insertResponse(device, lal, coords, f"#{cPos + 1:02d}", right, response) def checkConduit(mch, idx): if mch in halfDevices: for j, item in enumerate(corrects): if item[0] == mch: if matches[j][0] == corrects[idx][0] or matches[j][0] == "Unidentified": _, _, _, _, _, pts, _ = conduits[j] pts = inverseTransformCoordinates(pts, rotate, img.shape) processConduit("EXP_CONNECTOR_LINE_POSITION", c, cp, pts, j) return False return True else: return True outList = [] features, device, rotate = devicesParas devices = features[device]["set"] deviceLenght = len(devices) / 2 conduitBoxes = [sorted(item[3], key=lambda x: x[0], reverse=True) for item in conduits] conduitBoxes = [[(box[0][0] + box[1][0]) / 2, (box[0][1] + box[1][1]) / 2] for box in conduitBoxes] if deviceLenght < len(conduits): xList = [item[3][0][0] for item in nuts] if len(xList) < len(conduits): for i in range(len(conduits) - len(xList)): xList.append(xList[-1]) popLength = len(conduits) - deviceLenght while popLength > 0: conduitsXList = [item[3][1][0] for item in conduits] dist = [nut - conduit for conduit, nut in zip(conduitsXList, xList)] maxDist = dist.index(max(dist)) outList.append([conduits.pop(maxDist), maxDist]) matches.pop(maxDist) popLength -= 1 rev, counter, pos = paras corrects = copy.deepcopy(matches) for i, item in enumerate(corrects): offset = (i - pos) if rev else pos - i p = counter + offset if p < len(devices): corrects[i] = [devices[p], p] else: corrects[i] = ["ERR", p] OK = [[correct[0], 1] for correct in corrects] halfDevices = [it[0] for it in OK] for i in range(len(corrects)): # if matches[i][0] != "Unidentified": continue # matches[i] = corrects[i] print(f"#{corrects[i][1] + 1}: ", matches[i][0], "\t------->", corrects[i][0]) for out in outList: for match, correct in zip(matches, corrects): if match[0] == "empty" and correct[0] != "empty": out.append(correct[1]) occur = [] for i, cdu in enumerate(conduits): point, center, angle, box, tag, points, crop = cdu m, mp = matches[i] c, cp = corrects[i] points = inverseTransformCoordinates(points, rotate, img.shape) if m == "Unidentified" and c == "empty": processConduit("EXP_CONNECTOR_LINE_POSITION", c, cp, points, i) elif m == "Unidentified" and c != "empty": processConduit("WARN_CONFIRM", c, cp, points, i) elif m == "empty" and c != "empty": processConduit("EXP_CONNECTOR_LINE_EMPTY", c, cp, points, i) elif m != c and m not in halfDevices: processConduit("WARN_CONFIRM", c, cp, points, i) elif m != c and m in halfDevices: lenMatch = ocrLength(crop) if lenMatch > len(m): processConduit("WARN_CONFIRM", c, cp, points, i) continue processConduit("EXP_CONNECTOR_LINE_POSITION", c, cp, points, i) unchange = checkConduit(m, i) if not unchange: continue response["result"].pop(-1) occur.pop(-1) processConduit("WARN_CONFIRM", c, cp, points, i) # if len(noses) > 0: # for (point, center, angle, box, tag, points), belong in noses: # mask = np.zeros(img.shape[:2], dtype=np.uint8) # # 填充多边形区域为白色 # cv2.fillPoly(mask, [np.array(points, dtype=np.int32)], 255) # res = cv2.bitwise_and(img, img, mask=mask) # if belong > deviceLenght - 1: continue # correct = corrects[belong] # points = inverseTransformCoordinates(box, rotate, img.shape) # res = cv2.bitwise_and(img, img, mask=mask) # gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) # 转为灰度图 # _, binary = cv2.threshold(gray, 3, 255, cv2.THRESH_BINARY) # 将非黑色部分设为白色 # # 计算非黑色区域的边界框 # x, y, w, h = cv2.boundingRect(binary) # if w > 0 and h > 0: # # 裁剪非黑色部分 # res = res[y:y + h, x:x + w] # status = image2Cls(res, noseClsModel) # if status == "ls": # OK[belong][1] = 0 # occ = " ".join(["EXP_CONNECTOR_LINE_UNSEATED", f"#{correct[1] + 1:02d}", correct[0]]) # if occ in occur: continue # occur.append(occ) # insertResponse(device, "EXP_CONNECTOR_LINE_UNSEATED", points, f"#{correct[1] + 1:02d}", correct[0], response) ress = detModel.predict(img, save=True, verbose=False, exist_ok=True) names = list(detModel.names.values()) if ress[0] is not None: for res in ress[0]: clss = [int(item) for item in res.boxes.cls.tolist()] boxes = res.boxes.xyxy.tolist() for c, b in zip(clss, boxes): label = names[c] x_min, y_min, x_max, y_max = b # 矩形的四个顶点 box = [ [x_min, y_min], # 左上角 [x_max, y_min], # 右上角 [x_max, y_max], # 右下角 [x_min, y_max], # 左下角 ] if label == "Unconnected": belong = find_nearest_coordinate_index([x_min, (y_min + y_max) / 2], conduitBoxes) points = inverseTransformCoordinates(box, rotate, img.shape) if belong > deviceLenght - 1: for out in outList: if int(out[1]) == int(belong): OK[out[2]][1] = 0 occ = " ".join(["EXP_CONNECTOR_LINE_UNSEATED", f"#{out[2] + 1:02d}", devices[out[2]]]) if occ in occur: continue occur.append(occ) insertResponse(device, "EXP_CONNECTOR_LINE_UNSEATED", points, f"#{out[2] + 1:02d}", devices[out[2]], response) else: correct = corrects[belong] OK[belong][1] = 0 occ = " ".join(["EXP_CONNECTOR_LINE_UNSEATED", f"#{correct[1] + 1:02d}", correct[0]]) if occ in occur: continue occur.append(occ) insertResponse(device, "EXP_CONNECTOR_LINE_UNSEATED", points, f"#{correct[1] + 1:02d}", correct[0], response) # TODO for i, ok in enumerate(OK): if ok[1] == 1: correct = corrects[i] point, center, angle, box, tag, points, _ = conduits[i] points = inverseTransformCoordinates(box, rotate, img.shape) insertResponse(device, "OK", points, f"#{correct[1] + 1:02d}", correct[0], response) print("done") def insertResponse(device, tp, roi, port, lan, response): response["result"].append({ "device": device, "type": tp, "roi": roi, "port": port, "lan": lan, }) def drawResults(response, image, rotate): img = cv2.imread(image) org = img.shape[:2] scale = 3 if org[1] > 2000 else 1 org = (org[1] - 400, 100) if response is None: cv2.putText(img, "WARN", org, cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 255), 5) else: results = response["result"] flag = 0 # 存储已绘制的矩形区域 rectangles = [] for result in results: roi = result["roi"] polygon = Polygon(roi) box = list(polygon.minimum_rotated_rectangle.exterior.coords) contour = np.array(box, dtype=np.int32).reshape((-1, 1, 2)) if result["type"] == "OK": color = (0, 255, 0) cv2.drawContours(img, [contour], -1, color, 3) elif result["type"] == "WARN_CONFIRM": color = (0, 255, 255) flag += 1 print(result["type"], result["port"], result["lan"]) cv2.drawContours(img, [contour], -1, color, 3) drawText(img, box, scale, result["lan"], rectangles, color, rotate) elif result["type"] == "EXP_CONNECTOR_LINE_POSITION": color = (0, 0, 255) flag += 1 print(result["type"], result["port"],result["lan"]) cv2.drawContours(img, [contour], -1, color, 3) drawText(img, box, scale, result["lan"], rectangles, color, rotate) elif result["type"] == "EXP_CONNECTOR_LINE_EMPTY": color = (0, 0, 255) flag += 1 print(result["type"], result["port"],result["lan"]) cv2.drawContours(img, [contour], -1, color, 3) drawText(img, box, scale, result["lan"], rectangles, color, rotate) else: color = (0, 0, 255) flag += 1 print(result["type"], result["port"], result["lan"]) cv2.drawContours(img, [contour], -1, color, 3) drawText(img, box, scale, result["port"] + " " + result["type"], rectangles, color, rotate) if flag == 0: cv2.putText(img, "OK", org, cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 5) if img.shape[1] > 2000: img = cv2.resize(img, dsize=(0, 0), fx=0.3, fy=0.3, interpolation=cv2.INTER_AREA) elif 2000 > img.shape[1] > 1000: img = cv2.resize(img, dsize=(0, 0), fx=0.6, fy=0.6, interpolation=cv2.INTER_AREA) # cv2.imwrite(output, img) # cv2.imshow("result", img) # cv2.waitKey(0) return img def drawText(img, box, scale, label, rectangles, bgColor, rotate): coord = min(box, key=lambda x: x[1]) if rotate < 180 else max(box, key=lambda x: x[1]) coord = (int(coord[0]), int(coord[1])) cv2.circle(img, (int(coord[0]), int(coord[1])), 5 * scale, bgColor, -1) if scale < 2: if int(rotate) < 180: top_left = [int(coord[0]) + 20 * scale, int(coord[1] - 105)] bottom_right = [int(coord[0]) + 50 + 12 * len(label) * scale, int(coord[1] - 65)] else: top_left = [int(coord[0]) + 20 * scale, int(coord[1] + 65)] bottom_right = [int(coord[0]) + 50 + 12 * len(label) * scale, int(coord[1] + 105)] else: if int(rotate) < 180: top_left = [int(coord[0]) + 20 * scale, int(coord[1] - 145)] bottom_right = [int(coord[0]) + 130 + 12 * len(label) * scale, int(coord[1] - 65)] else: top_left = [int(coord[0]) + 20 * scale, int(coord[1] + 65)] bottom_right = [int(coord[0]) + 130 + 12 * len(label) * scale, int(coord[1] + 145)] # 确保矩形不与其他已绘制矩形重叠 while any( not ( top_left[0] > rect[1][0] or # 当前矩形在其他矩形的右边 bottom_right[0] < rect[0][0] or # 当前矩形在其他矩形的左边 top_left[1] > rect[1][1] or # 当前矩形在其他矩形的下边 bottom_right[1] < rect[0][1] # 当前矩形在其他矩形的上边 ) for rect in rectangles ): if rotate < 180: top_left[1] -= 20 # 向上移动 bottom_right[1] -= 20 else: top_left[1] += 20 # 向下移动 bottom_right[1] += 20 # 记录当前矩形 rectangles.append((top_left, bottom_right)) labelPos = (top_left[0], bottom_right[1]) cv2.line(img, coord, labelPos, bgColor, 2) # 绘制矩形标签 cv2.rectangle(img, tuple(top_left), tuple(bottom_right), bgColor, -1) # 绘制文字 font = cv2.FONT_HERSHEY_SIMPLEX # 字体类型 font_scale = 0.6 * scale # 字体大小 font_thickness = 1 * scale # 字体线条厚度 text_color = (0, 0, 0) # 黑色文字 text_size = cv2.getTextSize(label, font, font_scale, font_thickness)[0] text_width, text_height = text_size rect_center_x = (top_left[0] + bottom_right[0]) // 2 rect_center_y = (top_left[1] + bottom_right[1]) // 2 # 计算文字的左上角坐标(使文字中心与矩形中心对齐) text_x = rect_center_x - text_width // 2 text_y = rect_center_y + text_height // 2 cv2.putText(img, label, (text_x, text_y), font, font_scale, text_color, font_thickness) def terminal_process(path, point): SegModel = YOLO("models/terminal_seg_7_15_v3.pt") orientedClsModel = YOLO("models/orientedCls.pt") noseClsModel = YOLO("models/noseCls.pt") detModel = YOLO("models/det.pt") chModel = YOLO("models/ch.pt") outarea = YOLO("models/outarea.pt") features = praseExcel("dataset/excel/logbook-0.xlsx") index = 0 response = { "result": [] } if path.endswith((".jpg", ".jpeg", ".png")): im = cv2.imread(path) img, textImg, clsDict, results, rotate = image2Align(path, orientedClsModel, SegModel, noseClsModel) img_shape = img.shape # cv2.imwrite(os.path.join(path.replace(".jpg", "_0.jpg")), img) emptyList = [[item, "empty"] for item in clsDict["Empty"]] if "Empty" in clsDict else [] conduitList = [[item, "Conduit"] for item in clsDict["Conduit"]] if "Conduit" in clsDict else [] nuts = nut(clsDict) conduits = conduit(emptyList + conduitList,img_shape) noses = nose(conduits, clsDict) matches, device, paras = image2OCR(img, conduits, features, os.path.basename(path)) if matches is not None: progress(img, conduits, noses, nuts, matches, paras, [features, device, rotate], response, noseClsModel, detModel, chModel) else: return None, None return response, rotate def run(path,output_folder,SegModel,orientedClsModel,noseClsModel,detModel,chModel,features,index): for file in sorted(os.listdir(path)): if file.endswith((".jpg", ".jpeg", ".png")): p = os.path.join(path, file) # p_shape = cv2.imread(p).shape o = os.path.join(output_folder, file) # p = "//home/master/KR0418/12-PingGui/result/raw/26.jpg" print(p) r, rot = terminal_process(p, "", SegModel, orientedClsModel, noseClsModel, detModel, chModel, features, index) drawResults(r, p, o, rot) # print(f"\n本次测试已结束,测试图片共{len(os.listdir(path))}张,错误图片共{wrong_pic_count}张,测试准确率为:{(len(os.listdir(path))-wrong_pic_count)/len(os.listdir(path))}\n") if __name__ == '__main__': image_filepath = '/home/pbl/code/KR0530/12-PingGui_v5/12-PingGui/04b03bafc7984fdb93190dadb59a039e.jpg' r, rot = terminal_process(image_filepath, '') # print(r) EXP_CONNECTOR_LINE_EMPTY det_img= drawResults(r, image_filepath, rot) cv2.imwrite('/home/pbl/code/KR0530/12-PingGui_v5/12-PingGui/output1.jpg', det_img) 在这个程序中,被检测出有错误的地方是怎么被画成红框的,是怎么在图像上显示错误的
09-05
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值