image sensor 关于像素的误解

本文揭示了CMOS图像传感器中的两个常见误解:每个像素感光单元仅能感应单一颜色,而非RGB三色;每个像素感光单元的R、G、B信号并非直接为该像素使用,而是通过周围单元的数据进行分析计算。文章详细解释了这些概念,旨在帮助读者更准确地理解CMOS图像传感器的工作原理。

CMOS Image Sensor一些概念误区:


误解1: 130W Pixel 像素感光单元,每个单元能感应R,G,B值;

正解: 130W个像素感光单元,每个感光单元只能感应单一颜色(RGB中某种色彩),不能同时感应R,G,B三个值;

(PS: Foveon X3感光元件能同时感应RGB三个值)


误解2: 感光单元的R/G/B值只为单一像素试用

正解: 每个像素感光单元只能感应单一颜色,比如只能感应R,还原该像素需要的GB值根据周围感光单元GB值分析计算而来,通过色彩差值和特效处理,还原每个像素

import os import time import ujson import nncase_runtime as nn import ulab.numpy as np from media.sensor import * from media.display import * from media.media import * # 显示模式配置 display_mode = "lcd" # "lcd" 或 "hdmi" if display_mode == "lcd": DISPLAY_WIDTH = ALIGN_UP(800, 16) DISPLAY_HEIGHT = 480 else: DISPLAY_WIDTH = ALIGN_UP(1920, 16) DISPLAY_HEIGHT = 1080 # 图像尺寸配置 OUT_RGB888P_WIDTH = ALIGN_UP(1280, 16) OUT_RGB888P_HEIGHT = 720 NUM_CLASSIFY_PATH = "/sdcard/num_classify/" CONFIG_PATH = NUM_CLASSIFY_PATH + "deploy_config.json" # 红色检测配置 RED_THRESHOLDS = [ (30, 100, 15, 127, 15, 127), (30, 100, -127, -15, -127, -15) ] BOXES = { "B": (490, 0, 300, 720), # 蓝色方框 "R": (100, 210, 300, 300), # 红色方框 "Y": (880, 210, 300, 300) # 黄色方框 } COLORS = { "B": (0, 0, 255), "R": (255, 0, 0), "Y": (255, 255, 0) } class ScopedTiming: """性能计时工具类""" def __init__(self, info="", enable_profile=True): self.info = info self.enable_profile = enable_profile def __enter__(self): if self.enable_profile: self.start_time = time.time_ns() return self def __exit__(self, exc_type, exc_value, traceback): if self.enable_profile: elapsed_time = time.time_ns() - self.start_time print(f"{self.info} took {elapsed_time / 1000000:.2f} ms") def read_deploy_config(config_path): """读取部署配置文件""" with open(config_path, 'r') as json_file: return ujson.load(json_file) def softmax(x): """Softmax函数""" exp_x = np.exp(x - np.max(x)) return exp_x / np.sum(exp_x) def sigmoid(x): """Sigmoid函数""" return 1 / (1 + np.exp(-x)) def main(): """主函数""" # 读取数字分类配置 deploy_conf = read_deploy_config(CONFIG_PATH) kmodel_name = deploy_conf["kmodel_path"] labels = deploy_conf["categories"] confidence_threshold = deploy_conf["confidence_threshold"] img_size = deploy_conf["img_size"] num_classes = deploy_conf["num_classes"] # 初始化KPU kpu = nn.kpu() kpu.load_kmodel(NUM_CLASSIFY_PATH + kmodel_name) # 初始化AI2D预处理 ai2d = nn.ai2d() ai2d.set_dtype( nn.ai2d_format.NCHW_FMT, nn.ai2d_format.NCHW_FMT, np.uint8, np.uint8 ) ai2d.set_resize_param(True, nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel) ai2d_builder = ai2d.build([1, 3, OUT_RGB888P_HEIGHT, OUT_RGB888P_WIDTH], [1, 3, img_size[0], img_size[1]]) # 初始化传感器 sensor = Sensor() sensor.reset() sensor.set_hmirror(False) sensor.set_vflip(False) # 设置多路输出 sensor.set_framesize(width=DISPLAY_WIDTH, height=DISPLAY_HEIGHT) # 通道0: 显示 sensor.set_pixformat(PIXEL_FORMAT_YUV_SEMIPLANAR_420, chn=CAM_CHN_ID_0) sensor.set_framesize(width=OUT_RGB888P_WIDTH, height=OUT_RGB888P_HEIGHT, chn=CAM_CHN_ID_1) # 通道1: 红色检测 sensor.set_pixformat(PIXEL_FORMAT_RGB_565, chn=CAM_CHN_ID_1) sensor.set_framesize(width=OUT_RGB888P_WIDTH, height=OUT_RGB888P_HEIGHT, chn=CAM_CHN_ID_2) # 通道2: 数字分类 sensor.set_pixformat(PIXEL_FORMAT_RGB_888_PLANAR, chn=CAM_CHN_ID_2) # 绑定显示 sensor_bind_info = sensor.bind_info(x=0, y=0, chn=CAM_CHN_ID_0) if display_mode == "lcd": Display.init(Display.ST7701, to_ide=True) else: Display.init(Display.LT9611, to_ide=True) Display.bind_layer(**sensor_bind_info, layer=Display.LAYER_VIDEO1) # 创建OSD层用于绘制结果 osd_img = image.Image(DISPLAY_WIDTH, DISPLAY_HEIGHT, image.ARGB8888) # 初始化媒体 MediaManager.init() sensor.run() # 初始化AI2D输入输出张量 data = np.ones((1, 3, img_size[0], img_size[1]), dtype=np.uint8) ai2d_output_tensor = nn.from_numpy(data) # 主循环 clock = time.clock() try: while True: clock.tick() os.exitpoint() # === 红色目标检测任务 === rgb565_img = sensor.snapshot(chn=CAM_CHN_ID_1) if rgb565_img == -1: continue detections = {name: False for name in BOXES} red_in_blue = None red_count = 0 for name, box in BOXES.items(): blobs = rgb565_img.find_blobs( RED_THRESHOLDS, False, box, x_stride=5, y_stride=5, pixels_threshold=1000, area_threshold=1000, merge=True, margin=10 ) if blobs: detections[name] = True red_count += 1 if name == "B": # 蓝色方框 blob = blobs[0] # 在蓝色方框中查找红色目标 red_blobs = rgb565_img.find_blobs( RED_THRESHOLDS, False, box, x_stride=5, y_stride=5, pixels_threshold=100, area_threshold=100, merge=True, margin=10 ) if red_blobs: largest_blob = max(red_blobs, key=lambda b: b.pixels()) red_in_blue = (largest_blob.cx(), largest_blob.cy()) # === 数字分类任务 === rgb888p_img = sensor.snapshot(chn=CAM_CHN_ID_2) if rgb888p_img == -1: continue cls_idx = -1 score = 0.0 digit_label = "N/A" if rgb888p_img.format() == image.RGBP888: ai2d_input = rgb888p_img.to_numpy_ref() ai2d_input_tensor = nn.from_numpy(ai2d_input) # 预处理并推理 ai2d_builder.run(ai2d_input_tensor, ai2d_output_tensor) kpu.set_input_tensor(0, ai2d_output_tensor) kpu.run() # 获取输出 results = [] for i in range(kpu.outputs_size()): output_data = kpu.get_output_tensor(i) result = output_data.to_numpy() results.append(result) # 处理分类结果 if num_classes > 2: softmax_res = softmax(results[0][0]) cls_idx = np.argmax(softmax_res) if softmax_res[cls_idx] > confidence_threshold: score = softmax_res[cls_idx] digit_label = labels[cls_idx] else: sigmoid_res = sigmoid(results[0][0][0]) if sigmoid_res > confidence_threshold: cls_idx = 1 score = sigmoid_res digit_label = labels[1] else: cls_idx = 0 score = 1 - sigmoid_res digit_label = labels[0] # === 控制台输出(简化版) === print(f"识别数字: {digit_label}") print(f"红色目标数量: {red_count}") if red_in_blue: print(f"蓝色框内红色坐标: ({red_in_blue[0]}, {red_in_blue[1]})") else: print("蓝色框内红色坐标: 无") # === 屏幕显示 === osd_img.clear() # 绘制方框 for name, box in BOXES.items(): osd_img.draw_rectangle(box[0], box[1], box[2], box[3], color=COLORS[name], thickness=2) # 绘制红色目标坐标 if red_in_blue: osd_img.draw_cross(red_in_blue[0], red_in_blue[1], color=(255, 0, 0), size=8, thickness=2) osd_img.draw_string_advanced( BOXES["B"][0] + 5, BOXES["B"][1] + 5, 10, f"Red: {red_in_blue[0]},{red_in_blue[1]}", color=(255, 0, 0), bg_color=(0, 0, 0) ) # 显示红色目标数量 osd_img.draw_string_advanced(10, 10, 10, f"Red Count: {red_count}", color=(255, 255, 0), bg_color=(0, 0, 0)) # 显示数字分类结果 if cls_idx >= 0: osd_img.draw_string_advanced( 10, 40, 10, f"Digit: {digit_label} ({score:.2f})", color=(0, 255, 0), bg_color=(0, 0, 0) ) # 显示帧率 osd_img.draw_string_advanced(550, 10, 10, f"FPS: {clock.fps():.1f}", color=(255, 255, 0), bg_color=(0, 0, 0)) Display.show_image(osd_img, 0, 0, Display.LAYER_OSD3) except KeyboardInterrupt: print("程序已停止") except Exception as e: print(f"错误: {e}") finally: # 清理资源 sensor.stop() Display.deinit() MediaManager.deinit() del ai2d_output_tensor nn.shrink_memory_pool() print("资源已释放") if __name__ == "__main__": main() 将整个显示画面(包括摄像头捕获的图像和OSD叠加层)从原来的左侧位置移动到屏幕中间显示
07-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值