视觉随笔1

最新推荐文章于 2025-08-05 17:20:40 发布

原创最新推荐文章于 2025-08-05 17:20:40 发布 · 275 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python #opencv #计算机视觉

计算机视觉专栏收录该内容

3 篇文章

订阅专栏

该代码段涉及将多个气球图片填充到2048*2048的黑色背景中，保持至少20像素的间隔。当填充溢出时，会保存多张图片。同时，它从提供的annotations.json文件中获取气球的掩模坐标信息，生成新的JSON标注文件，并验证新文件的掩模标注信息的正确性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

需求：

在尺寸为 2048 * 2048 的黑色背景图片中，依次填充 images 中的气球图片，气球图片的间隔至少大于 20 个像素，填充完成并保存，如果填充溢出，可保存多张图片。。
annotations.json 中对应每一个气球的标注的掩膜坐标信息，请根据 annotations中的格式生成新的 json 标注文件，并确保新生成的 json 标注文件里的掩膜坐标信息对应需求 1 中保存的图片。
请验证需求 2 中新生成 json 文件掩膜标注信息的正确性。

第一题

# -*- coding: utf-8 -*-

"""
需求：
    1、在尺寸为 2048 * 2048 的黑色背景图片中，依次填充 images 中的气球图片，气球图
       片的间隔至少大于 20 个像素，填充完成并保存，如果填充溢出，可保存多张图片。。


    2、annotations.json 中对应每一个气球的标注的掩膜坐标信息，请根据 annotations
       中的格式生成新的 json 标注文件，并确保新生成的 json 标注文件里的掩膜坐标信息对
       应需求 1 中保存的图片。
    3、请验证需求 2 中新生成 json 文件掩膜标注信息的正确性。
"""



def merge(images, annotations_file):
    """
    input:
        images: 气球图片。
        annotations_file: 气球标注信息。
    """
    pass

import json

import cv2
import numpy as np



if __name__ == '__main__':
    image_dir = r"D:\hzh\pycharmProject\coding\images"

    annotation_path = r"D:\hzh\pycharmProject\coding\annotations.json"

    out_dir = r"D:\hzh\pycharmProject\\out"

    all_point_list_dict = {} # jpg_name : point_list
    with open(annotation_path,encoding="utf-8") as f:
        content = f.read()
        print(content)
        # 将字符串转换为字典dict
        all_dict = json.loads(content)
        print(all_dict)

        for jpg_name in all_dict.keys():
            value = all_dict[jpg_name]
            # print(jpg_name,value)

            name_ = value["name"]
            all_points_x = value["all_points_x"]
            all_points_y = value["all_points_y"]

            # print(all_points_x)
            # print(all_points_y)

            point_list = []

            for x,y in zip(all_points_x,all_points_y):
                # print(x,y)
                point_list.append([x,y])

            # opencv读取路径不能有中文，要读取中文需要先用numpy读取，然后再转换为opencv
            img_np = np.fromfile(image_dir+"/"+jpg_name, dtype=np.uint8)
            img = cv2.imdecode(img_np,cv2.IMREAD_COLOR)
            # cv2.imshow("a",img)
            # cv2.waitKey(0)

            img_mask = np.zeros(img.shape[:2],np.uint8)
            # 然后将一个通到转换为3个通道
            img_mask = cv2.cvtColor(img_mask, cv2.COLOR_GRAY2BGR)
            # img = cv2.fillPoly(img, [points], color=[0, 255, 0])

            # points = np.array([[200, 200], [200, 300], [270, 370], [340, 300], [340, 200], [270, 130]], np.int32)
            points = np.array(point_list)

            # img = cv2.polylines(img, [points], isClosed=True, color=[0, 0, 255], thickness=5)
            img_mask = cv2.fillPoly(img_mask, [points], color=[255, 255, 255])


            # # cv2.imwrite(out_dir+"/1.jpg", img)
            img_new = np.zeros(img.shape[:2], np.uint8)
            # 然后将一个通到转换为3个通道
            img_new = cv2.cvtColor(img_new, cv2.COLOR_GRAY2BGR)

            img_new[img_mask == 255] = img[img_mask == 255]
            # cv2.imshow("",img_new)
            # cv2.waitKey(0)


            all_point_list_dict[jpg_name] = {"img":img,"img_new":img_new, "img_mask":img_mask ,  "points_list": point_list}

    # 生成一个2048*2048的图片，一个通道
    big_img = np.zeros((2048, 2048), np.uint8)
    # 然后将一个通到转换为3个通道
    big_img = cv2.cvtColor(big_img, cv2.COLOR_GRAY2BGR)

    print(all_point_list_dict)
    current_h = 0
    current_w = 0
    index = 0
    for jpg_name,value in all_point_list_dict.items():
        # if jpg_name == 'Q_0000_L_10092020.jpg':
        #     continue
        points_list = value["points_list"]
        img_mask = value["img_mask"]
        img_new = value["img_new"]
        img = value["img"]
        # print(jpg_name)
        # print(points_list)
        # print(img_mask)
        # print(img)
        # print(img_new)
        img_h = img.shape[0]
        img_w = img.shape[1]

        print(img_new.shape)

        # current_w = current_w + img_w
        print(f"current_w:{current_w},current_h:{current_h}")
        print(f"img_w:{img_w},img_h:{img_h}")

        if current_w + img_w >= 2048:
            current_h = current_h + img_h
            current_w = 0

        if current_h + img_h >= 2048:
            cv2.imwrite(f"./{index}.jpg",big_img)
            cv2.imshow("", big_img)
            cv2.waitKey(0)



            current_h = 0
            current_w = 0
            index += 1
            big_img = np.zeros((2048, 2048), np.uint8)
            # 然后将一个通到转换为3个通道
            big_img = cv2.cvtColor(big_img, cv2.COLOR_GRAY2BGR)

        big_img[current_h:current_h+img_h,current_w:current_w+img_w] = img_new

        for each in points_list:
            print(each)
            each[0],each[1] = each[0]+img_h,each[1]+img_w

        print("test")
        current_w = current_w + img_w