yolov8检测掩码目标，并与背景融合

本文链接：https://blog.youkuaiyun.com/qq_40357993/article/details/146037356

yolov8检测掩码目标，并与背景融合

剪切代码如下：

import os
from ultralytics import YOLO
import numpy as np
import cv2

# Load YOLO model
model = YOLO("/home/zm/PycharmProjects/yolo/ultralytics-main/yolov8m-seg.pt")

# Define input and output directories
input_dir = '/media/ZM/ZM/数据/ok/person_ok_clothes_helmet（train）/train_down/'
output_dir = '/media/ZM/ZM/数据/ok/seg_result/'

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Loop through all image files in the directory
for img_name in os.listdir(input_dir):
    # Only process jpg images
    if img_name.endswith('.jpg'):
        # Construct the full file path
        img_path = os.path.join(input_dir, img_name)

        # Read the image
        orig_img = cv2.imread(img_path)

        # Resize the image to match the model input size (448x640)
        orig_img_resized = cv2.resize(orig_img, (640, 448))  # Note OpenCV uses (width, height)

        # Run inference on the image
        results = model(source=img_path, save=False, device='0', project=output_dir,classes=[0])

        # Get the mask data
        mask = results[0].masks.data[0].cpu().numpy().astype(np.bool_)

        # Resize the mask to the same dimensions as the resized image
        mask_resized = cv2.resize(mask.astype(np.uint8), (640, 448), interpolation=cv2.INTER_NEAREST).astype(np.bool_)

        # Apply the resized mask to the original image
        masked_image = np.zeros_like(orig_img_resized)
        masked_image[mask_resized] = orig_img_resized[mask_resized]

        # If you want a transparent background (RGBA)
        alpha_channel = np.ones(mask_resized.shape, dtype=orig_img.dtype) * 255  # Create the alpha channel
        masked_image_rgba = np.dstack((masked_image, alpha_channel))  # Add the alpha channel to RGB
        masked_image_rgba[~mask_resized] = (0, 0, 0, 0)  # Make the non-mask areas transparent

        # Save the image with a transparent background (RGBA)
        output_path_rgba = os.path.join(output_dir, f"masked_{img_name.replace('.jpg', '_rgba.png')}")
        cv2.imwrite(output_path_rgba, masked_image_rgba)

        # # Save the image with a black background
        # output_path_black_bg = os.path.join(output_dir, f"masked_{img_name}")
        # cv2.imwrite(output_path_black_bg, masked_image)

        print(f"Processed and saved: {img_name}")

融合代码代码如下：

import cv2
import numpy as np
import os
import random


def add_alpha_channel(img):
    """为jpg图像添加alpha通道"""
    b_channel, g_channel, r_channel = cv2.split(img)  # 剥离jpg图像通道
    alpha_channel = np.ones(b_channel.shape, dtype=b_channel.dtype) * 255  # 创建Alpha通道
    img_new = cv2.merge((b_channel, g_channel, r_channel, alpha_channel))  # 融合通道
    return img_new


def resize_image_aspect_ratio(img, target_width, target_height):
    """按比例调整PNG图像大小，使其宽度不超过500，高度不超过1080"""
    h, w = img.shape[:2]
    aspect_ratio = w / h

    # 设置最大宽度和高度限制
    max_width = 500
    max_height = 1080

    # 计算目标尺寸，保持宽高比
    if w > h:
        new_width = min(max_width, w)
        new_height = int(new_width / aspect_ratio)
    else:
        new_height = min(max_height, h)
        new_width = int(new_height * aspect_ratio)

    # 如果调整后的宽度或高度超过限制，进行适当缩放
    if new_width > max_width:
        new_width = max_width
        new_height = int(new_width / aspect_ratio)

    if new_height > max_height:
        new_height = max_height
        new_width = int(new_height * aspect_ratio)

    # 调整大小
    resized_img = cv2.resize(img, (new_width, new_height))
    return resized_img


def merge_img(jpg_img, png_img, y1, y2, x1, x2):
    """将png透明图像与jpg图像叠加"""
    # 判断jpg图像是否已经为4通道
    if jpg_img.shape[2] == 3:
        jpg_img = add_alpha_channel(jpg_img)

    # 叠加图像位置的限制，避免超出范围
    yy1 = 0
    yy2 = png_img.shape[0]
    xx1 = 0
    xx2 = png_img.shape[1]

    if x1 < 0:
        xx1 = -x1
        x1 = 0
    if y1 < 0:
        yy1 = -y1
        y1 = 0
    if x2 > jpg_img.shape[1]:
        xx2 = png_img.shape[1] - (x2 - jpg_img.shape[1])
        x2 = jpg_img.shape[1]
    if y2 > jpg_img.shape[0]:
        yy2 = png_img.shape[0] - (y2 - jpg_img.shape[0])
        y2 = jpg_img.shape[0]

    # 获取要覆盖图像的alpha值，将像素值除以255，使值保持在0-1之间
    alpha_png = png_img[yy1:yy2, xx1:xx2, 3] / 255.0
    alpha_jpg = 1 - alpha_png

    # 开始叠加
    for c in range(0, 3):
        jpg_img[y1:y2, x1:x2, c] = ((alpha_jpg * jpg_img[y1:y2, x1:x2, c]) + (alpha_png * png_img[yy1:yy2, xx1:xx2, c]))

    return jpg_img


def random_overlay(jpg_img, png_images_dir, max_overlays=3):
    """在JPEG图像上随机叠加1到max_overlays张PNG图像"""
    # 获取PNG图像路径列表
    png_files = [f for f in os.listdir(png_images_dir) if f.endswith('.png')]

    # 随机选择1到max_overlays个PNG图像
    num_overlays = random.randint(1, max_overlays)
    selected_png_files = random.sample(png_files, num_overlays)

    for png_file in selected_png_files:
        png_path = os.path.join(png_images_dir, png_file)
        png_img = cv2.imread(png_path, cv2.IMREAD_UNCHANGED)

        # 调整PNG图像的大小
        png_img_resized = resize_image_aspect_ratio(png_img, jpg_img.shape[1], jpg_img.shape[0])

        # 随机选择叠加位置
        x1 = random.randint(0, jpg_img.shape[1] - png_img_resized.shape[1])
        y1 = random.randint(0, jpg_img.shape[0] - png_img_resized.shape[0])
        x2 = x1 + png_img_resized.shape[1]
        y2 = y1 + png_img_resized.shape[0]

        # 开始叠加
        jpg_img = merge_img(jpg_img, png_img_resized, y1, y2, x1, x2)

    return jpg_img


if __name__ == '__main__':
    # 定义图像文件夹路径
    img_jpg_dir = '/media/ZM/ZM/数据/ok/20250304现场数据/no_obj/2025-03-03'  # JPG图像文件夹路径
    img_png_path = '/media/ZM/ZM/数据/ok/seg_result/wangluo'  # PNG图像文件夹路径

    # 获取JPEG图像路径列表
    jpg_files = [f for f in os.listdir(img_jpg_dir) if f.endswith('.jpg')]

    # 输出文件夹
    output_dir = '/media/ZM/ZM/数据/ok/result/wangluo1'
    os.makedirs(output_dir, exist_ok=True)

    # 对每一张JPEG图像执行随机叠加操作
    for jpg_file in jpg_files:
        img_jpg_path = os.path.join(img_jpg_dir, jpg_file)

        # 读取JPG图像
        img_jpg = cv2.imread(img_jpg_path, cv2.IMREAD_UNCHANGED)

        # 随机叠加PNG图像
        res_img = random_overlay(img_jpg, img_png_path)

        # 保存结果图像
        output_img_path = os.path.join(output_dir, f"result_5{jpg_file}")
        cv2.imwrite(output_img_path, res_img)

        print(f"Processed and saved: {jpg_file}")