半监督学习（SSL）标注数据集

最新推荐文章于 2025-05-14 10:28:06 发布
原创最新推荐文章于 2025-05-14 10:28:06 发布
· 969 阅读
22 ·
版权
文章标签：
#tensorflow #keras #opencv #numpy #机器学习
一、数据集及代码结构

(仅红色框中的文件使用到)
二、依赖包版本

关键包：tensorflow2.6 + python3.8 + keras2.6.0 + numpy1.20.0
三、代码

import os
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split
import tensorflow as tf
import cv2
import json
from tensorflow.keras import layers, models
import numpy as np
from tensorflow.keras.utils import to_categorical


# 步骤 3: 处理石刻照片并生成 XML 标注文件
def generate_xml(labels, img_size):
    # 创建 XML 文件的根节点
    root = ET.Element("annotation")
    size = ET.SubElement(root, "size")
    width = ET.SubElement(size, "width")
    height = ET.SubElement(size, "height")
    width.text = str(img_size[0])
    height.text = str(img_size[1])

    for image_labels in labels:
        for label in image_labels:
            # 添加 object 元素
            obj = ET.SubElement(root, "object")
            name = ET.SubElement(obj, "name")
            name.text = label['name']
            bndbox = ET.SubElement(obj, "bndbox")
            xmin = ET.SubElement(bndbox, "xmin")
            ymin = ET.SubElement(bndbox, "ymin")
            xmax = ET.SubElement(bndbox, "xmax")
            ymax = ET.SubElement(bndbox, "ymax")

            xmin.text = str(label['xmin'])
            ymin.text = str(label['ymin'])
            xmax.text = str(label['xmax'])
            ymax.text = str(label['ymax'])

    # 生成 XML 文件
    tree = ET.ElementTree(root)
    tree.write("annotations.xml")


# 步骤 5: 训练裂缝目标检测模型
def train_model(train_images, train_labels, input_shape=(224, 224, 3), num_classes=1, max_boxes=10):
    print(f"Train Images Shape: {train_images.shape}")
    print(f"Train Labels Shape: {train_labels.shape}")

    # 如果标签维度不匹配，调整它们
    if train_labels.ndim == 4 and train_labels.shape[-1] == 10:
        train_labels = np.squeeze(train_labels, axis=-1)


    model = tf.keras.models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(max_boxes * (num_classes + 4), activation='linear'),
        layers.Reshape((max_boxes, num_classes + 4))
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
    model.fit(train_images, train_labels, batch_size=64, epochs=500, validation_split=0.2)
    return model

# 步骤 3: 自动检测未标注石刻
def detect_and_correct(model, unannotated_images):
    detections = model.predict(unannotated_images)
    corrected_images = []
    for img, detection in zip(unannotated_images, detections):
        # 这里可以添加人工修正的代码
        corrected_images.append((img, detection))
    return corrected_images


# 步骤 4: 使用修正后的标注训练新模型
def retrain_model(corrected_images):
    corrected_labels = [detection for _, detection in corrected_images]
    corrected_images = [img for img, _ in corrected_images]
    train_model(corrected_images, corrected_labels)


# 步骤 6: 重复训练直到处理完所有未标注照片
def iterative_training(train_images, unannotated_images,initial_model):
    # 假设 unannotated_images 和 unannotated_labels 已经定义
    # num_test_images = len()
    # unannotated_images = train_images  # 示例数据
    print("train_images:", train_images.shape)
    print("unannotated_images:", unannotated_images.shape)
    unannotated_labels = np.zeros((unannotated_images.shape[0], 10, 5))  # 示例标签
    print('unannotated_labels:', unannotated_labels.shape)

    while unannotated_images.any():  # 使用 any() 方法判断数组是否为空
        # 动态调整选择的数量
        num_to_select = min(241, unannotated_images.shape[0])
        # 选择一部分未标注的图像进行预测
        selected_indices = np.random.choice(unannotated_images.shape[0], size=num_to_select, replace=False)
        selected_images = unannotated_images[selected_indices]
        # print('selected_images', selected_images)
        selected_labels = unannotated_labels[selected_indices]

        # 使用初始模型进行预测
        predictions = initial_model.predict(selected_images)

        # 对预测结果进行处理（这里只是一个示例，实际应用中需要根据具体情况处理）
        processed_predictions = process_predictions(predictions, 0.5)

        # 更新标签
        unannotated_labels[selected_indices] = processed_predictions


        # 重新训练模型
        initial_model.fit(selected_images, processed_predictions, batch_size=64, epochs=500)

        # 从 unannotated_images 中移除已标注的图像
        unannotated_images = np.delete(unannotated_images, selected_indices, axis=0)
        unannotated_labels = np.delete(unannotated_labels, selected_indices, axis=0)

        # 打印当前迭代的状态
        print(f"Remaining unannotated images: {unannotated_images.shape[0]}")

    return initial_model


# 步骤 7: 验证最新模型
def validate_model(model, test_images, test_labels):
    results = model.evaluate(test_images, test_labels)
    print("Validation Results:", results)

# 步骤 1: 加载自己的数据集

def load_images_and_labels_from_json(image_dir, label_dir):
    images = []
    labels = []
    image_all = []

    for filename in os.listdir(image_dir):
        img_path = os.path.join(image_dir, filename)
        label_path = os.path.join(label_dir, filename.replace(".jpg", ".json"))

        # 使用 OpenCV 读取图像
        image = cv2.imread(img_path)
        image_all.append(image)
        if image is None:
            print(f"Warning: Could not read image {img_path}.")
            continue  # 跳过无法读取的图像
        #images.append(image)

        # 检查图像文件是否存在
        if not os.path.exists(img_path):
            print(f"Image file not found: {img_path}")
            continue

        # 如果标签文件不存在，则跳过该图像（无对应的目标检测标签）
        if not os.path.exists(label_path):
            print(f"No label file for: {filename}, skipping...")
            continue

        # 解析 JSON 文件获取标注
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                label_data = json.load(f)

            image_labels = []

            for item in label_data:

                # 确保图像名称匹配
                if item['image'] != filename:
                    continue

                for annotation in item['annotations']:
                    name = annotation['label']
                    coordinates = annotation['coordinates']
                    xmin = int(coordinates['x'])
                    ymin = int(coordinates['y'])
                    xmax = int(coordinates['x'] + coordinates['width'])
                    ymax = int(coordinates['y'] + coordinates['height'])
                    image_labels.append({'name': name, 'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax})

            if image_labels:  # 仅在有标签时记录图像和标签
                images.append(image)
                labels.append(image_labels)
    print(f"Loaded {len(images)} images and {len(labels)} labels.")
    return images, labels, image_all

# 步骤 2 调整数据格式
def prepare_data(images, labels, input_shape=(224, 224), max_boxes=10):
    processed_images = []
    processed_labels = []

    for image, image_labels in zip(images, labels):
        # 调整图像大小到输入形状
        resized_image = cv2.resize(image, (input_shape[1], input_shape[0]))
        processed_images.append(resized_image)

        # 初始化 label_array
        label_array = []

        # 对标签进行处理
        #image_label_list = []

        for label in image_labels:
            try:
                xmin = label['xmin'] / image.shape[1]
                ymin = label['ymin'] / image.shape[0]
                xmax = label['xmax'] / image.shape[1]
                ymax = label['ymax'] / image.shape[0]
                class_id = 0  # 假设裂缝的类别为0

                # 将处理后的标签追加到 processed_labels 列表
                #image_label_list.append([xmin, ymin, xmax, ymax, class_id])
                # processed_labels.append([xmin, ymin, xmax, ymax, class_id])

                # 将每个标签转换为一个固定大小的数组 [xmin, ymin, xmax, ymax, class_id]
                label_array.append([xmin, ymin, xmax, ymax, class_id])
            except KeyError:
                # 跳过有问题的标签
                print(f"Skipping label with missing data: {label}")
                continue

        # 如果标签数量少于 max_boxes，则填充零；如果多于 max_boxes，则截断
        if len(label_array) < max_boxes:
            label_array.extend([[0, 0, 0, 0, 0]] * (max_boxes - len(label_array)))
        else:
            label_array = label_array[:max_boxes]

        # 仅在当前图像有有效标签时，添加到 processed_labels
        # if image_label_list:
        processed_labels.append(label_array)

    # 将处理后的数据转换为 NumPy 数组
    processed_images = np.array(processed_images, dtype=np.float32) / 255.0  # 归一化图像数据
    processed_labels = np.array(processed_labels, dtype=np.float32)

    return processed_images, processed_labels

# 模型预测
def process_predictions(predictions, conf_threshold=0.5, iou_threshold=0.4, max_boxes=10):
    """
    处理模型的预测结果，包括非极大值抑制 (NMS) 和阈值处理。

    :param predictions: 模型的预测结果，形状为 (batch_size, num_boxes, 5)
                        其中最后一维的5个元素分别是 [xmin, ymin, xmax, ymax, class_id]
    :param conf_threshold: 置信度阈值，低于该阈值的预测将被忽略
    :param iou_threshold: 用于非极大值抑制 (NMS) 的IoU阈值
    :param max_boxes: 每个图像的最大边界框数量
    :return: 处理后的预测结果
    """
    processed_predictions = []

    for pred in predictions:
        # 过滤掉置信度低于阈值的预测
        valid_indices = np.where(pred[:, -1] >= conf_threshold)[0]
        pred = pred[valid_indices]

        if len(pred) == 0:
            # 如果没有有效的预测，填充零
            processed_predictions.append(np.zeros((max_boxes, 5)))
            continue

        # 非极大值抑制 (NMS)
        boxes = pred[:, :4]
        scores = pred[:, -1]
        indices = tf.image.non_max_suppression(boxes, scores, max_output_size=max_boxes, iou_threshold=iou_threshold)

        # 获取经过NMS后的预测
        nms_pred = pred[indices.numpy()]

        # 填充到固定大小
        if nms_pred.shape[0] < max_boxes:
            padding = np.zeros((max_boxes - nms_pred.shape[0], 5))
            nms_pred = np.vstack((nms_pred, padding))

        processed_predictions.append(nms_pred)

    return np.array(processed_predictions)

# 9.使用训练好的模型生成未标注图像的标签，并保存为JSON文件。
def generate_json_labels(model, unannotated_images, image_names, output_dir="train_labels_new", input_shape=(224, 224), conf_threshold=0.5):
    """
    使用训练好的模型生成未标注图像的标签，并保存为JSON文件。

    :param model: 已训练好的模型
    :param unannotated_images: 未标注的图像数据
    :param output_dir: 生成标签的输出文件夹
    :param input_shape: 模型输入图像的尺寸
    :param conf_threshold: 置信度阈值，低于该值的预测将被忽略
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # 处理图像（调整大小和归一化）
    processed_images = []
    for image in unannotated_images:
        resized_image = cv2.resize(image, (input_shape[1], input_shape[0]))
        processed_images.append(resized_image)

    processed_images = np.array(processed_images, dtype=np.float32) / 255.0  # 归一化图像数据

    # 使用模型进行预测
    predictions = model.predict(processed_images)

    # 处理每张图像的预测结果
    for i, (image, prediction) in enumerate(zip(unannotated_images, predictions)):
        json_labels = []
        for pred in prediction:
            # 如果预测置信度低于阈值，则跳过
            if pred[-1] < conf_threshold:
                continue

            # 计算原始图像尺寸上的坐标
            xmin = int(pred[0] * image.shape[1])
            ymin = int(pred[1] * image.shape[0])
            xmax = int(pred[2] * image.shape[1])
            ymax = int(pred[3] * image.shape[0])
            label_name = "crack"  # 假设裂缝的类别名为"crack"

            # 添加标签数据
            json_labels.append({
                "image": f"image_{i}.jpg",
                "annotations": [{
                    "label": label_name,
                    "coordinates": {
                        "x": xmin,
                        "y": ymin,
                        "width": xmax - xmin,
                        "height": ymax - ymin
                    }
                }]
            })
        # 将标签保存为JSON文件
        if json_labels:
            json_file_path = os.path.join(output_dir, f"{os.path.splitext(image_names[i])[0]}.json")
            with open(json_file_path, 'w') as json_file:
                json.dump(json_labels, json_file, indent=4)
            print(f"Generated JSON label for image_{i}.jpg")

# 主程序
if __name__ == "__main__":
    # 加载数据和标签
    images, labels, image_all = load_images_and_labels_from_json("dataset2/train", "dataset2/train_labels")

    # 处理数据
    train_images, train_labels = prepare_data(images, labels, input_shape=(224, 224))

    # 生成 XML 文件（如果需要）
    generate_xml(labels, img_size=(640, 480))

    # 分割数据集
    train_images, test_images, train_labels, test_labels = train_test_split(
        train_images, train_labels, test_size=0.2
    )

    # 训练模型
    initial_model = train_model(train_images, train_labels)

    # 准备剔除测试数据后的图像数据
    input_shape = (224, 224)
    processed_images = []
    image_names = []  # 用于保存未标注图像的文件名

    # 第一次测试
    # for image in image_all:
    #     resized_image = cv2.resize(image, (input_shape[1], input_shape[0]))
    #     processed_images.append(resized_image)

    for image_path in os.listdir("dataset2/train"):
        img_path = os.path.join("dataset2/train", image_path)
        image = cv2.imread(img_path)
        if image is not None:
            resized_image = cv2.resize(image, (input_shape[1], input_shape[0]))
            processed_images.append(resized_image)
            image_names.append(image_path)  # 记录文件名

    processed_images = np.array(processed_images, dtype=np.float32) / 255.0  # 归一化图像数据

    num_test_images = len(test_images)
    # print('num_test_images',num_test_images)
    unannotated_images = processed_images[:-num_test_images]
    print(unannotated_images.shape)
    # 迭代训练使用剔除测试数据的图像
    iterative_training(processed_images, unannotated_images, initial_model)

    # 验证模型
    validate_model(initial_model, test_images, test_labels)

    # 验证模型
    validate_model(initial_model, test_images, test_labels)


    # 为未标注的图像生成 JSON 标签并保存
    generate_json_labels(initial_model, unannotated_images, image_names[:-num_test_images], output_dir="train_labels")