一、数据集及代码结构
(仅红色框中的文件使用到)
二、依赖包版本
关键包:tensorflow2.6 + python3.8 + keras2.6.0 + numpy1.20.0
三、代码
import os import xml.etree.ElementTree as ET from sklearn.model_selection import train_test_split import tensorflow as tf import cv2 import json from tensorflow.keras import layers, models import numpy as np from tensorflow.keras.utils import to_categorical # 步骤 3: 处理石刻照片并生成 XML 标注文件 def generate_xml(labels, img_size): # 创建 XML 文件的根节点 root = ET.Element("annotation") size = ET.SubElement(root, "size") width = ET.SubElement(size, "width") height = ET.SubElement(size, "height") width.text = str(img_size[0]) height.text = str(img_size[1]) for image_labels in labels: for label in image_labels: # 添加 object 元素 obj = ET.SubElement(root, "object") name = ET.SubElement(obj, "name") name.text = label['name'] bndbox = ET.SubElement(obj, "bndbox") xmin = ET.SubElement(bndbox, "xmin") ymin = ET.SubElement(bndbox, "ymin") xmax = ET.SubElement(bndbox, "xmax") ymax = ET.SubElement(bndbox, "ymax") xmin.text = str(label['xmin']) ymin.text = str(label['ymin']) xmax.text = str(label['xmax']) ymax.text = str(label['ymax']) # 生成 XML 文件 tree = ET.ElementTree(root) tree.write("annotations.xml") # 步骤 5: 训练裂缝目标检测模型 def train_model(train_images, train_labels, input_shape=(224, 224, 3), num_classes=1, max_boxes=10): print(f"Train Images Shape: {train_images.shape}") print(f"Train Labels Shape: {train_labels.shape}") # 如果标签维度不匹配,调整它们 if train_labels.ndim == 4 and train_labels.shape[-1] == 10: train_labels = np.squeeze(train_labels, axis=-1) model = tf.keras.models.Sequential([ layers.Input(shape=input_shape), layers.Conv2D(32, (3, 3), activation='relu', padding='same'), layers.MaxPooling2D((2, 2)), layers.Conv2D(64, (3, 3), activation='relu', padding='same'), layers.MaxPooling2D((2, 2)), layers.Conv2D(128, (3, 3), activation='relu', padding='same'), layers.MaxPooling2D((2, 2)), layers.Conv2D(256, (3, 3), activation='relu', padding='same'), layers.MaxPooling2D((2, 2)), layers.Flatten(), layers.Dense(512, activation='relu'), layers.Dense(max_boxes * (num_classes + 4), activation='linear'), layers.Reshape((max_boxes, num_classes + 4)) ]) model.compile(optimizer='adam', loss='mse', metrics=['accuracy']) model.fit(train_images, train_labels, batch_size=64, epochs=500, validation_split=0.2) return model # 步骤 3: 自动检测未标注石刻 def detect_and_correct(model, unannotated_images): detections = model.predict(unannotated_images) corrected_images = [] for img, detection in zip(unannotated_images, detections): # 这里可以添加人工修正的代码 corrected_images.append((img, detection)) return corrected_images # 步骤 4: 使用修正后的标注训练新模型 def retrain_model(corrected_images): corrected_labels = [detection for _, detection in corrected_images] corrected_images = [img for img, _ in corrected_images] train_model(corrected_images, corrected_labels) # 步骤 6: 重复训练直到处理完所有未标注照片 def iterative_training(train_images, unannotated_images,initial_model): # 假设 unannotated_images 和 unannotated_labels 已经定义 # num_test_images = len() # unannotated_images = train_images # 示例数据 print("train_images:", train_images.shape) print("unannotated_images:", unannotated_images.shape) unannotated_labels = np.zeros((unannotated_images.shape[0], 10, 5)) # 示例标签 print('unannotated_labels:', unannotated_labels.shape) while unannotated_images.any(): # 使用 any() 方法判断数组是否为空 # 动态调整选择的数量 num_to_select = min(241, unannotated_images.shape[0]) # 选择一部分未标注的图像进行预测 selected_indices = np.random.choice(unannotated_images.shape[0], size=num_to_select, replace=False) selected_images = unannotated_images[selected_indices] # print('selected_images', selected_images) selected_labels = unannotated_labels[selected_indices] # 使用初始模型进行预测 predictions = initial_model.predict(selected_images) # 对预测结果进行处理(这里只是一个示例,实际应用中需要根据具体情况处理) processed_predictions = process_predictions(predictions, 0.5) # 更新标签 unannotated_labels[selected_indices] = processed_predictions # 重新训练模型 initial_model.fit(selected_images, processed_predictions, batch_size=64, epochs=500) # 从 unannotated_images 中移除已标注的图像 unannotated_images = np.delete(unannotated_images, selected_indices, axis=0) unannotated_labels = np.delete(unannotated_labels, selected_indices, axis=0) # 打印当前迭代的状态 print(f"Remaining unannotated images: {unannotated_images.shape[0]}") return initial_model # 步骤 7: 验证最新模型 def validate_model(model, test_images, test_labels): results = model.evaluate(test_images, test_labels) print("Validation Results:", results) # 步骤 1: 加载自己的数据集 def load_images_and_labels_from_json(image_dir, label_dir): images = [] labels = [] image_all = [] for filename in os.listdir(image_dir): img_path = os.path.join(image_dir, filename) label_path = os.path.join(label_dir, filename.replace(".jpg", ".json")) # 使用 OpenCV 读取图像 image = cv2.imread(img_path) image_all.append(image) if image is None: print(f"Warning: Could not read image {img_path}.") continue # 跳过无法读取的图像 #images.append(image) # 检查图像文件是否存在 if not os.path.exists(img_path): print(f"Image file not found: {img_path}") continue # 如果标签文件不存在,则跳过该图像(无对应的目标检测标签) if not os.path.exists(label_path): print(f"No label file for: {filename}, skipping...") continue # 解析 JSON 文件获取标注 if os.path.exists(label_path): with open(label_path, 'r') as f: label_data = json.load(f) image_labels = [] for item in label_data: # 确保图像名称匹配 if item['image'] != filename: continue for annotation in item['annotations']: name = annotation['label'] coordinates = annotation['coordinates'] xmin = int(coordinates['x']) ymin = int(coordinates['y']) xmax = int(coordinates['x'] + coordinates['width']) ymax = int(coordinates['y'] + coordinates['height']) image_labels.append({'name': name, 'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}) if image_labels: # 仅在有标签时记录图像和标签 images.append(image) labels.append(image_labels) print(f"Loaded {len(images)} images and {len(labels)} labels.") return images, labels, image_all # 步骤 2 调整数据格式 def prepare_data(images, labels, input_shape=(224, 224), max_boxes=10): processed_images = [] processed_labels = [] for image, image_labels in zip(images, labels): # 调整图像大小到输入形状 resized_image = cv2.resize(image, (input_shape[1], input_shape[0])) processed_images.append(resized_image) # 初始化 label_array label_array = [] # 对标签进行处理 #image_label_list = [] for label in image_labels: try: xmin = label['xmin'] / image.shape[1] ymin = label['ymin'] / image.shape[0] xmax = label['xmax'] / image.shape[1] ymax = label['ymax'] / image.shape[0] class_id = 0 # 假设裂缝的类别为0 # 将处理后的标签追加到 processed_labels 列表 #image_label_list.append([xmin, ymin, xmax, ymax, class_id]) # processed_labels.append([xmin, ymin, xmax, ymax, class_id]) # 将每个标签转换为一个固定大小的数组 [xmin, ymin, xmax, ymax, class_id] label_array.append([xmin, ymin, xmax, ymax, class_id]) except KeyError: # 跳过有问题的标签 print(f"Skipping label with missing data: {label}") continue # 如果标签数量少于 max_boxes,则填充零;如果多于 max_boxes,则截断 if len(label_array) < max_boxes: label_array.extend([[0, 0, 0, 0, 0]] * (max_boxes - len(label_array))) else: label_array = label_array[:max_boxes] # 仅在当前图像有有效标签时,添加到 processed_labels # if image_label_list: processed_labels.append(label_array) # 将处理后的数据转换为 NumPy 数组 processed_images = np.array(processed_images, dtype=np.float32) / 255.0 # 归一化图像数据 processed_labels = np.array(processed_labels, dtype=np.float32) return processed_images, processed_labels # 模型预测 def process_predictions(predictions, conf_threshold=0.5, iou_threshold=0.4, max_boxes=10): """ 处理模型的预测结果,包括非极大值抑制 (NMS) 和阈值处理。 :param predictions: 模型的预测结果,形状为 (batch_size, num_boxes, 5) 其中最后一维的5个元素分别是 [xmin, ymin, xmax, ymax, class_id] :param conf_threshold: 置信度阈值,低于该阈值的预测将被忽略 :param iou_threshold: 用于非极大值抑制 (NMS) 的IoU阈值 :param max_boxes: 每个图像的最大边界框数量 :return: 处理后的预测结果 """ processed_predictions = [] for pred in predictions: # 过滤掉置信度低于阈值的预测 valid_indices = np.where(pred[:, -1] >= conf_threshold)[0] pred = pred[valid_indices] if len(pred) == 0: # 如果没有有效的预测,填充零 processed_predictions.append(np.zeros((max_boxes, 5))) continue # 非极大值抑制 (NMS) boxes = pred[:, :4] scores = pred[:, -1] indices = tf.image.non_max_suppression(boxes, scores, max_output_size=max_boxes, iou_threshold=iou_threshold) # 获取经过NMS后的预测 nms_pred = pred[indices.numpy()] # 填充到固定大小 if nms_pred.shape[0] < max_boxes: padding = np.zeros((max_boxes - nms_pred.shape[0], 5)) nms_pred = np.vstack((nms_pred, padding)) processed_predictions.append(nms_pred) return np.array(processed_predictions) # 9.使用训练好的模型生成未标注图像的标签,并保存为JSON文件。 def generate_json_labels(model, unannotated_images, image_names, output_dir="train_labels_new", input_shape=(224, 224), conf_threshold=0.5): """ 使用训练好的模型生成未标注图像的标签,并保存为JSON文件。 :param model: 已训练好的模型 :param unannotated_images: 未标注的图像数据 :param output_dir: 生成标签的输出文件夹 :param input_shape: 模型输入图像的尺寸 :param conf_threshold: 置信度阈值,低于该值的预测将被忽略 """ if not os.path.exists(output_dir): os.makedirs(output_dir) # 处理图像(调整大小和归一化) processed_images = [] for image in unannotated_images: resized_image = cv2.resize(image, (input_shape[1], input_shape[0])) processed_images.append(resized_image) processed_images = np.array(processed_images, dtype=np.float32) / 255.0 # 归一化图像数据 # 使用模型进行预测 predictions = model.predict(processed_images) # 处理每张图像的预测结果 for i, (image, prediction) in enumerate(zip(unannotated_images, predictions)): json_labels = [] for pred in prediction: # 如果预测置信度低于阈值,则跳过 if pred[-1] < conf_threshold: continue # 计算原始图像尺寸上的坐标 xmin = int(pred[0] * image.shape[1]) ymin = int(pred[1] * image.shape[0]) xmax = int(pred[2] * image.shape[1]) ymax = int(pred[3] * image.shape[0]) label_name = "crack" # 假设裂缝的类别名为"crack" # 添加标签数据 json_labels.append({ "image": f"image_{i}.jpg", "annotations": [{ "label": label_name, "coordinates": { "x": xmin, "y": ymin, "width": xmax - xmin, "height": ymax - ymin } }] }) # 将标签保存为JSON文件 if json_labels: json_file_path = os.path.join(output_dir, f"{os.path.splitext(image_names[i])[0]}.json") with open(json_file_path, 'w') as json_file: json.dump(json_labels, json_file, indent=4) print(f"Generated JSON label for image_{i}.jpg") # 主程序 if __name__ == "__main__": # 加载数据和标签 images, labels, image_all = load_images_and_labels_from_json("dataset2/train", "dataset2/train_labels") # 处理数据 train_images, train_labels = prepare_data(images, labels, input_shape=(224, 224)) # 生成 XML 文件(如果需要) generate_xml(labels, img_size=(640, 480)) # 分割数据集 train_images, test_images, train_labels, test_labels = train_test_split( train_images, train_labels, test_size=0.2 ) # 训练模型 initial_model = train_model(train_images, train_labels) # 准备剔除测试数据后的图像数据 input_shape = (224, 224) processed_images = [] image_names = [] # 用于保存未标注图像的文件名 # 第一次测试 # for image in image_all: # resized_image = cv2.resize(image, (input_shape[1], input_shape[0])) # processed_images.append(resized_image) for image_path in os.listdir("dataset2/train"): img_path = os.path.join("dataset2/train", image_path) image = cv2.imread(img_path) if image is not None: resized_image = cv2.resize(image, (input_shape[1], input_shape[0])) processed_images.append(resized_image) image_names.append(image_path) # 记录文件名 processed_images = np.array(processed_images, dtype=np.float32) / 255.0 # 归一化图像数据 num_test_images = len(test_images) # print('num_test_images',num_test_images) unannotated_images = processed_images[:-num_test_images] print(unannotated_images.shape) # 迭代训练使用剔除测试数据的图像 iterative_training(processed_images, unannotated_images, initial_model) # 验证模型 validate_model(initial_model, test_images, test_labels) # 验证模型 validate_model(initial_model, test_images, test_labels) # 为未标注的图像生成 JSON 标签并保存 generate_json_labels(initial_model, unannotated_images, image_names[:-num_test_images], output_dir="train_labels")