def preprocess_target(target):
"""
预处理目标数据,将其转换为模型期望的格式,同时过滤无效边界框。
参数:
- target: 一个样本的目标标注(字典列表)
返回:
- 处理后的目标标注
"""
# 提取有效的 bounding boxes 和 labels
boxes = []
labels = []
for annotation in target: # 遍历每个目标字典
# 提取 COCO 格式的 bbox 和 category_id
x_min, y_min, width, height = annotation["bbox"]
x_max = x_min + width
y_max = y_min + height
if width > 0 and height > 0: # 过滤无效框
boxes.append([x_min, y_min, x_max, y_max])
labels.append(annotation["category_id"])
if len(boxes) == 0:
return None # 没有有效框时返回 None
# 转换为 PyTorch 张量
processed_target = {
"boxes": torch.tensor(boxes, dtype=torch.float32),
"labels": torch.tensor(labels, dtype=torch.int64),
}
return processed_target
def collate_fn(batch):
"""
自定义 collate_fn,用于处理无效数据并格式化目标标注。
"""
images = []
targets = []
for img, target in batch:
processed_target = preprocess_target(target)
if processed_target is not None: # 跳过无效样本
images.append(img)
targets.append(processed_target)
if len(images) == 0:
raise ValueError("All samples in the batch have invalid targets!")
return images, targets
import os
import torch
train_anno = '/kaggle/input/coco-2017-dataset/coco2017/annotations/instances_train2017.json'
print("Training annotations file:", train_anno)
print("File exists:", os.path.exists(train_anno))
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import torch
import torchvision
from torchvision.transforms import ToTensor
from torchvision.datasets import CocoDetection
from torch.utils.data import DataLoader
# from torchvision.models.detection import fcos_resnet50_fpn
from pycocotools.coco import COCO
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from PIL import Image
import matplotlib.pyplot as plt
import os
# 1. 配置路径
coco_root = "/kaggle/input/coco-2017-dataset/coco2017" # 替换为COCO数据集的根路径
train_dir = os.path.join(coco_root, "train2017")
val_dir = os.path.join(coco_root, "val2017")
train_anno = os.path.join(coco_root, "annotations", "instances_train2017.json")
val_anno = os.path.join(coco_root, "annotations", "instances_val2017.json")
# 2. 定义数据加载函数
def get_coco_data(root, annotation_file, split='train'):
transform = ToTensor() # 仅包含基础的ToTensor转换
dataset = CocoDetection(root, annotation_file, transform=transform)
return dataset
# 3. 创建DataLoader
def create_dataloader(dataset, batch_size=8, num_workers=4):
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
return dataloader
# 加载数据集
train_dataset = get_coco_data(train_dir, train_anno, split='train')
train_dataloader = DataLoader(
train_dataset,
batch_size=8,
shuffle=True,
num_workers=4,
collate_fn=collate_fn
)
# 加载预训练的ssd模型
model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=True)
model.eval() # 设置为评估模式
model.to(device)
from torch.utils.data import DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# 早停机制
class EarlyStopping:
def __init__(self, patience=5, delta=0.0):
self.patience = patience
self.delta = delta
self.best_loss = None
self.counter = 0
self.stop_training = False
def __call__(self, val_loss):
if self.best_loss is None or val_loss + self.delta < self.best_loss:
self.best_loss = val_loss
self.counter = 0
else:
self.counter += 1
if self.counter >= self.patience:
self.stop_training = True
def calculate_metrics(predictions, targets):
# 获取预测框的类别
pred_labels = [item['labels'].cpu().numpy() for item in predictions]
true_labels = [item['labels'].cpu().numpy() for item in targets]
# 平展为一维数组
pred_labels = np.concatenate(pred_labels)
true_labels = np.concatenate(true_labels)
# 计算准确率、精确度、召回率、F1值
accuracy = accuracy_score(true_labels, pred_labels)
precision = precision_score(true_labels, pred_labels, average='weighted')
recall = recall_score(true_labels, pred_labels, average='weighted')
f1 = f1_score(true_labels, pred_labels, average='weighted')
return accuracy, precision, recall, f1
def plot_confusion_matrix(true_labels, pred_labels, class_names):
cm = confusion_matrix(true_labels, pred_labels)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()
import os
import time
import torch
from torch.optim.lr_scheduler import StepLR
from torchmetrics.detection.mean_ap import MeanAveragePrecision
# 定义设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# COCO 类别名称提取函数
def get_coco_class_names(coco):
return [coco.loadCats(cat_id)[0]["name"] for cat_id in coco.getCatIds()]
import os
import time
import torch
from torch.optim.lr_scheduler import StepLR
from torchmetrics.detection.mean_ap import MeanAveragePrecision
def train_model_optimized(model, dataloader, device, optimizer, scheduler, num_epochs=10, early_stopping=None):
model.train()
# 移除训练过程中的mAP计算,放到验证阶段
start_time = time.time()
best_loss = float('inf')
best_model_path = "/kaggle/working/best_model.pth"
for epoch in range(num_epochs):
epoch_loss = 0.0
epoch_start_time = time.time()
batch_count = 0
for batch_idx, (images, targets) in enumerate(dataloader):
try:
# 检查数据是否有效
if len(images) == 0:
print(f"Warning: Empty batch at {batch_idx}, skipping...")
continue
# 转移数据到设备
images = [img.to(device) for img in images]
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
# 前向与反向传播
optimizer.zero_grad()
loss_dict = model(images, targets)
# 检查loss是否有效
if torch.isnan(loss_dict['bbox_regression']).any() or torch.isnan(loss_dict['classification']).any():
print(f"Warning: NaN loss detected at batch {batch_idx}, skipping...")
continue
losses = sum(loss for loss in loss_dict.values())
losses.backward()
# 梯度裁剪防止梯度爆炸
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
# 累加损失
epoch_loss += losses.item()
batch_count += 1
# 每100个batch打印一次
if batch_idx % 100 == 0:
batch_time = time.time() - epoch_start_time
avg_loss = epoch_loss / (batch_count if batch_count > 0 else 1)
print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx}/{len(dataloader)}], "
f"Avg Loss: {avg_loss:.4f}, Time: {batch_time:.2f}s")
except Exception as e:
print(f"Error in batch {batch_idx}: {e}")
continue
if batch_count == 0:
print(f"Epoch {epoch+1} had no valid batches, skipping...")
continue
# 每个epoch完成后更新学习率
scheduler.step()
avg_epoch_loss = epoch_loss / batch_count
# 保存最佳模型(基于loss)
if avg_epoch_loss < best_loss:
best_loss = avg_epoch_loss
torch.save(model.state_dict(), best_model_path)
print(f"New best model saved with loss: {best_loss:.4f}")
epoch_time = time.time() - epoch_start_time
print(f"Epoch [{epoch+1}/{num_epochs}] Completed, Average Loss: {avg_epoch_loss:.4f}, Time: {epoch_time:.2f}s")
# 早停检查
if early_stopping:
early_stopping(avg_epoch_loss)
if early_stopping.stop_training:
print("Early stopping triggered!")
break
total_time = time.time() - start_time
print(f"Training completed in {total_time // 60:.0f}m {total_time % 60:.0f}s")
print(f"Best model saved at {best_model_path} with loss: {best_loss:.4f}")
return model
# 示例:定义优化器和学习率调度器
import torch.optim as optim
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
model = ssdlite320_mobilenet_v3_large(pretrained=True)
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = StepLR(optimizer, step_size=3, gamma=0.1)
# 开始训练
early_stopping = EarlyStopping(patience=5, delta=0.01) # 设定早停机制
train_model_optimized(model, train_dataloader, device, optimizer, lr_scheduler, num_epochs=10)
def plot_lr_loss(lr_list, loss_list):
plt.figure(figsize=(10, 5))
plt.plot(lr_list, label="Learning Rate")
plt.plot(loss_list, label="Loss")
plt.xlabel("Iterations")
plt.ylabel("Value")
plt.legend()
plt.title("Learning Rate and Loss Curves")
plt.show()
from torchmetrics.detection import MeanAveragePrecision
def calculate_map(model, dataloader, device):
metric = MeanAveragePrecision()
model.eval()
for images, targets in dataloader:
images = [img.to(device) for img in images]
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
with torch.no_grad():
preds = model(images)
metric.update(preds, targets)
map_score = metric.compute()
return map_score
import matplotlib.pyplot as plt
from PIL import Image
import torchvision.transforms as T
import torch
def detect_objects(model, image_path, device):
"""
使用训练好的模型对目标进行检测,并显示检测结果。
参数:
- model: 训练好的目标检测模型
- image_path: 待检测图像的路径
- device: 使用的设备 (CPU 或 GPU)
"""
model.eval() # 设置为评估模式
# 加载图片并进行预处理
image = Image.open(image_path).convert("RGB")
transform = T.Compose([
T.ToTensor() # 转换为张量
])
image_tensor = transform(image).unsqueeze(0).to(device)
# 进行检测
with torch.no_grad():
outputs = model(image_tensor)
# 提取结果
boxes = outputs[0]['boxes'].cpu().numpy()
scores = outputs[0]['scores'].cpu().numpy()
# 设置显示阈值
score_threshold = 0.5 # 仅显示置信度大于0.5的框
# 绘制结果
plt.figure(figsize=(12, 8))
plt.imshow(image)
ax = plt.gca()
for box, score in zip(boxes, scores):
if score > score_threshold:
# 绘制检测框
x_min, y_min, x_max, y_max = box
rect = plt.Rectangle(
(x_min, y_min),
x_max - x_min,
y_max - y_min,
fill=False,
color="red",
linewidth=2
)
ax.add_patch(rect)
ax.text(
x_min, y_min,
f"{score:.2f}",
bbox={"facecolor": "yellow", "alpha": 0.5},
fontsize=10,
color="black"
)
plt.axis("off")
plt.show()
# 测试函数
image_path = "/kaggle/input/coco-2017-dataset/coco2017/test2017/000000000890.jpg" # 替换为你的测试图像路径
detect_objects(model, image_path, device)
为以上代码添加详细注释(越详细越好)
最新发布