import numpy as np
import torch
import torchvision
from torch.optim import lr_scheduler
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import Compose, ToTensor, Normalize
from torch.utils.data import DataLoader
import torch.optim as optim
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import os
import matplotlib.pyplot as plt
import cv2
from torch.utils.data import Dataset
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# ===================== 1. 修正后的CocoDataset =====================
class CocoDataset(Dataset):
def __init__(self, img_dir, ann_file, transform=None):
self.img_dir = img_dir
self.coco = COCO(ann_file)
self.img_ids = self.coco.getImgIds()
self.transform = transform
self.resize_size = (640, 640)
def resize_image(self, img, target):
orig_h, orig_w = img.shape[:2]
new_w, new_h = self.resize_size
# 等比例缩放+补边,避免小目标变形
scale = min(new_w / orig_w, new_h / orig_h)
resize_w = int(orig_w * scale)
resize_h = int(orig_h * scale)
img = cv2.resize(img, (resize_w, resize_h))
pad_w = (new_w - resize_w) // 2
pad_h = (new_h - resize_h) // 2
img = cv2.copyMakeBorder(img, pad_h, pad_h, pad_w, pad_w, cv2.BORDER_CONSTANT, value=0)
# 修正标注框
if len(target['boxes']) > 0:
boxes = target['boxes'].numpy()
boxes[:, [0, 2]] *= scale
boxes[:, [1, 3]] *= scale
boxes[:, [0, 2]] += pad_w
boxes[:, [1, 3]] += pad_h
valid = (boxes[:, 2] - boxes[:, 0] > 1) & (boxes[:, 3] - boxes[:, 1] > 1)
valid &= (boxes[:, 0] >= 0) & (boxes[:, 1] >= 0) & (boxes[:, 2] <= new_w) & (boxes[:, 3] <= new_h)
boxes = boxes[valid]
target['boxes'] = torch.as_tensor(boxes, dtype=torch.float32)
target['labels'] = target['labels'][valid]
target['height'] = torch.tensor(new_h)
target['width'] = torch.tensor(new_w)
return img, target
def __getitem__(self, idx):
img_id = self.img_ids[idx]
img_info = self.coco.loadImgs(img_id)[0]
img_path = f"{self.img_dir}/{img_info['file_name']}"
img = cv2.imread(img_path)
if img is None:
raise FileNotFoundError(f"图像不存在:{img_path}")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
ann_ids = self.coco.getAnnIds(imgIds=img_id)
anns = self.coco.loadAnns(ann_ids)
boxes, labels = [], []
for ann in anns:
if ann['category_id'] != 1:
continue
x1, y1, w, h = ann['bbox']
if w <= 0 or h <= 0:
continue
x2 = x1 + w
y2 = y1 + h
boxes.append([x1, y1, x2, y2])
labels.append(1)
boxes = torch.as_tensor(boxes, dtype=torch.float32) if boxes else torch.empty((0, 4))
labels = torch.as_tensor(labels, dtype=torch.int64) if labels else torch.empty(0)
target = {'boxes': boxes, 'labels': labels, 'image_id': torch.tensor([img_id])}
img, target = self.resize_image(img, target)
if self.transform:
img = self.transform(img)
return img, target
def __len__(self):
return len(self.img_ids)
# ===================== 2. 可视化函数(修正反归一化) =====================
def visualize_predictions(image, targets, outputs, idx=0):
img = image[idx].permute(1, 2, 0).cpu().numpy()
img = (img * np.array([0.229, 0.224, 0.225])) + np.array([0.485, 0.456, 0.406])
img = np.clip(img, 0, 1)
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
ax.imshow(img)
# 绘制真实框
gt_boxes = targets[idx]['boxes'].cpu()
for box in gt_boxes:
x1, y1, x2, y2 = box
rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, color='green', linewidth=2)
ax.add_patch(rect)
# 绘制预测框
if len(outputs) > idx:
pred = outputs[idx]
keep = pred['scores'] > 0.1 # 降低阈值,显示更多小目标
pred_boxes = pred['boxes'][keep].cpu()
pred_labels = pred['labels'][keep].cpu()
pred_scores = pred['scores'][keep].cpu()
for box, label, score in zip(pred_boxes, pred_labels, pred_scores):
x1, y1, x2, y2 = box
rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, color='red', linewidth=2)
ax.text(x1, y1, f'{label}:{score:.2f}', color='red', fontsize=12)
ax.add_patch(rect)
plt.title("Green=GT, Red=Pred")
plt.axis('off')
plt.show()
# ===================== 3. 主训练逻辑(无权重加载) =====================
os.makedirs('checkpoints', exist_ok=True)
# 数据加载
transform = Compose([ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
train_dataset = CocoDataset(
img_dir=r'D:\Yolov8\coco_dataset1\images\train',
ann_file=r'D:\Yolov8\coco_dataset1\annotations\train.json',
transform=transform
)
val_dataset = CocoDataset(
img_dir=r'D:\Yolov8\coco_dataset1\images\val',
ann_file=r'D:\Yolov8\coco_dataset1\annotations\val.json',
transform=transform
)
def collate_fn(batch):
return tuple(zip(*batch))
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)
backbone = resnet_fpn_backbone('resnet50', pretrained=True)
# 模型初始化(自定义小目标锚框,仅主干网络预训练)
num_classes = 2
anchor_sizes = ((16, 32, 64),)
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
# 关键:仅主干网络加载预训练,RPN层随机初始化(无参数冲突)
model = FasterRCNN(
backbone = backbone,
num_classes = num_classes,
rpn_anchor_generator=anchor_generator,
min_size=640,
max_size=640,
box_nms_thresh=0.5,
box_score_thresh=0.05,
)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = model.to(device)
print(f"使用设备:{device}")
# 优化器(统一学习率,适配小目标)
optimizer = optim.SGD(
model.parameters(),
lr=1e-3,
momentum=0.9,
weight_decay=5e-4,
nesterov=True
)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) # 更频繁的学习率衰减
# 训练循环(20轮,确保小目标特征学习充分)
num_epochs = 5
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
total_cls_loss = 0.0
total_reg_loss = 0.0
for batch_idx, (images, targets) in enumerate(train_loader):
images = [img.to(device) for img in images]
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
optimizer.zero_grad()
losses.backward()
optimizer.step()
total_loss += losses.item()
total_cls_loss += loss_dict['loss_classifier'].item()
total_reg_loss += loss_dict['loss_box_reg'].item()
if (batch_idx + 1) % 10 == 0:
print(
f"Epoch [{epoch + 1}/{num_epochs}], "
f"Batch [{batch_idx + 1}/{len(train_loader)}], "
f"Loss: {losses.item():.4f}, "
f"Clf Loss: {loss_dict['loss_classifier'].item():.4f}, "
f"Reg Loss: {loss_dict['loss_box_reg'].item():.4f}"
)
avg_loss = total_loss / len(train_loader)
avg_cls = total_cls_loss / len(train_loader)
avg_reg = total_reg_loss / len(train_loader)
print(f"Epoch {epoch + 1} - Avg Loss: {avg_loss:.4f}, Cls: {avg_cls:.4f}, Reg: {avg_reg:.4f}")
scheduler.step()
# 保存模型(仅保存当前训练的权重,无冲突)
torch.save(model.state_dict(), f'checkpoints/faster_rcnn_epoch_{epoch + 1}.pth')
# ===================== 4. 评估+可视化 =====================
print("\n开始评估模型...")
model.eval()
results = []
# 可视化验证集第一个样本
print("可视化验证集预测结果...")
images, targets = next(iter(val_loader))
images = [img.to(device) for img in images]
with torch.no_grad():
outputs = model(images)
for output in outputs:
print("预测标签:", output['labels'].cpu().numpy())
print("预测置信度:", output['scores'].cpu().numpy())
print("预测框坐标:", output['boxes'].cpu().numpy())
visualize_predictions(images, targets, outputs, idx=0)
# 生成评估结果
with torch.no_grad():
for images, targets in val_loader:
images = [img.to(device) for img in images]
outputs = model(images)
for output, target in zip(outputs, targets):
img_id = target['image_id'].item()
boxes = output['boxes'].cpu().numpy()
scores = output['scores'].cpu().numpy()
labels = output['labels'].cpu().numpy()
for box, score, label in zip(boxes, scores, labels):
if score < 0.01: # 极低阈值,保留所有可能框
continue
x1, y1, x2, y2 = box
w = x2 - x1
h = y2 - y1
if w <= 0 or h <= 0:
continue
results.append({
'image_id': img_id,
'category_id': int(label),
'bbox': [float(x1), float(y1), float(w), float(h)],
'score': float(score)
})
if len(results) == 0:
print("⚠️ 警告:无有效预测结果!")
else:
cocoGt = val_dataset.coco
if 'info' not in cocoGt.dataset:
cocoGt.dataset['info'] = {'version': '1.0'}
try:
cocoDt = cocoGt.loadRes(results)
cocoEval = COCOeval(cocoGt, cocoDt, iouType='bbox')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
print("✅ 评估完成")
print(f"mAP@0.5:0.95 = {cocoEval.stats[0]:.4f}")
print(f"mAP@0.5 = {cocoEval.stats[1]:.4f}")
print(f"Recall@0.5 = {cocoEval.stats[6]:.4f}")
except Exception as e:
print("❌ 评估失败:", str(e))
C:\Users\YangGuang\.conda\envs\pytorch\python.exe D:\Yolov8\.github\Faster-Rcnn\faster-rcnn.py
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torchvision\models\_utils.py:135: UserWarning: Using 'backbone_name' as positional parameter(s) is deprecated since 0.13 and may be removed in the future. Please use keyword parameter(s) instead.
warnings.warn(
C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\cuda\__init__.py:235: UserWarning:
NVIDIA GeForce RTX 5070 Laptop GPU with CUDA capability sm_120 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 sm_90 compute_37.
If you want to use the NVIDIA GeForce RTX 5070 Laptop GPU GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
warnings.warn(
使用设备:cuda
Traceback (most recent call last):
File "D:\Yolov8\.github\Faster-Rcnn\faster-rcnn.py", line 204, in <module>
loss_dict = model(images, targets)
^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torchvision\models\detection\generalized_rcnn.py", line 104, in forward
proposals, proposal_losses = self.rpn(images, features, targets)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torchvision\models\detection\rpn.py", line 362, in forward
anchors = self.anchor_generator(images, features)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torchvision\models\detection\anchor_utils.py", line 127, in forward
anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torchvision\models\detection\anchor_utils.py", line 88, in grid_anchors
torch._assert(
File "C:\Users\YangGuang\.conda\envs\pytorch\Lib\site-packages\torch\__init__.py", line 2041, in _assert
assert condition, message
^^^^^^^^^
AssertionError: Anchors should be Tuple[Tuple[int]] because each feature map could potentially have different sizes and aspect ratios. There needs to be a match between the number of feature maps passed and the number of sizes / aspect ratios specified.
进程已结束,退出代码为 1
最新发布