# ===================================================================
# FULL IMPLEMENTATION: Lane Detection on TuSimple
# Model: ResNet50-DCNv2 + FPN + MultiTaskHead
# Loss: Focal + Regress + Distance + Variance
# Dataset: TuSimple
# Eval Metrics: Acc, P, R, F1, FPR, FNR
# ===================================================================
import os
import json
import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models
from torchvision.ops import DeformConv2d
# ======================================
# 1. 可变形卷积模块
# ======================================
class DeformableConv(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
super().__init__()
self.offset_conv = nn.Conv2d(in_channels, 2 * kernel_size * kernel_size, kernel_size, stride, padding)
self.dcn = DeformConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
def forward(self, x):
offset = self.offset_conv(x)
return self.dcn(x, offset)
# ======================================
# 2. 支持 DCN 的 Bottleneck
# ======================================
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1, use_dcn=False):
super(Bottleneck, self).__init__()
self.use_dcn = use_dcn
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
if use_dcn:
self.conv2 = DeformableConv(planes, planes, kernel_size=3, stride=stride, padding=dilation)
else:
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=dilation, dilation=dilation, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
if not self.use_dcn:
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
# 自动对齐 spatial 维度
if out.shape[-1] != identity.shape[-1] or out.shape[-2] != identity.shape[-2]:
h, w = identity.shape[2:]
out = F.interpolate(out, size=(h, w), mode='bilinear', align_corners=False)
out += identity
out = self.relu(out)
return out
# ======================================
# 3. 构建残差层
# ======================================
def make_res_layer(block, inplanes, planes, blocks, stride=1, dilation=1, use_dcn=False):
downsample = None
if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
first_dilation = 1 if dilation == 1 else dilation // 2
layers.append(block(inplanes, planes, stride, downsample, dilation=first_dilation, use_dcn=False))
for _ in range(1, blocks):
layers.append(block(planes * block.expansion, planes, dilation=dilation, use_dcn=use_dcn))
return nn.Sequential(*layers)
# ======================================
# 4. 修改版 ResNet50 主干
# ======================================
class ModifiedResNet50(nn.Module):
def __init__(self, pretrained=True):
super(ModifiedResNet50, self).__init__()
original = models.resnet50(pretrained=pretrained)
self.inplanes = 64
self.conv1 = original.conv1
self.bn1 = original.bn1
self.relu = original.relu
self.maxpool = original.maxpool
self.layer1 = original.layer1 # out: 256
self.layer2 = original.layer2 # out: 512
self.layer3 = make_res_layer(Bottleneck, 512, 256, 6, stride=1, dilation=2, use_dcn=True) # 256*4=1024
self.layer4 = make_res_layer(Bottleneck, 1024, 512, 3, stride=1, dilation=4, use_dcn=True) # 512*4=2048
def forward(self, x):
c1 = self.relu(self.bn1(self.conv1(x)))
c1 = self.maxpool(c1)
c2 = self.layer1(c1) # H/4
c3 = self.layer2(c2) # H/8
c4 = self.layer3(c3) # H/8, d=2
c5 = self.layer4(c4) # H/8, d=4
return c3, c4, c5
# ======================================
# 5. FPN Neck
# ======================================
class FPN(nn.Module):
def __init__(self, in_channels_list=[512, 1024, 2048], out_channels=256):
super(FPN, self).__init__()
self.lateral_convs = nn.ModuleList([
nn.Conv2d(in_c, out_channels, 1) for in_c in in_channels_list
])
self.fpn_convs = nn.ModuleList([
nn.Conv2d(out_channels, out_channels, 3, padding=1) for _ in range(3)
])
def forward(self, inputs):
c3, c4, c5 = inputs
p5 = self.lateral_convs[2](c5)
p4 = self.lateral_convs[1](c4) + F.interpolate(p5, scale_factor=2, mode='nearest')
p3 = self.lateral_convs[0](c3) + F.interpolate(p4, scale_factor=2, mode='nearest')
p5 = self.fpn_convs[2](p5)
p4 = self.fpn_convs[1](p4)
p3 = self.fpn_convs[0](p3)
out = F.interpolate(p3, scale_factor=4, mode='bilinear', align_corners=False)
return out
# ======================================
# 6. 多任务 Head
# ======================================
class MultiTaskHead(nn.Module):
def __init__(self, in_channels=256, num_classes=5):
super(MultiTaskHead, self).__init__()
self.cls = nn.Conv2d(in_channels, num_classes, 1)
self.offset = nn.Conv2d(in_channels, 2, 1)
self.distance = nn.Conv2d(in_channels, 1, 1)
self.variance = nn.Conv2d(in_channels, 2, 1)
def forward(self, x):
size = x.shape[2:]
return {
'cls': F.interpolate(self.cls(x), size=size, mode='bilinear', align_corners=False),
'offset': F.interpolate(self.offset(x), size=size, mode='bilinear', align_corners=False),
'distance': F.interpolate(self.distance(x), size=size, mode='bilinear', align_corners=False),
'variance': F.interpolate(self.variance(x), size=size, mode='bilinear', align_corners=False)
}
# ======================================
# 7. 主模型
# ======================================
class LaneSegNet_MultiTask(nn.Module):
def __init__(self, num_classes=5):
super(LaneSegNet_MultiTask, self).__init__()
self.backbone = ModifiedResNet50(pretrained=True)
self.fpn = FPN(out_channels=256)
self.head = MultiTaskHead(256, num_classes)
def forward(self, x):
feats = self.backbone(x)
fpn_out = self.fpn(feats)
return self.head(fpn_out)
# ======================================
# 8. TuSimple Dataset
# ======================================
class TuSimpleDataset(Dataset):
def __init__(self, root, split='train', img_size=(384, 640), transform=None):
self.root = root
self.img_size = img_size
self.transform = transform or T.Compose([
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
self.image_paths = []
self.lanes_data = []
if split == 'train':
label_files = [
os.path.join(root, 'train_set', 'label_data_0313.json'),
os.path.join(root, 'train_set', 'label_data_0531.json'),
os.path.join(root, 'train_set', 'label_data_0601.json')
]
else:
label_files = [os.path.join(root, 'test_label.json')]
for file in label_files:
if not os.path.exists(file):
raise FileNotFoundError(f"Label file not found: {file}")
with open(file, 'r') as f:
lines = f.readlines()
for line in lines:
try:
data = json.loads(line.strip())
raw_file = data['raw_file']
if split == 'train' and not raw_file.startswith('train_set'):
raw_file = os.path.join('train_set', raw_file)
img_path = os.path.join(root, raw_file)
if os.path.exists(img_path) and 'lanes' in data and 'h_samples' in data:
self.image_paths.append(img_path)
self.lanes_data.append(data)
except Exception:
continue
print(f"Loaded {len(self)} samples for {split} set.")
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
img_path = self.image_paths[idx]
data = self.lanes_data[idx]
image = cv2.imread(img_path)
if image is None:
return self[(idx + 1) % len(self)]
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
orig_h, orig_w = image.shape[:2]
image_pil = Image.fromarray(image)
image_resized = image_pil.resize((self.img_size[1], self.img_size[0]), Image.BILINEAR)
cls_map = np.zeros((self.img_size[0], self.img_size[1]), dtype=np.int64)
offset_map = np.zeros((2, self.img_size[0], self.img_size[1]), dtype=np.float32)
distance_map = np.full((1, self.img_size[0], self.img_size[1]), 1e6, dtype=np.float32)
ratio_h, ratio_w = self.img_size[0] / orig_h, self.img_size[1] / orig_w
lanes = data['lanes']
h_samples = data['h_samples']
all_lane_points = []
for xs in lanes:
points = [(int(x * ratio_w), int(y * ratio_h)) for x, y in zip(xs, h_samples) if x >= 0]
points = [(px, py) for px, py in points if 0 <= px < self.img_size[1] and 0 <= py < self.img_size[0]]
if len(points) > 1:
all_lane_points.append(np.array(points))
if all_lane_points:
yy, xx = np.mgrid[0:self.img_size[0], 0:self.img_size[1]]
coords = np.stack([xx, yy], axis=-1).astype(np.float32)
for points in all_lane_points:
dists = np.linalg.norm(coords[:, :, None] - points[None, None, :, :], axis=-1)
min_dists = dists.min(axis=-1)
nearest_idx = dists.argmin(axis=-1)
nearest_pts = points[nearest_idx]
offsets = nearest_pts - coords
update_mask = min_dists < distance_map[0]
offset_map[:, update_mask] = offsets[update_mask].T
distance_map[0][update_mask] = min_dists[update_mask]
offset_map /= 16.0
distance_map = np.clip(distance_map, 0, 100) / 50.0
image_tensor = self.transform(image_resized)
label_tensor = torch.from_numpy(cls_map).long()
offset_tensor = torch.from_numpy(offset_map).float()
distance_tensor = torch.from_numpy(distance_map).float()
return {
'image': image_tensor,
'label': label_tensor,
'offset': offset_tensor,
'distance': distance_tensor
}
# ======================================
# 9. 损失函数
# ======================================
class FocalLoss(nn.Module):
def __init__(self, alpha=1, gamma=2):
super().__init__()
self.alpha = alpha
self.gamma = gamma
def forward(self, pred, target):
ce_loss = F.cross_entropy(pred, target, ignore_index=0, reduction='none')
pt = torch.exp(-ce_loss)
focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
return focal_loss.mean()
class RegressLoss(nn.Module):
def __init__(self):
super().__init__()
self.criterion = nn.SmoothL1Loss(reduction='mean')
def forward(self, pred, target, mask):
if mask.sum() == 0:
return pred.new_zeros([])
return self.criterion(pred[mask], target[mask])
class DistanceLoss(nn.Module):
def __init__(self):
super().__init__()
self.criterion = nn.MSELoss()
def forward(self, pred, target):
return self.criterion(pred, target)
class VarianceLoss(nn.Module):
def __init__(self):
super().__init__()
def forward(self, pred_mean, pred_logvar, target):
precision = torch.exp(-pred_logvar)
loss = precision * (target - pred_mean) ** 2 + pred_logvar
return loss.mean()
# ======================================
# 10. 评估函数:计算 Accuracy, P, R, F1, FPR, FNR
# ======================================
@torch.no_grad()
def compute_metrics(pred_cls, target, num_classes=5, ignore_index=0):
pred_label = torch.argmax(pred_cls, dim=1)
pred_flat = pred_label.view(-1)
target_flat = target.view(-1)
mask = (target_flat >= ignore_index) & (target_flat < num_classes)
n = num_classes
hist = torch.bincount(n * target_flat[mask] + pred_flat[mask], minlength=n**2).reshape(n, n).float()
tp = hist[1:, 1:].diag().sum().item()
fp = (hist.sum(dim=0)[1:] - hist[1:, 1:].diag()).sum().item()
fn = (hist.sum(dim=1)[1:] - hist[1:, 1:].diag()).sum().item()
tn = hist[0, 0].item()
eps = 1e-8
acc = (tp + tn) / (tp + tn + fp + fn + eps)
prec = tp / (tp + fp + eps)
rec = tp / (tp + fn + eps)
f1 = 2 * prec * rec / (prec + rec + eps)
fpr = fp / (fp + tn + eps)
fnr = fn / (fn + tp + eps)
return {'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1, 'fpr': fpr, 'fnr': fnr}
@torch.no_grad()
def evaluate(model, dataloader, device):
model.eval()
total_metrics = {k: 0.0 for k in ['accuracy', 'precision', 'recall', 'f1', 'fpr', 'fnr']}
count = 0
for data in dataloader:
images = data['image'].to(device)
labels = data['label'].to(device)
outputs = model(images)
metrics = compute_metrics(outputs['cls'], labels)
for k in total_metrics:
total_metrics[k] += metrics[k]
count += 1
return {k: v / count for k, v in total_metrics.items()}
# ======================================
# 11. 主函数
# ======================================
def main():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
current_dir = os.path.dirname(os.path.abspath(__file__))
TUSIMPLE_ROOT = os.path.join(current_dir, "TUSimple")
if not os.path.exists(TUSIMPLE_ROOT):
raise NotADirectoryError(f"[Error] Dataset root not found: {TUSIMPLE_ROOT}")
transform = T.Compose([
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = TuSimpleDataset(TUSIMPLE_ROOT, split='train', img_size=(384, 640), transform=transform)
val_dataset = TuSimpleDataset(TUSIMPLE_ROOT, split='test', img_size=(384, 640), transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4)
model = LaneSegNet_MultiTask(num_classes=5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion_focal = FocalLoss().to(device)
criterion_regress = RegressLoss().to(device)
criterion_distance = DistanceLoss().to(device)
criterion_variance = VarianceLoss().to(device)
w_cls, w_reg, w_dist, w_var = 1.0, 1.0, 0.5, 0.3
epochs = 50
for epoch in range(epochs):
model.train()
total_loss = 0.0
for i, data in enumerate(train_loader):
images = data['image'].to(device)
labels = data['label'].to(device)
offsets = data['offset'].to(device)
distances = data['distance'].to(device)
masks = (labels > 0).unsqueeze(1).expand_as(offsets)
optimizer.zero_grad()
outputs = model(images)
loss_cls = criterion_focal(outputs['cls'], labels)
loss_reg = criterion_regress(outputs['offset'], offsets, masks)
loss_dist = criterion_distance(outputs['distance'], distances)
loss_var = criterion_variance(outputs['offset'], outputs['variance'], offsets)
loss = w_cls * loss_cls + w_reg * loss_reg + w_dist * loss_dist + w_var * loss_var
loss.backward()
optimizer.step()
total_loss += loss.item()
if (i + 1) % 20 == 0:
print(f"Epoch [{epoch + 1}/50], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}")
avg_loss = total_loss / len(train_loader)
print(f"Epoch [{epoch + 1}/50] Average Loss: {avg_loss:.4f}")
# 验证
val_metrics = evaluate(model, val_loader, device)
print(f"Val Acc: {val_metrics['accuracy']:.4f}, F1: {val_metrics['f1']:.4f}, "
f"Prec: {val_metrics['precision']:.4f}, Rec: {val_metrics['recall']:.4f}, "
f"FPR: {val_metrics['fpr']:.4f}, FNR: {val_metrics['fnr']:.4f}")
save_path = os.path.join(current_dir, "tusimple_lane_model.pth")
torch.save(model.state_dict(), save_path)
print(f"Training completed. Model saved to {save_path}")
if __name__ == "__main__":
main()
D:\Anaconda\envs\crawler\python.exe D:\Pycharm\Py_Projects\TUsimle\1.02.py
Using device: cuda
Loaded 3626 samples for train set.
Loaded 0 samples for test set.
D:\Anaconda\envs\crawler\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
D:\Anaconda\envs\crawler\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
Traceback (most recent call last):
File "D:\Pycharm\Py_Projects\TUsimle\1.02.py", line 476, in <module>
main()
File "D:\Pycharm\Py_Projects\TUsimle\1.02.py", line 445, in main
outputs = model(images)
File "D:\Anaconda\envs\crawler\lib\site-packages\torch\nn\modules\module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\crawler\lib\site-packages\torch\nn\modules\module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
File "D:\Pycharm\Py_Projects\TUsimle\1.02.py", line 200, in forward
fpn_out = self.fpn(feats)
File "D:\Anaconda\envs\crawler\lib\site-packages\torch\nn\modules\module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\crawler\lib\site-packages\torch\nn\modules\module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
File "D:\Pycharm\Py_Projects\TUsimle\1.02.py", line 156, in forward
p4 = self.lateral_convs[1](c4) + F.interpolate(p5, scale_factor=2, mode='nearest')
RuntimeError: The size of tensor a (80) must match the size of tensor b (160) at non-singleton dimension 3
进程已结束,退出代码为 1
最新发布