Tensorflow(三十) —— ResNet

部署运行你感兴趣的模型镜像

Tensorflow(三十) —— ResNet

import tensorflow as tf
from tensorflow import keras
# 定义一个basic block
class BasicBlock(keras.layers.Layer):
    def __init__(self,input_filters,filters,stride = 1):
        super(BasicBlock,self).__init__()
        self.conv1 = keras.layers.Conv2D(filters,[3,3],strides=stride,padding="same")
        self.bn1 = keras.layers.BatchNormalization()
        self.relu = keras.layers.Activation("relu")
        self.conv2 = keras.layers.Conv2D(filters,[3,3],strides = 1,padding = "same")
        self.bn2 = keras.layers.BatchNormalization()
        if stride != 1 or input_filters != filters:
            self.subsampling = keras.Sequential([
                                                keras.layers.Conv2D(filters,[1,1],strides = stride,padding = "same"),\
                                                keras.layers.BatchNormalization()
            ])
        else:
            self.subsampling = lambda x:x
    
    def call(self,inputs,training=None):
        out = self.conv1(inputs)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        residual = self.subsampling(inputs)
        out = keras.layers.add([out,residual])
        outputs = self.relu(out)
        return outputs
# 定义一个ResNet Block
def ResNetBlock(num_block,input_filters,filters,stride = 1):
    resnet = keras.Sequential()
    resnet.add(BasicBlock(input_filters,filters,stride = stride))
    for i in range(1,num_block):
        resnet.add(BasicBlock(filters,filters,stride = 1))
    return resnet
# 定义一个 ResNet (模型网络)
class ResNet(keras.Model):
    def __init__(self,n_classes = 100,block_dim = [2,2,2,2]):
        super(ResNet,self).__init__()
        self.root = keras.Sequential([keras.layers.Conv2D(64,[3,3],strides=[1,1],padding = "same"),\
                                      keras.layers.BatchNormalization(),
                                      keras.layers.MaxPool2D(pool_size=(2,2),strides = 1)
                                     ])                       
        self.layer1 = ResNetBlock(block_dim[0],64,64,stride=2)
        self.layer2 = ResNetBlock(block_dim[1],64,128,stride = 2)
        self.layer3 = ResNetBlock(block_dim[2],128,256,stride = 2)
        self.layer4 = ResNetBlock(block_dim[3],256,512,stride = 2)
        # output = [b,h,w,512] => [b,c]
        self.avgpool = keras.layers.GlobalAvgPool2D()
        self.fc = keras.layers.Dense(n_classes)
        
    def call(self,inputs,training = None):
        out = self.root(inputs)
        out = self.layer1(out)

        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        outputs = self.fc(out)
        
        return outputs
# 搭建18层的resnet网络
def resnet():
    return ResNet(100,[2,2,2,2])
# 预处理函数
def preprocess(x,y):
    x = 2*(tf.cast(x,dtype=tf.float32)/255.-0.5)
    y = tf.cast(y,dtype=tf.int32)
    y_onehot = tf.one_hot(y,depth=100)
    return x,y_onehot
# 加载数据集
(x_train,y_train),(x_test,y_test) = keras.datasets.cifar100.load_data()
y_train,y_test = tf.squeeze(y_train,axis=1),tf.squeeze(y_test,axis=1)
db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_train = db_train.map(preprocess).shuffle(100000).batch(32)
db_test = db_test.map(preprocess).shuffle(100000).batch(32)
sample = next(iter(db_train))
sample[1].shape
# 创建模型
res_net18 = resnet() 
res_net18.compile(loss = tf.losses.CategoricalCrossentropy(),\
                 optimizer = keras.optimizers.Adam(lr = 1e-4),\
                  metrics = ["accuracy"]
                 )
res_net18.fit(db_train,epochs = 10,validation_data = db_test,validation_freq = 1)

本文为参考龙龙老师的“深度学习与TensorFlow 2入门实战“课程书写的学习笔记

by CyrusMay 2022 04 18

您可能感兴趣的与本文相关的镜像

TensorFlow-v2.15

TensorFlow-v2.15

TensorFlow

TensorFlow 是由Google Brain 团队开发的开源机器学习框架,广泛应用于深度学习研究和生产环境。 它提供了一个灵活的平台,用于构建和训练各种机器学习模型

<think>好的,我现在需要帮助用户解决如何构建和训练ResNet模型的问题,用户提到了使用PyTorch或TensorFlow。首先,我得回忆一下ResNet的基本结构,特别是残差块和跳跃连接的设计。ResNet的核心是通过残差学习来解决深度网络中的梯度消失问题,所以需要解释清楚残差块的结构。 接下来,用户可能需要具体的代码示例,所以我得考虑用PyTorch和TensorFlow分别展示模型的构建。PyTorch方面,可以使用torch.nn模块,定义残差块,然后搭建ResNet-18。对于TensorFlow,可以用Keras的Functional API来构建类似的结构,确保跳跃连接正确实现。 然后是关于训练的部分,用户可能想知道数据预处理、损失函数、优化器的选择以及训练循环的设置。这里需要提到常用的优化器如SGD或Adam,学习率调整策略,以及数据增强的方法。还要注意在训练过程中使用验证集来监控模型性能,防止过拟合。 另外,用户可能对ResNet的变体(如ResNet-34、ResNet-50)感兴趣,可以简要提到调整层数的方法。同时,可能存在的常见问题如梯度消失、模型过深导致的训练困难,需要说明残差连接如何缓解这些问题。 最后,生成相关问题时,要涵盖原理、结构、应用场景、与其他模型的比较以及实际训练中的调参技巧,这样能帮助用户更全面地理解ResNet。</think>### 如何构建和训练ResNet模型? #### 1. ResNet核心原理 ResNet(残差网络)通过引入残差学习解决深度神经网络梯度消失问题,核心结构为残差块。其数学形式为: $$y = F(x, \{W_i\}) + x$$ 其中$x$是输入,$F$是残差函数,$y$是输出[^1]。当输入输出维度不同时,使用线性投影: $$y = F(x, \{W_i\}) + W_sx$$ #### 2. PyTorch实现 ```python import torch import torch.nn as nn class ResidualBlock(nn.Module): def __init__(self, in_channels, out_channels, stride=1): super().__init__() self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channels) self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channels) self.shortcut = nn.Sequential() if stride != 1 or in_channels != out_channels: self.shortcut = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(out_channels) ) def forward(self, x): out = torch.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) out += self.shortcut(x) return torch.relu(out) class ResNet18(nn.Module): def __init__(self, num_classes=1000): super().__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 创建残差块 self.layer1 = self._make_layer(64, 64, stride=1) self.layer2 = self._make_layer(64, 128, stride=2) self.layer3 = self._make_layer(128, 256, stride=2) self.layer4 = self._make_layer(256, 512, stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1,1)) self.fc = nn.Linear(512, num_classes) def _make_layer(self, in_channels, out_channels, stride): return nn.Sequential( ResidualBlock(in_channels, out_channels, stride), ResidualBlock(out_channels, out_channels) ) ``` #### 3. TensorFlow/Keras实现 ```python import tensorflow as tf from tensorflow.keras import layers, Model def residual_block(x, filters, stride=1): shortcut = x if stride != 1 or x.shape[-1] != filters: shortcut = layers.Conv2D(filters, 1, strides=stride)(x) shortcut = layers.BatchNormalization()(shortcut) x = layers.Conv2D(filters, 3, strides=stride, padding='same')(x) x = layers.BatchNormalization()(x) x = layers.ReLU()(x) x = layers.Conv2D(filters, 3, padding='same')(x) x = layers.BatchNormalization()(x) x = layers.Add()([x, shortcut]) return layers.ReLU()(x) def build_resnet18(input_shape=(224,224,3), num_classes=1000): inputs = layers.Input(shape=input_shape) x = layers.Conv2D(64, 7, strides=2, padding='same')(inputs) x = layers.BatchNormalization()(x) x = layers.ReLU()(x) x = layers.MaxPool2D(3, strides=2, padding='same')(x) # 堆叠残差块 x = residual_block(x, 64) x = residual_block(x, 64) x = residual_block(x, 128, stride=2) x = residual_block(x, 128) x = residual_block(x, 256, stride=2) x = residual_block(x, 256) x = residual_block(x, 512, stride=2) x = residual_block(x, 512) x = layers.GlobalAveragePooling2D()(x) outputs = layers.Dense(num_classes, activation='softmax')(x) return Model(inputs, outputs) ``` #### 4. 训练步骤(PyTorch示例) ```python # 数据预处理 transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # 初始化模型 model = ResNet18(num_classes=1000) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) # 训练循环 for epoch in range(100): model.train() for inputs, labels in train_loader: outputs = model(inputs) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # 验证代码... ``` #### 5. 关键技巧 1. **数据增强**:随机裁剪、水平翻转、颜色抖动 2. **学习率策略**:初始学习率0.1,每30个epoch衰减10倍 3. **权重初始化**:使用He初始化卷积层权重 4. **正则化**:权重衰减(weight decay)设为$1\times10^{-4}$ 5. **梯度裁剪**:设置梯度阈值防止爆炸
评论 2
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值