keras入门教程_03_cifar10_ResNet(代码)_如何设置变学习率、BatchNoral、l2实现

最新推荐文章于 2024-07-13 05:15:00 发布
疯狂的荷兰人学3D视觉
最新推荐文章于 2024-07-13 05:15:00 发布
阅读量1.4k
点赞数
CC 4.0 BY-SA版权
分类专栏： keras从入门到精通文章标签： keras 深度学习
本文链接：https://blog.youkuaiyun.com/weixin_40920290/article/details/80620440
keras从入门到精通专栏收录该内容
14 篇文章
订阅专栏
本文介绍了一个使用Keras实现的ResNet模型，用于CIFAR-10数据集上的图像分类任务。文章详细阐述了模型的构建、训练流程及技巧，包括学习率调度、批量归一化等关键技术。
import keras
from keras.layers import Dense, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten, Conv2D
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.regularizers import  l2
from keras.datasets import cifar10
import numpy as np
import os
from keras.utils import plot_model


# 1. 超参数
batch_size = 128
epochs = 200
data_augmentation = True
num_classes = 10
subtract_pix_mean = True
n = 3
version = 1
depth = n *6 +2

# 2. 数据读取
model_type = "ResNet%dversionV%d"%(depth, version)
print(model_type)
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
input_shape = x_train.shape[1:] # 图像的维度
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255
# 图像归一化
if subtract_pix_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean
print('训练样本的shape:', x_train.shape)
print('训练样本的数目:', x_train.shape[0])
print('测试样本的数目',x_test.shape[0])
print('训练样本标签的shape', y_train.shape)
# 0-1标签
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


"""
知识点:
1. 变学习率
(1) 定义一个函数，接受轮数为参数, 返回学习率
(2) opt = Adam(lr=lr_schedule(0))  # 定义变优化器，使其从0开始
(3) lr_scheduler = LearningRateScheduler(schedule=lr_schedule) 学习率定时器(通过回调函数)
    #####
    lr_reducer = ReduceLROnPlateau() 学习率减少器， 学不动的时候降低学习率
    #####
    在fit里面通过回调函数来做到变学习率    
2. 如何保存模型
    model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
    checkpoint = ModelCheckpoint()
    通过fit时候的回调函数保存模型    
3. 如何用tensorboard记录学习率
    TensorBoard 回调函数
4. fit_generator 输入 生成器
5. plot_model(model, to_file='models/model.png')  # 保存模型图片
6. Model模型怎么定义
7. he_normal 它从[-limt,limt]的均匀分布中抽取样本，其中limit=sqrt(6 / fan_in), 其中`fan_in`是权值张量中输入单位个数
8. pycharm远程同步代码，之后在ubuntu系统中训练
9. l2正则化使用
        conv = Conv2D(filters=num_filters, kernel_size=kernel_size,
                  strides=strides, padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))
10. BachNormal
    有两种方式，1.卷积+BatchNorm+Act 2. 卷积+Act+Batch
"""
# 3.模型定义
def lr_schedule(epoch):
    """根据训练轮数返回该轮数的学习率

    学习率会在80,120,160,180轮逐渐减少，训练过程中每轮传入callback
    Args:
        epoch: 轮数

    Returns: 返回训练的学习率

    """
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print("学习率;",lr)
    return lr


# 先定义一个神经层，再通过堆叠来完成，分为两个函数
def resnet_layer(inputs, num_filters=16, kernel_size=3,strides=1,
                 activation='relu', batch_normalization = True, conv_first=True):
    """2D Convolution-Batch Normalization-Activation stack builder

    Args:
        inputs(张量): 图像数据或者前一层神经网络的张量
        num_filters(整数): 二维卷积核的个数
        kernel_size(整数): 卷积核的大小 square
        strides(整数): 卷积核的滑动步长 square
        activation(string): 激活函数
        batch_normalization(布尔): 是否使用批量归一化
        conv_first(布尔): conv_bn_activation(True) or bn_activation_conv(False)

    Returns:
        x(张量): 下一层输入的张量
    """
    # 实例化一个卷积对象
    conv = Conv2D(filters=num_filters, kernel_size=kernel_size,
                  strides=strides, padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))
    x = inputs
    # 定义卷积、批量标准化、激活
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x


def resnet_v1(input_shape, depth, num_classes=10):
    """

    Args:
        input_shape(张量): 输入图像张量的shape
        depth(int): 卷积核的个数
        num_classes(int): 分类数目

    Returns:
        model(Model): Keras model instance
    """
    if (depth - 2) % 6 != 0:
        raise ValueError("深度应该为 6n+2(例如: 20, 32, 44)")
    num_filters = 16
    num_res_blocks = int((depth -2)/6)

    # 1.开始定义模型
    inputs = Input(shape=input_shape) # 这部分返回一个张量，形状为input_shape, 要指定inputshape
    # 2.指定数据计算过程
    x = resnet_layer(inputs=inputs) # 其余都用自定义参数
    for stack in range(3):
        for res_blocks in range(num_res_blocks): # num_res_blocks = 3
            strides = 1
            if stack > 0 and res_blocks == 0: # 第一个神经元，但不是第一个堆叠
                strides = 2 # 降采样
            y = resnet_layer(inputs=x,
                             num_filters=num_filters,
                             strides=strides)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters,
                             activation=None)
            if stack > 0 and res_blocks == 0: # 第一个神经元，但不是第一个堆叠
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x,y])
            x = Activation('relu')(x)
        num_filters *= 2 # 每次堆叠的卷积核个数16/32/64
    # Add classifier on top
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    # 增加最后一个分类层
    outputs = Dense(units=num_classes, activation='softmax',
                    kernel_initializer='he_normal')(y)
    # 3.指定输入输出,定义模型
    model = Model(inputs=inputs, outputs=outputs)
    return model


model = resnet_v1(input_shape=input_shape, depth=depth)  # ALT+shift+C 查看当前改动

# 4. 编译模型
opt = Adam(lr=lr_schedule(0))  # 定义变优化器
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# 收集汇总信息
model.summary()
print(model_type)

# 5. 保存模型
save_dir = os.path.join(os.getcwd(), 'saved_models') # 保存文件夹
model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# 6. 准备回调函数为模型保存和学习率调整

# ModelCheckpoint在每个训练周期保存模型  monitor:被监测的数据
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True)

# 学习率定时器，schedule,一个函数，接受轮索引作为输入，然后返回一个学习率作为输出
lr_scheduler = LearningRateScheduler(schedule=lr_schedule)

# 当学习停止时，模型总是会受益于降低2-10倍的学习率。这个回调函数检测一个数据，并且这个数据在
# 一定有耐心的训练轮之后还没有进步，那么学习率就会被降低
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.001,
                               verbose=1)
tfck =TensorBoard(log_dir='logs/cifar10_ResNet')
# 汇总回调函数,以列表为参数
callbacks = [checkpoint, lr_reducer, lr_scheduler, tfck]

# 7. 开始训练
if not data_augmentation:
    print('不使用数据增强')
    hist = model.fit(x=x_train, y=y_train, batch_size=batch_size,
                  epochs=epochs, validation_data=(x_test, y_test),
                  shuffle=True, callbacks=callbacks)

    with open('cifar10_ResNet.txt', 'w') as f:
        f.write(str(hist.history))
else:
    print("使用数据增强")
    datagen = ImageDataGenerator(
        # set input mean to 0 over the dataset前面已经做过了
        featurewise_center=False,
        # set each sample mean to 0/
        samplewise_center=False,
        # divide inputs by std of dataset
        featurewise_std_normalization=False,
        # divide each input by its std
        samplewise_std_normalization=False,
        # apply ZCA whitening
        zca_whitening=False,
        # randomly rotate images in the range (deg 0 to 180)
        rotation_range=0,
        # randomly shift images horizontally
        width_shift_range=0.1,
        # randomly shift images vertically
        height_shift_range=0.1,
        # randomly flip images
        horizontal_flip=True,
        # randomly flip images
        vertical_flip=False)  # 只是实例化了一个宽度、高度滑移器
    datagen.fit(x=x_train)
    # 前面两步搭在一起才完成这个功能
    flow = datagen.flow(x=x_train, y=y_train, batch_size=batch_size)
    # 如果是自己的数据的话，需要自己定义生成器
    model.fit_generator(generator=flow, validation_data=(x_test, y_test),
                        epochs=epochs, verbose=1, workers=4, callbacks=callbacks)

# 评估模型
scores = model.evaluate(x_test, y_test, verbose=1)
print("测试集损失", scores[0])
print("验证集损失", scores[1])
plot_model(model, to_file='models/model.png')