摘自《动手学习深度学习》tensorflow版本 (2.1.0)
图像增广意义
- 大规模数据集是成功应用深度神经网络的前提。
- 图像增广(image augmentation)技术通过对训练图像做一系列随机改变,来产生相似但又不同的训练样本,从而扩大训练数据集的规模。
- 图像增广的另一种解释是,随机改变训练样本可以降低模型对某些属性的依赖,从而提高模型的泛化能力。例如:
- 对图像进行不同方式的裁剪,使感兴趣的物体出现在不同位置,从而减轻模型对物体出现位置的依赖性。
- 调整亮度、色彩等因素来降低模型对色彩的敏感度。
import tensorflow as tf
import numpy as np
print(tf.__version__)
读取一张图片
from matplotlib import pyplot as plt
img = plt.imread('./hotdog/train/hotdog/0.png')
plt.imshow(img)
定义对原图进行指定变换并显示的函数:
def show_images(imgs, num_rows, num_cols, scale=2): #显示imgs中的num_rows * num_cols张图
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
for i in range(num_rows):
for j in range(num_cols):
axes[i][j].imshow(imgs[i * num_cols + j])
axes[i][j].axes.get_xaxis().set_visible(False) # 不显示x轴
axes[i][j].axes.get_yaxis().set_visible(False) # 不显示y轴
return axes
def apply(img, aug, num_rows=2, num_cols=4, scale=1.5): # 对img 做num_rows*num_cols次aug变换
Y = [aug(img) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols, scale)
一半概率的左右翻转
apply(img, tf.image.random_flip_left_right)
一半概率上下翻转
apply(img, tf.image.random_flip_up_down)
随机裁剪
图像实体不总出现在正中央,池化层可以降低网络对目标位置的敏感度,还可以用随机裁剪(物体按不同比例出现在不同位置)来达到相同目的。
随机裁剪,选择宽和高分别为100像素(参数必须比原图小)(tf 没有mxnet里面的选项:面积为原来10%∼100% 的区域,宽高比随机取自0.5∼2)random_crop api
aug=tf.image.random_crop
num_rows=2
num_cols=4
scale=1.5
crop_size=100 #必须比原图的高宽小
Y = [aug(img, (crop_size, crop_size, 3)) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols, scale)
颜色变化
可从4个方面变化:亮度、对比度、饱和度和色调
将图像的亮度随机变化为原图亮度的50%(即1−0.5)∼150%(即1+0.5)
aug=tf.image.random_brightness
num_rows=2
num_cols=4
scale=1.5
max_delta=0.5
Y = [aug(img, max_delta) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols, scale)
色调变化
delta 参数同颜色变化
aug=tf.image.random_hue
num_rows=2
num_cols=4
scale=1.5
max_delta=0.5
Y = [aug(img, max_delta) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols, scale)
使用图像增广训练模型
在CIFAR-10数据集上训练ResNet-18 模型,使用左右随机翻转增广
(x, y), (test_x, test_y) = tf.keras.datasets.cifar10.load_data()
print(x.shape, test_x.shape)
show_images(x[0:32][0], 4, 8, scale=0.8)
from tensorflow.keras import layers,activations
class Residual(tf.keras.Model):
def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
super(Residual, self).__init__(**kwargs)
self.conv1 = layers.Conv2D(num_channels,
padding='same',
kernel_size=3,
strides=strides)
self.conv2 = layers.Conv2D(num_channels, kernel_size=3,padding='same')
if use_1x1conv:
self.conv3 = layers.Conv2D(num_channels,
kernel_size=1,
strides=strides)
else:
self.conv3 = None
self.bn1 = layers.BatchNormalization()
self.bn2 = layers.BatchNormalization()
def call(self, X):
Y = activations.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return activations.relu(Y + X)
class ResnetBlock(tf.keras.layers.Layer):
def __init__(self,num_channels, num_residuals, first_block=False,**kwargs):
super(ResnetBlock, self).__init__(**kwargs)
self.listLayers=[]
for i in range(num_residuals):
if i == 0 and not first_block:
self.listLayers.append(Residual(num_channels, use_1x1conv=True, strides=2))
else:
self.listLayers.append(Residual(num_channels))
def call(self, X):
for layer in self.listLayers.layers:
X = layer(X)
return X
class ResNet(tf.keras.Model):
def __init__(self,num_blocks,**kwargs):
super(ResNet, self).__init__(**kwargs)
self.conv=tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same')
self.bn=tf.keras.layers.BatchNormalization()
self.relu=tf.keras.layers.Activation('relu')
self.mp=tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')
self.resnet_block1=ResnetBlock(64,num_blocks[0], first_block=True)
self.resnet_block2=ResnetBlock(128,num_blocks[1])
self.resnet_block3=ResnetBlock(256,num_blocks[2])
self.resnet_block4=ResnetBlock(512,num_blocks[3])
self.gap=tf.keras.layers.GlobalAvgPool2D()
self.fc=tf.keras.layers.Dense(units=10,activation=tf.keras.activations.softmax)
def call(self, x):
x=self.conv(x)
x=self.bn(x)
x=self.relu(x)
x=self.mp(x)
x=self.resnet_block1(x)
x=self.resnet_block2(x)
x=self.resnet_block3(x)
x=self.resnet_block4(x)
x=self.gap(x)
x=self.fc(x)
return x
net = ResNet([2,2,2,2])
x = [tf.image.random_flip_left_right(i) for i in x] #左右随机翻转
net.compile(loss='sparse_categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam(),
metrics=['accuracy'])
history = net.fit(x, y,
batch_size=64,
epochs=5,
validation_split=0.2)
test_scores = net.evaluate(test_x, test_y, verbose=2)