使用Street View House Numbers (SVHN)数据集训练DCGAN。
加载模块
%matplotlib inline
import pickle as pkl
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
import tensorflow as tf
获得数据
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
data_dir = 'data/'
if not isdir(data_dir):
raise Exception("Data directory doesn't exist!")
class DLProgress(tqdm):
last_block = 0
def hook(self, block_num=1, block_size=1, total_size=None):
self.total = total_size
self.update((block_num - self.last_block) * block_size)
self.last_block = block_num
if not isfile(data_dir + "train_32x32.mat"):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
urlretrieve(
'http://ufldl.stanford.edu/housenumbers/train_32x32.mat',
data_dir + 'train_32x32.mat',
pbar.hook)
if not isfile(data_dir + "test_32x32.mat"):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
urlretrieve(
'http://ufldl.stanford.edu/housenumbers/test_32x32.mat',
data_dir + 'test_32x32.mat',
pbar.hook)
这些SVHN文件是Matlab中常用的.mat文件。我们可以用scipy.io.loadmat加载它们。
trainset = loadmat(data_dir + 'train_32x32.mat')
testset = loadmat(data_dir + 'test_32x32.mat')
这里展示了一小部分图片。每一个都是32x32,带有3个颜色通道(RGB)。这些是我们将传递给辨别器的真实图像,以及生成器最终将伪造的图像。
idx = np.random.randint(0, trainset['X'].shape[3], size=36)
fig, axes = plt.subplots(6, 6, sharex=True, sharey=True, figsize=(5,5),)
for ii, ax in zip(idx, axes.flatten()):
ax.imshow(trainset['X'][:,:,:,ii], aspect='equal')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
plt.subplots_adjust(wspace=0, hspace=0)
在这里,需要做一些预处理,并将图像转换成一种形式,以便将批处理传递到网络。首先,我们需要将图像缩放到-1到1的范围,因为生成器的输出也在这个范围内。
def scale(x, feature_range=(-1, 1)):
# scale to (0, 1)
x = ((x - x.min())/(255 - x.min()))
# scale to feature_range
min, max = feature_range
x = x * (max - min) + min
return x
class Dataset:
def __init__(self, train, test, val_frac=0.5, shuffle=False, scale_func=None):
split_idx = int(len(test['y'])*(1 - val_frac))
self.test_x, self.valid_x = test['X'][:,:,:,:split_idx], test['X'][:,:,:,split_idx:]
self.test_y, self.valid_y = test['y'][:split_idx], test['y'][split_idx:]
self.train_x, self.train_y = train['X'], train['y']
self.train_x = np.rollaxis(self.train_x, 3)
self.valid_x = np.rollaxis(self.valid_x, 3)
self.test_x = np.rollaxis(self.test_x, 3)
if scale_func is None:
self.scaler = scale
else:
self.scaler = scale_func
self.shuffle = shuffle
def batches(self, batch_size):
if self.shuffle:
idx = np.arange(len(dataset.train_x))
np.random.shuffle(idx)
self.train_x = self.train_x[idx]
self.train_y = self.train_y[idx]
n_batches = len(self.train_y)//batch_size
for ii in range(0, len(self.train_y), batch_size):
x = self.train_x[ii:ii+batch_size]
y = self.train_y[ii:ii+batch_size]
yield self.scaler(x), y
网络输入
创建一些占位符。
def model_inputs(real_dim, z_dim):
inputs_real = tf.placeholder(tf.float32, (None, *real_dim), name='input_real')
inputs_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')
return inputs_real, inputs_z
生成器
输入将是噪声向量z。输出将是???ℎ双曲正切的输出,但这一次大小为32×32我们SVHN图像的大小。
这里的新功能是我们将使用卷积层来创建新的图像。第一层是一个完全连接的层,它被重新塑造成一个深而窄的层,类似于原始DCGAN文件中的4x4x1024。然后使用批处理规范化和Leaky ReLU激活。接下来是转置卷积,通常你要将深度减半,将上一层的宽度和高度翻倍。同样,我们使用批处理标准化和Leaky ReLU。对于每一层,一般的方案是卷积>批次归一化>Leaky ReLU。
像这样叠层直到最后得到形状为32x32x3的转置卷积层。
注意,这里的最后一层是64x64x3,而对于我们的SVHN数据集,我们只希望它是32x32x3。
def generator(z, output_dim, reuse=False, alpha=0.2, training=True):
with tf.variable_scope('generator', reuse=reuse):
# First fully connected layer
x1 = tf.layers.dense(z, 4*4*512)
# Reshape it to start the convolutional stack
x1 = tf.reshape(x1, (-1, 4, 4, 512))
x1 = tf.layers.batch_normalization(x1, training=training)
x1 = tf.maximum(alpha * x1, x1)
# 4x4x512 now
x2 = tf.layers.conv2d_transpose(x1, 256, 5, strides=2, padding='same')
x2 = tf.layers.batch_normalization(x2, training=training)
x2 = tf.maximum(alpha * x2, x2)
# 8x8x256 now
x3 = tf.layers.conv2d_transpose(x2, 128, 5, strides=2, padding='same')
x3 = tf.layers.batch_normalization(x3, training=training)
x3 = tf.maximum(alpha * x3, x3)
# 16x16x128 now
# Output layer
logits = tf.layers.conv2d_transpose(x3, output_dim, 5, strides=2, padding='same')
# 32x32x3 now
out = tf.tanh(logits)
return out
辨别器
这基本上就是一个卷积分类器就像之前构建的那样。鉴别器的输入是32x32x3张量/图像。需要几个卷积层,然后一个完全连接的输出层。与之前一样,我们需要一个sigmoid输出,您还需要返回logits。对于卷积层的深度,建议从第一层的16、32、64个过滤器开始,然后在添加层时将深度加倍。注意,在DCGAN的论文中,他们只使用没有maxpool层的strided convolutional layer来进行所有的down sampling。
还需要使用tf.layers.batch_normalization
的批次归一化。除第一个卷积层和输出层外,对每一层进行batch_normalization。同样,每一层应该看起来像卷积>批次规范>Leaky ReLU。
def discriminator(x, reuse=False, alpha=0.2):
with tf.variable_scope('discriminator', reuse=reuse):
# Input layer is 32x32x3
x1 = tf.layers.conv2d(x, 64, 5, strides=2, padding='same')
relu1 = tf.maximum(alpha * x1, x1)
# 16x16x64
x2 = tf.layers.conv2d(relu1, 128, 5, strides=2, padding='same')
bn2 = tf.layers.batch_normalization(x2, training=True)
relu2 = tf.maximum(alpha * bn2, bn2)
# 8x8x128
x3 = tf.layers.conv2d(relu2, 256, 5, strides=2, padding='same')
bn3 = tf.layers.batch_normalization(x3, training=True)
relu3 = tf.maximum(alpha * bn3, bn3)
# 4x4x256
# Flatten it
flat = tf.reshape(relu3, (-1, 4*4*256))
logits = tf.layers.dense(flat, 1)
out = tf.sigmoid(logits)
return out, logits
模型Loss
def model_loss(input_real, input_z, output_dim, alpha=0.2):
"""
Get the loss for the discriminator and generator
:param input_real: Images from the real dataset
:param input_z: Z input
:param out_channel_dim: The number of channels in the output image
:return: A tuple of (discriminator loss, generator loss)
"""
g_model = generator(input_z, output_dim, alpha=alpha)
d_model_real, d_logits_real = discriminator(input_real, alpha=alpha)
d_model_fake, d_logits_fake = discriminator(g_model, reuse=True, alpha=alpha)
d_loss_real = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real, labels=tf.ones_like(d_model_real)))
d_loss_fake = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_model_fake)))
g_loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_model_fake)))
d_loss = d_loss_real + d_loss_fake
return d_loss, g_loss
Optimizers
def model_opt(d_loss, g_loss, learning_rate, beta1):
"""
Get optimization operations
:param d_loss: Discriminator loss Tensor
:param g_loss: Generator loss Tensor
:param learning_rate: Learning Rate Placeholder
:param beta1: The exponential decay rate for the 1st moment in the optimizer
:return: A tuple of (discriminator training operation, generator training operation)
"""
# Get weights and bias to update
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if var.name.startswith('discriminator')]
g_vars = [var for var in t_vars if var.name.startswith('generator')]
# Optimize
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(d_loss, var_list=d_vars)
g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(g_loss, var_list=g_vars)
return d_train_opt, g_train_opt
构建模型
在这里,我们可以使用我们定义的函数将模型构建为一个类。这将使在代码中移动网络变得更容易,因为图中的节点和操作打包在一个对象中。
class GAN:
def __init__(self, real_size, z_size, learning_rate, alpha=0.2, beta1=0.5):
tf.reset_default_graph()
self.input_real, self.input_z = model_inputs(real_size, z_size)
self.d_loss, self.g_loss = model_loss(self.input_real, self.input_z,
real_size[2], alpha=0.2)
self.d_opt, self.g_opt = model_opt(self.d_loss, self.g_loss, learning_rate, beta1)
这是一个显示生成图像的函数。
def view_samples(epoch, samples, nrows, ncols, figsize=(5,5)):
fig, axes = plt.subplots(figsize=figsize, nrows=nrows, ncols=ncols,
sharey=True, sharex=True)
for ax, img in zip(axes.flatten(), samples[epoch]):
ax.axis('off')
img = ((img - img.min())*255 / (img.max() - img.min())).astype(np.uint8)
ax.set_adjustable('box-forced')
im = ax.imshow(img, aspect='equal')
plt.subplots_adjust(wspace=0, hspace=0)
return fig, axes
另一个我们可以用来训练网络的功能。注意,当调用generator创建要显示的示例时,我们将training设置为False。因此,批次归一化层将使用总体统计信息而不是批处理统计信息。还要注意,当我们运行生成器的优化器时,我们设置net.input_real占位符。生成器实际上并不使用它,但是由于我们在model_opt中创建
tf.control_dependencies的原因,如果没有它,我们会得到一个错误。我们在model_opt中创建的control_dependencies块。
def train(net, dataset, epochs, batch_size, print_every=10, show_every=100, figsize=(5,5)):
saver = tf.train.Saver()
sample_z = np.random.uniform(-1, 1, size=(72, z_size))
samples, losses = [], []
steps = 0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for e in range(epochs):
for x, y in dataset.batches(batch_size):
steps += 1
# Sample random noise for G
batch_z = np.random.uniform(-1, 1, size=(batch_size, z_size))
# Run optimizers
_ = sess.run(net.d_opt, feed_dict={net.input_real: x, net.input_z: batch_z})
_ = sess.run(net.g_opt, feed_dict={net.input_z: batch_z, net.input_real: x})
if steps % print_every == 0:
# At the end of each epoch, get the losses and print them out
train_loss_d = net.d_loss.eval({net.input_z: batch_z, net.input_real: x})
train_loss_g = net.g_loss.eval({net.input_z: batch_z})
print("Epoch {}/{}...".format(e+1, epochs),
"Discriminator Loss: {:.4f}...".format(train_loss_d),
"Generator Loss: {:.4f}".format(train_loss_g))
# Save losses to view after training
losses.append((train_loss_d, train_loss_g))
if steps % show_every == 0:
gen_samples = sess.run(
generator(net.input_z, 3, reuse=True, training=False),
feed_dict={net.input_z: sample_z})
samples.append(gen_samples)
_ = view_samples(-1, samples, 6, 12, figsize=figsize)
plt.show()
saver.save(sess, './checkpoints/generator.ckpt')
with open('samples.pkl', 'wb') as f:
pkl.dump(samples, f)
return losses, samples
超参数
GANd对超参数很敏感。为了找到最佳的超参数,需要进行大量的实验,以便使生成器和辨别器不会互相压倒对方。
real_size = (32,32,3)
z_size = 100
learning_rate = 0.0002
batch_size = 128
epochs = 25
alpha = 0.2
beta1 = 0.5
# Create the network
net = GAN(real_size, z_size, learning_rate, alpha=alpha, beta1=beta1)
dataset = Dataset(trainset, testset)
losses, samples = train(net, dataset, epochs, batch_size, figsize=(10,5))
fig, ax = plt.subplots()
losses = np.array(losses)
plt.plot(losses.T[0], label='Discriminator', alpha=0.5)
plt.plot(losses.T[1], label='Generator', alpha=0.5)
plt.title("Training Losses")
plt.legend()
fig, ax = plt.subplots()
losses = np.array(losses)
plt.plot(losses.T[0], label='Discriminator', alpha=0.5)
plt.plot(losses.T[1], label='Generator', alpha=0.5)
plt.title("Training Losses")
plt.legend()
_ = view_samples(-1, samples, 6, 12, figsize=(10,5))