"""
Deep Convolutional GANs
本章中,将构建一个深度卷积生成对抗网络。简称:DCGAN。DCGAN论文发表于2015年,论文地址:[论文链接](https://arxiv.org/pdf/1511.06434.pdf).
我们将在[Street View House Numbers](http://ufldl.stanford.edu/housenumbers/) (SVHN)数据集基础上训练DCGAN。\
该数据集来源于谷歌街景中房屋门牌数字(RGB图片)。 SVHN相比MNIST,彩色的,且种类更丰富。
故此,我们需要一个更深且更强大的网络:使用卷积层。且非常有必要使用批归一化(batch normalization)。
相比之前 gan_mnist网络,唯一区别也在此(其他操作基本相同)。
"""
import pickle as pkl
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy.io import loadmat
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False
data_dir = '../gans_datas/svhn/'
if not isdir(data_dir):
raise Exception("Data directory doesn't exist!")
class DLProgress(tqdm):
last_block = 0
def hook(self, block_num=1, block_size=1, total_size=None):
self.total = total_size
self.update((block_num - self.last_block) * block_size)
self.last_block = block_num
if not isfile(data_dir + "train_32x32.mat"):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
urlretrieve(
'http://ufldl.stanford.edu/housenumbers/train_32x32.mat',
data_dir + 'train_32x32.mat',
pbar.hook)
if not isfile(data_dir + "test_32x32.mat"):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Testing Set') as pbar:
urlretrieve(
'http://ufldl.stanford.edu/housenumbers/test_32x32.mat',
data_dir + 'test_32x32.mat',
pbar.hook)
trainset = loadmat(data_dir + 'train_32x32.mat')
testset = loadmat(data_dir + 'test_32x32.mat')
def show_svhn_images():
idx = np.random.randint(0, trainset['X'].shape[3], size=36)
fig, axes = plt.subplots(6, 6, sharex=True, sharey=True, figsize=(5, 5), )
for ii, ax in zip(idx, axes.flatten()):
ax.imshow(trainset['X'][:, :, :, ii], aspect='equal')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
plt.subplots_adjust(wspace=0, hspace=0)
plt.show()
def scale(x, feature_range=[-1, 1]):
x = (x - x.min()) / (255.0 - x.min())
min, max = feature_range
x = x*(max - min) + min
return x
class Dataset:
def __init__(self, train, test, val_frac=0.5, shuffle=False, scale_func=None):
split_idx = int(len(test['y']*(1-val_frac)))
self.test_x, self.valid_x = test['X'][:, :, :, :split_idx], test['X'][:, :, :, split_idx:]
self.test_y, self.valid_y = test['y'][:split_idx], test['y'][split_idx:]
self.train_x, self.train_y = train['X'], train['y']
self.train_x = np.rollaxis(self.train_x, axis=3, start=0)
self.test_x = np.rollaxis(self.test_x, axis=3, start=0)
self.valid_x = np.rollaxis(self.valid_x, axis=3, start=0)
print(self.train_x.shape)
if scale_func is None:
self.scalar = scale
else:
self.scalar = scale_func
self.shuffle = shuffle
def next_batch(self, batch_size):
if self.shuffle:
idx = np.arange(len(self.train_x))
np.random.shuffle(idx)
self.train_x = self.train_x[idx]
self.train_y = self.train_y[idx]
for ii in range(0, len(self.train_x), batch_size):
x = self.train_x[ii:ii+batch_size]
y = self.train_y[ii:ii+batch_size]
yield self.scalar(x), y
def model_inputs(real_dims, z_dims):
"""
:param real_dims: [32, 32, 3]
:param z_dims:
:return:
"""
inputs_real = tf.placeholder(tf.float32, [None, *real_dims], name='inputs_real')
inputs_z = tf.placeholder(tf.float32, [None, z_dims], name='inputs_z')
bn_train = tf.placeholder_with_default(True, shape=None, name='bn_train')
return inputs_real, inputs_z, bn_train
def generator(input_z, output_dims, reuse=False, alpha=0.2, bn_train=True):
"""
生成网络
:param input_z:
:param output_dims:
:param reuse:
:param alpha:
:param bn_train:
:return:
"""
with tf.variable_scope('generator', reuse=reuse):
x1 = tf.layers.dense(input_z, units=4*4*512)
x1 = tf.reshape(x1, shape=[-1, 4, 4, 512])
x1 = tf.layers.batch_normalization(x1, training=bn_train)
x1 = tf.nn.leaky_relu(x1, alpha=alpha)
x2 = tf.layers.conv2d_transpose(
x1, filters=256, kernel_size=5, strides=2, padding='same')
x2 = tf.layers.batch_normalization(x2, training=bn_train)
x2 = tf.nn.leaky_relu(x2, alpha=alpha)
x3 = tf.layers.conv2d_transpose(
x2, filters=128, kernel_size=5, strides=2, padding='same')
x3 = tf.layers.batch_normalization(x3, training=bn_train)
x3 = tf.nn.leaky_relu(x3, alpha=alpha)
logits = tf.layers.conv2d_transpose(
x3, filters=output_dims, kernel_size=5, strides=2, padding='same')
output = tf.nn.tanh(logits)
return output
def discriminator(x, reuse=False, alpha=0.2):
"""
判别网络
:param x: [N, 32, 32, 3]
:param reuse:
:param alpha:
:return:
"""
with tf.variable_scope('discriminator', reuse=reuse):
x1 = tf.layers.conv2d(x, 64, 5, strides=2, padding='same')
x1 = tf.nn.leaky_relu(x1, alpha=alpha)
x2 = tf.layers.conv2d(x1, 128, 5, strides=2, padding='same')
x2 = tf.layers.batch_normalization(x2, training=True)
x2 = tf.nn.leaky_relu(x2, alpha=alpha)
x3 = tf.layers.conv2d(x2, 256, 5, strides=2, padding='same')
x3 = tf.layers.batch_normalization(x3, training=True)
x3 = tf.nn.leaky_relu(x3, alpha=alpha)
flatten = tf.layers.flatten(x3)
logits = tf.layers.dense(flatten, units=1)
predictions = tf.nn.sigmoid(logits)
return logits, predictions
def model_loss(input_real, input_z, output_dims, alpha=0.2, bn_train=True):
"""
构建模型损失
:param input_real:
:param input_z:
:param output_dims:
:param alpha:
:param bn_train:
:return:
"""
fake_images = generator(input_z, output_dims, alpha=alpha, bn_train=bn_train)
d_logit_real, d_model_real = discriminator(input_real, reuse=False, alpha=alpha)
d_logit_fake, d_model_fake = discriminator(fake_images, reuse=True, alpha=alpha)
d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
logits=d_logit_real, labels=tf.ones_like(d_logit_real)*0.9
))
d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
logits=d_logit_fake, labels=tf.zeros_like(d_logit_fake)
))
d_loss = d_loss_real + d_loss_fake
g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
logits=d_logit_fake, labels=tf.ones_like(d_logit_fake)
))
return d_loss, g_loss
def model_optimizer(d_loss, g_loss, learning_rate, beta1):
"""
构建模型优化器
:param d_loss:
:param g_loss:
:param learning_rate:
:param beta1: Adam优化器中 一阶矩估计的衰减系数
:return:
"""
vars_list = tf.trainable_variables()
d_vars = [var for var in vars_list if var.name.startswith('discriminator')]
g_vars = [var for var in vars_list if var.name.startswith('generator')]
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).\
minimize(d_loss, var_list=d_vars)
g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).\
minimize(g_loss, var_list=g_vars)
return d_train_opt, g_train_opt, g_vars
real_size = [32, 32, 3]
z_size = 50
lr = 2e-4
batch_size = 32
epochs = 20
alpha = 0.2
beta1 = 0.5
checkpoint_dir = './model/dcgan'
def view_samples(epoch, samples, nrows, ncols, figsize=(5, 5)):
fig, axes = plt.subplots(figsize=figsize, nrows=nrows, ncols=ncols,
sharey=True, sharex=True)
for ax, img in zip(axes.flatten(), samples[epoch]):
ax.axis('off')
img = ((img - img.min()) * 255 / (img.max() - img.min())).astype(np.uint8)
ax.set_adjustable('box-forced')
im = ax.imshow(img, aspect='equal')
plt.subplots_adjust(wspace=0, hspace=0)
plt.show()
return fig, axes
def train(dataset, figsize=(5, 5)):
tf.reset_default_graph()
graph = tf.Graph()
sample_z = np.random.uniform(-1, 1, size=(72, z_size))
samples, losses = [], []
with graph.as_default():
inputs_real, inputs_z, bn_train = model_inputs(real_size, z_size)
d_loss, g_loss = model_loss(
inputs_real, inputs_z, real_size[-1], alpha=alpha, bn_train=bn_train)
d_train_opt, g_train_opt, g_vars = model_optimizer(d_loss, g_loss, lr, beta1)
saver = tf.train.Saver(var_list=g_vars)
with tf.Session(graph=graph) as sess:
sess.run(tf.global_variables_initializer())
step = 1
for e in range(1, epochs):
for x, y in dataset.next_batch(batch_size):
batch_z = np.random.uniform(-1, 1, size=[batch_size, z_size])
feed = {inputs_real: x, inputs_z: batch_z}
sess.run(d_train_opt, feed)
sess.run(d_train_opt, feed)
sess.run(g_train_opt, feed)
if step % 3 ==0:
d_loss_, g_loss_ = sess.run([d_loss, g_loss], feed)
print('Epochs:{} - Step:{} - G_Loss:{} - D_Loss:{}'.format(
e, step, g_loss_, d_loss_
))
if step % 200 ==0:
files = 'model.ckpt'
save_files = os.path.join(checkpoint_dir, files)
saver.save(sess, save_path=save_files)
print('model saved to path:{}'.format(save_files))
if step % 100 == 0:
plt.ion()
gen_samples = sess.run(
generator(inputs_z, 3, reuse=True, bn_train=False),
feed_dict={inputs_z: sample_z})
samples.append(gen_samples)
_ = view_samples(-1, samples, 6, 12, figsize=figsize)
plt.pause(3)
plt.close()
step +=1
def show_train_losses(losses):
fig, ax = plt.subplots()
losses = np.array(losses)
plt.plot(losses.T[0], label='Discriminator', alpha=0.5)
plt.plot(losses.T[1], label='Generator', alpha=0.5)
plt.title("Training Losses")
plt.legend()
plt.show()
def generator_for_show():
saver = tf.train.Saver(var_list=g_vars)
with tf.Session(graph=graph) as sess:
saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
sample_z = np.random.uniform(-1, 1, size=(16, z_size))
gen_samples = sess.run(
generator(input_z, real_size[2], reuse=True, alpha=0.2, training=True),
feed_dict={input_z: sample_z})
_ = view_samples(-1, [gen_samples], 4, 4)
if __name__ == '__main__':
data_set = Dataset(train=trainset, test=testset)
train(dataset=data_set)