代码实现参考:https://github.com/affinelayer/pix2pix-tensorflow,论文参考《Image-to-Image Translation with Conditional Adversarial Nets》,以下实现只在facades数据集上训练,运行代码前需要先下载该数据集。地带了2000次epoch,下过如下(左边为输入图像,中间为生成的图像,右边为真实图像):
#coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import os
import glob
import random
import collections
import math
import time
# https://github.com/affinelayer/pix2pix-tensorflow
train_input_dir="E:/testdata/facades/train/" # 训练集输入
train_output_dir="E:/testdata/facades/train_out/" # 训练集输出
test_input_dir="E:/testdata/facades/val/" # 测试集输入
test_output_dir="E:/testdata/facades/test_out/" # 测试集的输出
checkpoint="E:/testdata/facades/train_out/" # 保存结果的目录
seed=None
max_steps=None # number of training steps (0 to disable)
max_epochs=200 # number of training epochs
progress_freq=50 # display progress every progress_freq steps
trace_freq=0 # trace execution every trace_freq steps
display_freq=50 # write current training images every display_freq steps
save_freq=500 # save model every save_freq steps, 0 to disable
separable_conv=False # use separable convolutions in the generator
aspect_ratio=1.0 # aspect ratio of output images (width/height)
batch_size=1 # help="number of images in batch")
which_direction="BtoA" # choices=["AtoB", "BtoA"])
ngf=64 # help="number of generator filters in first conv layer")
ndf=64 # help="number of discriminator filters in first conv layer")
scale_size=286 # help="scale images to this size before cropping to 256x256")
flip=True # flip images horizontally
no_flip=True # don't flip images horizontally
lr=0.0002 # initial learning rate for adam
beta1=0.5 # momentum term of adam
l1_weight=100.0 # weight on L1 term for generator gradient
gan_weight=1.0 # weight on GAN term for generator gradient
output_filetype="png" # 输出图像的格式
EPS = 1e-12 # 极小数,防止梯度为损失为0
CROP_SIZE = 256 # 图片的裁剪大小
# 命名元组,用于存放加载的数据集合创建好的模型
Examples = collections.namedtuple("Examples", "paths, inputs, targets, count, steps_per_epoch")
Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, gen_loss_GAN, gen_loss_L1, gen_grads_and_vars, train")
# 图像预处理 [0, 1] => [-1, 1]
def preprocess(image):
with tf.name_scope("preprocess"):
return image * 2 - 1
# 图像后处理[-1, 1] => [0, 1]
def deprocess(image):
with tf.name_scope("deprocess"):
return (image + 1) / 2
# 判别器的卷积定义,batch_input为 [ batch , 256 , 256 , 6 ]
def discrim_conv(batch_input, out_channels, stride):
# [ batch , 256 , 256 , 6 ] ===>[ batch , 258 , 258 , 6 ]
padded_input = tf.pad(batch_input, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT")
'''
[0,0]: 第一维batch大小不扩充
[1,1]:第二维图像宽度左右各扩充一列,用0填充
[1,1]:第三维图像高度上下各扩充一列,用0填充
[0,0]:第四维图像通道不做扩充
'''
return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", kernel_initializer=tf.random_normal_initializer(0, 0.02))
# 生成器的卷积定义,卷积核为4*4,步长为2,输出图像为输入的一半
def gen_conv(batch_input, out_channels):
# [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
initializer = tf.random_normal_initializer(0, 0.02)
if separable_conv:
return tf.layers.separable_conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
else:
return tf.layers.conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)
# 生成器的反卷积定义
def gen_deconv(batch_input, out_channels):
# [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
initializer = tf.random_normal_initializer(0, 0.02)
if separable_conv:
_b, h, w, _c = batch_input.shape
resized_input = tf.image.resize_images(batch_input, [h * 2, w * 2], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return tf.layers.separable_conv2d(resized_input, out_channels, kernel_size=4, strides=(1, 1), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
else:
return tf.layers.conv2d_transpose(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)
# 定义LReLu激活函数
def lrelu(x, a):
with tf.name_scope("lrelu"):
# adding these together creates the leak part and linear part
# then cancels them out by subtracting/adding an absolute value term
# leak: a*x/2 - a*abs(x)/2
# linear: x/2 + abs(x)/2
# this block looks like it has 2 inputs on the graph unless we do this
x = tf.identity(x)
return (0.5 * (1 + a)) * x + (0.5 * (1 - a)) * tf.abs(x)
# 批量归一化图像
def batchnorm(inputs):
return tf.layers.batch_normalization(inputs, axis=3, epsilon=1e-5, momentum=0.1, training=True, gamma_initializer=tf.random_normal_initializer(1.0, 0.02))
# 检查图像的维度
def check_image(image):
assertion = tf.assert_equal(tf.shape(image)[-1], 3, message="image must have 3 color chan