TensorFlow实现Pix2Pix

    代码实现参考:https://github.com/affinelayer/pix2pix-tensorflow,论文参考《Image-to-Image Translation with Conditional Adversarial Nets》,以下实现只在facades数据集上训练,运行代码前需要先下载该数据集。地带了2000次epoch,下过如下(左边为输入图像,中间为生成的图像,右边为真实图像):

#coding=utf-8

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import os
import glob
import random
import collections
import math
import time
# https://github.com/affinelayer/pix2pix-tensorflow

train_input_dir="E:/testdata/facades/train/"       # 训练集输入
train_output_dir="E:/testdata/facades/train_out/"  # 训练集输出

test_input_dir="E:/testdata/facades/val/"          # 测试集输入
test_output_dir="E:/testdata/facades/test_out/"    # 测试集的输出
checkpoint="E:/testdata/facades/train_out/"        # 保存结果的目录

seed=None
max_steps=None     # number of training steps (0 to disable)
max_epochs=200     # number of training epochs

progress_freq=50   # display progress every progress_freq steps
trace_freq=0       # trace execution every trace_freq steps
display_freq=50     # write current training images every display_freq steps
save_freq=500     # save model every save_freq steps, 0 to disable

separable_conv=False    # use separable convolutions in the generator
aspect_ratio=1.0        # aspect ratio of output images (width/height)
batch_size=1            # help="number of images in batch")
which_direction="BtoA"  # choices=["AtoB", "BtoA"])
ngf=64                  # help="number of generator filters in first conv layer")
ndf=64                  # help="number of discriminator filters in first conv layer")
scale_size=286          # help="scale images to this size before cropping to 256x256")
flip=True               # flip images horizontally
no_flip=True            # don't flip images horizontally

lr=0.0002        # initial learning rate for adam
beta1=0.5        # momentum term of adam
l1_weight=100.0  # weight on L1 term for generator gradient
gan_weight=1.0   # weight on GAN term for generator gradient

output_filetype="png"  # 输出图像的格式

EPS = 1e-12       # 极小数,防止梯度为损失为0
CROP_SIZE = 256   # 图片的裁剪大小

# 命名元组,用于存放加载的数据集合创建好的模型
Examples = collections.namedtuple("Examples", "paths, inputs, targets, count, steps_per_epoch")
Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, gen_loss_GAN, gen_loss_L1, gen_grads_and_vars, train")

# 图像预处理 [0, 1] => [-1, 1]
def preprocess(image):
    with tf.name_scope("preprocess"):        
        return image * 2 - 1

# 图像后处理[-1, 1] => [0, 1]
def deprocess(image):
    with tf.name_scope("deprocess"):        
        return (image + 1) / 2


# 判别器的卷积定义,batch_input为 [ batch , 256 , 256 , 6 ]
def discrim_conv(batch_input, out_channels, stride):
    # [ batch , 256 , 256 , 6 ] ===>[ batch , 258 , 258 , 6 ]
    padded_input = tf.pad(batch_input, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT")
    '''
    [0,0]: 第一维batch大小不扩充
    [1,1]:第二维图像宽度左右各扩充一列,用0填充
    [1,1]:第三维图像高度上下各扩充一列,用0填充
    [0,0]:第四维图像通道不做扩充
    '''
    return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", kernel_initializer=tf.random_normal_initializer(0, 0.02))


# 生成器的卷积定义,卷积核为4*4,步长为2,输出图像为输入的一半
def gen_conv(batch_input, out_channels):
    # [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
    initializer = tf.random_normal_initializer(0, 0.02)
    if separable_conv:
        return tf.layers.separable_conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
    else:
        return tf.layers.conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)

# 生成器的反卷积定义
def gen_deconv(batch_input, out_channels):
    # [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
    initializer = tf.random_normal_initializer(0, 0.02)
    if separable_conv:
        _b, h, w, _c = batch_input.shape
        resized_input = tf.image.resize_images(batch_input, [h * 2, w * 2], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        return tf.layers.separable_conv2d(resized_input, out_channels, kernel_size=4, strides=(1, 1), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
    else:
        return tf.layers.conv2d_transpose(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)

# 定义LReLu激活函数
def lrelu(x, a):
    with tf.name_scope("lrelu"):
        # adding these together creates the leak part and linear part
        # then cancels them out by subtracting/adding an absolute value term
        # leak: a*x/2 - a*abs(x)/2
        # linear: x/2 + abs(x)/2

        # this block looks like it has 2 inputs on the graph unless we do this
        x = tf.identity(x)
        return (0.5 * (1 + a)) * x + (0.5 * (1 - a)) * tf.abs(x)

# 批量归一化图像
def batchnorm(inputs):
    return tf.layers.batch_normalization(inputs, axis=3, epsilon=1e-5, momentum=0.1, training=True, gamma_initializer=tf.random_normal_initializer(1.0, 0.02))

# 检查图像的维度
def check_image(image):
    assertion = tf.assert_equal(tf.shape(image)[-1], 3, message="image must have 3 color chan
评论 17
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值