Auto-Encoder Example
本文具体数据集与源代码可从我的GitHub地址获取
https://github.com/liuzuoping/Deep_Learning_note
用tensorflow构建一个两层的auto-encoder,把图像压缩到一个低维隐层空间并重构。
Auto-Encoder 概览

参考文献:
- Gradient-based learning applied to document recognition. Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Proceedings of the IEEE, 86(11):2278-2324, November 1998.
MNIST 数据集概览
这个例子运用我们常见的手写数字识别数据集. 之前已经介绍过,这些图片的尺寸已经进行过归一化了,并且数字集中在一个固定的图片大小里(28*28像素),数值为0~1。为了简化,每张图片都被拍平并转换为一个含有784维特征的一维数组。
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
导入 MNIST 数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
训练参数
learning_rate = 0.01
num_steps = 30000
batch_size = 256
display_step = 1000
examples_to_show = 10
网络参数
num_hidden_1 = 256 # 1st layer num features
num_hidden_2 = 128 # 2nd layer num features (the latent dim)
num_input = 784 # MNIST data input (img shape: 28*28)
设置placeholder
X = tf.placeholder("float", [None, num_input])
weights = {
'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2])),
'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1])),
'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input])),
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
'decoder_b2': tf.Variable(tf.random_normal([num_input])),
}
构建encoder与decoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
biases['encoder_b1']))
# Encoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
biases['encoder_b2']))
return layer_2
# 构建decoder
def decoder(x):
# Decoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
biases['decoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
biases['decoder_b2']))
return layer_2
# 构建模型
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# 预测
y_pred = decoder_op
# label即为输入值
y_true = X
# 定义loss和optimizer,最小化平方误差
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
# 初始化参数
init = tf.global_variables_initializer()
开始训练,启动一个session
sess = tf.Session()
sess.run(init)
# 训练
for i in range(1, num_steps+1):
# 准备数据
# 取每个batch的数据
batch_x, _ = mnist.train.next_batch(batch_size)
# 运行optimzer的op和cost的op,获得loss值
_, l = sess.run([optimizer, loss], feed_dict={X: batch_x})
# 展示每一步的loss
if i % display_step == 0 or i == 1:
print('Step %i: Minibatch Loss: %f' % (i, l))
测试
对测试集进行编码和解码,并可视化它们的重构
n = 4
canvas_orig = np.empty((28 * n, 28 * n))
canvas_recon = np.empty((28 * n, 28 * n))
for i in range(n):
# MNIST 测试集
batch_x, _ = mnist.test.next_batch(n)
# 对数字图像进encode和decode
g = sess.run(decoder_op, feed_dict={X: batch_x})
# 展示原始图像
for j in range(n):
# 画出生成的像素
canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = batch_x[j].reshape([28, 28])
# 展示重构后的图像
for j in range(n):
# 展示生成的像素
canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = g[j].reshape([28, 28])
print("Original Images")
plt.figure(figsize=(n, n))
plt.imshow(canvas_orig, origin="upper", cmap="gray")
plt.show()
print("Reconstructed Images")
plt.figure(figsize=(n, n))
plt.imshow(canvas_recon, origin="upper", cmap="gray")
plt.show()