import numpy as np
import sklearn.preprocessing as prep
import tensorflow as tf
In [3]:
from tensorflow.examples.tutorials.mnist import input_data
In [20]:
def xavier_init(fan_in,fan_out,constant=1):
low = -constant*np.sqrt(6.0/(fan_in+fan_out))
high = constant*np.sqrt(6.0/(fan_in+fan_out))
return tf.random_uniform((fan_in,fan_out),
minval=low,maxval=high,
dtype=tf.float32)
In [30]:
class AdditiveGaussianNoiseAutoencoder(object):
def __init__(self,n_input,n_hidden,transfer_function=tf.nn.softplus,
optimizer = tf.train.AdamOptimizer,scale=0.1):
self.n_input = n_input
self.n_hidden = n_hidden
self.transfer = transfer_function
self.scale = tf.placeholder(tf.float32)
self.training_scale = scale
network_weights = self._initialize_weights()
self.weights = network_weights
self.x=tf.placeholder(tf.float32,[None,self.n_input])#x是噪声
self.hidden =self.transfer(tf.add(tf.matmul(
self.x+scale*tf.random_normal((n_input,)),
self.weights['w1']),self.weights['b1']))
self.reconstruction = tf.add(tf.matmul(self.hidden,
self.weights['w2']),self.weights['b2'])
self.cost =0.5*tf.reduce_sum(tf.pow(tf.subtract(
self.reconstruction,self.x),2.0))
self.optimizer=optimizer.minimize(self.cost)
init = tf.global_variables_initializer()#定义了去噪编码器的架构,并且初始化所有参数
self.sess =tf.Session()
self.sess.run(init)
def _initialize_weights(self):
all_weights = dict()
all_weights['w1']=tf.Variable(xavier_init(self.n_input,
self.n_hidden))
all_weights['b1']=tf.Variable(tf.zeros([self.n_hidden],
dtype = tf.float32))
all_weights['w2']=tf.Variable(tf.zeros([self.n_hidden,
self.n_input],dtype=tf.float32))#由于是编码器,所以这里输出层的个数和输入层维数是一样的
all_weights['b2']=tf.Variable(tf.zeros([self.n_input],
dtype=tf.float32))
return all_weights
def partial_fit(self,X):#执行训练过程
cost,opt = self.sess.run((self.cost,self.optimizer),
feed_dict={self.x:X,self.scale:self.training_scale})
return cost
def calc_total_cost(self,X):#这个过程不执行optimizer,那么不会训练,只是用来测试cost
return self.sess.run(self.cost,feed_dict={self.x:X,self.scale:self.training_scale})
def transform(self,X):
return self.sess.run(self.hidden,feed_dict ={self.x:X,
self.scale:self.training_scale})#这个函数用来输出hidden学习的特征
def generate(self,hidden=None):
if hidden is None:
hidden=np.random.normal(size=self.weights['b1'])
return self.sess.run(self.reconstruction,feed_dict={self.hidden:hidden})
def reconstruction(self,X):
return self.sess.run(self.reconstruction,feed_dict={self.x:X,self.scale:self.training_scale})
def getWeights(self):#获取隐含层权重
return self.sess.run(self.weights['w1'])
def getBiases(self):
return self.sess.run(self.weights['b1'])
In [6]:
mnist =input_data.read_data_sets('MNIST_data',one_hot=True)#读入数据
Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
In [25]:
def standard_scale(X_train,X_test):
preprocessor=prep.StandardScaler().fit(X_train)#对数据进行减均值除方差操作
X_train=preprocessor.transform(X_train)
X_teat=preprocessor.transform(X_test)
return X_train,X_test
def get_random_block_from_data(data,batch_size):#定义一个随机获取block数据的函数,随机得到batch_size个数据,不放回抽样
start_index=np.random.randint(0,len(data)-batch_size)
return data[start_index:(start_index+batch_size)]
In [9]:
X_train,X_test=standard_scale(mnist.train.images,mnist.test.images)
In [11]:
n_samples=int(mnist.train.num_examples)
training_epochs=20
batch_size=128
display_step=1
In [31]:
#创建一个AGN自编码器实例
autoencoder=AdditiveGaussianNoiseAutoencoder(n_input=784,
n_hidden=200,
transfer_function=tf.nn.softplus,
optimizer=tf.train.AdamOptimizer(learning_rate=0.001),
scale=0.01)
In [32]:
for epoch in range(training_epochs):
avg_cost=0.
total_batch=int(n_samples/batch_size)
for i in range(total_batch):
batch_xs=get_random_block_from_data(X_train,batch_size)
cost=autoencoder.partial_fit(batch_xs)
avg_cost+=cost/n_samples*batch_size
if epoch%display_step==0:
print("Epoch:",'%04d'%(epoch+1),"cost=",
"{:.9f}".format(avg_cost))
Epoch: 0001 cost= 18236.463554545
Epoch: 0002 cost= 12616.789628409
Epoch: 0003 cost= 11127.080529545
Epoch: 0004 cost= 10196.701531818
Epoch: 0005 cost= 9599.919021023
Epoch: 0006 cost= 9168.061587500
Epoch: 0007 cost= 10017.257284091
Epoch: 0008 cost= 8463.642398295
Epoch: 0009 cost= 8802.881572159
Epoch: 0010 cost= 8055.374426136
Epoch: 0011 cost= 8735.796370455
Epoch: 0012 cost= 8992.533970455
Epoch: 0013 cost= 8351.100086932
Epoch: 0014 cost= 7891.109460795
Epoch: 0015 cost= 8248.965477841
Epoch: 0016 cost= 7675.395246023
Epoch: 0017 cost= 8532.023831818
Epoch: 0018 cost= 7955.170034091
Epoch: 0019 cost= 8332.180966477
Epoch: 0020 cost= 7899.204300568
In [33]:
#最后使用训练好的模型进行性能测试
print("Total cost:"+str(autoencoder.calc_total_cost(X_test)))
Total cost:74088.1