Deep Learning 初探（三）-优快云博客

numpy 矩阵与向量加和实现按列加和的形式，
故被类别实现常数项的加和是合理的。

多层感知机的设计基本上是在logistic回归前，加一个预处理过程，
通过S形曲线做映射，将空间变得线性可分（这个目的一般是通过核方法实现的），
从隐藏层的处理上来讲，其是通过设定一些非线性变换及设定数量来实现
变换，这种变换并不能单纯从变换后的维度上直接看出，例子可以从
rbf(径向基)函数上窥得，

所以从抽象的角度来讲多层感知基本上是核方法，如一般的岭回归与
核岭回归的不同。只不过这种核变换的得到是比较花费时间的，
因为要通过训练得到（多层）。相当于在无穷维空间中利用随机梯度
下降法找到一个比较好的核变换，这种变换的选择是具备指导意义的。

下面是单隐藏层的代码例子：

import os 
import sys 
import timeit 


import numpy
import theano 
import theano.tensor as T 


from logistic_sgd import LogisticRegression, load_data # Have designed before.


class HiddenLayer(object):


def __init__(self, rng, input, n_in, n_out, W = None, b = None,
activation = T.tanh):
self.input = input 
if W is None:
W_values = numpy.asarray(
rng.uniform(
low = -numpy.sqrt(6. / (n_in + n_out)),
high = numpy.sqrt(6. / (n_in + n_out)),
size = (n_in, n_out)
),
dtype = theano.config.floatX
)
if activation == theano.tensor.nnet.sigmoid:
W_values *= 4


W = theano.shared(value = W_values, name = 'W', borrow = True)


if b is None:
b_values = numpy.zeros((n_out,), dtype = theano.config.floatX)
b = theano.shared(value = b_values, name = 'b', borrow = True)


self.W = W 
self.b = b 


lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
)
self.params = [self.W, self.b]


class MLP(object):


def __init__(self, rng, input, n_in, n_hidden, n_out):
self.hiddenLayer = HiddenLayer(
rng = rng,
input = input,
n_in = n_in,
n_out = n_hidden,
activation = T.tanh
)
self.logRegressionLayer = LogisticRegression(
input = self.hiddenLayer.output,
n_in = n_hidden,
n_out = n_out
) 


self.L1 = (
abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()
)


self.L2_sqr = (
(self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum()
) 


self.negative_log_likelihood = (
self.logRegressionLayer.negative_log_likelihood
)


self.errors = self.logRegressionLayer.errors
self.params = self.hiddenLayer.params + self.logRegressionLayer.params
self.input = input 


#
self.y_pred = self.logRegressionLayer.y_pred




def test_mlp(learning_rate = 0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs = 30,
dataset = 'mnist.pkl.gz', batch_size = 20, n_hidden = 500):
datasets = load_data(dataset)


train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]


n_train_batches = train_set_x.get_value(borrow = True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow = True).shape[0] / batch_size


print "... building the model"


index = T.iscalar()
x = T.matrix('x')
y = T.ivector('y')


rng = numpy.random.RandomState(1234)


classifier = MLP(
rng = rng,
input = x,
n_in = 28 * 28,
n_hidden = n_hidden,
n_out = 10
)


cost = (
classifier.negative_log_likelihood(y) + 
L1_reg * classifier.L1 +
L2_reg * classifier.L2_sqr
)


test_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: test_set_x[index * batch_size : (index + 1) * batch_size],
y: test_set_y[index * batch_size : (index + 1) * batch_size]
}
)


validate_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)


gparams = [T.grad(cost, param) for param in classifier.params]


updates = [
(param, param - learning_rate * gparam) 
for param, gparam in zip(classifier.params, gparams)
]


train_model = theano.function(
inputs = [index],
outputs = cost,
updates = updates,
givens = {
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)




print "... training"
patience = 10000
patience_increase = 2 
improvement_threshold = 0.995 
validation_frequency = min(n_train_batches, patience / 2)


best_validation_loss = numpy.inf 
best_iter = 0
test_score = 0. 
start_time = timeit.default_timer()


epoch = 0
done_looping = False


while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)


iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)


print (
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)


if this_validation_loss < best_validation_loss:
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter
test_losses = [test_model(i) for i in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print ((
'epoch %i, minibatch %i/%i, test error of'
' best model %f %%' 
) %
(epoch, 
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
) 


if patience <= iter:
done_looping = True
break


end_time = timeit.default_timer()
print (
('Optimization complete. Best validation score of %f %% '
'obtained at iteration %i, with test performance %f %%')
%
(
best_validation_loss * 100., best_iter + 1, test_score * 100., 
)
)


print "begin predict :"
test_set_x = test_set_x.get_value()
predict_model = theano.function(inputs = [classifier.input],
outputs = classifier.y_pred)


num = 100
predicted_values = predict_model(test_set_x[:num])
print ("The Ori values :")
print test_set_y.eval()[:num]
print ("Predicted values for the first 10 examples in test set:")
print predicted_values
print "equal :"
print float(numpy.sum(predicted_values == test_set_y.eval()[:num])) /  predicted_values.shape[0]




if __name__ == '__main__':
test_mlp()