numpy 矩阵与向量加和实现按列加和的形式,
故被类别实现常数项的加和是合理的。
多层感知机的设计基本上是在logistic回归前,加一个预处理过程,
通过S形曲线做映射,将空间变得线性可分(这个目的一般是通过核方法实现的),
从隐藏层的处理上来讲,其是通过设定一些非线性变换及设定数量来实现
变换,这种变换并不能单纯从变换后的维度上直接看出,例子可以从
rbf(径向基)函数上窥得,
所以从抽象的角度来讲多层感知基本上是核方法,如一般的岭回归与
核岭回归的不同。只不过这种核变换的得到是比较花费时间的,
因为要通过训练得到(多层)。相当于在无穷维空间中利用随机梯度
下降法找到一个比较好的核变换,这种变换的选择是具备指导意义的。
下面是单隐藏层的代码例子:
故被类别实现常数项的加和是合理的。
多层感知机的设计基本上是在logistic回归前,加一个预处理过程,
通过S形曲线做映射,将空间变得线性可分(这个目的一般是通过核方法实现的),
从隐藏层的处理上来讲,其是通过设定一些非线性变换及设定数量来实现
变换,这种变换并不能单纯从变换后的维度上直接看出,例子可以从
rbf(径向基)函数上窥得,
所以从抽象的角度来讲多层感知基本上是核方法,如一般的岭回归与
核岭回归的不同。只不过这种核变换的得到是比较花费时间的,
因为要通过训练得到(多层)。相当于在无穷维空间中利用随机梯度
下降法找到一个比较好的核变换,这种变换的选择是具备指导意义的。
下面是单隐藏层的代码例子:
import os
import sys
import timeit
import numpy
import theano
import theano.tensor as T
from logistic_sgd import LogisticRegression, load_data # Have designed before.
class HiddenLayer(object):
def __init__(self, rng, input, n_in, n_out, W = None, b = None,
activation = T.tanh):
self.input = input
if W is None:
W_values = numpy.asarray(
rng.uniform(
low = -numpy.sqrt(6. / (n_in + n_out)),
high = numpy.sqrt(6. / (n_in + n_out)),
size = (n_in, n_out)
),
dtype = theano.config.floatX
)
if activation == theano.tensor.nnet.sigmoid:
W_values *= 4
W = theano.shared(value = W_values, name = 'W', borrow = True)
if b is None:
b_values = numpy.zeros((n_out,), dtype = theano.config.floatX)
b = theano.shared(value = b_values, name = 'b', borrow = True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
)
self.params = [self.W, self.b]
class MLP(object):
def __init__(self, rng, input, n_in, n_hidden, n_out):
self.hiddenLayer = HiddenLayer(
rng = rng,
input = input,
n_in = n_in,
n_out = n_hidden,
activation = T.tanh
)
self.logRegressionLayer = LogisticRegression(
input = self.hiddenLayer.output,
n_in = n_hidden,
n_out = n_out
)
self.L1 = (
abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()
)
self.L2_sqr = (
(self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum()
)
self.negative_log_likelihood = (
self.logRegressionLayer.negative_log_likelihood
)
self.errors = self.logRegressionLayer.errors
self.params = self.hiddenLayer.params + self.logRegressionLayer.params
self.input = input
#
self.y_pred = self.logRegressionLayer.y_pred
def test_mlp(learning_rate = 0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs = 30,
dataset = 'mnist.pkl.gz', batch_size = 20, n_hidden = 500):
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
n_train_batches = train_set_x.get_value(borrow = True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow = True).shape[0] / batch_size
print "... building the model"
index = T.iscalar()
x = T.matrix('x')
y = T.ivector('y')
rng = numpy.random.RandomState(1234)
classifier = MLP(
rng = rng,
input = x,
n_in = 28 * 28,
n_hidden = n_hidden,
n_out = 10
)
cost = (
classifier.negative_log_likelihood(y) +
L1_reg * classifier.L1 +
L2_reg * classifier.L2_sqr
)
test_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: test_set_x[index * batch_size : (index + 1) * batch_size],
y: test_set_y[index * batch_size : (index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)
gparams = [T.grad(cost, param) for param in classifier.params]
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)
]
train_model = theano.function(
inputs = [index],
outputs = cost,
updates = updates,
givens = {
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
print "... training"
patience = 10000
patience_increase = 2
improvement_threshold = 0.995
validation_frequency = min(n_train_batches, patience / 2)
best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print (
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
if this_validation_loss < best_validation_loss:
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter
test_losses = [test_model(i) for i in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print ((
'epoch %i, minibatch %i/%i, test error of'
' best model %f %%'
) %
(epoch,
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
)
if patience <= iter:
done_looping = True
break
end_time = timeit.default_timer()
print (
('Optimization complete. Best validation score of %f %% '
'obtained at iteration %i, with test performance %f %%')
%
(
best_validation_loss * 100., best_iter + 1, test_score * 100.,
)
)
print "begin predict :"
test_set_x = test_set_x.get_value()
predict_model = theano.function(inputs = [classifier.input],
outputs = classifier.y_pred)
num = 100
predicted_values = predict_model(test_set_x[:num])
print ("The Ori values :")
print test_set_y.eval()[:num]
print ("Predicted values for the first 10 examples in test set:")
print predicted_values
print "equal :"
print float(numpy.sum(predicted_values == test_set_y.eval()[:num])) / predicted_values.shape[0]
if __name__ == '__main__':
test_mlp()