对于theano中的若干运算形式也可以结合numpy进行检验,
相应的法则是相同的。简单的验证是可以完成的。
相应的法则是相同的。简单的验证是可以完成的。
theano运行所使用的内存与python使用的内存是相互独立的,
一般的theano shared变量可以通过将参数borrow设定为True将内存进行
共享而非copy,这样能够节约空间。
一般的theano shared变量可以通过将参数borrow设定为True将内存进行
共享而非copy,这样能够节约空间。
下面看使用Theano完成Logistic Regression的过程:
类的初始化(对应于数学表达式):
self.W = theano.shared(value = numpy.zeros((n_in, n_out), dtype = theano.config.floatX),
name = 'W', borrow = True)
self.b = theano.shared(value = numpy.zeros((n_out, ), dtype = theano.config.floatX),
name = 'b', borrow = True)
self.W = theano.shared(value = numpy.zeros((n_in, n_out), dtype = theano.config.floatX),
name = 'W', borrow = True)
self.b = theano.shared(value = numpy.zeros((n_out, ), dtype = theano.config.floatX),
name = 'b', borrow = True)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
这里要指出的是softmax函数对应于多元的logistic函数的形式,其原因在于可以将软边际函数
分类数置为2,并同除分子即可得到logistic函数的形式。
利用梯度完成损失函数对应参数的求解:
cost = classifier.negative_log_likelihood(y)
g_W = T.grad(cost = cost, wrt = classifier.W)
g_b = T.grad(cost = cost, wrt = classifier.b)
cost = classifier.negative_log_likelihood(y)
g_W = T.grad(cost = cost, wrt = classifier.W)
g_b = T.grad(cost = cost, wrt = classifier.b)
updates = [(classifier.W, classifier.W - learning_rate * g_W),
(classifier.b, classifier.b - learning_rate * g_b)]
(classifier.b, classifier.b - learning_rate * g_b)]
train_model = theano.function(
inputs = [index],
outputs = cost,
updates = updates,
givens = {
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
test_model 与validate_model与上面train_model的定义几乎相同,仅仅是given中所使用
的数据是不同的(test_set_ valid_set_)
inputs = [index],
outputs = cost,
updates = updates,
givens = {
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
test_model 与validate_model与上面train_model的定义几乎相同,仅仅是given中所使用
的数据是不同的(test_set_ valid_set_)
现在看完整的模型代码:(有些内容可参看Deep Learning(一))
对一些常用的功能代码进行介绍
os.path.split("some path") 可以返回"some path"进行分解为目录名及文件名的二元序列。
os.path.isfile("some file") 可以返回"some file"是否是一个表述当前目录文件的字符串,
当当前目录中无"some file"时也返回False.(相应的还有os.path.isdir
只不过其并不检查是否是当前目录,相应的方法可以通过os.getcwd()完成)
对一些常用的功能代码进行介绍
os.path.split("some path") 可以返回"some path"进行分解为目录名及文件名的二元序列。
os.path.isfile("some file") 可以返回"some file"是否是一个表述当前目录文件的字符串,
当当前目录中无"some file"时也返回False.(相应的还有os.path.isdir
只不过其并不检查是否是当前目录,相应的方法可以通过os.getcwd()完成)
完整代码:
import cPickle
import gzip
import os
import sys
import timeit
import numpy
import theano
import theano.tensor as T
class LogisticRegression(object):
def __init__(self, input, n_in, n_out):
self.W = theano.shared(value = numpy.zeros((n_in, n_out), dtype = theano.config.floatX),
name = 'W', borrow = True)
self.b = theano.shared(value = numpy.zeros((n_out,), dtype = theano.config.floatX),
name = 'b', borrow = True)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
self.params = [self.W, self.b]
self.input = input
def negative_log_likelihood(self, y):
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y):
if y.ndim != self.y_pred.ndim:
raise TypeError("y should have the same shape as self.y_pred",
('y', y.type, 'y_pred', y_pred.type)
)
if y.dtype.startswith('int'):
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
def load_data(dataset):
data_dir, data_file = os.path.split(dataset)
if data_dir == "" and not os.path.isfile(dataset):
new_path = os.path.join(
os.path.split(__file__)[0],
'..',
'data',
dataset
)
if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
dataset = new_path
if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
import urllib
origin = (
'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
)
print "Downloading data from %s" % origin
urllib.urlretrieve(origin, dataset)
print "... loading data"
f = gzip.open(dataset, 'rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()
def shared_dataset(data_xy, borrow = True):
data_x, data_y = data_xy
shared_x = theano.shared(numpy.asarray(data_x,
dtype = theano.config.floatX), borrow = borrow)
shared_y = theano.shared(numpy.asarray(data_y,
dtype = theano.config.floatX), borrow = borrow)
return shared_x, T.cast(shared_y, 'int32')
test_set_x, test_set_y = shared_dataset(test_set)
valid_set_x, valid_set_y = shared_dataset(valid_set)
train_set_x, train_set_y = shared_dataset(train_set)
rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
return rval
def sgd_optimization_mnist(learning_rate = 0.13, n_epochs = 1000, dataset = 'mnist.pkl.gz', batch_size = 600):
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
n_train_batches = train_set_x.get_value(borrow = True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow = True).shape[0] / batch_size
print "... building the model"
index = T.iscalar()
x = T.matrix('x')
y = T.ivector('y')
classifier = LogisticRegression(input = x, n_in = 28 * 28, n_out = 10)
cost = classifier.negative_log_likelihood(y)
test_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)
valid_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)
g_W = T.grad(cost = cost, wrt = classifier.W)
g_b = T.grad(cost = cost, wrt = classifier.b)
updates = [(classifier.W, classifier.W - learning_rate * g_W),
(classifier.b, classifier.b - learning_rate * g_b)]
train_model = theano.function(
inputs = [index],
outputs = cost,
updates = updates,
givens = {
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
print "... train the model"
patience = 5000
patience_increase = 2
improvement_threshold = 0.995
validation_frequency = min(n_train_batches, patience / 2)
best_validation_loss = numpy.inf
test_score = 0.
start_time = timeit.default_timer()
done_looping = False
epoch = 0
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = [valid_model(i) for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print (
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
if this_validation_loss < best_validation_loss:
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
test_losses = [test_model(i) for i in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print (
(' epoch %i, minibatch %i/%i, test error of'
' best model %f %%' ) %
(
epoch,
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
)
with open('best_model.pkl', 'w') as f:
cPickle.dump(classifier, f)
if patience <= iter:
done_looping = True
break
end_time = timeit.default_timer()
print (
('Optimization complete with best validation score of %f %%,'
'with test performance %f %%') %
(best_validation_loss * 100., test_score * 100.)
)
print 'The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))
print
def predict():
classifier = cPickle.load(open('best_model.pkl'))
predict_model = theano.function(inputs = [classifier.input],
outputs = classifier.y_pred)
dataset = 'mnist.pkl.gz'
datasets = load_data(dataset)
test_set_x, test_set_y = datasets[2]
test_set_x = test_set_x.get_value()
num = 10
predicted_values = predict_model(test_set_x[:num])
print ("The Ori values :")
print test_set_y.eval()[:num]
print ("Predicted values for the first 10 examples in test set:")
print predicted_values
print "equal :"
print float(numpy.sum(predicted_values == test_set_y.eval()[:num])) / predicted_values.shape[0]
if __name__ == '__main__':
#sgd_optimization_mnist()
predict()