Deep Learning 初探（二）-优快云博客

对于theano中的若干运算形式也可以结合numpy进行检验，
相应的法则是相同的。简单的验证是可以完成的。

theano运行所使用的内存与python使用的内存是相互独立的，
一般的theano shared变量可以通过将参数borrow设定为True将内存进行
共享而非copy，这样能够节约空间。

下面看使用Theano完成Logistic Regression的过程：

类的初始化（对应于数学表达式）：
self.W = theano.shared(value = numpy.zeros((n_in, n_out), dtype = theano.config.floatX),
name = 'W', borrow = True)
self.b = theano.shared(value = numpy.zeros((n_out, ), dtype = theano.config.floatX),
name = 'b', borrow = True)

self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)

这里要指出的是softmax函数对应于多元的logistic函数的形式，其原因在于可以将软边际函数

分类数置为2，并同除分子即可得到logistic函数的形式。

利用梯度完成损失函数对应参数的求解：
cost = classifier.negative_log_likelihood(y)
g_W = T.grad(cost = cost, wrt = classifier.W)
g_b = T.grad(cost = cost, wrt = classifier.b)

updates = [(classifier.W, classifier.W - learning_rate * g_W),
(classifier.b, classifier.b - learning_rate * g_b)]

train_model = theano.function(
     inputs = [index],
     outputs = cost,
     updates = updates,
     givens = {
      x: train_set_x[index * batch_size: (index + 1) * batch_size],
      y: train_set_y[index * batch_size: (index + 1) * batch_size]
     }
    )
test_model 与validate_model与上面train_model的定义几乎相同，仅仅是given中所使用
的数据是不同的(test_set_ valid_set_)

现在看完整的模型代码：（有些内容可参看Deep Learning(一)）
对一些常用的功能代码进行介绍
os.path.split("some path") 可以返回"some path"进行分解为目录名及文件名的二元序列。
os.path.isfile("some file") 可以返回"some file"是否是一个表述当前目录文件的字符串，
当当前目录中无"some file"时也返回False.(相应的还有os.path.isdir
只不过其并不检查是否是当前目录，相应的方法可以通过os.getcwd()完成)

完整代码：

import cPickle 
import gzip 
import os 
import sys 
import timeit
import numpy 
import theano 
import theano.tensor as T 
class LogisticRegression(object):
 def __init__(self, input, n_in, n_out):
  self.W = theano.shared(value = numpy.zeros((n_in, n_out), dtype = theano.config.floatX),
     name = 'W', borrow = True)
  self.b = theano.shared(value = numpy.zeros((n_out,), dtype = theano.config.floatX),
     name = 'b', borrow = True)
  self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
  self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
  self.params = [self.W, self.b]
  self.input = input
 def negative_log_likelihood(self, y):
  return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
 def errors(self, y):
  if y.ndim != self.y_pred.ndim:
   raise TypeError("y should have the same shape as self.y_pred",
      ('y', y.type, 'y_pred', y_pred.type)
     )
  if y.dtype.startswith('int'):
   return T.mean(T.neq(self.y_pred, y))
  else:
   raise NotImplementedError()
def load_data(dataset):
 data_dir, data_file = os.path.split(dataset)
 if data_dir == "" and not os.path.isfile(dataset):
  new_path = os.path.join(
   os.path.split(__file__)[0],
   '..',
   'data',
   dataset
   )
  if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
   dataset = new_path
 if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
  import urllib 
  origin = (
    'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
   )
  print "Downloading data from %s" % origin
  urllib.urlretrieve(origin, dataset)
 print "... loading data"
 
 f = gzip.open(dataset, 'rb')
 train_set, valid_set, test_set = cPickle.load(f)
 f.close()
 def shared_dataset(data_xy, borrow = True):
  data_x, data_y = data_xy
  shared_x = theano.shared(numpy.asarray(data_x, 
      dtype = theano.config.floatX), borrow = borrow)
  shared_y = theano.shared(numpy.asarray(data_y, 
      dtype = theano.config.floatX), borrow = borrow)
  return shared_x, T.cast(shared_y, 'int32') 
 test_set_x, test_set_y = shared_dataset(test_set)
 valid_set_x, valid_set_y = shared_dataset(valid_set)
 train_set_x, train_set_y = shared_dataset(train_set)
 rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
 return rval
def sgd_optimization_mnist(learning_rate = 0.13, n_epochs = 1000, dataset = 'mnist.pkl.gz', batch_size = 600):
 datasets = load_data(dataset)
 train_set_x, train_set_y = datasets[0]
 valid_set_x, valid_set_y = datasets[1]
 test_set_x, test_set_y = datasets[2]
 n_train_batches = train_set_x.get_value(borrow = True).shape[0] / batch_size
 n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] / batch_size
 n_test_batches = test_set_x.get_value(borrow = True).shape[0] / batch_size
 print "... building the model"
 index = T.iscalar()
 x = T.matrix('x')
 y = T.ivector('y')
 classifier = LogisticRegression(input = x, n_in = 28 * 28, n_out = 10)
 cost = classifier.negative_log_likelihood(y)
 
 test_model = theano.function(
   inputs = [index],
   outputs = classifier.errors(y),
   givens = {
    x: test_set_x[index * batch_size: (index + 1) * batch_size],
    y: test_set_y[index * batch_size: (index + 1) * batch_size]
   }
  )
 valid_model = theano.function(
   inputs = [index],
   outputs = classifier.errors(y),
   givens = {
    x: valid_set_x[index * batch_size: (index + 1) * batch_size],
    y: valid_set_y[index * batch_size: (index + 1) * batch_size]
   }
  )
 g_W = T.grad(cost = cost, wrt = classifier.W)
 g_b = T.grad(cost = cost, wrt = classifier.b)
 updates = [(classifier.W, classifier.W - learning_rate * g_W), 
    (classifier.b, classifier.b - learning_rate * g_b)]

 train_model = theano.function(
   inputs = [index],
   outputs = cost,
   updates = updates,
   givens = {
    x: train_set_x[index * batch_size: (index + 1) * batch_size],
    y: train_set_y[index * batch_size: (index + 1) * batch_size]
   }
  )
 print "... train the model"
 patience = 5000
 patience_increase = 2 
 improvement_threshold = 0.995
 validation_frequency = min(n_train_batches, patience / 2)
 best_validation_loss = numpy.inf 
 test_score = 0. 
 start_time = timeit.default_timer()
 done_looping = False 
 epoch = 0 
 while (epoch < n_epochs) and (not done_looping):
  epoch = epoch + 1
  for minibatch_index in xrange(n_train_batches):
   minibatch_avg_cost = train_model(minibatch_index)
   iter = (epoch - 1) * n_train_batches + minibatch_index
   if (iter + 1) % validation_frequency == 0:
    validation_losses = [valid_model(i) for i in xrange(n_valid_batches)]
    this_validation_loss = numpy.mean(validation_losses)
    print (
      'epoch %i, minibatch %i/%i, validation error %f %%' % 
      (
       epoch,
       minibatch_index + 1,
       n_train_batches,
       this_validation_loss * 100.
       )
     )
    if this_validation_loss < best_validation_loss:
     if this_validation_loss < best_validation_loss * improvement_threshold:
      patience = max(patience, iter * patience_increase)
     best_validation_loss = this_validation_loss
     test_losses = [test_model(i) for i in xrange(n_test_batches)]
     test_score = numpy.mean(test_losses)
     print (
       (' epoch %i, minibatch %i/%i, test error of'
       ' best model %f %%' ) %
       (
        epoch,
        minibatch_index + 1,
        n_train_batches,
        test_score * 100.
        )       
      )
     with open('best_model.pkl', 'w') as f:
      cPickle.dump(classifier, f)
   if patience <= iter:
    done_looping = True 
    break
 end_time = timeit.default_timer()
 print (
   ('Optimization complete with best validation score of %f %%,'
    'with test performance %f %%') %
   (best_validation_loss * 100., test_score * 100.)
  )
 print 'The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))
 print 
def predict():
 classifier = cPickle.load(open('best_model.pkl'))
 predict_model = theano.function(inputs = [classifier.input],
         outputs = classifier.y_pred)
 dataset = 'mnist.pkl.gz'
 datasets = load_data(dataset)
 test_set_x, test_set_y = datasets[2]
 test_set_x = test_set_x.get_value()
 num = 10
 predicted_values = predict_model(test_set_x[:num])
 print ("The Ori values :")
 print test_set_y.eval()[:num]
 print ("Predicted values for the first 10 examples in test set:")
 print predicted_values
 print "equal :"
 print float(numpy.sum(predicted_values == test_set_y.eval()[:num])) /  predicted_values.shape[0]
if __name__ == '__main__':
 #sgd_optimization_mnist() 
 predict()