Deep Learning 初探(二)

对于theano中的若干运算形式也可以结合numpy进行检验,
相应的法则是相同的。简单的验证是可以完成的。
theano运行所使用的内存与python使用的内存是相互独立的,
一般的theano shared变量可以通过将参数borrow设定为True将内存进行
共享而非copy,这样能够节约空间。
下面看使用Theano完成Logistic Regression的过程:
类的初始化(对应于数学表达式):
self.W = theano.shared(value = numpy.zeros((n_in, n_out), dtype = theano.config.floatX),
    name = 'W', borrow = True)
self.b = theano.shared(value = numpy.zeros((n_out, ), dtype = theano.config.floatX),
    name = 'b', borrow = True)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)

这里要指出的是softmax函数对应于多元的logistic函数的形式,其原因在于可以将软边际函数
分类数置为2,并同除分子即可得到logistic函数的形式。

利用梯度完成损失函数对应参数的求解:
cost = classifier.negative_log_likelihood(y)
g_W = T.grad(cost = cost, wrt = classifier.W)
g_b = T.grad(cost = cost, wrt = classifier.b)
updates = [(classifier.W, classifier.W - learning_rate * g_W),
   (classifier.b, classifier.b - learning_rate * g_b)]
train_model = theano.function(
     inputs = [index],
     outputs = cost,
     updates = updates,
     givens = {
      x: train_set_x[index * batch_size: (index + 1) * batch_size],
      y: train_set_y[index * batch_size: (index + 1) * batch_size]
     }
    )
test_model 与validate_model与上面train_model的定义几乎相同,仅仅是given中所使用
的数据是不同的(test_set_ valid_set_)
现在看完整的模型代码:(有些内容可参看Deep Learning(一))
对一些常用的功能代码进行介绍
os.path.split("some path") 可以返回"some path"进行分解为目录名及文件名的二元序列。
os.path.isfile("some file") 可以返回"some file"是否是一个表述当前目录文件的字符串,
  当当前目录中无"some file"时也返回False.(相应的还有os.path.isdir
  只不过其并不检查是否是当前目录,相应的方法可以通过os.getcwd()完成)
完整代码:
import cPickle 
import gzip 
import os 
import sys 
import timeit
import numpy 
import theano 
import theano.tensor as T 
class LogisticRegression(object):
 def __init__(self, input, n_in, n_out):
  self.W = theano.shared(value = numpy.zeros((n_in, n_out), dtype = theano.config.floatX),
     name = 'W', borrow = True)
  self.b = theano.shared(value = numpy.zeros((n_out,), dtype = theano.config.floatX),
     name = 'b', borrow = True)
  self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
  self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
  self.params = [self.W, self.b]
  self.input = input
 def negative_log_likelihood(self, y):
  return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
 def errors(self, y):
  if y.ndim != self.y_pred.ndim:
   raise TypeError("y should have the same shape as self.y_pred",
      ('y', y.type, 'y_pred', y_pred.type)
     )
  if y.dtype.startswith('int'):
   return T.mean(T.neq(self.y_pred, y))
  else:
   raise NotImplementedError()
def load_data(dataset):
 data_dir, data_file = os.path.split(dataset)
 if data_dir == "" and not os.path.isfile(dataset):
  new_path = os.path.join(
   os.path.split(__file__)[0],
   '..',
   'data',
   dataset
   )
  if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
   dataset = new_path
 if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
  import urllib 
  origin = (
    'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
   )
  print "Downloading data from %s" % origin
  urllib.urlretrieve(origin, dataset)
 print "... loading data"
 
 f = gzip.open(dataset, 'rb')
 train_set, valid_set, test_set = cPickle.load(f)
 f.close()
 def shared_dataset(data_xy, borrow = True):
  data_x, data_y = data_xy
  shared_x = theano.shared(numpy.asarray(data_x, 
      dtype = theano.config.floatX), borrow = borrow)
  shared_y = theano.shared(numpy.asarray(data_y, 
      dtype = theano.config.floatX), borrow = borrow)
  return shared_x, T.cast(shared_y, 'int32') 
 test_set_x, test_set_y = shared_dataset(test_set)
 valid_set_x, valid_set_y = shared_dataset(valid_set)
 train_set_x, train_set_y = shared_dataset(train_set)
 rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
 return rval
def sgd_optimization_mnist(learning_rate = 0.13, n_epochs = 1000, dataset = 'mnist.pkl.gz', batch_size = 600):
 datasets = load_data(dataset)
 train_set_x, train_set_y = datasets[0]
 valid_set_x, valid_set_y = datasets[1]
 test_set_x, test_set_y = datasets[2]
 n_train_batches = train_set_x.get_value(borrow = True).shape[0] / batch_size
 n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] / batch_size
 n_test_batches = test_set_x.get_value(borrow = True).shape[0] / batch_size
 print "... building the model"
 index = T.iscalar()
 x = T.matrix('x')
 y = T.ivector('y')
 classifier = LogisticRegression(input = x, n_in = 28 * 28, n_out = 10)
 cost = classifier.negative_log_likelihood(y)
 
 test_model = theano.function(
   inputs = [index],
   outputs = classifier.errors(y),
   givens = {
    x: test_set_x[index * batch_size: (index + 1) * batch_size],
    y: test_set_y[index * batch_size: (index + 1) * batch_size]
   }
  )
 valid_model = theano.function(
   inputs = [index],
   outputs = classifier.errors(y),
   givens = {
    x: valid_set_x[index * batch_size: (index + 1) * batch_size],
    y: valid_set_y[index * batch_size: (index + 1) * batch_size]
   }
  )
 g_W = T.grad(cost = cost, wrt = classifier.W)
 g_b = T.grad(cost = cost, wrt = classifier.b)
 updates = [(classifier.W, classifier.W - learning_rate * g_W), 
    (classifier.b, classifier.b - learning_rate * g_b)]

 train_model = theano.function(
   inputs = [index],
   outputs = cost,
   updates = updates,
   givens = {
    x: train_set_x[index * batch_size: (index + 1) * batch_size],
    y: train_set_y[index * batch_size: (index + 1) * batch_size]
   }
  )
 print "... train the model"
 patience = 5000
 patience_increase = 2 
 improvement_threshold = 0.995
 validation_frequency = min(n_train_batches, patience / 2)
 best_validation_loss = numpy.inf 
 test_score = 0. 
 start_time = timeit.default_timer()
 done_looping = False 
 epoch = 0 
 while (epoch < n_epochs) and (not done_looping):
  epoch = epoch + 1
  for minibatch_index in xrange(n_train_batches):
   minibatch_avg_cost = train_model(minibatch_index)
   iter = (epoch - 1) * n_train_batches + minibatch_index
   if (iter + 1) % validation_frequency == 0:
    validation_losses = [valid_model(i) for i in xrange(n_valid_batches)]
    this_validation_loss = numpy.mean(validation_losses)
    print (
      'epoch %i, minibatch %i/%i, validation error %f %%' % 
      (
       epoch,
       minibatch_index + 1,
       n_train_batches,
       this_validation_loss * 100.
       )
     )
    if this_validation_loss < best_validation_loss:
     if this_validation_loss < best_validation_loss * improvement_threshold:
      patience = max(patience, iter * patience_increase)
     best_validation_loss = this_validation_loss
     test_losses = [test_model(i) for i in xrange(n_test_batches)]
     test_score = numpy.mean(test_losses)
     print (
       (' epoch %i, minibatch %i/%i, test error of'
       ' best model %f %%' ) %
       (
        epoch,
        minibatch_index + 1,
        n_train_batches,
        test_score * 100.
        )       
      )
     with open('best_model.pkl', 'w') as f:
      cPickle.dump(classifier, f)
   if patience <= iter:
    done_looping = True 
    break
 end_time = timeit.default_timer()
 print (
   ('Optimization complete with best validation score of %f %%,'
    'with test performance %f %%') %
   (best_validation_loss * 100., test_score * 100.)
  )
 print 'The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))
 print 
def predict():
 classifier = cPickle.load(open('best_model.pkl'))
 predict_model = theano.function(inputs = [classifier.input],
         outputs = classifier.y_pred)
 dataset = 'mnist.pkl.gz'
 datasets = load_data(dataset)
 test_set_x, test_set_y = datasets[2]
 test_set_x = test_set_x.get_value()
 num = 10
 predicted_values = predict_model(test_set_x[:num])
 print ("The Ori values :")
 print test_set_y.eval()[:num]
 print ("Predicted values for the first 10 examples in test set:")
 print predicted_values
 print "equal :"
 print float(numpy.sum(predicted_values == test_set_y.eval()[:num])) /  predicted_values.shape[0]
if __name__ == '__main__':
 #sgd_optimization_mnist() 
 predict()






评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值