在贡献一个,和tutorial写的不太一样,现在是在学习着写。这个结果可以达到1.68%的误差,需要200多万次的迭代,快1000个epoch了
import theano, numpy, theano.tensor as T, gzip, cPickle, time
class HiddenLayer():
def __init__(self, n_in, n_out, x, w=None, b=None):
if w is None:
t = numpy.sqrt(6.0/(n_in+n_out))
w = theano.shared(numpy.asarray(numpy.random.uniform(-t, t, [n_in, n_out]), theano.config.floatX))
self.w = w
if b is None:
b = theano.shared(numpy.asarray(numpy.zeros(n_out), theano.config.floatX))
self.b = b
self.out = T.tanh(T.dot(x, self.w) + self.b)
self.params = [self.w, self.b]
def get_l1(self):
return T.sum(T.abs_(self.w))
def get_l2(self):
return T.sum(self.w**2)
class MLP():
def __init__(self, n_in=784, n_out=10, n_hidden=500):
self.n_in = n_in
self.n_out = n_out
self.n_hidden = n_hidden
self.w = theano.shared(numpy.asarray(numpy.zeros([n_hidden, n_out]), theano.config.floatX))
self.b = theano.shared(numpy.asarray(numpy.zeros(n_out), theano.config.floatX))
self.params = [self.w, self.b]
def p_y_given_x(self, x):
return T.nnet.softmax(T.dot(x, self.w) + self.b)
def pred(self, x):
return T.argmax(self.p_y_given_x(x), 1)
def cost(self, x, y):
p_y_given_x = self.p_y_given_x(x)
return -T.mean(T.log(p_y_given_x[T.arange(y.shape[0]), y]))
def error(self, x, y):
pred = self.pred(x)
return T.mean(T.neq(pred, y))
def get_l1(self):
return T.sum(T.abs_(self.w))
def get_l2(self):
return T.sum(self.w**2)
def train(self):
def load_data():
f = gzip.open('mnist.pkl.gz')
trainxy, validatexy, testxy = cPickle.load(f)
def share_data(xy):
x,y = xy
x = theano.shared(numpy.asarray(x, theano.config.floatX))
y = theano.shared(numpy.asarray(y, theano.config.floatX))
return [x, T.cast(y, 'int32')]
trainx,trainy = share_data(trainxy)
validatex,validatey = share_data(validatexy)
testx,testy = share_data(testxy)
return [(trainx, trainy), (validatex, validatey), (testx, testy)]
[(trainx, trainy), (validatex, validatey), (testx, testy)] = load_data()
print 'Load the data successfully...'
batch_size = 20
train_batch = trainx.get_value().shape[0]/batch_size
validate_batch = validatex.get_value().shape[0]/batch_size
test_batch = testx.get_value().shape[0]/batch_size
x = T.matrix('x', theano.config.floatX)
y = T.ivector('y')
n_in = self.n_in
n_hidden = self.n_hidden
n_out = self.n_out
l1_decay = 0.0
l2_decay = 0.0001
hl = HiddenLayer(n_in, n_hidden, x)
mlp = MLP(n_hidden, n_out)
cost = mlp.cost(hl.out, y) + l1_decay*(mlp.get_l1() + hl.get_l1()) + l2_decay*(mlp.get_l2() + hl.get_l2())
error = mlp.error(hl.out, y)
params = hl.params + mlp.params
grad_params = [T.grad(cost, param) for param in params]
updates = [(param, param-0.01*grad) for param,grad in zip(params, grad_params)]
index = T.lscalar()
trainModel = theano.function([index], cost, updates=updates, givens={x:trainx[index*batch_size:(index+1)*batch_size], y:trainy[index*batch_size:(index+1)*batch_size]})
validateModel = theano.function([index], error, givens={x:validatex[index*batch_size:(index+1)*batch_size], y:validatey[index*batch_size:(index+1)*batch_size]})
testModel = theano.function([index], error, givens={x:testx[index*batch_size:(index+1)*batch_size], y:testy[index*batch_size:(index+1)*batch_size]})
patience = 5000
frequency = min(patience/2, train_batch)
increase = 2
best_validate_error = numpy.Inf
best_test_error = 0
epochs = 1000
epoch = 1
ite = 0
stopping = False
while (epoch < epochs) and (not stopping):
for i in xrange(train_batch):
ite += 1
this_cost = trainModel(i)
if ite%frequency == 0:
this_validate_error = numpy.mean([validateModel(j) for j in xrange(validate_batch)])
print 'ite:%d/%d, cost:%f, validate error:%f'%(ite, epoch, this_cost, this_validate_error)
if this_validate_error < best_validate_error:
if this_validate_error < 0.995*best_validate_error:
patience = max(patience, ite*increase)
best_validate_error = this_validate_error
best_test_error = numpy.mean([testModel(j) for j in xrange(test_batch)])
print 'ite:%d/%d, cost:%f, validate error:%f, test error:%f'%(ite, epoch, this_cost, this_validate_error, best_test_error)
if patience <= ite:
stopping = True
break
epoch += 1
print 'best validate error:%f, best test error:%f'%(best_validate_error, best_test_error)
if __name__=='__main__':
mlp = MLP()
start_time = time.clock()
mlp.train()
end_time = time.clock()
print 'total consuming %d mins'%((end_time-start_time)/60.0)