theano做的MLP

最新推荐文章于 2024-01-02 12:41:20 发布
原创最新推荐文章于 2024-01-02 12:41:20 发布 · 2.4k 阅读
CC 4.0 BY-SA版权
在贡献一个，和tutorial写的不太一样，现在是在学习着写。这个结果可以达到1.68%的误差，需要200多万次的迭代，快1000个epoch了
import theano, numpy, theano.tensor as T, gzip, cPickle, time


class HiddenLayer():
    def __init__(self, n_in, n_out, x, w=None, b=None):
        if w is None:
            t = numpy.sqrt(6.0/(n_in+n_out))
            w = theano.shared(numpy.asarray(numpy.random.uniform(-t, t, [n_in, n_out]), theano.config.floatX))
        self.w = w
        if b is None:
            b = theano.shared(numpy.asarray(numpy.zeros(n_out), theano.config.floatX))
        self.b = b
        self.out = T.tanh(T.dot(x, self.w) + self.b)
        self.params = [self.w, self.b]
    def get_l1(self):
        return T.sum(T.abs_(self.w))
    def get_l2(self):
        return T.sum(self.w**2)
            
class MLP():
    def __init__(self, n_in=784, n_out=10, n_hidden=500):
        self.n_in = n_in
        self.n_out = n_out
        self.n_hidden = n_hidden
        self.w = theano.shared(numpy.asarray(numpy.zeros([n_hidden, n_out]), theano.config.floatX))
        self.b = theano.shared(numpy.asarray(numpy.zeros(n_out), theano.config.floatX))
        self.params = [self.w, self.b]
    def p_y_given_x(self, x):
        return T.nnet.softmax(T.dot(x, self.w) + self.b)
    def pred(self, x):
        return T.argmax(self.p_y_given_x(x), 1)
    def cost(self, x, y):
        p_y_given_x = self.p_y_given_x(x)
        return -T.mean(T.log(p_y_given_x[T.arange(y.shape[0]), y]))
    def error(self, x, y):
        pred = self.pred(x)
        return T.mean(T.neq(pred, y))    
    def get_l1(self):
        return T.sum(T.abs_(self.w))
    def get_l2(self):
        return T.sum(self.w**2)
        
    def train(self):
        def load_data():
            f = gzip.open('mnist.pkl.gz')
            trainxy, validatexy, testxy = cPickle.load(f)
            def share_data(xy):
                x,y = xy
                x = theano.shared(numpy.asarray(x, theano.config.floatX))
                y = theano.shared(numpy.asarray(y, theano.config.floatX))
                return [x, T.cast(y, 'int32')]
            trainx,trainy = share_data(trainxy)
            validatex,validatey = share_data(validatexy)
            testx,testy = share_data(testxy)
            return [(trainx, trainy), (validatex, validatey), (testx, testy)]
        [(trainx, trainy), (validatex, validatey), (testx, testy)] = load_data()
        print 'Load the data successfully...'
        batch_size = 20
        train_batch = trainx.get_value().shape[0]/batch_size
        validate_batch = validatex.get_value().shape[0]/batch_size
        test_batch = testx.get_value().shape[0]/batch_size
        
        x = T.matrix('x', theano.config.floatX)
        y = T.ivector('y')
        n_in = self.n_in
        n_hidden = self.n_hidden
        n_out = self.n_out
        l1_decay = 0.0
        l2_decay = 0.0001
        hl = HiddenLayer(n_in, n_hidden, x)
        
        mlp = MLP(n_hidden, n_out)
        cost = mlp.cost(hl.out, y) + l1_decay*(mlp.get_l1() + hl.get_l1()) + l2_decay*(mlp.get_l2() + hl.get_l2())
        error = mlp.error(hl.out, y)
        params = hl.params + mlp.params
        grad_params = [T.grad(cost, param) for param in params]
        updates = [(param, param-0.01*grad) for param,grad in zip(params, grad_params)]
        
        index = T.lscalar()
        trainModel = theano.function([index], cost, updates=updates, givens={x:trainx[index*batch_size:(index+1)*batch_size], y:trainy[index*batch_size:(index+1)*batch_size]})
        validateModel = theano.function([index], error, givens={x:validatex[index*batch_size:(index+1)*batch_size], y:validatey[index*batch_size:(index+1)*batch_size]})
        testModel = theano.function([index], error, givens={x:testx[index*batch_size:(index+1)*batch_size], y:testy[index*batch_size:(index+1)*batch_size]})
        
        patience = 5000
        frequency = min(patience/2, train_batch)
        increase = 2
        best_validate_error = numpy.Inf
        best_test_error = 0
        epochs = 1000
        epoch = 1
        ite = 0
        stopping = False
        
        while (epoch < epochs) and (not stopping):
            for i in xrange(train_batch):
                ite += 1
                this_cost = trainModel(i)
                if ite%frequency == 0: 
                    this_validate_error = numpy.mean([validateModel(j) for j in xrange(validate_batch)])
                    print 'ite:%d/%d, cost:%f, validate error:%f'%(ite, epoch, this_cost, this_validate_error)
                    if this_validate_error < best_validate_error:
                        if this_validate_error < 0.995*best_validate_error:
                            patience = max(patience, ite*increase)
                        best_validate_error = this_validate_error
                        best_test_error = numpy.mean([testModel(j) for j in xrange(test_batch)])
                        print 'ite:%d/%d, cost:%f, validate error:%f, test error:%f'%(ite, epoch, this_cost, this_validate_error, best_test_error)
                    if patience <= ite:
                        stopping = True
                        break
            epoch += 1           
        
        print 'best validate error:%f, best test error:%f'%(best_validate_error, best_test_error)
        
if __name__=='__main__':
    mlp = MLP()
    start_time = time.clock()
    mlp.train()
    end_time = time.clock()
    print 'total consuming %d mins'%((end_time-start_time)/60.0)