Deep Learning 初探（五）

最新推荐文章于 2022-02-09 22:23:42 发布

斯温jack

最新推荐文章于 2022-02-09 22:23:42 发布

阅读量459

点赞数

分类专栏：机器学习 Theano TensorFlow 文章标签： theano Python CNN 卷积神经网络 Tensorflow

机器学习同时被 3 个专栏收录

48 篇文章

订阅专栏

Theano

7 篇文章

订阅专栏

TensorFlow

1 篇文章

订阅专栏

下面展开对整体CNN的讨论：
卷积神经网络的整体结构相当于在MLP(多层感知机)的前面
加上若干卷积层及MaxPooling层进行预处理，从而得到边界的相关信息。

下面对有关整体代码的一些细节进行讨论：
有关一些初值的界的问题不进行进一步讨论，把它看成规定。
一般在使用MaxPooling的时候选择ignore_border=True是基于将边界
无实际意义的像素过滤掉。

numpy.ndarray.flatten方法接受参数为flatten的方向默认为C数组的方向，
即按行拉直，当使用"F"选项时使用Fortan拉直的方向，即按列拉直，当将
输入参数指定为任意数字，如1000时，效果是按列拉直。

首先要将前述LogisticRegression及 HiddenLayer函数放到logistic_sgd函数中。

完整代码：

#coding: utf-8
#from __future__ import division
import numpy
import theano
from theano.tensor.signal import conv
from theano.tensor.signal import downsample
from theano import tensor as T
from logistic_sgd import HiddenLayer, LogisticRegression, load_data
from theano.tensor.nnet import conv2d
import timeit




import os


class LeNetConvPoolLayer(object):


def __init__(self, rng, input, filter_shape, image_shape, poolsize = (2,2)):
# The m - 1 layer shape must equal to shape of feature map
assert image_shape[1] == filter_shape[1]
self.input = input


# This equal to w_bound in the previous time 
fan_in = numpy.prod(filter_shape[1:])
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:])) / numpy.prod(poolsize)
W_bound = numpy.sqrt(6. / (fan_in + fan_out))


self.W = theano.shared(
numpy.asarray(
rng.uniform(low = - W_bound, high = W_bound, size = filter_shape),
dtype = theano.config.floatX
),
borrow = True
)


b_values = numpy.zeros((filter_shape[0],), dtype = theano.config.floatX)
self.b = theano.shared(value = b_values, borrow = True)


conv_out = conv2d(
input = input,
filters = self.W,
filter_shape = filter_shape,
input_shape = image_shape
) 


pooled_out = downsample.max_pool_2d(
input = conv_out,
ds = poolsize,
ignore_border = True
)


self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))


self.params = [self.W, self.b]
self.input = input 


def test_mlp(learning_rate = 0.1, n_epochs = 10,
dataset = 'mnist.pkl.gz', batch_size = 500, nkerns=[20, 50]):
datasets = load_data(dataset)


train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]


n_train_batches = train_set_x.get_value(borrow = True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow = True).shape[0] / batch_size


rng = numpy.random.RandomState(23455)


index = T.iscalar()
x = T.matrix('x')
y = T.ivector('y')


print "... building the model"


layer0_input = x.reshape((batch_size, 1, 28, 28))


layer0 = LeNetConvPoolLayer(
rng,
input = layer0_input,
image_shape = (batch_size, 1, 28, 28),
filter_shape = (nkerns[0], 1, 5, 5),
poolsize = (2, 2)
)


layer1 = LeNetConvPoolLayer(
rng,
input = layer0.output,
image_shape = (batch_size, nkerns[0], 12, 12),
filter_shape = (nkerns[1], nkerns[0], 5, 5),
poolsize = (2,2)
)


layer2_input = layer1.output.flatten(2)


layer2 = HiddenLayer(
rng,
input = layer2_input,
n_in = nkerns[1] * 4 * 4,
n_out = 500,
activation = T.tanh
) 




layer3 = LogisticRegression(input = layer2.output, n_in = 500, n_out = 10)
cost = layer3.negative_log_likelihood(y)


test_model = theano.function(
[index],
layer3.errors(y),
givens = {
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)


validate_model = theano.function(
[index],
layer3.errors(y),
givens = {
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)


params = layer3.params + layer2.params + layer1.params + layer0.params
grads = T.grad(cost, params)


updates = [
(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)
]


train_model = theano.function(
[index],
cost,
updates = updates,
givens = {
x: train_set_x[index * batch_size: (1 + index) * batch_size],
y: train_set_y[index * batch_size: (1 + index) * batch_size]
}
)




print "... training"
patience = 10000
patience_increase = 2 
improvement_threshold = 0.995 
validation_frequency = min(n_train_batches, patience / 2)


best_validation_loss = numpy.inf 
best_iter = 0
test_score = 0. 
start_time = timeit.default_timer()


epoch = 0
done_looping = False


while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)


iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)


print (
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)


if this_validation_loss < best_validation_loss:
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter
test_losses = [test_model(i) for i in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print ((
'epoch %i, minibatch %i/%i, test error of'
' best model %f %%' 
) %
(epoch, 
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
) 

if patience <= iter:
done_looping = True
break


end_time = timeit.default_timer()
print (
('Optimization complete. Best validation score of %f %% '
'obtained at iteration %i, with test performance %f %%')
%
(
best_validation_loss * 100., best_iter + 1, test_score * 100., 
)
)



print "begin predict :"
test_set_x = test_set_x.get_value()


num = 500


predict_model = theano.function(inputs = [layer0.input],
outputs = layer3.y_pred)


predicted_values = predict_model(test_set_x[:num].reshape((-1, 1, 28, 28))) 


#print ("The Ori values :")
#print test_set_y.eval()[:num]
#print ("Predicted values for the first %i examples in test set:" % num)
#print predicted_values
print "equal :"
print float(numpy.sum(predicted_values == test_set_y.eval()[:num])) /  predicted_values.shape[0]



if __name__ == "__main__":
test_mlp()

可以看到进行10次卷积神经网络训练后，对500个样本的精确度可以达到0.984

下面是类似上述过程的Tensorflow版本

转载自

https://juejin.im/post/595f87f4f265da6c50300bcb

该文对于卷积神经网络及使用到的一些细节编程运算的描述是比较细致的

而且给出了相应图示，比较经典

#coding: utf-8
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta

# Convolutional Layer 1.
filter_size1 = 5
num_filters1 = 16

# Convolutional Layer 2.
filter_size2 = 5
num_filters2 = 36

# Fully-connected layer.
fc_size = 128

from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('data/MNIST', one_hot=True)
data.test.cls = np.argmax(data.test.labels, axis = 1)

# MNIST image are 28 pixels in each dimension
img_size = 28

# Image are stored in one-dimensional arrays of this length
img_size_flat = img_size * img_size

# Tuple with height and width of images used to rehape arrays
img_shape = (img_size, img_size)

# Number of color channels for the images: 1 channel for grey-scale
num_channels = 1

# Number of classes
num_classes = 10

def plot_images(images, cls_true, cls_pred=None):
    assert len(images) == len(cls_true) == 9

    fig, axes = plt.subplots(3, 3)
    fig.subplots_adjust(hspace=0.3, wspace=0.3)

    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i].reshape(img_shape), cmap = 'binary')

        if cls_pred is None:
            xlabel = "True: {0}".format(cls_true[i])
        else:
            xlabel = "True: {0}, Pred: {1}".format(cls_true[i], cls_pred[i])

        ax.set_xlabel(xlabel)

        ax.set_xticks([])
        ax.set_yticks([])

    plt.show()

images = data.test.images[0: 9]
cls_true = data.test.cls[0:9]
plot_images(images, cls_true)

def new_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))

def new_biases(length):
    return tf.Variable(tf.constant(0.05, shape = [length]))

def new_conv_layer(input, num_input_channels, filter_size, num_filters, use_pooling = True):
    shape = [filter_size, filter_size, num_input_channels, num_filters]

    weights = new_weights(shape=shape)
    biases = new_biases(length=num_filters)
    layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')

    layer += biases

    if use_pooling:
        layer = tf.nn.max_pool(value=layer, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')

    layer = tf.nn.relu(layer)

    return layer, weights

def flatten_layer(layer):
    layer_shape = layer.get_shape()
    num_features = layer_shape[1:4].num_elements()
    layer_flat = tf.reshape(layer, [-1, num_features])

    return layer_flat, num_features

def new_fc_layer(input, num_inputs, num_outputs, use_relu = True):
    weights = new_weights(shape=[num_inputs, num_outputs])
    biases = new_biases(length=num_outputs)

    layer = tf.matmul(input, weights) + biases

    if use_relu:
        layer = tf.nn.relu(layer)

    return layer

x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name = 'x')
x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
y_true = tf.placeholder(tf.float32, shape=[None, 10], name = 'y_true')
y_true_cls = tf.argmax(y_true, dimension=1)

layer_conv1, weights_conv1 = new_conv_layer(input = x_image, num_input_channels=num_channels, filter_size=filter_size1,\
                                            num_filters=num_filters1, use_pooling=True)

layer_conv2, weights_conv2 = new_conv_layer(input = layer_conv1, num_input_channels=num_filters1, filter_size=filter_size2, \
                                            num_filters=num_filters2, use_pooling=True)

layer_flat, num_features = flatten_layer(layer_conv2)

layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=fc_size, use_relu=True)
layer_fc2 = new_fc_layer(layer_fc1, num_inputs=fc_size, num_outputs=num_classes, use_relu=False)

y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2, labels=y_true)
cost = tf.reduce_mean(cross_entropy)

optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)

correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

session = tf.Session()
session.run(tf.global_variables_initializer())

train_batch_size = 64
total_iterations = 0

def optimize(num_iterations):
    global total_iterations

    start_time = time.time()

    for i in range(total_iterations, total_iterations + num_iterations):
        x_batch, y_true_batch = data.train.next_batch(train_batch_size)

        feed_dict_train = {x: x_batch, y_true: y_true_batch}
        session.run(optimizer, feed_dict=feed_dict_train)

        if i % 100 == 0:
            acc = session.run(accuracy, feed_dict=feed_dict_train)
            msg = "Optimization Iterstion: {0:>6}, Training Accuracy: {1:>6.1%}"
            print(msg.format(i + 1, acc))

    total_iterations += num_iterations
    end_time = time.time()
    time_dif = end_time - start_time
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))


def plot_example_errors(cls_pred, correct):
    incorrect = (correct == False)
    images = data.test.images[incorrect]
    cls_pred = cls_pred[incorrect]
    cls_true = data.test.cls[incorrect]
    plot_images(images=images[0:9], cls_true=cls_true[0:9], cls_pred=cls_pred[0:9])

def plot_confusion_matrix(cls_pred):
    cls_true = data.test.cls
    cm = confusion_matrix(y_true=cls_true, y_pred=cls_pred)
    print(cm)

    plt.matshow(cm)

    plt.colorbar()
    tick_marks = np.arange(num_classes)
    plt.xticks(tick_marks, range(num_classes))
    plt.yticks(tick_marks, range(num_classes))
    plt.xlabel("Predicted")
    plt.ylabel("True")

    plt.show()

test_batch_size = 256

def print_test_accuracy(show_example_errors = False, show_confusion_matrix = False):
    num_test = len(data.test.images)
    cls_pred = np.zeros(shape = num_test, dtype = np.int)

    i = 0
    while i < num_test:
        j = min(i + test_batch_size, num_test)
        images = data.test.images[i:j,:]
        labels = data.test.labels[i:j,:]

        feed_dict = {x: images, y_true: labels}
        cls_pred[i:j] = session.run(y_pred_cls, feed_dict=feed_dict)
        i = j

    cls_true = data.test.cls
    correct = (cls_true == cls_pred)
    correct_num = correct.sum()
    acc = float(correct_num) / num_test

    msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})"
    print(msg.format(acc, correct_num, num_test))

    if show_example_errors:
        print("Example errors:")
        plot_example_errors(cls_pred=cls_pred, correct=correct)

    if show_confusion_matrix:
        print("Confusion Matrix: ")
        plot_confusion_matrix(cls_pred=cls_pred)

print_test_accuracy()
optimize(num_iterations=10000)
print_test_accuracy()