下面展开对整体CNN的讨论:
卷积神经网络的整体结构相当于在MLP(多层感知机)的前面
加上若干卷积层及MaxPooling层进行预处理,从而得到边界的相关信息。
下面对有关整体代码的一些细节进行讨论:
有关一些初值的界的问题不进行进一步讨论,把它看成规定。
一般在使用MaxPooling的时候选择ignore_border=True是基于将边界
无实际意义的像素过滤掉。
numpy.ndarray.flatten方法接受参数为flatten的方向 默认为C数组的方向,
即按行拉直,当使用"F"选项时使用Fortan拉直的方向,即按列拉直,当将
输入参数指定为任意数字,如1000时,效果是按列拉直。
首先要将前述LogisticRegression及 HiddenLayer函数放到logistic_sgd函数中。
完整代码:
卷积神经网络的整体结构相当于在MLP(多层感知机)的前面
加上若干卷积层及MaxPooling层进行预处理,从而得到边界的相关信息。
下面对有关整体代码的一些细节进行讨论:
有关一些初值的界的问题不进行进一步讨论,把它看成规定。
一般在使用MaxPooling的时候选择ignore_border=True是基于将边界
无实际意义的像素过滤掉。
numpy.ndarray.flatten方法接受参数为flatten的方向 默认为C数组的方向,
即按行拉直,当使用"F"选项时使用Fortan拉直的方向,即按列拉直,当将
输入参数指定为任意数字,如1000时,效果是按列拉直。
首先要将前述LogisticRegression及 HiddenLayer函数放到logistic_sgd函数中。
完整代码:
#coding: utf-8
#from __future__ import division
import numpy
import theano
from theano.tensor.signal import conv
from theano.tensor.signal import downsample
from theano import tensor as T
from logistic_sgd import HiddenLayer, LogisticRegression, load_data
from theano.tensor.nnet import conv2d
import timeit
import os
class LeNetConvPoolLayer(object):
def __init__(self, rng, input, filter_shape, image_shape, poolsize = (2,2)):
# The m - 1 layer shape must equal to shape of feature map
assert image_shape[1] == filter_shape[1]
self.input = input
# This equal to w_bound in the previous time
fan_in = numpy.prod(filter_shape[1:])
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:])) / numpy.prod(poolsize)
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
self.W = theano.shared(
numpy.asarray(
rng.uniform(low = - W_bound, high = W_bound, size = filter_shape),
dtype = theano.config.floatX
),
borrow = True
)
b_values = numpy.zeros((filter_shape[0],), dtype = theano.config.floatX)
self.b = theano.shared(value = b_values, borrow = True)
conv_out = conv2d(
input = input,
filters = self.W,
filter_shape = filter_shape,
input_shape = image_shape
)
pooled_out = downsample.max_pool_2d(
input = conv_out,
ds = poolsize,
ignore_border = True
)
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
self.params = [self.W, self.b]
self.input = input
def test_mlp(learning_rate = 0.1, n_epochs = 10,
dataset = 'mnist.pkl.gz', batch_size = 500, nkerns=[20, 50]):
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
n_train_batches = train_set_x.get_value(borrow = True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow = True).shape[0] / batch_size
rng = numpy.random.RandomState(23455)
index = T.iscalar()
x = T.matrix('x')
y = T.ivector('y')
print "... building the model"
layer0_input = x.reshape((batch_size, 1, 28, 28))
layer0 = LeNetConvPoolLayer(
rng,
input = layer0_input,
image_shape = (batch_size, 1, 28, 28),
filter_shape = (nkerns[0], 1, 5, 5),
poolsize = (2, 2)
)
layer1 = LeNetConvPoolLayer(
rng,
input = layer0.output,
image_shape = (batch_size, nkerns[0], 12, 12),
filter_shape = (nkerns[1], nkerns[0], 5, 5),
poolsize = (2,2)
)
layer2_input = layer1.output.flatten(2)
layer2 = HiddenLayer(
rng,
input = layer2_input,
n_in = nkerns[1] * 4 * 4,
n_out = 500,
activation = T.tanh
)
layer3 = LogisticRegression(input = layer2.output, n_in = 500, n_out = 10)
cost = layer3.negative_log_likelihood(y)
test_model = theano.function(
[index],
layer3.errors(y),
givens = {
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)
validate_model = theano.function(
[index],
layer3.errors(y),
givens = {
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)
params = layer3.params + layer2.params + layer1.params + layer0.params
grads = T.grad(cost, params)
updates = [
(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)
]
train_model = theano.function(
[index],
cost,
updates = updates,
givens = {
x: train_set_x[index * batch_size: (1 + index) * batch_size],
y: train_set_y[index * batch_size: (1 + index) * batch_size]
}
)
print "... training"
patience = 10000
patience_increase = 2
improvement_threshold = 0.995
validation_frequency = min(n_train_batches, patience / 2)
best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print (
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
if this_validation_loss < best_validation_loss:
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter
test_losses = [test_model(i) for i in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print ((
'epoch %i, minibatch %i/%i, test error of'
' best model %f %%'
) %
(epoch,
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
)
if patience <= iter:
done_looping = True
break
end_time = timeit.default_timer()
print (
('Optimization complete. Best validation score of %f %% '
'obtained at iteration %i, with test performance %f %%')
%
(
best_validation_loss * 100., best_iter + 1, test_score * 100.,
)
)
print "begin predict :"
test_set_x = test_set_x.get_value()
num = 500
predict_model = theano.function(inputs = [layer0.input],
outputs = layer3.y_pred)
predicted_values = predict_model(test_set_x[:num].reshape((-1, 1, 28, 28)))
#print ("The Ori values :")
#print test_set_y.eval()[:num]
#print ("Predicted values for the first %i examples in test set:" % num)
#print predicted_values
print "equal :"
print float(numpy.sum(predicted_values == test_set_y.eval()[:num])) / predicted_values.shape[0]
if __name__ == "__main__":
test_mlp()
可以看到进行10次卷积神经网络训练后,对500个样本的精确度可以达到0.984
下面是类似上述过程的Tensorflow版本
转载自
https://juejin.im/post/595f87f4f265da6c50300bcb
该文对于卷积神经网络及使用到的一些细节编程运算的描述是比较细致的
而且给出了相应图示,比较经典
#coding: utf-8
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
# Convolutional Layer 1.
filter_size1 = 5
num_filters1 = 16
# Convolutional Layer 2.
filter_size2 = 5
num_filters2 = 36
# Fully-connected layer.
fc_size = 128
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('data/MNIST', one_hot=True)
data.test.cls = np.argmax(data.test.labels, axis = 1)
# MNIST image are 28 pixels in each dimension
img_size = 28
# Image are stored in one-dimensional arrays of this length
img_size_flat = img_size * img_size
# Tuple with height and width of images used to rehape arrays
img_shape = (img_size, img_size)
# Number of color channels for the images: 1 channel for grey-scale
num_channels = 1
# Number of classes
num_classes = 10
def plot_images(images, cls_true, cls_pred=None):
assert len(images) == len(cls_true) == 9
fig, axes = plt.subplots(3, 3)
fig.subplots_adjust(hspace=0.3, wspace=0.3)
for i, ax in enumerate(axes.flat):
ax.imshow(images[i].reshape(img_shape), cmap = 'binary')
if cls_pred is None:
xlabel = "True: {0}".format(cls_true[i])
else:
xlabel = "True: {0}, Pred: {1}".format(cls_true[i], cls_pred[i])
ax.set_xlabel(xlabel)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
images = data.test.images[0: 9]
cls_true = data.test.cls[0:9]
plot_images(images, cls_true)
def new_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def new_biases(length):
return tf.Variable(tf.constant(0.05, shape = [length]))
def new_conv_layer(input, num_input_channels, filter_size, num_filters, use_pooling = True):
shape = [filter_size, filter_size, num_input_channels, num_filters]
weights = new_weights(shape=shape)
biases = new_biases(length=num_filters)
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
layer += biases
if use_pooling:
layer = tf.nn.max_pool(value=layer, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
layer = tf.nn.relu(layer)
return layer, weights
def flatten_layer(layer):
layer_shape = layer.get_shape()
num_features = layer_shape[1:4].num_elements()
layer_flat = tf.reshape(layer, [-1, num_features])
return layer_flat, num_features
def new_fc_layer(input, num_inputs, num_outputs, use_relu = True):
weights = new_weights(shape=[num_inputs, num_outputs])
biases = new_biases(length=num_outputs)
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name = 'x')
x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
y_true = tf.placeholder(tf.float32, shape=[None, 10], name = 'y_true')
y_true_cls = tf.argmax(y_true, dimension=1)
layer_conv1, weights_conv1 = new_conv_layer(input = x_image, num_input_channels=num_channels, filter_size=filter_size1,\
num_filters=num_filters1, use_pooling=True)
layer_conv2, weights_conv2 = new_conv_layer(input = layer_conv1, num_input_channels=num_filters1, filter_size=filter_size2, \
num_filters=num_filters2, use_pooling=True)
layer_flat, num_features = flatten_layer(layer_conv2)
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=fc_size, use_relu=True)
layer_fc2 = new_fc_layer(layer_fc1, num_inputs=fc_size, num_outputs=num_classes, use_relu=False)
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2, labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
session = tf.Session()
session.run(tf.global_variables_initializer())
train_batch_size = 64
total_iterations = 0
def optimize(num_iterations):
global total_iterations
start_time = time.time()
for i in range(total_iterations, total_iterations + num_iterations):
x_batch, y_true_batch = data.train.next_batch(train_batch_size)
feed_dict_train = {x: x_batch, y_true: y_true_batch}
session.run(optimizer, feed_dict=feed_dict_train)
if i % 100 == 0:
acc = session.run(accuracy, feed_dict=feed_dict_train)
msg = "Optimization Iterstion: {0:>6}, Training Accuracy: {1:>6.1%}"
print(msg.format(i + 1, acc))
total_iterations += num_iterations
end_time = time.time()
time_dif = end_time - start_time
print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
def plot_example_errors(cls_pred, correct):
incorrect = (correct == False)
images = data.test.images[incorrect]
cls_pred = cls_pred[incorrect]
cls_true = data.test.cls[incorrect]
plot_images(images=images[0:9], cls_true=cls_true[0:9], cls_pred=cls_pred[0:9])
def plot_confusion_matrix(cls_pred):
cls_true = data.test.cls
cm = confusion_matrix(y_true=cls_true, y_pred=cls_pred)
print(cm)
plt.matshow(cm)
plt.colorbar()
tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, range(num_classes))
plt.yticks(tick_marks, range(num_classes))
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()
test_batch_size = 256
def print_test_accuracy(show_example_errors = False, show_confusion_matrix = False):
num_test = len(data.test.images)
cls_pred = np.zeros(shape = num_test, dtype = np.int)
i = 0
while i < num_test:
j = min(i + test_batch_size, num_test)
images = data.test.images[i:j,:]
labels = data.test.labels[i:j,:]
feed_dict = {x: images, y_true: labels}
cls_pred[i:j] = session.run(y_pred_cls, feed_dict=feed_dict)
i = j
cls_true = data.test.cls
correct = (cls_true == cls_pred)
correct_num = correct.sum()
acc = float(correct_num) / num_test
msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})"
print(msg.format(acc, correct_num, num_test))
if show_example_errors:
print("Example errors:")
plot_example_errors(cls_pred=cls_pred, correct=correct)
if show_confusion_matrix:
print("Confusion Matrix: ")
plot_confusion_matrix(cls_pred=cls_pred)
print_test_accuracy()
optimize(num_iterations=10000)
print_test_accuracy()