初始化网络结构
前面一篇文章讲解了数据集的准备,其实它也是网络的第一层输入层,主要是将数据格式转化为下一层网络可处理的形式。我们先看看整体网络结构,定义如下:
conv1 = Convolution(data, kernel_size = 5, num_output = 20)#卷积层
pool1 = Max_pooling(conv1, kernel_size = 2, stride = 2)#池化层
conv2 = Convolution(pool1, kernel_size = 5, num_output = 50)
pool2 = Max_pooling(conv2, kernel_size = 2, stride = 2)
flat1 = Flatten(pool2)#将矩阵扁平化
fc1 = Full_connection(flat1, 500)#全连接层
relu1 = Relu(fc1)#激活函数
fc2 = Full_connection(relu1, 10)
softmax = Softmax(fc2)
我们会讲解每一层如何定义,以及它们的向前传播和反向传播的实现。
卷积层
在Convolution这个类中,定义了当前神经节点数量,前一个网络层实体,,当前层输入矩阵大小和卷积以后输出矩阵大小,卷积核,padding等信息,,权重的采用Xavier初始化梯度下降采用动量更新法,考虑前一次梯度的影响,同时考虑梯度衰减,具体卷积层的实现如下:
def conv(X, W, b, stride = 1, padding = 0):#卷积的计算过程,向前传播的具体计算
n_filters, d_filter, kernel_size, _ = W.shape
n_x, d_x, h_x, w_x = X.shape
h_out = (h_x - kernel_size + 2 * padding) / stride + 1
w_out = (w_x - kernel_size + 2 * padding) / stride + 1
h_out, w_out = int(h_out), int(w_out)
X_col = im2col(X, kernel_size, padding=padding, stride=stride)
W_col = W.reshape(n_filters, -1)
out = (np.dot(W_col, X_col).T + b).T
out = out.reshape(n_filters, h_out, w_out, n_x)
out = out.transpose(3, 0, 1, 2)
return out
def im2col(x, kernel_size, padding=0, stride=1):#图片转矩阵
p = padding
x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
k, i, j = get_im2col_indices(x.shape, kernel_size, padding, stride)
cols = x_padded[:, k, i, j]
C = x.shape[1]
cols = cols.transpose(1, 2, 0).reshape(kernel_size ** 2 * C, -1)
return cols
def get_im2col_indices(x_shape, kernel_size, padding=0, stride=1):
N, C, H, W = x_shape
assert (H + 2 * padding - kernel_size) % stride == 0
assert (W + 2 * padding - kernel_size) % stride == 0
out_height = int((H + 2 * padding - kernel_size) / stride + 1)
out_width = int((W + 2 * padding - kernel_size) / stride + 1)
i0 = np.repeat(np.arange(kernel_size), kernel_size)
i0 = np.tile(i0, C)
i1 = stride * np.repeat(np.arange(out_height), out_width)
j0 = np.tile(np.arange(kernel_size), kernel_size * C)
j1 = stride * np.tile(np.arange(out_width), out_height)
i = i0.reshape(-1, 1) + i1.reshape(1, -1)
j = j0.reshape(-1, 1) + j1.reshape(1, -1)
k = np.repeat(np.arange(C), kernel_size * kernel_size).reshape(-1, 1)
return (k.astype(int), i.astype(int), j.astype(int))
def col2im(x, img_shape, kernel_size, padding = 0, stride = 1):
x_row_num, x_col_num = x.shape
channels, img_height, img_width = img_shape
x_width = img_width - kernel_size + padding + 1
x_height = img_height - kernel_size + padding + 1
assert channels * kernel_size ** 2 == x_row_num
assert x_width * x_height == x_col_num
x_reshape = x.T.reshape(x_height, x_width, channels, kernel_size, kernel_size)
output_padded = np.zeros((channels, img_height + 2 * padding, img_width + 2 * padding))
for i in range(x_height):
for j in range(x_width):
output_padded[:, i * stride : i * stride + kernel_size, j * stride : j * stride + kernel_size] = \
output_padded[:, i * stride : i * stride + kernel_size, j * stride : j * stride + kernel_size] + \
x_reshape[i, j, ...]
return output_padded[:, padding : img_height + padding, padding : img_width + padding]
class Convolution:
def __init__(self, layer, kernel_size = 1, num_output = 1, padding = 0):
self.upper_layer = layer#前一层
self.num = layer.num
self.num_input = layer.num_output#网络的输入为前一层网络的输入
self.input_h = layer.output_h
self.input_w = layer.output_w
self.output_h = self.input_h + 2 * padding - kernel_size + 1
self.output_w = self.input_w + 2 * padding - kernel_size + 1#卷积具体过程
self.num_output = num_output
self.kernel_size = kernel_size
self.padding = padding
scale = get_XavierScale(kernel_size)
self.weight = np.random.rand(num_output, self.num_input, kernel_size, kernel_size)
self.weight = (self.weight - 0.5) * 2 * scale#采用Xavier初始化
self.weight_diff_his = np.zeros(self.weight.shape)#历史梯度w,用来更新当前梯度动量更新法
self.bias = np.zeros((num_output))
self.bias_diff_his = np.zeros(self.bias.shape)
def forward(self):
self.input_data = self.upper_layer.forward()
self.num = self.upper_layer.num
self.output_data = conv(self.input_data, self.weight, self.bias, padding = self.padding)
return self.output_data
def backward(self, diff):
weight_diff,bias_diff=get_diff()#采用动量梯度下降
self.weight_diff_his = 0.9 * self.weight_diff_his + weight_diff.reshape(self.weight.shape)
self.weight = self.weight * 0.9995 - self.weight_diff_his#梯度衰减
self.bias_diff_his = 0.9 * self.bias_diff_his + 2 * bias_diff
self.bias = self.bias * 0.9995 - self.bias_diff_his
self.upper_layer.backward(self.diff)
def get_output(self):
return self.output_data
def get_XavierScale(self,kernel_size):
return math.sqrt(3. / (self.num_input * kernel_size**2))
def get_diff(self):
self.diff = np.zeros(self.input_data.shape)
weight_diff = np.zeros(self.weight.shape)
weight_diff = weight_diff.reshape(weight_diff.shape[0], -1)
bias_diff = np.zeros((self.num_output))
weight_reshape_T = self.weight.reshape(self.weight.shape[0], -1).T
for i in range(self.num):
input_data_col = im2col(self.input_data[[i]], self.kernel_size, self.padding)
weight_diff = weight_diff + diff[i].reshape(diff[i].shape[0], -1).dot(input_data_col.T)
bias_diff = bias_diff + np.sum(diff[i].reshape(diff[i].shape[0], -1), 1)
tmp_diff = weight_reshape_T.dot(diff[i].reshape(diff[i].shape[0], -1))
self.diff[i, ...] = col2im(tmp_diff, self.input_data.shape[1:], self.kernel_size, padding = self.padding)
return weight_diff,bias_diff
池化层
我们采用的是max池化,代码里面很详细,我就不废话了.具体如下:
def im2col(X, kernel_size = 1, stride = 1):
num, channels, height, width = X.shape
surplus_height = (height - kernel_size) % stride
surplus_width = (width - kernel_size) % stride
pad_h = (kernel_size - surplus_height) % kernel_size
pad_w = (kernel_size - surplus_width) % kernel_size
X = np.pad(X, ((0,0),(0,0),(0,pad_h),(0,pad_w)), mode='constant')
k,i,j = get_im2col_indices(X.shape, kernel_size, stride = stride)
X_col = X[:,k,i,j].reshape(num * channels, kernel_size**2, -1)
X_col = X_col.transpose(0,2,1)
return X_col.reshape(-1, kernel_size**2)
def get_im2col_indices(x_shape, kernel_size, padding=0, stride=1):
N, C, H, W = x_shape
assert (H + 2 * padding - kernel_size) % stride == 0
assert (W + 2 * padding - kernel_size) % stride == 0
out_height = int((H + 2 * padding - kernel_size) / stride + 1)
out_width = int((W + 2 * padding - kernel_size) / stride + 1)
i0 = np.repeat(np.arange(kernel_size), kernel_size)
i0 = np.tile(i0, C)
i1 = stride * np.repeat(np.arange(out_height), out_width)
j0 = np.tile(np.arange(kernel_size), kernel_size * C)
j1 = stride * np.tile(np.arange(out_width), out_height)
i = i0.reshape(-1, 1) + i1.reshape(1, -1)
j = j0.reshape(-1, 1) + j1.reshape(1, -1)
k = np.repeat(np.arange(C), kernel_size * kernel_size).reshape(-1, 1)
return (k.astype(int), i.astype(int), j.astype(int))
def col2ims(x, img_shape, kernel_size, stride):
x_row_num, x_col_num = x.shape
img_n, img_c, img_h, img_w = img_shape
o_h = int(math.ceil((img_h - kernel_size + 0.) / stride)) + 1
o_w = int(math.ceil((img_w - kernel_size + 0.) / stride)) + 1
assert img_n * img_c * o_h * o_w == x_row_num
assert kernel_size**2 == x_col_num
surplus_h = (img_h - kernel_size) % stride
surplus_w = (img_w - kernel_size) % stride
pad_h = (kernel_size - surplus_h) % stride
pad_w = (kernel_size - surplus_w) % stride
output_padded = np.zeros((img_n, img_c, img_h + pad_h, img_w + pad_w))
x_reshape = x.reshape(img_n, img_c, o_h, o_w, kernel_size, kernel_size)
for n in range(img_n):
for i in range(o_h):
for j in range(o_w):
output_padded[n, :, i * stride : i * stride + kernel_size, j * stride : j * stride + kernel_size] = \
output_padded[n, :, i * stride : i * stride + kernel_size, j * stride : j * stride + kernel_size] + \
x_reshape[n, :, i, j, ...]
return output_padded[:, :, 0 : img_h + pad_h, 0 : img_w + pad_w]
class Max_pooling:
def __init__(self, layer, kernel_size = 1, stride = 1):
self.num = layer.num
self.num_output = layer.num_output
self.num_input = layer.num_output
self.input_h = layer.output_h
self.input_w = layer.output_w
self.output_h = int(math.ceil((self.input_h - kernel_size + 0.) / stride)) + 1
self.output_w = int(math.ceil((self.input_w - kernel_size + 0.) / stride)) + 1
self.upper_layer = layer
self.kernel_size = kernel_size
self.stride = stride
def forward(self):
self.input_data = self.upper_layer.forward()
self.num = self.upper_layer.num
input_col = im2col(self.input_data, self.kernel_size, self.stride)
tmp_index = np.tile(np.arange(input_col.shape[1]),input_col.shape[0]).reshape(input_col.shape)
c=input_col.argmax(1).reshape(-1,1)
d=input_col.argmax(1)
self.max_index = tmp_index == input_col.argmax(1).reshape(-1,1)
self.output_data = input_col[self.max_index].reshape(self.num, self.num_input, self.output_h, self.output_w)
return self.output_data
def backward(self, diff):
diff_col = np.zeros((self.num * self.num_input * self.output_h * self.output_w, self.kernel_size**2))
c=diff.reshape(-1)
diff_col[self.max_index] = diff.reshape(-1)
self.diff = col2ims(diff_col, self.input_data.shape, self.kernel_size, self.stride)
self.upper_layer.backward(self.diff)
def get_output(self):
return self.output_data
扁平化
将矩阵扁平化,使得更好的被全连接层处理运算。具体实现如下:
class Flatten:
def __init__(self, layer):
self.upper_layer = layer
self.num_input = layer.num_output
self.input_w = layer.output_w
self.input_h = layer.output_h
self.output_w = 1
self.output_h = 1
self.num_output = self.num_input * self.input_h * self.input_w
self.num = layer.num
def forward(self):
self.input_data = self.upper_layer.forward()
self.num = self.upper_layer.num
self.output_data = self.input_data.reshape(self.input_data.shape[0], -1)
return self.output_data
def backward(self, diff):
self.diff = diff.reshape(self.input_data.shape)
self.upper_layer.backward(self.diff)
def get_output(self):
return self.output_data
激活函数
激活函数采用relu和softmax,具体实现如下:
class Relu:
def __init__(self, layer):
self.upper_layer = layer
self.num_output = layer.num_output
self.num = layer.num
self.output_w = layer.output_w
self.output_h = layer.output_h
def forward(self):
self.input_data = self.upper_layer.forward()
self.num = self.upper_layer.num
self.output_data = self.input_data.copy()
self.output_data[self.output_data < 0] = 0
return self.output_data
def backward(self, diff):
self.diff = diff.copy()
self.diff[self.input_data < 0] == 0
self.upper_layer.backward(self.diff)
def get_output(self):
return self.output_data
class Softmax:
def __init__(self, layer):
self.upper_layer = layer
def forward(self):
self.input_data = self.upper_layer.forward()
self.num = self.upper_layer.num
_, self.dim = self.input_data.shape
t = np.exp(self.input_data - self.input_data.max(1).reshape(-1,1))
self.softmax_data = t / t.sum(1).reshape(-1,1)
self.softmax_data[self.softmax_data < 1e-30] = 1e-30
return self.softmax_data
def calc_loss(self, label):
s = np.tile(np.arange(self.dim), self.num).reshape(self.input_data.shape)
gt_index = s == label.reshape(-1, 1)
loss = 0 - np.average(np.log(self.softmax_data[gt_index]))
self.diff = self.softmax_data.copy()
self.diff[gt_index] = self.diff[gt_index] - 1.
self.diff = self.diff / self.num
return loss
def backward(self, lr):
self.upper_layer.backward(self.diff * lr)
网络的结构已经实现定义好了,最后我们来讲如何训练和测试网络模型。