实验目的
使用Python语言numpy模块基于VGG19网络模型实现非实时图像迁移。
- 加深对卷积神经网络的理解,利用VGG19模型进行图像特征提取。
- 使用numpy模块实现风格迁移中相关风格和内容损失函数的计算。实现layer2层的反向传播。
- 对卷积层和池化层实现中的四重循环进行改进(img2col+gemm),提升运算速度。
实验代码
- layer_2.py:卷积层和池化层的基本实现和加速算法实现
# coding:utf-8
import numpy as np
import struct
import os
import time
def im2col(image, ksize, stride):
# image is a 4d tensor([batchsize, channel, height, width])
image_col = []
for b in range(image.shape[0]):
for i in range(0, image.shape[2] - ksize + 1, stride):
for j in range(0, image.shape[3] - ksize + 1, stride):
col = image[b, :, i:i + ksize, j:j + ksize].reshape([-1])
image_col.append(col)
image_col = np.array(image_col)
return image_col #[N, ((H-k)/s+1)*((w-k)/s+1), k*k*cin]
def im2col_pool(image, ksize, stride):
# image is a 4d tensor([batchsize, channel, height, width])
image_col = []
for b in range(image.shape[0]):
for i in range(0, image.shape[2] - ksize + 1, stride):
for j in range(0, image.shape[3] - ksize + 1, stride):
col = image[b, :, i:i + ksize, j:j + ksize].reshape([image.shape[1],-1])
image_col.append(col)
image_col = np.array(image_col)
return image_col #[N, channel, ((H-k)/s+1)*((w-k)/s+1), k*k]
class ConvolutionalLayer(object):
def __init__(self, kernel_size, channel_in, channel_out, padding, stride, type=1):
self.kernel_size = kernel_size
self.channel_in = channel_in
self.channel_out = channel_out
self.padding = padding
self.stride = stride
self.forward = self.forward_raw
self.backward = self.backward_raw
if type == 1: # type 设为 1 时,使用优化后的 foward 和 backward 函数
self.forward = self.forward_speedup
self.backward = self.backward_speedup
print('\tConvolutional layer with kernel size %d, input channel %d, output channel %d.' % (self.kernel_size, self.channel_in, self.channel_out))
def init_param(self, std=0.01):
self.weight = np.random.normal(loc=0.0, scale=std, size=(self.channel_in, self.kernel_size, self.kernel_size, self.channel_out))
self.bias = np.zeros([self.channel_out])
def forward_raw(self, input):
start_time = time.time()
self.input = input # [N, C, H, W]
height = self.input.shape[2] + self.padding * 2
width = self.input.shape[3] + self.padding * 2
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
height_out = (height - self.kernel_size) / self.stride + 1
width_out = (width - self.kernel_size) / self.stride + 1
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
for idxn in range(self.input.shape[0]):
for idxc in range(self.channel_out):
for idxh in range(height_out):
for idxw in range(width_out):
# TODO: 计算卷积层的前向传播,特征图与卷积核的内积再加偏置
#(3.3)
hs = idxh * self.stride
ws = idxw * self.stride
self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * \
self.input_pad[idxn, :, hs:hs+self.kernel_size, ws:ws+self.kernel_size]) + \
self.bias[idxc]
self.forward_time = time.time() - start_time
return self.output
def forward_speedup(self, input):
# TODO: 改进forward函数,使得计算加速
start_time = time.time()
self.input = input
N = self.input.shape[0]
cin = self.weight.shape[0]
cout = self.weight.shape[3]
height = self.input.shape[2] + self.padding * 2
width = self.input.shape[3] + self.padding * 2
#1.padding.
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
height_out = (height - self.kernel_size) / self.stride + 1
width_out = (width - self.kernel_size) / self.stride + 1
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
#2.weight reshape
col_weight = np.reshape(self.weight, [-1,cout]) #cin,k,k,cout-> cin*k*k,cout
#3.col reshape. can be speed up too.
self.col_image = im2col(self.input_pad, self.kernel_size, self.stride) #N,Cin,H,W -> N,(height_out)*(width_out),cin*k*k
#4.matrix multiply
# print(self.col_image.shape, col_weight.shape)
self.output = np.dot(self.col_image, col_weight) + self.bias
#5.reshape to ours.
self.output = np.reshape(self.output, np.hstack(([N],[height_out],[width_out],[cout]))) #