智能计算系统实验3.3非实时图像风格迁移

最新推荐文章于 2025-05-02 02:24:23 发布

隨兴

最新推荐文章于 2025-05-02 02:24:23 发布

阅读量6.6k

点赞数 8

分类专栏：寒武纪实验文章标签： python 神经网络

本文链接：https://blog.youkuaiyun.com/a429367172/article/details/117774314

版权

本文介绍如何使用Python和numpy实现基于VGG19网络的图像风格迁移，包括卷积层、池化层的优化及内容损失和风格损失的计算。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

实验目的

使用Python语言numpy模块基于VGG19网络模型实现非实时图像迁移。

加深对卷积神经网络的理解，利用VGG19模型进行图像特征提取。
使用numpy模块实现风格迁移中相关风格和内容损失函数的计算。实现layer2层的反向传播。
对卷积层和池化层实现中的四重循环进行改进（img2col+gemm），提升运算速度。

实验代码

layer_2.py：卷积层和池化层的基本实现和加速算法实现

# coding:utf-8
import numpy as np
import struct
import os
import time

def im2col(image, ksize, stride):
    # image is a 4d tensor([batchsize, channel, height, width])
    image_col = []
    for b in range(image.shape[0]):
        for i in range(0, image.shape[2] - ksize + 1, stride):
            for j in range(0, image.shape[3] - ksize + 1, stride):
                col = image[b, :, i:i + ksize, j:j + ksize].reshape([-1])
                image_col.append(col)
    image_col = np.array(image_col)
    return image_col #[N, ((H-k)/s+1)*((w-k)/s+1), k*k*cin]

def im2col_pool(image, ksize, stride):
    # image is a 4d tensor([batchsize, channel, height, width])
    image_col = []
    for b in range(image.shape[0]):
        for i in range(0, image.shape[2] - ksize + 1, stride):
            for j in range(0, image.shape[3] - ksize + 1, stride):
                col = image[b, :, i:i + ksize, j:j + ksize].reshape([image.shape[1],-1])
                image_col.append(col)
    image_col = np.array(image_col)
    return image_col #[N, channel, ((H-k)/s+1)*((w-k)/s+1), k*k]

class ConvolutionalLayer(object):
    def __init__(self, kernel_size, channel_in, channel_out, padding, stride, type=1):
        self.kernel_size = kernel_size
        self.channel_in = channel_in
        self.channel_out = channel_out
        self.padding = padding
        self.stride = stride
        self.forward = self.forward_raw
        self.backward = self.backward_raw
        if type == 1:  # type 设为 1 时，使用优化后的 foward 和 backward 函数
            self.forward = self.forward_speedup
            self.backward = self.backward_speedup
        print('\tConvolutional layer with kernel size %d, input channel %d, output channel %d.' % (self.kernel_size, self.channel_in, self.channel_out))
    def init_param(self, std=0.01):
        self.weight = np.random.normal(loc=0.0, scale=std, size=(self.channel_in, self.kernel_size, self.kernel_size, self.channel_out))
        self.bias = np.zeros([self.channel_out])
    def forward_raw(self, input):
        start_time = time.time()
        self.input = input # [N, C, H, W]
        height = self.input.shape[2] + self.padding * 2
        width = self.input.shape[3] + self.padding * 2
        self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
        self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
        height_out = (height - self.kernel_size) / self.stride + 1
        width_out = (width - self.kernel_size) / self.stride + 1
        self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
        for idxn in range(self.input.shape[0]):
            for idxc in range(self.channel_out):
                for idxh in range(height_out):
                    for idxw in range(width_out):
                        # TODO: 计算卷积层的前向传播，特征图与卷积核的内积再加偏置
                        #(3.3)
                        hs = idxh * self.stride
                        ws = idxw * self.stride
                        self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * \
                        self.input_pad[idxn, :, hs:hs+self.kernel_size, ws:ws+self.kernel_size]) + \
                        self.bias[idxc]
        self.forward_time = time.time() - start_time
        return self.output

    def forward_speedup(self, input):
        # TODO: 改进forward函数，使得计算加速
        start_time = time.time()
        self.input = input
        N = self.input.shape[0]
        cin = self.weight.shape[0]
        cout = self.weight.shape[3]
        height = self.input.shape[2] + self.padding * 2
        width = self.input.shape[3] + self.padding * 2
        #1.padding.
        self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
        self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
        height_out = (height - self.kernel_size) / self.stride + 1
        width_out = (width - self.kernel_size) / self.stride + 1
        self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
        
        #2.weight reshape
        col_weight = np.reshape(self.weight, [-1,cout]) #cin,k,k,cout-> cin*k*k,cout
        #3.col reshape. can be speed up too.
        self.col_image = im2col(self.input_pad, self.kernel_size, self.stride) #N,Cin,H,W -> N,(height_out)*(width_out),cin*k*k
        
        #4.matrix multiply
        # print(self.col_image.shape, col_weight.shape)
        self.output = np.dot(self.col_image, col_weight) + self.bias
        #5.reshape to ours.
        self.output = np.reshape(self.output, np.hstack(([N],[height_out],[width_out],[cout]))) #

最低0.47元/天解锁文章