【深度学习】神经风格迁移算法的实现-优快云博客

本文链接：https://blog.youkuaiyun.com/dannnnnnnnnnnn/article/details/122931812

问题描述：

神经风格迁移是指将参考图像的风格应用于目标图像，同时保留目标图像的内容。

实现风格迁移背后的关键概念与所有深度学习算法的核心思想是一样的：定义一个损失函

数来指定想要实现的目标，然后将这个损失最小化。你知道想要实现的目标是什么，就是保存

原始图像的内容，同时采用参考图像的风格。

代码展示：

from tensorflow import keras
import tensorflow as tf


base_image_path = keras.utils.get_file(
    "sf.jpg", origin="https://img-datasets.s3.amazonaws.com/sf.jpg")
style_reference_image_path = keras.utils.get_file(
    "starry_night.jpg", origin="https://img-datasets.s3.amazonaws.com/starry_night.jpg")

original_width, original_height = keras.utils.load_img(base_image_path).size
img_height = 400
img_width = round(original_width * img_height / original_height)

#图片处理
import numpy as np
def preprocess_image(image_path):
    img = keras.utils.load_img(
        image_path, target_size=(img_height, img_width))
    img = keras.utils.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = keras.applications.vgg19.preprocess_input(img)
    return img

def deprocess_image(img):
    img = img.reshape((img_height, img_width, 3))
    img[:, :, 0] += 103.939
    img[:, :, 1] += 116.779
    img[:, :, 2] += 123.68
    img = img[:, :, ::-1]
    img = np.clip(img, 0, 255).astype("uint8")
    return img

#使用模型来创造特征提取器
model = keras.applications.vgg19.VGG19(weights='imagenet',include_top=False)

output_dict = dict([(layer.name,layer.output) for layer in model.layers])
feature_extractor = keras.Model(inputs =model.inputs,outputs = output_dict)

#内容损失函数
def content_loss(base_img,combination_img):
    return tf.reduce_sum(tf.square(combination_img - base_img))
    
#风格损失
def gram_matrix(x):
    x = tf.transpose(x, (2, 0, 1))
    features = tf.reshape(x, (tf.shape(x)[0], -1))
    gram = tf.matmul(features, tf.transpose(features))
    return gram

def style_loss(style_img,combination_img):
    S = gram_matrix(style_img)
    C = gram_matrix(combination_img)

    channels = 3
    size = img_height *img_width
    return tf.reduce_sum(tf.square(S-C)) / (4.0 *(channels **2) *(size **2))


#总变差损失
def total_variation_loss(x):
    a = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, 1:, : img_width - 1, :]
    )
    b = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, : img_height - 1, 1:, :]
    )
    return tf.reduce_sum(tf.pow(a + b, 1.25))

#定义要最小化的损失函数
style_layer_names = [
    "block1_conv1",
    "block2_conv1",
    "block3_conv1",
    "block4_conv1",
    "block5_conv1",
]
content_layer_name = "block5_conv2"
total_variation_weight = 1e-6
style_weight = 1e-6
content_weight = 2.5e-8


def compute_loss(combination_image, base_image, style_reference_image):
    input_tensor = tf.concat(
        [base_image, style_reference_image, combination_image], axis=0
    )
    features = feature_extractor(input_tensor)
    loss = tf.zeros(shape=())
    layer_features = features[content_layer_name]
    base_image_features = layer_features[0, :, :, :]
    combination_features = layer_features[2, :, :, :]
    loss = loss + content_weight * content_loss(
        base_image_features, combination_features
    )
    for layer_name in style_layer_names:
        layer_features = features[layer_name]
        style_reference_features = layer_features[1, :, :, :]
        combination_features = layer_features[2, :, :, :]
        style_loss_value = style_loss(
          style_reference_features, combination_features)
        loss += (style_weight / len(style_layer_names)) * style_loss_value

    loss += total_variation_weight * total_variation_loss(combination_image)
    return loss


#设置梯度下降过程
@tf.function
def compute_loss_and_grads(combination_image, base_image, style_reference_image):
    with tf.GradientTape() as tape:
        loss = compute_loss(combination_image, base_image, style_reference_image)
    grads = tape.gradient(loss, combination_image)
    return loss, grads

optimizer = keras.optimizers.SGD(
    keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=100.0, decay_steps=100, decay_rate=0.96
    )
)

train_base_image = preprocess_image(base_image_path)
train_style_reference_image = preprocess_image(style_reference_image_path)
train_combination_image = tf.Variable(preprocess_image(base_image_path))

iterations = 4000
for i in range(1, iterations + 1):
    # with tf.GradientTape() as tape:
    #     loss = compute_loss(train_combination_image, train_base_image, train_style_reference_image)
    # grads = tape.gradient(loss, train_combination_image)
    loss,grads= compute_loss_and_grads(train_combination_image, train_base_image, train_style_reference_image)
    # grads = compute_loss_and_grads(train_combination_image, train_base_image, train_style_reference_image)
    
    optimizer.apply_gradients([(grads, train_combination_image)])
    if i % 100 == 0:
        print(f"Iteration {i}: loss={loss:.2f}")
        img = deprocess_image(train_combination_image.numpy())
        fname = f"combination_image_at_iteration_{i}.png"
        '''
        Iteration 100: loss=8137.91
        Iteration 200: loss=6652.61
        Iteration 300: loss=6063.19
        Iteration 400: loss=5742.43
        Iteration 500: loss=5536.14
        '''
        keras.utils.save_img(fname, img)