None_local_block in TensorFlow

None_local_block in TensorFlow

this block are not offcially implement of non_local_block in paper

Non-local Neural Networks https://arxiv.org/pdf/1711.07971.pdf

this code just suit for 2D images data, you can change this code easilly to suit for 3D or 4D data

Whole code you can visit https://github.com/jack-Dong/None_local_block

code


def non_local_block(input_tensor, computation_compression=2, mode='dot'):
    if mode not in ['gaussian', 'embedded', 'dot', 'concatenate']:
        raise ValueError('`mode` must be one of `gaussian`, `embedded`, `dot`,`concatenate`')

    input_shape = input_tensor.get_shape().as_list()
    print(input_shape)
    batchsize, dim1, dim2, channels = input_shape

    if mode == 'gaussian':  # Gaussian instantiation
        x1 = tf.reshape(input_tensor, shape=[-1, dim1 * dim2, channels])
        x2 = tf.reshape(input_tensor, shape=[-1, dim1 * dim2, channels])

        f = tf.matmul(x1, x2, transpose_b=True)

        f = tf.reshape(f, shape=[-1, dim1 * dim2 * dim1 * dim2])

        f = tf.nn.softmax(f, axis=-1)

        f = tf.reshape(f, shape=[-1, dim1 * dim2, dim1 * dim2])

        print("gaussian=", f)
    elif mode == 'dot':
        theta = conv2d(input_tensor, channels, channels // 2, 1)  # add BN relu layer before conv will speed up training
        theta = tf.reshape(theta, shape=[-1, dim1 * dim2, channels // 2])

        phi = conv2d(input_tensor, channels, channels // 2, 1)
        phi = tf.reshape(phi, shape=[-1, dim1 * dim2, channels // 2])

        f = tf.matmul(theta, phi, transpose_b=True)

        # scale the values to make it size invarian t
        f = f / (dim1 * dim2 * channels)

        print("dot f=", f)

    elif mode == 'concatenate':  # this operation cost too much memory, so make sure you input a small resolution  feature map like(14X14 7X7)

        theta = conv2d(input_tensor, channels, channels // 2, 1)
        theta = tf.reshape(theta, shape=[-1, dim1 * dim2, channels // 2])

        phi = conv2d(input_tensor, channels, channels // 2, 1)
        phi = tf.reshape(phi, shape=[-1, dim1 * dim2, channels // 2])

        theta_splits = tf.split(theta, dim1 * dim2, 1)
        phi_splits = tf.split(phi, dim1 * dim2, 1)

        theta_split_shape = tf.shape(theta[0])
        print("theta_split_shape", theta_split_shape)

        initial = tf.constant(1.0 / channels, shape=[channels, 1])

        print('initial', initial)
        W_concat = tf.Variable(initial)

        print("W_concat", W_concat)

        f_matrix = []
        for i in range(dim1 * dim2):
            for j in range(dim1 * dim2):
                print(i, '  ', j)
                tmp = tf.concat([theta_splits[i], phi_splits[j]], 2)
                tmp = tf.reshape(tmp, shape=[-1, channels])
                # print(tmp)
                tmp = tf.matmul(tmp, W_concat)
                print(tmp)
                f_matrix.append(tmp)

        f_matrix_tensor = tf.stack(f_matrix, axis=2)
        print('f_matrix_tensor', f_matrix_tensor)

        f = tf.reshape(f_matrix_tensor, shape=[-1, dim1 * dim2, dim1 * dim2])

        f = f / (dim1 * dim2 * channels)

        print("concatenate f=", f)


    else:  # Embedded Gaussian instantiation
        theta = conv2d(input_tensor, channels, channels // 2, 1)
        theta = tf.reshape(theta, shape=[-1, dim1 * dim2, channels // 2])

        phi = conv2d(input_tensor, channels, channels // 2, 1)
        phi = tf.reshape(phi, shape=[-1, dim1 * dim2, channels // 2])<p>

        if computation_compression > 1:
            phi = tf.layers.max_pooling1d(phi, pool_size=2, strides=computation_compression, padding='SAME')
            print('phi', phi)

        f = tf.matmul(theta, phi, transpose_b=True)

        phi_shape = phi.get_shape().as_list()
        f = tf.reshape(f, shape=[-1, dim1 * dim2 * phi_shape[1]])

        f = tf.nn.def non_local_block(input_tensor, computation_compression=2, mode='dot'):
    if mode not in ['gaussian', 'embedded', 'dot', 'concatenate']:
        raise ValueError('`mode` must be one of `gaussian`, `embedded`, `dot`,`concatenate`')

    input_shape = input_tensor.get_shape().as_list()
    print(input_shape)
    batchsize, dim1, dim2, channels = input_shape

    if mode == 'gaussian':  # Gaussian instantiation
        x1 = tf.reshape(input_tensor, shape=[-1, dim1 * dim2, channels])
        x2 = tf.reshape(input_tensor, shape=[-1, dim1 * dim2, channels])

        f = tf.matmul(x1, x2, transpose_b=True)

        f = tf.reshape(f, shape=[-1, dim1 * dim2 * dim1 * dim2])

        f = tf.nn.softmax(f, axis=-1)

        f = tf.reshape(f, shape=[-1, dim1 * dim2, dim1 * dim2])

        print("gaussian=", f)
    elif mode == 'dot':
        theta = conv2d(input_tensor, channels, channels // 2, 1)  # add BN relu layer before conv will speed up training
        theta = tf.reshape(theta, shape=[-1, dim1 * dim2, channels // 2])

        phi = conv2d(input_tensor, channels, channels // 2, 1)
        phi = tf.reshape(phi, shape=[-1, dim1 * dim2, channels // 2])

        f = tf.matmul(theta, phi, transpose_b=True)

        # scale the values to make it size invarian t
        f = f / (dim1 * dim2 * channels)

        print("dot f=", f)

    elif mode == 'concatenate':  # this operation cost too much memory, so make sure you input a small resolution  feature map like(14X14 7X7)

        theta = conv2d(input_tensor, channels, channels // 2, 1)
        theta = tf.reshape(theta, shape=[-1, dim1 * dim2, channels // 2])

        phi = conv2d(input_tensor, channels, channels // 2, 1)
        phi = tf.reshape(phi, shape=[-1, dim1 * dim2, channels // 2])
<p>
        theta_splits = tf.split(theta, dim1 * dim2, 1)
        phi_splits = tf.split(phi, dim1 * dim2, 1)

        theta_split_shape = tf.shape(theta[0])
        print("theta_split_shape", theta_split_shape)

        initial = tf.constant(1.0 / channels, shape=[channels, 1])

        print('initial', initial)
        W_concat = tf.Variable(initial)

        print("W_concat", W_concat)

        f_matrix = []
        for i in range(dim1 * dim2):
            for j in range(dim1 * dim2):
                print(i, '  ', j)
                tmp = tf.concat([theta_splits[i], phi_splits[j]], 2)
                tmp = tf.reshape(tmp, shape=[-1, channels])
                # print(tmp)
                tmp = tf.matmul(tmp, W_concat)
                print(tmp)
                f_matrix.append(tmp)

        f_matrix_tensor = tf.stack(f_matrix, axis=2)
        print('f_matrix_tensor', f_matrix_tensor)

        f = tf.reshape(f_matrix_tensor, shape=[-1, dim1 * dim2, dim1 * dim2])

        f = f / (dim1 * dim2 * channels)

        print("concatenate f=", f)<p>


    else:  # Embedded Gaussian instantiation
        theta = conv2d(input_tensor, channels, channels // 2, 1)
        theta = tf.reshape(theta, shape=[-1, dim1 * dim2, channels // 2])

        phi = conv2d(input_tensor, channels, channels // 2, 1)
        phi = tf.reshape(phi, shape=[-1, dim1 * dim2, channels // 2])

        if computation_compression > 1:
            phi = tf.layers.max_pooling1d(phi, pool_size=2, strides=computation_compression, padding='SAME')
            print('phi', phi)

        f = tf.matmul(theta, phi, transpose_b=True)

        phi_shape = phi.get_shape().as_list()
        f = tf.reshape(f, shape=[-1, dim1 * dim2 * phi_shape[1]])

        f = tf.nn.softmax(f, axis=-1)

        f = tf.reshape(f, shape=[-1, dim1 * dim2, phi_shape[1]])

        print("Embedded f=", f)

    g = conv2d(input_tensor, channels, channels // 2, 1)
    g = tf.reshape(g, shape=[-1, dim1 * dim2, channels // 2])

    if computation_compression > 1 and mode == 'embedded':
        g = tf.layers.max_pooling1d(g, pool_size=2, strides=computation_compression, padding='SAME')
        print('g', g)

    y = tf.matmul(f, g)

    print('y=', y)

    y = tf.reshape(y, shape=[-1, dim1, dim2, channels // 2])

    y = conv2d(y, channels // 2, channels, kernel_size=3)
    print('y=', y)

    residual = input_tensor + y

    return residualsoftmax(f, axis=-1)

        f = tf.reshape(f, shape=[-1, dim1 * dim2, phi_shape[1]])

        print("Embedded f=", f)

    g = conv2d(input_tensor, channels, channels // 2, 1)
    g = tf.reshape(g, shape=[-1, dim1 * dim2, channels // 2])

    if computation_compression > 1 and mode == 'embedded':
        g = tf.layers.max_pooling1d(g, pool_size=2, strides=computation_compression, padding='SAME')
        print('g', g)

    y = tf.matmul(f, g)

    print('y=', y)

    y = tf.reshape(y, shape=[-1, dim1, dim2, channels // 2])

    y = conv2d(y, channels // 2, channels, kernel_size=3)
    print('y=', y)

    residual = input_tensor + y

    return residual
非局部块(Non-local Block)是一种用于处理图像、视频等数据的神经网络模块,它可以捕捉全局上下文信息,从而提高模型的性能。在本文中,我们将使用 Keras 实现非局部块。 首先,我们需要导入 Keras 和相关的库: ```python import tensorflow as tf from tensorflow.keras import layers ``` 接下来,我们定义一个名为 NonLocalBlock 的 Keras 层: ```python class NonLocalBlock(layers.Layer): def __init__(self, inter_channels=None, sub_sample=True): super(NonLocalBlock, self).__init__() self.inter_channels = inter_channels self.sub_sample = sub_sample def build(self, input_shape): self.input_channels = input_shape[-1] if self.inter_channels is None: self.inter_channels = self.input_channels // 2 self.theta = layers.Conv2D(filters=self.inter_channels, kernel_size=(1, 1), strides=(1, 1), padding='same') self.phi = layers.Conv2D(filters=self.inter_channels, kernel_size=(1, 1), strides=(1, 1), padding='same') self.g = layers.Conv2D(filters=self.inter_channels, kernel_size=(1, 1), strides=(1, 1), padding='same') self.out_conv = layers.Conv2D(filters=self.input_channels, kernel_size=(1, 1), strides=(1, 1), padding='same') if self.sub_sample: self.pool = layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='same') def call(self, inputs): if self.sub_sample: x = self.pool(inputs) else: x = inputs batch_size = tf.shape(x)[0] spatial_size = tf.shape(x)[1:3] theta = self.theta(x) phi = self.phi(x) g = self.g(x) theta = tf.reshape(theta, [batch_size, -1, self.inter_channels]) phi = tf.reshape(phi, [batch_size, -1, self.inter_channels]) g = tf.reshape(g, [batch_size, -1, self.inter_channels]) theta_phi = tf.matmul(theta, phi, transpose_b=True) theta_phi = tf.nn.softmax(theta_phi, axis=-1) g = tf.matmul(theta_phi, g) g = tf.reshape(g, [batch_size, *spatial_size, self.inter_channels]) out = self.out_conv(g) if self.sub_sample: out = tf.image.resize(out, spatial_size, tf.image.ResizeMethod.NEAREST_NEIGHBOR) return out + inputs ``` 这个 NonLocalBlock 层有两个参数:inter_channels 和 sub_sample。其中,inter_channels 表示非局部块中使用的卷积核数量,sub_sample 表示是否对输入进行下采样。在 __init__ 函数中,我们将这两个参数保存为类属性。在 build 函数中,我们定义了四个卷积层和一个最终输出的卷积层。其中,theta、phi 和 g 分别表示输入经过三个不同卷积层后的特征图,out_conv 表示最终输出的特征图。在 call 函数中,我们首先对输入进行下采样(如果需要),然后将 theta、phi 和 g 分别进行 reshape 操作,以便进行矩阵乘法。接下来,我们计算 theta 和 phi 的点积,并通过 softmax 函数计算权重。最后,我们将权重与 g 进行矩阵乘法,并将结果 reshape 成与输入相同的形状。最后,我们将这个结果与输入相加,并返回结果。 最后,我们可以测试一下这个 NonLocalBlock 层: ```python inputs = layers.Input(shape=(None, None, 64)) x = NonLocalBlock(inter_channels=32, sub_sample=True)(inputs) model = tf.keras.Model(inputs, x) ``` 这个模型的输入是一个形状为 (None, None, 64) 的特征图,输出与输入相同的形状。我们可以使用 model.summary() 来查看模型的结构。 这就是用 Keras 实现非局部块的方法。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值