Online_Video Moment Localization via Deep Cross-modal Hashing论文阅读3-代码分析

最新推荐文章于 2025-05-13 22:10:49 发布

shy2218

最新推荐文章于 2025-05-13 22:10:49 发布

阅读量1.4k

点赞数

分类专栏：论文阅读文章标签：深度学习

本文链接：https://blog.youkuaiyun.com/shy2218/article/details/124006882

版权

论文阅读专栏收录该内容

5 篇文章

订阅专栏

Pytorch相关

torch的核心是Module类，pytorch中其实一般没有特别明显的Layer和Module的区别，不管是自定义层、自定义块、自定义模型，都是通过继承Module类完成的，这一点很重要。其实Sequential类也是继承自Module类的。
pytorch里面一般是没有层的概念，层也是当成一个模型来处理的

Sequential类

参考：https://blog.youkuaiyun.com/qq_27825451/article/details/90551513

class Sequential(Module): # 继承Module
    def __init__(self, *args):  # 重写了构造函数
    def _get_item_by_idx(self, iterator, idx):
    def __getitem__(self, idx):
    def __setitem__(self, idx, module):
    def __delitem__(self, idx):
    def __len__(self):
    def __dir__(self):
    def forward(self, input):  # 重写关键方法forward

#除了Sequential类以外 的容器
class Container(Module):
class Sequential(Module)：
class ModuleList(Module)：
class ModuleDict(Module):
class ParameterList(Module):
class ParameterDict(Module):

使用Sequential创建网络

import torch.nn as nn
model = nn.Sequential(
                  nn.Conv2d(1,20,5),
                  nn.ReLU(),
                  nn.Conv2d(20,64,5),
                  nn.ReLU()
                )
 
print(model)
print(model[2]) # 通过索引获取第几个层
'''运行结果为：
Sequential(
  (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (3): ReLU()
)
Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
'''

使用Sequential来包装层

import torch.nn as nn
from collections import OrderedDict
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.conv_block = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.dense_block = nn.Sequential(
            nn.Linear(32 * 3 * 3, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
    # 在这里实现层之间的连接关系，其实就是所谓的前向传播
    def forward(self, x):
        conv_out = self.conv_block(x)
        res = conv_out.view(conv_out.size(0), -1)
        out = self.dense_block(res)
        return out
 
model = MyNet()
print(model)
'''运行结果为：
MyNet(
  (conv_block): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense_block): Sequential(
    (0): Linear(in_features=288, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)
'''

nn.Module类

class Module(object):
    def __init__(self):
    def forward(self, *input):
 
    def add_module(self, name, module):
    def cuda(self, device=None):
    def cpu(self):
    def __call__(self, *input, **kwargs):
    def parameters(self, recurse=True):
    def named_parameters(self, prefix='', recurse=True):
    def children(self):
    def named_children(self):
    def modules(self):  
    def named_modules(self, memo=None, prefix=''):
    def train(self, mode=True):
    def eval(self):
    def zero_grad(self):
    def __repr__(self):
    def __dir__(self):
'''
有一部分没有完全列出来
'''

我们在定义自已的网络的时候，需要继承nn.Module类，并重新实现构造函数__init__和forward这两个方法。但有一些注意技巧：

（1）一般把网络中具有可学习参数的层（如全连接层、卷积层等）放在构造函数__init__()中。

（2）一般把不具有可学习参数的层(如ReLU、dropout、BatchNormanation层)可放在构造函数中，也可不放在构造函数中，如果不放在构造函数__init__里面，则在forward方法里面可以使用nn.functional来代替

（3）forward方法是必须要重写的，它是实现模型的功能，实现各个层之间的连接关系的核心。

import torch
 
class MyNet(torch.nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()  # 第一句话，调用父类的构造函数
        self.conv1 = torch.nn.Conv2d(3, 32, 3, 1, 1)
        self.relu1=torch.nn.ReLU()
        self.max_pooling1=torch.nn.MaxPool2d(2,1)
 
        self.conv2 = torch.nn.Conv2d(3, 32, 3, 1, 1)
        self.relu2=torch.nn.ReLU()
        self.max_pooling2=torch.nn.MaxPool2d(2,1)
 
        self.dense1 = torch.nn.Linear(32 * 3 * 3, 128)
        self.dense2 = torch.nn.Linear(128, 10)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.max_pooling1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.max_pooling2(x)
        x = self.dense1(x)
        x = self.dense2(x)
        return x
 
model = MyNet()
print(model)
'''运行结果为：
MyNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (max_pooling1): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (max_pooling2): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  (dense1): Linear(in_features=288, out_features=128, bias=True)
  (dense2): Linear(in_features=128, out_features=10, bias=True)
)
'''

其他小知识

卷积层的输出通道数取决于卷积核的数目
view函数：
squeeze和unsqueeze
squeeze:主要对数据的维度进行压缩或解压，squeeze(a)就是将a中所有为1的维度删掉。
unsqueeze:给指定位置加上维数为一的维度

VEN

TCN

因果卷积

即对于上一层t时刻的值，只依赖于下一层t时刻及其之前的值。
在这里插入图片描述

class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation))
        # self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.RReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation))
        # self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.RReLU()
        self.dropout2 = nn.Dropout(dropout)

        # self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
        #                          self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.net = nn.Sequential(self.conv1, self.relu1, self.dropout1,
                                 self.conv2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.RReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i - 1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=int((kernel_size - 1) * dilation_size / 2), dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

class VideoModule(nn.Module):
    def __init__(self):
        super(VideoModule, self).__init__()
        self.tcn = TemporalConvNet(LEN_FEATURE_V, [2048, 1024, 512, 256], kernel_size=3, dropout=DROP_OUT)
        self.conv1d = nn.ModuleList([nn.Conv1d(in_channels=256,
                                               out_channels=256,
                                               kernel_size=kernel_size,
                                               stride=stride,
                                               dilation=1)# 这里创建了一个输入单元为256 输出单元为256的1维卷积网络
                                     for stride, kernel_size in zip(STRIDE, KERNEL_SIZE)])
        self.net = nn.Sequential(
            nn.RReLU(),
            nn.Linear(256, LEN_FEATURE_B),
            nn.RReLU(),
            nn.Linear(LEN_FEATURE_B, LEN_FEATURE_B)
        )

    def forward(self, feed):
        feed = self.tcn(feed)
        out = []
        for kernel_size, conv1d in zip(KERNEL_SIZE, self.conv1d):
            if feed.shape[2] >= kernel_size:
                tmp = conv1d(feed)
                out.append(tmp)
        out = torch.cat(out, dim=2).permute(0, 2, 1)
        out = torch.squeeze(out, dim=0)
        out = self.net(out)
        return out

QEN

三层全连接网络

class SentenceModule(nn.Module):
    def __init__(self):
        super(SentenceModule, self).__init__()
        self.net = nn.Sequential(
            ##三层全连接网络
            nn.Linear(LEN_FEATURE_S, 512),#对传入数据应用线性变换：y = A x+ b
            nn.RReLU(),
            nn.Linear(512, 256),
            nn.RReLU(),
            nn.Linear(256, LEN_FEATURE_B)
        )

    def forward(self, feed):
        out = self.net(feed)
        return out