PSMNet debug(二)

本文详细探讨了PSMNet中特征提取模块的实现,包括convbn层、特征提取层、不同尺度的特征融合及左右特征图的合并过程,展示了各层输出的尺寸变化,深入理解其在立体匹配中的作用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

submodule.py

1.

1
3
384
1248

def convbn(in_planes, out_planes, kernel_size, stride, pad, dilation):

    return nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=dilation if dilation > 1 else pad, dilation = dilation, bias=False),
                         nn.BatchNorm2d(out_planes))
 

class feature_extraction(nn.Module):

    def __init__(self):
        print('feature_extraction 123')
        super(feature_extraction, self).__init__()
        self.inplanes = 32
        self.firstconv = nn.Sequential(convbn(3, 32, 3, 2, 1, 1), #下采样,缩小一倍。w,h各除以2.filter num:32。
                                       nn.ReLU(inplace=True),
                                       convbn(32, 32, 3, 1, 1, 1),
                                       nn.ReLU(inplace=True),
                                       convbn(32, 32, 3, 1, 1, 1),
                                       nn.ReLU(inplace=True))

对应下面。

def forward(self, x):

        output      = self.firstconv(x)
        print('submodule firstconv:')
        print(output.size()[0])
        print(output.size()[1])
        print(output.size()[2])
        print(output.size()[3])

submodule firstconv:
1
32
192
624

 

2.

        self.lastconv = nn.Sequential(convbn(320, 128, 3, 1, 1, 1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(128, 32, kernel_size=1, padding=0, stride = 1, bias=False))

        output_feature = torch.cat((output_raw, output_skip, output_branch4, output_branch3, output_branch2, output_branch1),

#output_branch4, output_branch3, output_branch2, output_branch1 第2个通道分别是32,output_raw是64,output_skip是128。其它通道都是一样的。1,96,312。

和起来第2个通道是32.   (32*4=128) + 128 + 64 = 320。

submodule torch.cat:
1
320
96
312

1)
        print('submodule torch.cat:')
        print(output_feature.size()[0])
        print(output_feature.size()[1])
        print(output_feature.size()[2])
        print(output_feature.size()[3])

        output_feature = self.lastconv(output_feature)
        print('submodule lastconv:')
        print(output_feature.size()[0])
        print(output_feature.size()[1])
        print(output_feature.size()[2])
        print(output_feature.size()[3])

submodule torch.cat:
1
320
96
312
submodule lastconv:
1
32
96
312

 

3.

        self.branch2 = nn.Sequential(nn.AvgPool2d((32, 32), stride=(32,32)),#32倍缩小。第3个通道96变为3,第4个通道312变为9
                                     convbn(128, 32, 1, 1, 0, 1), # 第2个通道128变为了32。
                                     nn.ReLU(inplace=True))

        output_skip = self.layer4(output)
        print('submodule layer4 output_skip:')
        print(output_skip.size()[0])
        print(output_skip.size()[1])
        print(output_skip.size()[2])
        print(output_skip.size()[3])

        output_branch1 = self.branch1(output_skip)
        output_branch1 = F.upsample(output_branch1, (output_skip.size()[2],output_skip.size()[3]),mode='bilinear')

        output_branch2 = self.branch2(output_skip)
        print('submodule branch2 output_branch2:')
        print(output_branch2.size()[0])
        print(output_branch2.size()[1])
        print(output_branch2.size()[2])
        print(output_branch2.size()[3])
        output_branch2 = F.upsample(output_branch2, (output_skip.size()[2],output_skip.size()[3]),mode='bilinear')
        print('submodule upsample output_branch2:')
        print(output_branch2.size()[0])
        print(output_branch2.size()[1])
        print(output_branch2.size()[2])
        print(output_branch2.size()[3])

打印:

submodule layer4 output_skip:
1
128
96
312
submodule branch2 output_branch2:
1
32
3
9
submodule upsample output_branch2:
1
32
96
312

 

4.

        self.branch3 = nn.Sequential(nn.AvgPool2d((16, 16), stride=(16,16)),#16倍缩小。96变为6.312变为19。
                                     convbn(128, 32, 1, 1, 0, 1),# 第2维的通道128变为32
                                     nn.ReLU(inplace=True))

 

        output_branch3 = self.branch3(output_skip)
        print('submodule branch3 output_branch3:')
        print(output_branch3.size()[0])
        print(output_branch3.size()[1])
        print(output_branch3.size()[2])
        print(output_branch3.size()[3])
        output_branch3 = F.upsample(output_branch3, (output_skip.size()[2],output_skip.size()[3]),mode='bilinear')
        print('submodule upsample output_branch3:')
        print(output_branch3.size()[0])
        print(output_branch3.size()[1])
        print(output_branch3.size()[2])
        print(output_branch3.size()[3])

submodule layer4 output_skip:
1
128
96
312

submodule branch3 output_branch3:
1
32
6
19
submodule upsample output_branch3:
1
32
96
312

 

5.

 

self.layer2 = self._make_layer(BasicBlock, 64, 16, 2,1,1) 

 

        output_raw  = self.layer2(output)
        print('submodule layer2 output_raw:')
        print(output_raw.size()[0])
        print(output_raw.size()[1])
        print(output_raw.size()[2])
        print(output_raw.size()[3])

打印结果:

submodule layer2 output_raw:
1
64
96
312

 

6.

 

class feature_extraction(nn.Module):

  def _make_layer(self, block, planes, blocks, stride, pad, dilation):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
           downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),)

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, pad, dilation))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes,1,None,pad,dilation))

        return nn.Sequential(*layers)

def __init__(self):

     self.layer1 = self._make_layer(BasicBlock, 32, 3, 1,1,1)
        self.layer2 = self._make_layer(BasicBlock, 64, 16, 2,1,1) 
        self.layer3 = self._make_layer(BasicBlock, 128, 3, 1,1,1)
        self.layer4 = self._make_layer(BasicBlock, 128, 3, 1,1,2)

       output      = self.layer1(output)
        output_raw  = self.layer2(output)
        print('submodule layer2 output_raw:')
        print(output_raw.size()[0])
        print(output_raw.size()[1])
        print(output_raw.size()[2])
        print(output_raw.size()[3])

        output      = self.layer3(output_raw)
        output_skip = self.layer4(output)
        print('submodule layer4 output_skip:')
        print(output_skip.size()[0])
        print(output_skip.size()[1])
        print(output_skip.size()[2])
        print(output_skip.size()[3])
打印结果,看第2个通道的。64和128。

submodule layer2 output_raw:
1
64
96
312
submodule layer4 output_skip:
1
128
96
312

7.

输入彩色图像。输出特征图像是H/4=96,W/4=312。32个通道。

3

384
1248

1
32
96
312

 

8.

左右特征图合并。视差也以最大的除以4。

cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp/4,  refimg_fea.size()[2],  refimg_fea.size()[3]).zero_())

self.maxdisp/4是D。

refimg_fea.size()[1]*2是左右图合并的。特征通道个数。

 

stackhourglass refim

        refimg_fea     = self.feature_extraction(left)
        print('PSMNet forward 0000')
        targetimg_fea  = self.feature_extraction(right)
        print('PSMNet forward 1111')


        print('stackhourglass refimg_fea disp')
        print(refimg_fea.size()[0])
        print(refimg_fea.size()[1])
        print(refimg_fea.size()[2])
        print(refimg_fea.size()[3])

        print('stackhourglass targetimg_fea disp')
        print(targetimg_fea.size()[0])
        print(targetimg_fea.size()[1])
        print(targetimg_fea.size()[2])
        print(targetimg_fea.size()[3])

 

g_fea disp
1
32
96
312
stackhourglass targetimg_fea disp
1
32
96
312

 

9.

      cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp/4,  refimg_fea.size()[2],  refimg_fea.size()[3]).zero_())
        print('stackhourglass cost disp')
        print(cost.size()[0])
        print(cost.size()[1])#2个特征图合并。32*2 = 64
        print(cost.size()[2])#视差D
        print(cost.size()[3])#高度
        print(cost.size()[4])#高度

打印

stackhourglass cost disp
1
64  
48
96
312

 

10.

stackhourglass.py

  for i in range(self.maxdisp/4):#48*4 = 192
            #print(i)
            if i > 0 :
             cost[:, :refimg_fea.size()[1], i, :,i:]   = refimg_fea[:,:,:,i:] # 1:32 存储左特征图
             cost[:, refimg_fea.size()[1]:, i, :,i:] = targetimg_fea[:,:,:,:-i] #32:64 存储右特征图
            else:
             cost[:, :refimg_fea.size()[1], i, :,:]   = refimg_fea # 1:32 存储左特征图
             cost[:, refimg_fea.size()[1]:, i, :,:]   = targetimg_fea #32:64 存储右特征图

 

 

 

 

 

 

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值