[BasicConv2d(
(conv): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
), LeakyReLU(negative_slope=0.01, inplace=True), BasicConv2d(
(conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
), LeakyReLU(negative_slope=0.01, inplace=True), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), FocalConv2d(
(conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
), LeakyReLU(negative_slope=0.01, inplace=True), FocalConv2d(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
), LeakyReLU(negative_slope=0.01, inplace=True), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), FocalConv2d(
(conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
), LeakyReLU(negative_slope=0.01, inplace=True), FocalConv2d(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
), LeakyReLU(negative_slope=0.01, inplace=True)]
GaitPart(
输入sils[128, 30, 64, 44]-》[128, 1,30, 64, 44]
(Backbone): SetBlockWrapper(
转置成[3840, 1, 64, 44]
(forward_block): Plain(
(feature): Sequential(
(0): BasicConv2d(
(conv): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
)
[3840, 32, 64, 44]
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv2d(
(conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(3): LeakyReLU(negative_slope=0.01, inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
[3840, 32, 32, 22]
(5): FocalConv2d(
(conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
[3840, 64, 32, 22]
(6): LeakyReLU(negative_slope=0.01, inplace=True)
(7): FocalConv2d(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(8): LeakyReLU(negative_slope=0.01, inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
[3840, 64, 16, 11]
(10): FocalConv2d(
(conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
[3840, 128, 16, 11]
(11): LeakyReLU(negative_slope=0.01, inplace=True)
(12): FocalConv2d(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(13): LeakyReLU(negative_slope=0.01, inplace=True)
)
[3840, 128, 16, 11]再转置得到out[128,128, 30, 16, 11]
)
)
(HPP): SetBlockWrapper()
输入out转置成[3840, 128, 16, 11]
其实是HorizontalPoolingPyramid
不过这里bin只有16
所以只有一个[3840, 128, 16]
再转置成out[128,128, 30, 16]
(TFA): PackSequenceWrapper(
(pooling_func): TemporalFeatureAggregator(
out转置为x[16, 128, 128, 30]
再split成16个[1, 128, 128, 30]组成的tuple feature
让每一个feature经过下面的conv1d3x1每一层
(conv1d3x1): ModuleList(
(0): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(1): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(2): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(3): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(4): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(5): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(6): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(7): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(8): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(9): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(10): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(11): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(12): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(13): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(14): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
(15): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(1,), stride=(1,), bias=False)
)
)
)
每层的结果是[1, 128, 128, 30]
拼接得到logits3x1[16, 128, 128, 30]
sigmoid后得到scores3x1[16, 128, 128, 30]
(avg_pool3x1): AvgPool1d(kernel_size=(3,), stride=(1,), padding=(1,))
(max_pool3x1): MaxPool1d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
x[2048, 128, 30]送入上面两种池化后求和得到feature3x1[2048, 128, 30]
view成[16, 128, 128, 30]
再与scores3x1相乘得到feature3x1
(conv1d3x3): ModuleList(
(0): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(1): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(2): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(3): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(4): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(5): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(6): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(7): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(8): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(9): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(10): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(11): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(12): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(13): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(14): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
(15): Sequential(
(0): BasicConv1d(
(conv): Conv1d(128, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): BasicConv1d(
(conv): Conv1d(32, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
)
)
)
每层同样得到[1, 128, 128, 30]
拼接得到logits3x3也是一样的[16, 128, 128, 30]
sigmoid后是scores3x3
(avg_pool3x3): AvgPool1d(kernel_size=(5,), stride=(1,), padding=(2,))
(max_pool3x3): MaxPool1d(kernel_size=5, stride=1, padding=2, dilation=1, ceil_mode=False)
)
两种池化后求和,再view后与scores3x3相乘得到feature3x3[16, 128, 128, 30]
和feature3x1求和后最大池化
得到ret[16, 128, 128]再转置出out[128, 128, 16]
(Head): SeparateFCs()
得到embs[128, 128, 16]
)
(loss_aggregator): LossAggregator(
(losses): ModuleDict(
(triplet): TripletLoss()
)
)
)
{'training_feat': {
'triplet': {
'embeddings': embs[128, 128, 16],
'labels': labs128维}},
'visual_summary': {
'image/sils': sils给view成[3840, 1, 64, 44]},
'inference_feat': {
'embeddings': embs[128, 128, 16]}}