如何用Pytorch提取视频单帧网络特征+SVM分类 - 非End-to-End

本文介绍了一种从视频帧中提取深度特征的方法,并利用这些特征进行时域池化和SVM分类。首先使用预训练的ResNet152模型提取每帧图像的特征,然后通过平均、最大和最小池化技术聚合帧级特征到视频级表示,最后采用线性SVM进行分类。

1、提取网络特征

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
os.system('echo $CUDA_VISIBLE_DEVICES')

import torch
import torch.nn as nn
from torchvision import transforms, models
from torch.autograd import Variable

import numpy as np
from PIL import Image

def pre_image(image_path):
    trans = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()])

    img = Image.open(image_path)
    img = trans(img)
    x = Variable(torch.unsqueeze(img,dim=0).float(),requires_grad=False).cuda()
    return x

model = models.resnet152(pretrained=True).cuda()
extractor = nn.Sequential(*list(model.children())[:-1])

feature_path = '/data/FrameFeature/Penn/'
video_path = '/home/UPenn_RGB/frames/'
for video in os.listdir(video_path):
    for frame in os.listdir(os.path.join(video_path,video)):
        image_path = video_path+video+'/'+frame
        x = pre_image(image_path)
        y = extractor(x)
        y = y.data.cpu().numpy().reshape(1,2048)
        if not os.path.exists(feature_path+video):
            os.mkdir(feature_path+video)
        np.savetxt(feature_path+video+'/'+frame.split('.')[0]+'.txt',y,delimiter=',')
    print video

2、时域pooling以及SVM分类器训练

import os
import numpy as np
import h5py
from sklearn import svm

def load_feature(video_name):
    frames = os.listdir(video_name)
    feature = []
    for frame in frames:
        frame_path = os.path.join(video_name,frame)
        feature.append(np.loadtxt(frame_path,delimiter=','))
    feature = np.asarray(feature)
    return feature

def mean_pool(feature):
    return np.mean(feature,axis=0)

def max_pool(feature):
    return np.max(feature,axis=0)

def min_pool(feature):
    return np.min(feature,axis=0)

if __name__ == '__main__':
    ''' Save Data '''
    # with open('/data/FrameFeature/Penn_train.txt','r') as fp:
    #     mean_feat,max_feat,min_feat,diff_feat,dyna_feat,label=[],[],[],[],[],[]
    #     for line in fp.readlines():
    #         video_name = line.strip().split(' ')[0]
    #         video_label= int(line.strip().split(' ')[1])
    #         video_name = '/data/FrameFeature/Penn/'+video_name
    #         print video_name+'\ttrain'
    #         feature = load_feature(video_name)
    #         mean_feat.append(mean_pool(feature))
    #         max_feat.append(max_pool(feature))
    #         min_feat.append(min_pool(feature))
    #         diff_feat.append(sum_diff_pool(feature))
    #         dyna_feat.append(dynamic_pool(feature))
    #         label.append(video_label)
    # train_mean = np.asarray(mean_feat); del mean_feat
    # train_max  = np.asarray(max_feat);  del max_feat
    # train_min  = np.asarray(min_feat);  del min_feat
    # train_diff = np.asarray(diff_feat); del diff_feat
    # train_dyna = np.asarray(dyna_feat); del dyna_feat
    # train_label= np.asarray(label);     del label
    # h5file = h5py.File('/data/FrameFeature/Penn_train.h5','w')
    # h5file.create_dataset('train_mean',data=train_mean)
    # h5file.create_dataset('train_max',data=train_max)
    # h5file.create_dataset('train_min',data=train_min)
    # h5file.create_dataset('train_diff',data=train_diff)
    # h5file.create_dataset('train_dyna',data=train_dyna)
    # h5file.create_dataset('train_label',data=train_label)
    # h5file.close()
    #
    #
    # with open('/data/FrameFeature/Penn_test.txt','r') as fp:
    #     mean_feat,max_feat,min_feat,diff_feat,dyna_feat,label=[],[],[],[],[],[]
    #     for line in fp.readlines():
    #         video_name = line.strip().split(' ')[0]
    #         video_label= int(line.strip().split(' ')[1])
    #         video_name = '/data/FrameFeature/Penn/'+video_name
    #         print video_name+'\ttest'
    #         feature = load_feature(video_name)
    #         mean_feat.append(mean_pool(feature))
    #         max_feat.append(max_pool(feature))
    #         min_feat.append(min_pool(feature))
    #         diff_feat.append(sum_diff_pool(feature))
    #         dyna_feat.append(dynamic_pool(feature))
    #         label.append(video_label)
    # test_mean = np.asarray(mean_feat); del mean_feat
    # test_max  = np.asarray(max_feat);  del max_feat
    # test_min  = np.asarray(min_feat);  del min_feat
    # test_diff = np.asarray(diff_feat); del diff_feat
    # test_dyna = np.asarray(dyna_feat); del dyna_feat
    # test_label= np.asarray(label);     del label
    # h5file = h5py.File('/data/FrameFeature/Penn_test.h5','w')
    # h5file.create_dataset('test_mean',data=test_mean)
    # h5file.create_dataset('test_max',data=test_max)
    # h5file.create_dataset('test_min',data=test_min)
    # h5file.create_dataset('test_diff',data=test_diff)
    # h5file.create_dataset('test_dyna',data=test_dyna)
    # h5file.create_dataset('test_label',data=test_label)
    # h5file.close()
    ''' Read Data '''
    h5file = h5py.File('/data/FrameFeature/Penn_train.h5','r')
    train_mean = h5file['train_mean'][:]
    train_max  = h5file['train_max'][:]
    train_min  = h5file['train_min'][:]
    train_diff = h5file['train_diff'][:]
    train_dyna = h5file['train_dyna'][:]
    train_label= h5file['train_label'][:]
    h5file.close()

    h5file = h5py.File('/data/FrameFeature/Penn_test.h5','r')
    test_mean = h5file['test_mean'][:]
    test_max  = h5file['test_max'][:]
    test_min  = h5file['test_min'][:]
    test_diff = h5file['test_diff'][:]
    test_dyna = h5file['test_dyna'][:]
    test_label= h5file['test_label'][:]
    h5file.close()

    ''' Train SVM '''
    SVM = svm.SVC(kernel='linear')
    # Mean
    SVM.fit(train_mean, train_label)
    print 'Mean: ' + str(SVM.score(test_mean, test_label))
    # Max
    SVM.fit(train_max, train_label)
    print 'Max: ' + str(SVM.score(test_max, test_label))
    # Min
    SVM.fit(train_min, train_label)
    print 'Min: ' + str(SVM.score(test_min, test_label))

### SVM结合Huber损失函数的分类器 #### 原理概述 支持向量机(Support Vector Machine, SVM)是一种经典的监督学习算法,主要用于解决分类和回归问题。传统的SVM通常基于Hinge损失函数来构建分类模型[^3]。然而,在某些场景下,数据可能含有较多噪声或异常值,这可能导致传统SVM模型的效果下降。 Huber损失作为一种鲁棒性强的损失函数,能够有效地处理异常值的影响。其核心思想在于:当预测值与真实值之间的偏差较小时,采用平方误差;而当偏差较大时,则切换到线性误差模式。因此,将Huber损失引入SVM框架中,可以在一定程度上增强模型对异常值的容忍能力[^4]。 #### 数学表达形式 假设给定一组训练样本 \((\mathbf{x}_i, y_i)\),其中 \(y_i\) 是标签 (\(y_i \in \{-1, 1\}\)),则标准SVM的目标是最小化以下目标函数: \[ \min_{\mathbf{w}, b} \frac{1}{2} ||\mathbf{w}||^2 + C \sum_{i=1}^{n} \max(0, 1 - y_i(\mathbf{w}^\top \mathbf{x}_i + b)) \] 如果我们将Hinge损失替换为Huber损失,则新的目标函数变为: \[ \min_{\mathbf{w}, b} \frac{1}{2} ||\mathbf{w}||^2 + C \sum_{i=1}^{n} L_\text{huber}(y_i(\mathbf{w}^\top \mathbf{x}_i + b), 1) \] 其中,\(L_\text{huber}\) 的定义如下: \[ L_\text{huber}(z, t) = \begin{cases} \frac{1}{2}(t-z)^2 & \text{if } |t-z| \leq \delta \\ \delta (|t-z|-\frac{\delta}{2}) & \text{otherwise}, \end{cases} \] 这里,参数 \(\delta\) 控制着从二次项过渡到一次项的位置[^4]。 #### 实现方法 为了实现带有Huber损失的支持向量机,可以通过梯度下降法或其他数值优化技术求解上述目标函数。具体来说,可以利用Python中的`scikit-learn`库作为基础框架,并自定义损失函数部分。下面是一个简单的伪代码示例: ```python import numpy as np from sklearn.svm import SVC class HuberSVC(SVC): def __init__(self, delta=1.0, *args, **kwargs): super().__init__(*args, **kwargs) self.delta = delta def huber_loss(self, z, t): error = t - z is_small_error = np.abs(error) <= self.delta squared_loss = 0.5 * (error ** 2) linear_loss = self.delta * (np.abs(error) - 0.5 * self.delta) return np.where(is_small_error, squared_loss, linear_loss) def fit(self, X, y): # 自定义优化逻辑以适应Huber Loss pass ``` 需要注意的是,由于`scikit-learn`并不直接提供修改损失函数的功能,实际开发过程中可能需要借助更灵活的工具如TensorFlow或PyTorch来自定义整个流程[^4]。 #### 调优技巧 在使用SVM-Huber分类器时,有几个重要的超参数需要调整: 1. **正则化系数C**:控制模型复杂度与泛化能力间的平衡。 2. **核函数类型及参数**:选择合适的核函数(如RBF、多项式等),并调节对应的参数。 3. **Huber损失阈值δ**:决定何时由平方误差转换至绝对误差,需依据具体应用场景设定合理范围。 --- ### 问题
评论 2
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值