enhanced HOG features

本文详细介绍了HOG特征的计算过程,包括像素级特征图的生成、空间聚合、标准化及截断等步骤,并通过C语言代码实现了这一过程。此外,还提供了辅助理解的图像示例。

理论

  • 像素级特征图
    使用[1,0,1],[1,0,1]T计算梯度。
  • 空间聚合
    C(i,j),for0i(w1)k,0j(h1)k

    其中w是图像的宽度,h是图像的高度,k就是下面的sbin,注意C(i,j)=C(x,y)而不是i为行,j为列。
  • 标准化和截断
    Nδ,γ(i,j)=(||C(i,j)||2+||C(i+δ,j)||2+||C(i,j+γ)||2+||C(i+δ,j+γ)||2)1/2

    其中:
    {δ,γ}  {1,1}

    截断,文中α取0.2:
    Tα(C(i,j)/Nδ,γ(i,j))

    理论和实现可能有细微的差别,但是实验效果好才是王道。

实现

#include <math.h>
#include "mex.h"

#define round(x)    ((x-floor(x))>0.5 ? ceil(x) : floor(x))

// small value, used to avoid division by zero
#define eps 0.0001
// 单位方向向量
// unit vectors used to compute gradient orientation
double uu[9] = {1.0000, //0
    0.9397, //20°
    0.7660, //40
    0.500, //60
    0.1736, //80
    -0.1736, //100
    -0.5000, //120
    -0.7660, //140
    -0.9397};//160
double vv[9] = {0.0000, 
    0.3420, 
    0.6428, 
    0.8660, 
    0.9848, 
    0.9848, 
    0.8660, 
    0.6428, 
    0.3420};

static inline double min(double x, double y) { return (x <= y ? x : y); }
static inline double max(double x, double y) { return (x <= y ? y : x); }

static inline int min(int x, int y) { return (x <= y ? x : y); }
static inline int max(int x, int y) { return (x <= y ? y : x); }

// main function:
// takes a double color image and a bin size 
// returns HOG features
// [input]:double color imgae and sbin
// [output]:out[0]*out[1]*32
mxArray *process(const mxArray *mximage, const mxArray *mxsbin) {
    double *im = (double *)mxGetPr(mximage);
    const int *dims = mxGetDimensions(mximage);//彩色图像,三通道维数
    if (mxGetNumberOfDimensions(mximage) != 3 ||
        dims[2] != 3 ||
        mxGetClassID(mximage) != mxDOUBLE_CLASS)
        mexErrMsgTxt("Invalid input");

    int sbin = (int)mxGetScalar(mxsbin);

    // memory for caching orientation histograms & their norms
    int blocks[2];
    blocks[0] = (int)round((double)dims[0]/(double)sbin);
    blocks[1] = (int)round((double)dims[1]/(double)sbin);
    //连续存储的三维数组
    double *hist = (double *)mxCalloc(blocks[0]*blocks[1]*18, sizeof(double));
    double *norm = (double *)mxCalloc(blocks[0]*blocks[1], sizeof(double));

    // memory for HOG features
    int out[3];
    out[0] = max(blocks[0]-2, 0);
    out[1] = max(blocks[1]-2, 0);
    out[2] = 27+4+1;//18个方向敏感的,9个方向不敏感的,4个纹理,和1最后没用。
    mxArray *mxfeat = mxCreateNumericArray(3, out, mxDOUBLE_CLASS, mxREAL);
    double *feat = (double *)mxGetPr(mxfeat);

    int visible[2];
    visible[0] = blocks[0]*sbin;
    visible[1] = blocks[1]*sbin;

    for (int x = 1; x < visible[1]-1; x++) {//行
        for (int y = 1; y < visible[0]-1; y++) {//列
            // first color channel
            // min(x,dims[1]-2),防止越出原图像的取值边界
            double *s = im + min(x, dims[1]-2)*dims[0] + min(y, dims[0]-2);
            //图像坐标轴为:横x,纵y
            //图像是按列顺序存储的。
            double dy = *(s+1) - *(s-1);
            double dx = *(s+dims[0]) - *(s-dims[0]);
            double v = dx*dx + dy*dy;

            // second color channel
            //第二个通道
            s += dims[0]*dims[1];
            double dy2 = *(s+1) - *(s-1);
            double dx2 = *(s+dims[0]) - *(s-dims[0]);
            double v2 = dx2*dx2 + dy2*dy2;

            // third color channel
            s += dims[0]*dims[1];
            double dy3 = *(s+1) - *(s-1);
            double dx3 = *(s+dims[0]) - *(s-dims[0]);
            double v3 = dx3*dx3 + dy3*dy3;

            // pick channel with strongest gradient
            if (v2 > v) {
                v = v2;
                dx = dx2;
                dy = dy2;
            } 
            if (v3 > v) {
                v = v3;
                dx = dx3;
                dy = dy3;
            }
            // v,dx,dy 三个通道中最强的梯度,即其方向。
            // snap to one of 18 orientations 投影到18个方向
            double best_dot = 0;
            int best_o = 0;
            for (int o = 0; o < 9; o++) {
                double dot = uu[o]*dx + vv[o]*dy;//计算与单位方向向量的内积
                if (dot > best_dot) {
                    best_dot = dot;
                    best_o = o;
                } else if (-dot > best_dot) {
                    best_dot = -dot;
                    best_o = o+9;
                }
            }

            // add to 4 histograms around pixel using linear interpolation
            double xp = ((double)x+0.5)/(double)sbin - 0.5;
            double yp = ((double)y+0.5)/(double)sbin - 0.5;
            int ixp = (int)floor(xp);
            int iyp = (int)floor(yp);
            double vx0 = xp-ixp;
            double vy0 = yp-iyp;
            double vx1 = 1.0-vx0;
            double vy1 = 1.0-vy0;
            v = sqrt(v);
            //点(ixp,iyp)为其所在cell投票
            //hist为首地址,ixp*blocks[0]+iyp表示按列存储,其中blocks[0]为图像的高度,或者说成矩阵的行数
            if (ixp >= 0 && iyp >= 0) {
                *(hist + ixp*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) += 
                    vx1*vy1*v;
            }
            //为其右边的cell进行投票
            if (ixp+1 < blocks[1] && iyp >= 0) {
                *(hist + (ixp+1)*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) += 
                    vx0*vy1*v;
            }
            //为其下面的cell进行投票
            if (ixp >= 0 && iyp+1 < blocks[0]) {
                *(hist + ixp*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) += 
                    vx1*vy0*v;
            }
            //对其右下角的cell进行投票
            if (ixp+1 < blocks[1] && iyp+1 < blocks[0]) {
                *(hist + (ixp+1)*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) += 
                    vx0*vy0*v;
            }
        }
    }

    // compute energy in each block by summing over orientations
    for (int o = 0; o < 9; o++) {
        double *src1 = hist + o*blocks[0]*blocks[1];
        double *src2 = hist + (o+9)*blocks[0]*blocks[1];
        double *dst = norm;
        double *end = norm + blocks[1]*blocks[0];
        while (dst < end) {
            *(dst++) += (*src1 + *src2) * (*src1 + *src2);
            src1++;
            src2++;
        }
    }

    // compute features
    for (int x = 0; x < out[1]; x++) {
        for (int y = 0; y < out[0]; y++) {
            double *dst = feat + x*out[0] + y;      
            double *src, *p, n1, n2, n3, n4;
            //首先计算blocks[1][1]处的标准化
            p = norm + (x+1)*blocks[0] + y+1;
            n1 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
            p = norm + (x+1)*blocks[0] + y;
            n2 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
            p = norm + x*blocks[0] + y+1;
            n3 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
            p = norm + x*blocks[0] + y;      
            n4 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);

            double t1 = 0;
            double t2 = 0;
            double t3 = 0;
            double t4 = 0;

            // contrast-sensitive features
            //从块的[1,1]处位置开始标准化
            src = hist + (x+1)*blocks[0] + (y+1);
            for (int o = 0; o < 18; o++) {
                double h1 = min(*src * n1, 0.2);
                double h2 = min(*src * n2, 0.2);
                double h3 = min(*src * n3, 0.2);
                double h4 = min(*src * n4, 0.2);
                *dst = 0.5 * (h1 + h2 + h3 + h4);//0.5
                t1 += h1;
                t2 += h2;
                t3 += h3;
                t4 += h4;
                dst += out[0]*out[1];
                src += blocks[0]*blocks[1];
            }

            // contrast-insensitive features
            src = hist + (x+1)*blocks[0] + (y+1);
            for (int o = 0; o < 9; o++) {
                double sum = *src + *(src + 9*blocks[0]*blocks[1]);
                double h1 = min(sum * n1, 0.2);
                double h2 = min(sum * n2, 0.2);
                double h3 = min(sum * n3, 0.2);
                double h4 = min(sum * n4, 0.2);
                *dst = 0.5 * (h1 + h2 + h3 + h4);//0.5
                dst += out[0]*out[1];
                src += blocks[0]*blocks[1];
            }

            // texture features
            *dst = 0.2357 * t1;//0.2357
            dst += out[0]*out[1];
            *dst = 0.2357 * t2;
            dst += out[0]*out[1];
            *dst = 0.2357 * t3;
            dst += out[0]*out[1];
            *dst = 0.2357 * t4;
            // 上面的0.5 0.5 0.2357 可能是为了使得各特征的响应均衡
            // truncation feature
            dst += out[0]*out[1];
            *dst = 0;
        }
    }

    mxFree(hist);
    mxFree(norm);
    return mxfeat;
}

// matlab entry point
// F = features(image, bin)
// image should be color with double values
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { 
    if (nrhs != 2)
        mexErrMsgTxt("Wrong number of inputs"); 
    if (nlhs != 1)
        mexErrMsgTxt("Wrong number of outputs");
    plhs[0] = process(prhs[0], prhs[1]);
}

辅助理解图

这里写图片描述
这里写图片描述
这里写图片描述

图像示例

原图像

这里写图片描述

特征图像

这里写图片描述

参考文献:

1、From Rigid Templates to GrammarsObject Detection with Structured Models(23-25)
2、Face Detection, Pose Estimation, and Landmark Localization in the Wild。
3、http://blog.youkuaiyun.com/ubunfans/article/details/46830833
4、http://apprenticez.github.io/2013/10/08/hog-feature/

转载于:https://www.cnblogs.com/raby/p/5886708.html

import numpy as np import cv2 import os from skimage.feature import hog from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.metrics import accuracy_score, classification_report from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt import joblib # 1. 数据准备和特征提取 def load_dataset_and_extract_features(data_dir): """ 加载车型图像数据集并提取HOG特征 """ features = [] labels = [] categories = ['0', '1', '2', '3'] # 车型分类 for label, category in enumerate(categories): category_dir = os.path.join(data_dir, category) if not os.path.exists(category_dir): print(f"警告:类别目录 {category_dir} 不存在,已跳过") continue for img_name in os.listdir(category_dir): if img_name.endswith('.jpg'): # 读取图像并转为灰度图 img_path = os.path.join(category_dir, img_name) try: img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) if img is None: print(f"警告:无法读取图像 {img_path},已跳过") continue # 调整图像尺寸 img = cv2.resize(img, (128, 64)) # 标准尺寸用于HOG特征提取 # 提取HOG特征 hog_features = hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), transform_sqrt=True, block_norm='L2-Hys') features.append(hog_features) labels.append(label) except Exception as e: print(f"处理图像 {img_path} 时出错: {e}") continue if not features: raise ValueError("未找到有效图像数据,请检查数据集路径和文件格式") return np.array(features), np.array(labels) # 2. 数据预处理 def preprocess_data(features, labels): """ 数据标准化和训练/测试集划分 """ # 特征标准化 scaler = StandardScaler() features_scaled = scaler.fit_transform(features) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( features_scaled, labels, test_size=0.2, random_state=42 ) return X_train, X_test, y_train, y_test, scaler # 3. SVM模型训练 def train_svm_classifier(X_train, y_train): """ 训练SVM分类器 """ svm = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True) svm.fit(X_train, y_train) return svm # 4. 模型评估 def evaluate_model(model, X_test, y_test): """ 评估模型性能 """ y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) report = classification_report(y_test, y_pred, target_names=['0', '1', '2', '3']) print(f"模型准确率: {accuracy:.2f}") print("分类报告:\n", report) # 可视化混淆矩阵 from sklearn.metrics import ConfusionMatrixDisplay ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=['0', '1', '2', '3']) plt.title('车型分类混淆矩阵') plt.show() # 主函数 def main(): try: # 数据集路径 - 替换为实际路径 DATA_DIR = "C:/Users/Administrator/Desktop/jinwan" if not os.path.exists(DATA_DIR): raise FileNotFoundError(f"数据集目录 {DATA_DIR} 不存在") # 执行流程 print("正在加载数据集并提取特征...") features, labels = load_dataset_and_extract_features(DATA_DIR) print("正在进行数据预处理...") X_train, X_test, y_train, y_test, scaler = preprocess_data(features, labels) print("正在训练SVM模型...") model = train_svm_classifier(X_train, y_train) print("正在评估模型性能...") evaluate_model(model, X_test, y_test) # 保存模型和标准化器到指定路径 SAVE_DIR = "C:/Users/Administrator/Desktop/ultralytics-main" os.makedirs(SAVE_DIR, exist_ok=True) # 自动创建目录 model_path = os.path.join(SAVE_DIR, "vehicle_classifier_svm.pkl") scaler_path = os.path.join(SAVE_DIR, "standard_scaler.pkl") joblib.dump(model, model_path) joblib.dump(scaler, scaler_path) print(f"模型已保存到: {model_path}") print(f"标准化器已保存到: {scaler_path}") except Exception as e: print(f"程序运行出错: {e}") import traceback traceback.print_exc() if __name__ == "__main__": main()以上代码是什么原理
06-07
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值