Faster RCNN 复现---RPN模块

最新推荐文章于 2025-03-16 13:08:28 发布

李潭林

最新推荐文章于 2025-03-16 13:08:28 发布

阅读量372

点赞数

文章标签：计算机视觉 numpy 深度学习目标检测

本文链接：https://blog.youkuaiyun.com/lalalafloat/article/details/110205057

版权

faster rcnn 持续填坑
因为本人numpy, torch零基础，代码注释会比较详细

generate_anchors

参考博客：某个强者的复现
这部分主要是生成先验框

import numpy as np

def generate_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2 ** np.arange(3, 6)):
    # np.arange(3,6)   [3 4 5]
    # 2 ** np.arange(3,6) -> [8, 16, 32]
    base_anchor = np.array([1, 1, base_size, base_size]) - 1  # [0, 0, 15, 15]  左上角和右下角
    ratio_anchors = _ratio_enum(base_anchor, ratios)
    anchors = np.vstack(
        [_scale_enum(ratio_anchors[i, :], scales) for i in range(ratio_anchors.shape[0])]  # 再次注意np的张量求维度是x.shape
    )
    return anchors


def _ratio_enum(anchor, ratios):
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    # h = ratio * w -> size = h * w = ratio * w * w
    # size_ratio = size / ratio = w * w
    size_ratios = size / ratios  # python的list可以直接和标量运算
    ws = np.round(np.sqrt(size_ratios))  # np.round 四舍五入取整
    hs = np.round(ws * ratios)
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors


def _scale_enum(anchor, scales):
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors

# 通过左上角右下角坐标转化为 中心长宽表示
def _whctrs(anchor):
    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)  # 这里因为是像素点不是线段 如果不是奇数个格子 其实算到了 n//2的格子做中心点
    y_ctr = anchor[1] + 0.5 * (h - 1)
    return w, h, x_ctr, y_ctr


# 从wh中心点坐标转化为左上角右下角坐标
def _mkanchors(ws, hs, x_ctr, y_ctr):
    # before ws.shape = (3,)  另外注意np的矩阵要用 x.shape查看维度
    # after ws.shape = (3, 1)
    ws = ws[:, np.newaxis]  # 增加一维 规范成矩阵
    hs = hs[:, np.newaxis]
    # np.vstack 只能在 dim=1维进行拼接  等价于
    # np.hstack 只能在 dim=2维进行拼接  上述适合二维张量
    # np.stack 在最外层增加一个维度
    # np.concatenate((a,b), axis = ?) 可以指定拼接维度 拼接的要求就是目标拼接维度可以不同，其他dim必须相同
    # vstack hstack stack concatenate的区别见https://blog.youkuaiyun.com/weixin_36149892/article/details/86657314
    anchors = np.hstack(
        [
            x_ctr - 0.5 * (ws - 1),
            y_ctr - 0.5 * (hs - 1),
            x_ctr + 0.5 * (ws - 1),  # 这里直接加其实和原计算差1 当做了奇数中心 但是只要近似就没管那么多
            y_ctr + 0.5 * (hs - 1)
        ]
    )
    return anchors


if __name__ == '__main__':
    a = generate_anchors()
    print(a)
    pass```