模型从训练到实际应用,主要有收集数据集、调整参数进行训练、部署等。本文主要介绍在拿到模型后将模型部署应用中的步骤。
-
数据集
-
模型训练
数据集采用lightweight_openpose进行模型训练,获得pb模型或者tflite
-
Android部署(Java版本)
recognizeImage(): 获得图像后,将图像喂给模型,模型执行完毕后,通过fetch获取到指定数组outputHeatMap, outputPafMat
public List<Recognition> recognizeImage(Bitmap bitmap) { //final
//TODO: preprocessBitmap, -1.0f的作用?
Trace.beginSection("preprocessBitmap");
int w = bitmap.getWidth();
bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
for (int i = 0; i < intValues.length; ++i) {
final int val = intValues[i];
/*
def compute_resized_coords(coords, resizeFactor):
"""
Given the index/coordinates of a cell in some input array (e.g. image),
provides the new coordinates if that array was resized by making it
resizeFactor times bigger.
E.g.: image of size 3x3 is resized to 6x6 (resizeFactor=2), we'd like to
know the new coordinates of cell [1,2] -> Function would return [2.5,4.5]
:param coords: Coordinates (indices) of a cell in some input array
:param resizeFactor: Resize coefficient = shape_dest/shape_source. E.g.:
resizeFactor=2 means the destination array is twice as big as the
original one
:return: Coordinates in an array of size
shape_dest=resizeFactor*shape_source, expressing the array indices of the
closest point to 'coords' if an image of size shape_source was resized to
shape_dest
"""
# 1) Add 0.5 to coords to get coordinates of center of the pixel (e.g.
# index [0,0] represents the pixel at location [0.5,0.5])
# 2) Transform those coordinates to shape_dest, by multiplying by resizeFactor
# 3) That number represents the location of the pixel center in the new array,
# so subtract 0.5 to get coordinates of the array index/indices (revert
# step 1)
return (np.array(coords, dtype=float) + 0.5) * resizeFactor - 0.5
*/
// OpenCV uses : B G R
floatValues[i * 3 + 0] = ((float) (val & 0xFF) * 2.0f / 255.0f) - 1.0f; //B
floatValues[i * 3 + 1] = ((float) ((val >> 8) & 0xFF) * 2.0f / 255.0f) - 1.0f; //G
floatValues[i * 3 + 2] = ((float) ((val >> 16) & 0xFF) * 2.0f / 255.0f) - 1.0f; //R
bitmap.setPixel(i % w, i / w,
Color.rgb(
(int) ((floatValues[i * 3 + 0] + 1.0f) * (255.0f / 2.0f)),
(int) ((floatValues[i * 3 + 1] + 1.0f) * (255.0f / 2.0f)),
(int) ((floatValues[i * 3 + 2] + 1.0f) * (255.0f / 2.0f))));
}
Trace.endSection();
// Copy the input data into TensorFlow.
Trace.beginSection("feed");
inferenceInterface.feed(inputName, floatValues, 1, inputSize, inputSize, 3);
Trace.endSection();
// Run the inference call.
Trace.beginSection("run");
inferenceInterface.run(outputNames, logStats);
Trace.endSection();
// Copy the output Tensor back into the output array.
Trace.beginSection("fetch");
inferenceInterface.fetch(outputNames[0], outputPafMat);
inferenceInterface.fetch(outputNames[1], outputHeatMap);
final ArrayList<Recognition> recognitions = new ArrayList<Recognition>();
Recognition r0 = new Recognition("a", "a", 1f, new RectF(0, 0, 10, 10));
List<Human> humans = estimatePose(outputHeatMap, outputPafMat);
}
estimatePose(): 然后调用estimatePose对模型输出结果进行姿态估计
private List<Human> estimatePose(float[] heatMat, float[] pafMat) {
//TODO:jiang 坐标轴滚动,将骨骼点移动前面,方便循环(46a,46b,19c) => (19c,46a,46b)
heatMat = rollAxis(heatMat, new int[]{NUM_HEATMAP, NUM_HEATMAP, NUM_KEYPOINTS}, 2);
pafMat = rollAxis(pafMat, new int[]{NUM_HEATMAP, NUM_HEATMAP, NUM_PAF}, 2);
for (int i = 0; i < NUM_KEYPOINTS; i++) {
float[] dd = new float[w * w];
System.arraycopy(heatMat, i * w * w, dd, 0, w * w);
dd = restByMin(dd); //TODO:对axis=1,找到最小的,减去最小的
System.arraycopy(dd, 0, heatMat, i * w * w, w * w);
}
for (int i = 0; i < NUM_KEYPOINTS; i++) { // each image
for (int j = 0; j < w; j++) { // each row
float[] rd = new float[w];
System.arraycopy(heatMat, i * w * w + (j * w), rd, 0, w);
rd = restByMin(rd);//TODO:对axis=2,找到最小的,减去最小的
System.arraycopy(rd, 0, heatMat, i * w * w + (j * w), w);
}
}
for (int i = 0; i < NUM_KEYPOINTS; i++) {
float[] img = new float[w * w];
System.arraycopy(heatMat, i * w * w, img, 0, w * w);
//TODO:非极大值抑制,找到heatMat峰值
float[] nms = non_max_suppression(img, w, w, 5, _NMS_Threshold);
coords[i] = new ArrayList<Coord>();
//TODO: findCoords找nms后大于阈值的坐标
coords[i].addAll(findCoords(nms, w, _NMS_Threshold));
}
}
estimatePose(): 继续通过非极大值值抑制找到峰值的坐标后,调用get_score计算这些点的连接关系分值pafs
List<Connection> connection_all = new ArrayList<>();
for (int i = 0; i < CommonQD.CocoPairs.length; i++) {
int idx1 = CommonQD.CocoPairs[i][0];
int idx2 = CommonQD.CocoPairs[i][1];
float[] paf_x = new float[w * w];
float[] paf_y = new float[w * w];
System.arraycopy(pafMat, CommonQD.CocoPairsNetwork[i][0] * w * w, paf_x, 0, w * w);
System.arraycopy(pafMat, CommonQD.CocoPairsNetwork[i][1] * w * w, paf_y, 0, w * w);
//TODO: estimate_pose_pair找连接关系
List<Connection> connections = estimate_pose_pair(coords, idx1, idx2, paf_x, paf_y, w);
//connection_all.extend(connection)
connection_all.addAll(connections);
}
private List<Connection> estimate_pose_pair(ArrayList<Coord>[] coords, int partIdx1, int partIdx2, float[] pafMatX, float[] pafMatY, int w) {
List<Connection> connection_temp = new ArrayList<>();
List<Coord> peak_coord1 = coords[partIdx1];
List<Coord> peak_coord2 = coords[partIdx2];
int cnt = 0;
for (int idx1 = 0; idx1 < peak_coord1.size(); idx1++) {
for (int idx2 = 0; idx2 < peak_coord2.size(); idx2++) {
float y1 = peak_coord1.get(idx1).x;
float x1 = peak_coord1.get(idx1).y;
float y2 = peak_coord2.get(idx2).x;
float x2 = peak_coord2.get(idx2).y;
//TODO:计算连接关系
ScoreOutput scoreCount = get_score(x1, y1, x2, y2, pafMatX, pafMatY, w);
cnt += 1;
//TODO, 忽略小于InterMinAbove_Threshold=6的连接关系,分两种情况:(右胳膊,右手臂,左胳膊,左手臂)、非(右胳膊,右手臂,左胳膊,左手臂)
int[][] cc1 = {{0, 1}, {1, 2}, {3, 4}, {4, 5}};//TODO, 忽略小于InterMinAbove_Threshold=6的连接关系14个点的右胳膊,右手臂,左胳膊,左手臂
if ((partIdx1 == cc1[0][0] && partIdx2 == cc1[0][1]) ||
(partIdx1 == cc1[1][0] && partIdx2 == cc1[1][1]) ||
(partIdx1 == cc1[2][0] && partIdx2 == cc1[2][1]) ||
(partIdx1 == cc1[3][0] && partIdx2 == cc1[3][1])) {
if (scoreCount.count < Math.floor(InterMinAbove_Threshold / 2)
|| scoreCount.score <= 0.0f) {
continue;
}
}
else if (scoreCount.count < InterMinAbove_Threshold
|| scoreCount.score < 0.0) {
continue;
}
……
//TODO, 加入连接关系的临时变量中
connection_temp.add(cnn);
}
}
List<Connection> connection = new ArrayList<>();
for (Connection candidate : sortConnections(connection_temp)) {
//TODO, jiangjunhou, 用过的不能再用,一个连接关系不能属于两个人
if (used_idx1.contains(candidate.idx[0])
|| used_idx2.contains(candidate.idx[1])) {
continue;
}
connection.add(candidate);
used_idx1.add(candidate.idx[0]);
used_idx2.add(candidate.idx[1]);
}
return connection;
}
先把思维压栈,看下get_score的实现逻辑
private ScoreOutput get_score(float x1, float y1, float x2, float y2, float[] pafMatX, float[] pafMatY, int w) {
//TODO, 点(x1, y1)到点(x2, y2)的坐标插值个数,选10个,取这10个pafmap计算score
int __num_inter = 10;
float __num_inter_f = (float) __num_inter;
float dx = x2 - x1;
float dy = y2 - y1;
float normVec = (float) Math.sqrt(Math.pow(dx, 2) + Math.pow(dy, 2));//TODO:用来计算单位向量
……
float vx = dx / normVec;
float vy = dy / normVec;
//TODO:根据x1,x2是否相等决定如何对这个10个点插值, 等差递增,还是全部都是x1, 为什么还要加0.5,防止是0?
int[] xs = np_astype_int8(vector_add(
(x1 != x2) ? np_arange(x1, x2, dx / __num_inter_f) : np_full(__num_inter, x1)
, 0.5f)
);
int[] ys = np_astype_int8(vector_add(
(y1 != y2) ? np_arange(y1, y2, dy / __num_inter_f) : np_full(__num_inter, y1),
0.5f));
//TODO 初始化__num_inter=10个pafmap为0
float[] pafXs = np_full(__num_inter, 0);
float[] pafYs = np_full(__num_inter, 0);
//TODO 取这10个pafmap
for (int idx = 0; idx < xs.length; idx++) {
int mx = xs[idx];
int my = ys[idx];
pafXs[idx] = pafMatX[my * w + mx];
pafYs[idx] = pafMatY[my * w + mx];
}
//TODO, patmap与坐标进行点积
float[] local_scores = vector_sum(vector_mul(pafXs, vx), vector_mul(pafYs, vy));
boolean[] thidxs = vector_grater(local_scores, Inter_Threashold);//TODO, 找到score大于阈值的坐标
float score = sum(filter(local_scores, thidxs));//TODO, 根据score大于阈值的坐标过滤得到score值,并相加
int count = sum(thidxs);
return new ScoreOutput(score, count);
}
弹栈出来,回到estimatePose(),获得符合的连接关系后进行合并merge
//TODO, 参考pose_decode.py的方法group_limbs_of_same_person()
HashMap<String, List<Connection>> connection_by_human = new HashMap<>();
//TODO, 以human_0,human_1为key建立map
for (int idx = 0; idx < connection_all.size(); idx++) {
String key = String.format("human_%d", idx);
if (!connection_by_human.containsKey(key)) {
connection_by_human.put(key, new ArrayList<Connection>());
}
connection_by_human.get(key).add(connection_all.get(idx));
}
HashMap<String, List<String>> no_merge_cache = new HashMap<>();
//TODO, 参考pose_decode.py的方法group_limbs_of_same_person()
while (true) {
boolean is_merged = false;
//TODO,将打印 list中的所有长度为r,子集的合集,创建一个迭代器,返回iterable中所有长度为r的子序列,返回的子序列中的项按输入iterable中的顺序排序
String[][] keyComb = itertools_combinations(
connection_by_human.keySet().toArray(new String[connection_by_human.size()]));
for (String[] comb : keyComb) {
String k1 = comb[0];
String k2 = comb[1];
if (no_merge_cache.containsKey(k1) && no_merge_cache.get(k1).contains(k2)) {
continue;
}
//TODO,返回A、B中的元素的笛卡尔积的元组, 依次取出a中的每1个元素,与b中的每1个元素,组成元组
Connection[][] conProd = itertools_product(connection_by_human.get(k1), connection_by_human.get(k2));
for (Connection[] prod : conProd) {
Connection c1 = prod[0];
Connection c2 = prod[1];
if (inBothSets(c1.uPartIdx, c2.uPartIdx)) {//TODO,在两个人中进行merge
is_merged = true;
connection_by_human.get(k1).addAll(connection_by_human.get(k2));
connection_by_human.remove(k2);
break;
}
}
if (is_merged) {
no_merge_cache.remove(k1);
break;
} else {
if (!no_merge_cache.containsKey(k1)) {
no_merge_cache.put(k1, new ArrayList<String>());
}
no_merge_cache.get(k1).add(k2);
}
} //!for
if (!is_merged) {
break;
}
} //!while
//TODO,根据最小子集数Min_Subset_Cnt(4)拒绝
HashMap<String, List<Connection>> tmpCBH = new HashMap<>();
for (Map.Entry<String, List<Connection>> entry : connection_by_human.entrySet()) {
if (entry.getValue().size() >= Min_Subset_Cnt) {
tmpCBH.put(entry.getKey(), entry.getValue());
}
}
connection_by_human = tmpCBH;
//TODO,根据最小子集分值Min_Subset_Score(0.8f)拒绝
tmpCBH = new HashMap<>();
for (Map.Entry<String, List<Connection>> entry : connection_by_human.entrySet()) {
float maxScH = 0;
for (Connection chs : entry.getValue()) {
maxScH = Math.max(maxScH, chs.score);
}
if (maxScH >= Min_Subset_Score) {
tmpCBH.put(entry.getKey(), entry.getValue());
}
}
connection_by_human = tmpCBH;
List<Human> humans = new ArrayList<>();
for (List<Connection> conn : connection_by_human.values()) {
humans.add(connections_to_human(conn, heatMat, w));
}
return humans;
合并完毕后最终获得以单个人为单位骨骼点连接关系humans
-
python版本
初始化相关参数、判断
#通过cv2.imread读取图片,红蓝颜色是反的
img_ori = cv2.imread(os.path.join(params['img_path'], img_name))
img_data = cv2.cvtColor(img_ori, code=cv2.COLOR_BGR2RGB)
#resize,并标准化
img_data = cv2.resize(img_data, (256, 256))
img = img_data / 255.
start_time = time.time()
#sess.run,输入为img,返回heatmap, _paf
heatmap, _paf = sess.run([cpm, paf], feed_dict={input_img: [img]})
end_time = time.time()
#调用decode_pose
canvas, joint_list, person_to_joint_assoc, joints = decode_pose(img_data, params, heatmap[0], _paf[0])
decode_time = time.time()
print('inference + decode time == {}'.format(decode_time - start_time))
decode_pose中,先进行非极大值抑制NMS
def decode_pose(img_orig, param, heatmaps, pafs):
scale = img_orig.shape[0] / heatmaps.shape[0]
# Bottom-up approach:
# Step 1: find all joints in the image (organized by joint type: [0]=nose,
# [1]=neck...)
# 这一步就是通过NMS找到所有在heatmap上满足响应阈值的点的坐标, joint_list_per_joint_type是个包含14个list的lists,
# 14表示总共有14个点的类型. 每个list又是一个包含多个list的lists, 其中每个list是[x, y, score, unique_id, used_flag]类型,
# x和y表示坐标点位置, score是这个点的分数, unique_id标明这个点在所有点中的id, used_flag用来表示这个点是否被使用过, 用在下面的程序里
joint_list_per_joint_type = NMS(param, heatmaps, scale)
# for joint in joint_list_per_joint_type:
# print ('....... ', joint)
# joint_list is an unravel'd version of joint_list_per_joint, where we add
# a 5th column to indicate the joint_type (0=nose, 1=neck...)
# joint_list_per_joint_type是个列表的列表中组合, 这行代码是把其变成个ndarray, shape变成(total_joints_num, 6). 其中每一行都是
# 6个值, 前5个和joint_list_per_joint_type的值一样, 最后一个指代该点的类型, 例如0就代表右肩, 1代表右肘.....
joint_list = np.array([tuple(peak) + (joint_type,) for joint_type, joint_peaks in enumerate(joint_list_per_joint_type)
for peak in joint_peaks])
# for joint in joint_list:
# print ('*....* ', joint)
# # Step 2: find which joints go together to form limbs (which wrists go
# # with which elbows)
paf_upsamp = cv2.resize(pafs, (img_orig.shape[1], img_orig.shape[0]), interpolation=cv2.INTER_CUBIC)
# 这一步是找到所有可能的连接, connected_limbs 是个lists的list, 有多少个需要连接的关节连接个数, 就有多少行, 每一行同样包含不定个数的list,存储的是这个连接所有潜在的连接可能
# 每一行包含不定的lists, 每个list包含五个值, [joint_src_id,joint_dst_id, limb_score, joint_src_index, joint_dst_index], 前两个用来存储找到的两个点的unique_id,
# limb_score 是计算得到的这个关节连接的分数, joint_src_index和joint_dst_index用来存储找到的点在这个同类型的点的位置. 例如对于头和脖子这个连接,
# 我们不仅要找到头和脖子这两个点, 并且知道这两个点的unique_id, 还存储了这两个点的在总的type中的位置, 例如这是所有头部点中的第joint_src_index个头部点和所有脖子点中的第joint_dst_index个脖子点
connected_limbs = find_connected_joints(param, paf_upsamp, joint_list_per_joint_type)
# Step 3: associate limbs that belong to the same person
person_to_joint_assoc = group_limbs_of_same_person(connected_limbs, joint_list)
NMS找到peak的坐标
def NMS(param, heatmaps, upsampFactor=1., bool_refine_center=True, bool_gaussian_filt=False):
"""
NonMaximaSuppression: find peaks (local maxima) in a set of grayscale images
:param heatmaps: set of grayscale images on which to find local maxima (3d np.array,
with dimensions image_height x image_width x num_heatmaps)
:param upsampFactor: Size ratio between CPM heatmap output and the input image size.
Eg: upsampFactor=16 if original image was 480x640 and heatmaps are 30x40xN
:param bool_refine_center: Flag indicating whether:
- False: Simply return the low-res peak found upscaled by upsampFactor (subject to grid-snap)
- True: (Recommended, very accurate) Upsample a small patch around each low-res peak and
fine-tune the location of the peak at the resolution of the original input image
:param bool_gaussian_filt: Flag indicating whether to apply a 1d-GaussianFilter (smoothing)
to each upsampled patch before fine-tuning the location of each peak.
:return: a NUM_JOINTS x 5 np.array where each row represents a joint type (0=nose, 1=neck...)
and the columns indicate the {x,y} position, the score (probability), a unique id (counter)
and a flag this point used for assignment
"""
# MODIFIED BY CARLOS: Instead of upsampling the heatmaps to heatmap_avg and
# then performing NMS to find peaks, this step can be sped up by ~25-50x by:
# (9-10ms [with GaussFilt] or 5-6ms [without GaussFilt] vs 250-280ms on RoG
# 1. Perform NMS at (low-res) CPM's output resolution
# 1.1. Find peaks using scipy.ndimage.filters.maximum_filter
# 2. Once a peak is found, take a patch of 5x5 centered around the peak, upsample it, and
# fine-tune the position of the actual maximum.
# '-> That's equivalent to having found the peak on heatmap_avg, but much faster because we only
# upsample and scan the 5x5 patch instead of the full (e.g.) 480x640
joint_list_per_joint_type = []
cnt_total_joints = 0
# For every peak found, win_size specifies how many pixels in each
# direction from the peak we take to obtain the patch that will be
# upsampled. Eg: win_size=1 -> patch is 3x3; win_size=2 -> 5x5
# (for BICUBIC interpolation to be accurate, win_size needs to be >=2!)
win_size = 2
for joint in range(NUM_JOINTS):
map_orig = heatmaps[:, :, joint]#TODO,拿到一个关节点的数据
# peak_coords = find_peaks(param, map_orig)
peak_coords = find_peaks_v2(param, map_orig)#TODO,找峰值
peaks = np.zeros((len(peak_coords), 5))
# TODO,returns a tuple containing a count (from start which defaults to 0) and the values obtained from iterating over iterable.
for i, peak in enumerate(peak_coords):
if bool_refine_center:
x_min, y_min = np.maximum(0, peak - win_size)
x_max, y_max = np.minimum(
np.array(map_orig.T.shape) - 1, peak + win_size)
# Take a small patch around each peak and only upsample that
# tiny region
patch = map_orig[y_min:y_max + 1, x_min:x_max + 1]
map_upsamp = cv2.resize(
patch, None, fx=upsampFactor, fy=upsampFactor, interpolation=cv2.INTER_CUBIC)
# Gaussian filtering takes an average of 0.8ms/peak (and there might be
# more than one peak per joint!) -> For now, skip it (it's
# accurate enough)
map_upsamp = gaussian_filter(
map_upsamp, sigma=3) if bool_gaussian_filt else map_upsamp
# Obtain the coordinates of the maximum value in the patch
# TODO, 以shape的形状在map_upsamp中找最大值的坐标(x,y)
location_of_max = np.unravel_index(
map_upsamp.argmax(), map_upsamp.shape)
# Remember that peaks indicates [x,y] -> need to reverse it for
# [y,x]
location_of_patch_center = compute_resized_coords(
peak[::-1] - [y_min, x_min], upsampFactor)
# Calculate the offset wrt to the patch center where the actual
# maximum is
refined_center = (location_of_max - location_of_patch_center)
peak_score = map_upsamp[location_of_max]
else:
refined_center = [0, 0]
# Flip peak coordinates since they are [x,y] instead of [y,x]
peak_score = map_orig[tuple(peak[::-1])]
peaks[i, :] = tuple([int(math.floor(x)) for x in compute_resized_coords(
peak_coords[i], upsampFactor) + refined_center[::-1]]) + (peak_score, cnt_total_joints, 0)
cnt_total_joints += 1
joint_list_per_joint_type.append(peaks)
return joint_list_per_joint_type
numpy 中 np.unravel_index 使用
完整函数定义:unravel_index(indices, shape, order=‘C’)
首先,我们会问:indices是个啥呀? indices 就是索引值,可以是一个数,也可以是一个列表。
例如 indices = 1 ,indices = [3,12] 。
shape 代表的是数组的形状
函数的作用就是得到 indices 中每一项对应到形状为shape的数组中的坐标。这样说可能不清晰,我们看例子!:
假设有一个列表:[2,3,4,5,6,7,8,1,12,23,34,55]
我们要取得其中最小值的索引,睁眼以看,索引为:7 (从0开始)
但是当你用到了numpy的时候,你很难不将其变为数组吧(要不然你用numpy有啥用?),那我们就将其变成一个 3x4的矩阵吧!:
a = [2,3,4,5,6,7,8,1,12,23,34,55]
b = np.array(a).reshape((3,4))
print(b)
1
2
3
结果:
array([[ 2, 3, 4, 5],
[ 6, 7, 8, 1],
[12, 23, 34, 55]])
既然我们已经转化为矩阵了,那么我如果再要想得到这个矩阵的最小值所对应的索引我应该怎么办?不能再用之前的 7 了吧。矩阵的索引需要是一个“坐标”,这样我们如何求这个坐标?此时unravel_index 就用上了:
indices 就是 7 (代表的是原来的索引),shape 就是(3,4)(代表的就是矩阵形状),而 order
由两种取值:“C”,“F”,“C”表示的是以横坐标为基准;“F”表示的是以纵坐标为基准。 例如:我们使用函数:np.unravel_index(7,(3,4),order=‘C’),得到结果为:(1,3)
如果我们使用函数:np.unravel_index(7,(3,4),order=‘F’),得到结果为:(1,2)
- 结果为(1,3):形状为(3,4)的矩阵为 3 行 4 列,设横纵坐标 x,y,则4*x+y=7,得出x=1,y=3,因此结果为(1,3)
- 结果为(1,2):形状为(3,4)的矩阵为 3 行 4 列,设横纵坐标 x,y,因为此时的order=“F”,所以要以纵坐标为基准 设横纵坐标 x,y,则3*y + x = 7 ,得到(4,1) 和 (1,2)两个结果,显然(4,1)超出了范围,因此选择(1,2)
find_connected_joints找连接的关节点
def find_connected_joints(param, paf_upsamp, joint_list_per_joint_type, num_intermed_pts=10, max_dist_thresh=1,
max_paf_score_thresh=0.7):
"""
For every type of limb (eg: forearm, shin, etc.), look for every potential
pair of joints (eg: every wrist-elbow combination) and evaluate the PAFs to
determine which pairs are indeed body limbs.
:param paf_upsamp: PAFs upsampled to the original input image resolution
:param joint_list_per_joint_type: See 'return' doc of NMS()
:param num_intermed_pts: Int indicating how many intermediate points to take
between joint_src and joint_dst, at which the PAFs will be evaluated
:param max_dist_thresh: do not set limb connected if limb length is bigger than (this threshold * img_height / img_width)
:return: List of NUM_LIMBS rows. For every limb_type (a row) we store
a list of all limbs of that type found (eg: all the right forearms).
For each limb (each item in connected_limbs[limb_type]), we store 5 cells:
# {joint_src_id,joint_dst_id}: a unique number associated with each joint,
# limb_score_penalizing_long_dist: a score of how good a connection
of the joints is, penalized if the limb length is too long
# {joint_src_index,joint_dst_index}: the index of the joint within
all the joints of that type found (eg: the 3rd right elbow found)
"""
connected_limbs = []
long_dist_thresh = np.array([paf_upsamp.shape[1], paf_upsamp.shape[0]]) * max_dist_thresh
# Auxiliary array to access paf_upsamp quickly
# 这个 limb_intermed_coords, 是个(4, num_intermed_pts)ndarray, 对其赋值完之后,
# 第一行表示的取得两个关节点之间10个值的y坐标
# 第二行表示的取得的两个关节点之间10个值的x坐标
# 第三 四行表示这两个关节点的所在的 paf channel 位置,根据 paf_xy_coords_per_limb 得到
limb_intermed_coords = np.empty((4, num_intermed_pts), dtype=np.intp)
for limb_type in range(NUM_LIMBS):
# joint_list_per_joint_type : a NUM_JOINTS x 5 np.array where each row represents a joint type (0=nose, 1=neck...)
# and the columns indicate the {x,y} position, the score (probability), a unique id (counter)
# and a flag this point used for assignment
# 假如我们希望链接脖子到头的点,那么这一步就是找到所有的头和脖子点的过程
# List of all joints of type A found, where A is specified by limb_type
# (eg: a right forearm starts in a right elbow)
joints_src = joint_list_per_joint_type[joint_to_limb_heatmap_relationship[limb_type][0]]
# List of all joints of type B found, where B is specified by limb_type
# (eg: a right forearm ends in a right wrist)
joints_dst = joint_list_per_joint_type[joint_to_limb_heatmap_relationship[limb_type][1]]
if len(joints_src) == 0 or len(joints_dst) == 0:
# No limbs of this type found (eg: no right forearms found because
# we didn't find any right wrists or right elbows)
print ('no limbs of this type found.', joints_src, joints_dst)
connected_limbs.append([])
else:
connections = np.empty((0, 5))
# Specify the paf index that contains the x-coord of the paf for
# this limb
# 这一对paf在heatmap中对应的id
#paf_xy_coords_per_limb 每个limb在paf中的id,例如第一个limb就是前两个paf的channel
limb_intermed_coords[2, :] = paf_xy_coords_per_limb[limb_type][0]
# print (paf_xy_coords_per_limb[limb_type][0])
# And the y-coord paf index
limb_intermed_coords[3, :] = paf_xy_coords_per_limb[limb_type][1]
for i, joint_src in enumerate(joints_src):
# print (i, joint_src)
# Try every possible joints_src[i]-joints_dst[j] pair and see
# if it's a feasible limb
best_score = 0.0
best_connection = []
for j, joint_dst in enumerate(joints_dst):
# print (j, joint_dst)
# Subtract the position of both joints to obtain the
# direction of the potential limb,两个关键点的连接方向
limb_dir = joint_dst[:2] - joint_src[:2]
if (np.abs(limb_dir) > long_dist_thresh).any():
# print ('000')
continue
# Compute the distance/length of the potential limb (norm
# of limb_dir)
limb_dist = np.sqrt(np.sum(limb_dir ** 2)) + 1e-8
limb_dir = limb_dir / limb_dist # Normalize limb_dir to be a unit vector标准化为单位向量
#在joint_src到joint_dst间线性插值x
# Linearly distribute num_intermed_pts points from the x
# coordinate of joint_src to the x coordinate of joint_dst
limb_intermed_coords[1, :] = np.round(np.linspace(
joint_src[0], joint_dst[0], num=num_intermed_pts))
#在joint_src到joint_dst间线性插值y
# Same for the y coordinate
limb_intermed_coords[0, :] = np.round(np.linspace(
joint_src[1], joint_dst[1], num=num_intermed_pts))
# 找到两个关节点之间的paf值,转置
intermed_paf = paf_upsamp[limb_intermed_coords[0, :],
limb_intermed_coords[1, :], limb_intermed_coords[2:4, :]].T
# 点积
score_intermed_pts = intermed_paf.dot(limb_dir)
score_penalizing_long_dist = score_intermed_pts.mean()
# score_penalizing_long_dist = score_intermed_pts.mean() + min(0.5 * paf_upsamp.shape[0] / limb_dist - 1, 0)
# Criterion 1: At least 80% of the intermediate points have
# a score higher than thre2
criterion1 = (np.count_nonzero(
score_intermed_pts > param['thre2']) > 0.8 * num_intermed_pts)
# Criterion 2: Mean score, penalized for large limb
# distances (larger than half the image height), is
# positive
# print ('score penalizing long dist: {}, mean: {}'.format(score_penalizing_long_dist, score_intermed_pts.mean()))
criterion2 = (score_penalizing_long_dist > param['thre2'])
# print()
if criterion1 and criterion2 and score_penalizing_long_dist > best_score:
best_score = score_penalizing_long_dist
best_connection = [joint_src[3], joint_dst[3], score_penalizing_long_dist, i, j]
print('best_connection')
# if best_score > max_paf_score_thresh:
# break
# Last value is the combined paf(+limb_dist) + heatmap
# scores of both joints
if best_connection:
# print(best_connection[2])
# print('111', connections)
#按垂直方向(行顺序)堆叠数组构成一个新的数组堆叠的数组需要具有相同的维度
connections = np.vstack([connections, np.array(best_connection)])
connected_limbs.append(connections)
return connected_limbs
group_limbs_of_same_person将连接的分组为某个人
def group_limbs_of_same_person(connected_limbs, joint_list):
"""
Associate limbs belonging to the same person together.
:param connected_limbs: See 'return' doc of find_connected_joints()
# joint_list_per_joint_type是个列表的列表中组合, 这行代码是把其变成个ndarray, shape变成(total_joints_num, 6). 其中每一行都是
# 6个值, 前5个和 joint_list_per_joint_type 的值一样, 最后一个指代该点的类型, 例如0就代表右肩, 1代表右肘.....
joint_list_per_joint_type: a NUM_JOINTS x 5 np.array where each row represents a joint type (0=nose, 1=neck...)
and the columns indicate the {x,y} position, the score (probability), a unique id (counter)
and a flag this point used for assignment
:param joint_list: unravel'd version of joint_list_per_joint [See 'return' doc of NMS()]
:return: 2d np.array of size num_people x (NUM_JOINTS+2). For each person found:
# First NUM_JOINTS columns contain the index (in joint_list) of the joints associated
with that person (or -1 if their i-th joint wasn't found)
# 2nd-to-last column: Overall score of the joints+limbs that belong to this person
# Last column: Total count of joints found for this person
"""
# person_to_joint_assoc 是一个列表的列表, 每个子列表都是一个1x(num_joints+2)的ndarray
person_to_joint_assoc = []
for limb_type in range(NUM_LIMBS):
# 需要连接关节点的类型
joint_src_type, joint_dst_type = joint_to_limb_heatmap_relationship[limb_type]
# 对 connect_limbs 里对应的 limb_type 那行进行遍历, 就是遍历所有的这个类型的关节
"""
TODO
connected_limbs: List of NUM_LIMBS rows. For every limb_type (a row) we store a list of all limbs of that type found (eg: all the right forearms).
For each limb (each item in connected_limbs[limb_type]), we store 5 cells:
# {joint_src_id,joint_dst_id}: a unique number associated with each joint,
# limb_score_penalizing_long_dist: a score of how good a connection of the joints is, penalized if the limb length is too long
# {joint_src_index,joint_dst_index}: the index of the joint within all the joints of that type found (eg: the 3rd right elbow found)
"""
for limb_info in connected_limbs[limb_type]:
person_assoc_idx = []
# 如果在已有的person_to_joint_assoc里,已有的连接和此时需要处理的limb_info保存的unique_id一样,
# ??????
for person, person_limbs in enumerate(person_to_joint_assoc):
if person_limbs[joint_src_type] == limb_info[0] or person_limbs[joint_dst_type] == limb_info[1]:
person_assoc_idx.append(person)
# If one of the joints has been associated to a person, and either
# the other joint is also associated with the same person or not
# associated to anyone yet:
if len(person_assoc_idx) == 1:
person_limbs = person_to_joint_assoc[person_assoc_idx[0]]
# If the other joint is not associated to anyone yet,
if person_limbs[joint_dst_type] != limb_info[1]:
# Associate it with the current person
person_limbs[joint_dst_type] = limb_info[1]
# Increase the number of limbs associated to this person
person_limbs[-1] += 1
# And update the total score (+= heatmap score of joint_dst
# + score of connecting joint_src with joint_dst)
person_limbs[-2] += joint_list[limb_info[1] #TODO heatmap score of joint_dst
.astype(int), 2] + limb_info[2]#TODO limb_score_penalizing_long_dist
elif len(person_assoc_idx) == 2: # if found 2 and disjoint, merge them
person1_limbs = person_to_joint_assoc[person_assoc_idx[0]]
person2_limbs = person_to_joint_assoc[person_assoc_idx[1]]
membership = ((person1_limbs >= 0) & (person2_limbs >= 0))[:-2]
if not membership.any(): # If both people have no same joints connected, merge them into a single person
# Update which joints are connected
person1_limbs[:-2] += (person2_limbs[:-2] + 1)
# Update the overall score and total count of joints
# connected by summing their counters
person1_limbs[-2:] += person2_limbs[-2:]
# Add the score of the current joint connection to the
# overall score
person1_limbs[-2] += limb_info[2]
person_to_joint_assoc.pop(person_assoc_idx[1])
else: # Same case as len(person_assoc_idx)==1 above
person1_limbs[joint_dst_type] = limb_info[1]
person1_limbs[-1] += 1
person1_limbs[-2] += joint_list[limb_info[1]
.astype(int), 2] + limb_info[2]
else: # No person has claimed any of these joints, create a new person
# Initialize person info to all -1 (no joint associations)
row = -1 * np.ones(NUM_JOINTS+2)
# Store the joint info of the new connection
row[joint_src_type] = limb_info[0]
row[joint_dst_type] = limb_info[1]
# Total count of connected joints for this person: 2
row[-1] = 2
# Compute overall score: score joint_src + score joint_dst + score connection
# {joint_src,joint_dst}
row[-2] = sum(joint_list[limb_info[:2].astype(int), 2]#TODO heatmap dst 分值
) + limb_info[2]#TODO limb_score_penalizing_long_dist分值
person_to_joint_assoc.append(row)
# Delete people who have very few parts connected
people_to_delete = []
for person_id, person_info in enumerate(person_to_joint_assoc):
if person_info[-1] < 3 or (person_info[-2] / person_info[-1] < 0.2):
people_to_delete.append(person_id)
# Traverse the list in reverse order so we delete indices starting from the
# last one (otherwise, removing item for example 0 would modify the indices of
# the remaining people to be deleted!)
for index in people_to_delete[::-1]:
person_to_joint_assoc.pop(index)
# Appending items to a np.array can be very costly (allocating new memory, copying over the array, then adding new row)
# Instead, we treat the set of people as a list (fast to append items) and
# only convert to np.array at the end
return np.array(person_to_joint_assoc)
最后将人体关键点和连线画出来
参考文章:
https://www.cnblogs.com/jerryspace/p/10023851.html
https://numpy.org/doc/stable/reference/generated/numpy.rollaxis.html