首先来看一下原始论文的描述:
一开始我对n_ji和n_ij的理解挺绕的。后来理清楚了之后发现很简单:
∑
i
n
i
i
\sum_{i}n_{ii}
∑inii 即class i 被正确分成了class i。 True Positive
∑
j
n
i
j
=
t
i
\sum_{j}n_{ij}=t_i
∑jnij=ti 即class i 被正确分成了class j,然后对class j求和。这就是ground-truth里class=i的个数!
∑
j
n
j
i
\sum_{j}n_{ji}
∑jnji 这就是preds里class=i的个数!
(但是需要注意,本篇关于multi-class的所有观点都是基于one v.s. all的思路来考虑的,也就是有one-hot的意思在里面,问学长的时候他说,一般而言对于multi-class的问题,11和00都是正确,而01,10也都是错误)
用代码来表示(对于multi-class来说)的话,
#若gt和preds没有进行one-hot编码,那么是这样:
n_ij = t_i = np.sum(GT==i)
n_ji = np.sum(Preds == i )
#若gt和preds进行了one-hot编码,那么是这样:
curr_gt_mask = gt_masks[i,:,:]
curr_pred_mask = pred_masks[i,:,:]
t_i = np.sum(curr_gt_mask)
n_ji = np.sum(curr_pred_mask)
自然而言的可以想到confusion_matrix,记得在半个月前,我也思考过multi-class的混淆矩阵,但是当时理解的还很不到位。
基本的混淆矩阵如下图所示:
∑ j n j i − n i i = F a l s e P o s i t i v e \sum_{j}n_{ji}-n_{ii}=False Positive ∑jnji−nii=FalsePositive
∑ j n i j − n i i = F a l s e N e g a t i v e \sum_{j}n_{ij}-n_{ii}=FalseNegative ∑jnij−nii=FalseNegative
那么,在iou中,所谓的union,也就是 t i + ∑ j n j i − n i i t_i+\sum_{j}n_{ji}-n_{ii} ti+∑jnji−nii==(FP-TP)+(FN-TP)+TP
import numpy as np
from sklearn.metrics import confusion_matrix
y_true = [1, -1, 0, 0, 1, -1, 1, 0, -1, 0, 1, -1, 1, 0, 0, -1, 0]
y_prediction = [-1, -1, 1, 0, 0, 0, 0, -1, 1, -1, 1, 1, 0, 0, 1, 1, -1]
cnf_matrix = confusion_matrix(y_true, y_prediction)
print(cnf_matrix)
#[[1 1 3]
# [3 2 2]
# [1 3 1]]
FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix) # 即一列一列的来看
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix) # 即一行一行的来看
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum() - (FP + FN + TP) #见下面解释
对于TN而言,从one vs all的角度比较好理解(以上述混淆矩阵的class0作为举例)
代码:github:Image Segmentation Evaluation
import numpy as np
def pixel_accuracy(eval_segm, gt_segm):
'''
sum_i(n_ii) / sum_i(t_i)
'''
check_size(eval_segm, gt_segm)
cl, n_cl = extract_classes(gt_segm)
eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
sum_n_ii = 0
sum_t_i = 0
for i, c in enumerate(cl):
curr_eval_mask = eval_mask[i, :, :]
curr_gt_mask = gt_mask[i, :, :]
sum_n_ii += np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
sum_t_i += np.sum(curr_gt_mask)
if (sum_t_i == 0):
pixel_accuracy_ = 0
else:
pixel_accuracy_ = sum_n_ii / sum_t_i
return pixel_accuracy_
def mean_accuracy(eval_segm, gt_segm):
'''
(1/n_cl) sum_i(n_ii/t_i)
'''
check_size(eval_segm, gt_segm)
cl, n_cl = extract_classes(gt_segm)
eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
accuracy = list([0]) * n_cl
for i, c in enumerate(cl):
curr_eval_mask = eval_mask[i, :, :]
curr_gt_mask = gt_mask[i, :, :]
n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
t_i = np.sum(curr_gt_mask)
if (t_i != 0):
accuracy[i] = n_ii / t_i
mean_accuracy_ = np.mean(accuracy)
return mean_accuracy_
def mean_IU(eval_segm, gt_segm):
'''
(1/n_cl) * sum_i(n_ii / (t_i + sum_j(n_ji) - n_ii))
'''
check_size(eval_segm, gt_segm)
cl, n_cl = union_classes(eval_segm, gt_segm)
_, n_cl_gt = extract_classes(gt_segm)
eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
IU = list([0]) * n_cl
for i, c in enumerate(cl):
curr_eval_mask = eval_mask[i, :, :]
curr_gt_mask = gt_mask[i, :, :]
if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0):
continue
n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
t_i = np.sum(curr_gt_mask)
n_ij = np.sum(curr_eval_mask)
IU[i] = n_ii / (t_i + n_ij - n_ii)
mean_IU_ = np.sum(IU) / n_cl_gt
return mean_IU_
def frequency_weighted_IU(eval_segm, gt_segm):
'''
sum_k(t_k)^(-1) * sum_i((t_i*n_ii)/(t_i + sum_j(n_ji) - n_ii))
'''
check_size(eval_segm, gt_segm)
cl, n_cl = union_classes(eval_segm, gt_segm)
eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
frequency_weighted_IU_ = list([0]) * n_cl
for i, c in enumerate(cl):
curr_eval_mask = eval_mask[i, :, :]
curr_gt_mask = gt_mask[i, :, :]
if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0):
continue
n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
t_i = np.sum(curr_gt_mask)
n_ij = np.sum(curr_eval_mask)
frequency_weighted_IU_[i] = (t_i * n_ii) / (t_i + n_ij - n_ii)
sum_k_t_k = get_pixel_area(eval_segm)
frequency_weighted_IU_ = np.sum(frequency_weighted_IU_) / sum_k_t_k
return frequency_weighted_IU_
'''
Auxiliary functions used during evaluation.
'''
def get_pixel_area(segm):
return segm.shape[0] * segm.shape[1]
def extract_both_masks(eval_segm, gt_segm, cl, n_cl):
eval_mask = extract_masks(eval_segm, cl, n_cl)
gt_mask = extract_masks(gt_segm, cl, n_cl)
return eval_mask, gt_mask
def extract_classes(segm):
cl = np.unique(segm)
n_cl = len(cl)
return cl, n_cl
def union_classes(eval_segm, gt_segm):
eval_cl, _ = extract_classes(eval_segm)
gt_cl, _ = extract_classes(gt_segm)
cl = np.union1d(eval_cl, gt_cl)
n_cl = len(cl)
return cl, n_cl
def extract_masks(segm, cl, n_cl):
h, w = segm_size(segm)
masks = np.zeros((n_cl, h, w))
for i, c in enumerate(cl):
masks[i, :, :] = segm == c
return masks
def segm_size(segm):
try:
height = segm.shape[0]
width = segm.shape[1]
except IndexError:
raise
return height, width
def check_size(eval_segm, gt_segm):
h_e, w_e = segm_size(eval_segm)
h_g, w_g = segm_size(gt_segm)
if (h_e != h_g) or (w_e != w_g):
raise EvalSegErr("DiffDim: Different dimensions of matrices!")
'''
Exceptions
'''
class EvalSegErr(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)