写在前面:
该代码从评估MVTec-AD数据集的代码中拆出,下载方法:
wget https://www.mydrive.ch/shares/60736/698155e0e6d0467c4ff6203b16a31dc9/download/439517473-1665667812/mvtec_ad_evaluation.tar.xz
tar -xvf mvtec_ad_evaluation.tar.xz
rm mvtec_ad_evaluation.tar.xz
代码的输入为mask真值列表、mask预测列表、异常值阈值(默认值0.3)
需要引入3个库(python文件): evaluate_utils import generic_util, pro_curve_util, roc_curve_util
代码如下:
calculate_au_pro_au_roc函数
import generic_util, pro_curve_util, roc_curve_util
def calculate_au_pro_au_roc(ground_truth, predictions, integration_limit=0.3):
# # Read all ground truth and anomaly images.
# ground_truth = []
# predictions = []
# # print("Read ground truth files and corresponding predictions...")
# for (gt_name, pred_name) in tqdm(zip(gt_filenames, prediction_filenames),
# total=len(gt_filenames)):
# prediction = util.read_tiff(pred_name)
# predictions.append(prediction)
# if gt_name is not None:
# ground_truth.append(np.asarray(Image.open(gt_name)))
# else:
# ground_truth.append(np.zeros(prediction.shape))
# Compute the PRO curve.
pro_curve = pro_curve_util.compute_pro(
anomaly_maps=predictions,
ground_truth_maps=ground_truth)
# Compute the area under the PRO curve.
au_pro = generic_util.trapezoid(
pro_curve[0], pro_curve[1], x_max=integration_limit)
au_pro /= integration_limit
print(f"AU-PRO (FPR limit: {integration_limit}): {au_pro}")
# Derive binary labels for each input image:
# (0 = anomaly free, 1 = anomalous).
binary_labels = [int(np.any(x > 0)) for x in ground_truth]
del ground_truth
# Compute the classification ROC curve.
roc_curve = roc_curve_util.compute_classification_roc(
anomaly_maps=predictions,
scoring_function=np.max,
ground_truth_labels=binary_labels)
# Compute the area under the classification ROC curve.
au_roc = generic_util.trapezoid(roc_curve[0], roc_curve[1])
print(f"Image-level classification AU-ROC: {au_roc}")
# Return the evaluation metrics.
return au_pro, au_roc
generic_util.py
"""
Various utility functions for:
- parsing user arguments.
- computing the area under a curve.
- generating a toy dataset to test the evaluation script.
"""
from bisect import bisect
import os
import numpy as np
import tifffile as tiff
OBJECT_NAMES = ['bottle', 'cable', 'capsule', 'carpet', 'grid',
'hazelnut', 'leather', 'metal_nut', 'pill', 'screw',
'tile', 'toothbrush', 'transistor', 'wood', 'zipper']
def trapezoid(x, y, x_max=None):
"""
This function calculates the definit integral of a curve given by
x- and corresponding y-values. In contrast to, e.g., 'numpy.trapz()',
this function allows to define an upper bound to the integration range by
setting a value x_max.
Points that do not have a finite x or y value will be ignored with a
warning.
Args:
x: Samples from the domain of the function to integrate
Need to be sorted in ascending order. May contain the same value
multiple times. In that case, the order of the corresponding
y values will affect the integration with the trapezoidal rule.
y: Values of the function corresponding to x values.
x_max: Upper limit of the integration. The y value at max_x will be
determined by interpolating between its neighbors. Must not lie
outside of the range of x.
Returns:
Area under the curve.
"""
x = np.asarray(x)
y = np.asarray(y)
finite_mask = np.logical_and(np.isfinite(x), np.isfinite(y))
if not finite_mask.all():
print("WARNING: Not all x and y values passed to trapezoid(...)"
" are finite. Will continue with only the finite values.")
x = x[finite_mask]
y = y[finite_mask]
# Introduce a correction term if max_x is not an element of x.
correction = 0.
if x_max is not None:
if x_max not in x:
# Get the insertion index that would keep x sorted after
# np.insert(x, ins, x_max).
ins = bisect(x, x_max)
# x_max must be between the minimum and the maximum, so the
# insertion_point cannot be zero or len(x).
assert 0 < ins < len(x)
# Calculate the correction term which is the integral between
# the last x[ins-1] and x_max. Since we do not know the exact value
# of y at x_max, we interpolate between y[ins] and y[ins-1].
y_interp = y[ins - 1] + ((y[ins] - y[ins - 1]) *
(x_max - x[ins - 1]) /
(x[ins] - x[ins - 1]))
correction = 0.5 * (y_interp + y[ins - 1]) * (x_max - x[ins - 1])
# Cut off at x_max.
mask = x <= x_max
x = x[mask]
y = y[mask]
# Return area under the curve using the trapezoidal rule.
return np.sum(0.5 * (y[1:] + y[:-1]) * (x[1:] - x[:-1])) + correction
def read_tiff(file_path_no_ext, exts=('.tif', '.tiff', '.TIF', '.TIFF')):
"""Read a TIFF file from a given path without the TIFF extension.
Args:
file_path_no_ext: Path to the TIFF file without a file extension.
exts: TIFF extensions to consider when searching for the file.
Raises:
FileNotFoundError: The given file path does not exist with any of the
given extensions.
IOError: The given file path exists with multiple of the given
extensions.
"""
# Get all file paths that exist
file_paths = []
for ext in exts:
# Make sure the file path does not already end with a tiff extension.
assert not file_path_no_ext.endswith(ext)
file_path = file_path_no_ext + ext
if os.path.exists(file_path):
file_paths.append(file_path)
if len(file_paths) == 0:
raise FileNotFoundError('Could not find a file with a TIFF extension'
f' at {file_path_no_ext}')
elif len(file_paths) > 1:
raise IOError('Found multiple files with a TIFF extension at'
f' {file_path_no_ext}'
'\nPlease specify which TIFF extension to use via the'
' `exts` parameter of this function.')
return tiff.imread(file_paths[0])
def generate_toy_dataset(num_images, image_width, image_height, gt_size):
"""Generate a toy dataset to test the evaluation script.
Args:
num_images: Number of images that the toy dataset contains.
image_width: Width of the dataset images in pixels.
image_height: Height of the dataset images in pixels.
gt_size: Size of rectangular ground truth regions that are
artificially generated on the dataset images.
Returns:
anomaly_maps: List of numpy arrays that contain random anomaly maps.
ground_truth_map: Corresponding list of numpy arrays that specify a
rectangular ground truth region.
"""
# Fix a random seed for reproducibility.
np.random.seed(1338)
# Create synthetic evaluation data with random anomaly scores and
# simple ground truth maps.
anomaly_maps = []
ground_truth_maps = []
for _ in range(num_images):
# Sample a random anomaly map.
anomaly_map = np.random.random((image_height, image_width))
# Construct a fixed ground truth map.
ground_truth_map = np.zeros((image_height, image_width))
ground_truth_map[0:gt_size, 0:gt_size] = 1
anomaly_maps.append(anomaly_map)
ground_truth_maps.append(ground_truth_map)
return anomaly_maps, ground_truth_maps
pro_curve_util.py
"""Utility function that computes a PRO curve, given pairs of anomaly and ground
truth maps.
The PRO curve can also be integrated up to a constant integration limit.
"""
import numpy as np
from scipy.ndimage.measurements import label
def compute_pro(anomaly_maps, ground_truth_maps):
"""Compute the PRO curve for a set of anomaly maps with corresponding ground
truth maps.
Args:
anomaly_maps: List of anomaly maps (2D numpy arrays) that contain a
real-valued anomaly score at each pixel.
ground_truth_maps: List of ground truth maps (2D numpy arrays) that
contain binary-valued ground truth labels for each pixel.
0 indicates that a pixel is anomaly-free.
1 indicates that a pixel contains an anomaly.
Returns:
fprs: numpy array of false positive rates.
pros: numpy array of corresponding PRO values.
"""
print("Compute PRO curve...")
# Structuring element for computing connected components.
structure = np.ones((3, 3), dtype=int)
num_ok_pixels = 0
num_gt_regions = 0
shape = (len(anomaly_maps),
anomaly_maps[0].shape[0],
anomaly_maps[0].shape[1])
fp_changes = np.zeros(shape, dtype=np.uint32)
assert shape[0] * shape[1] * shape[2] < np.iinfo(fp_changes.dtype).max, \
'Potential overflow when using np.cumsum(), consider using np.uint64.'
pro_changes = np.zeros(shape, dtype=np.float64)
for gt_ind, gt_map in enumerate(ground_truth_maps):
# Compute the connected components in the ground truth map.
labeled, n_components = label(gt_map, structure)
num_gt_regions += n_components
# Compute the mask that gives us all ok pixels.
ok_mask = labeled == 0
num_ok_pixels_in_map = np.sum(ok_mask)
num_ok_pixels += num_ok_pixels_in_map
# Compute by how much the FPR changes when each anomaly score is
# added to the set of positives.
# fp_change needs to be normalized later when we know the final value
# of num_ok_pixels -> right now it is only the change in the number of
# false positives
fp_change = np.zeros_like(gt_map, dtype=fp_changes.dtype)
fp_change[ok_mask] = 1
# Compute by how much the PRO changes when each anomaly score is
# added to the set of positives.
# pro_change needs to be normalized later when we know the final value
# of num_gt_regions.
pro_change = np.zeros_like(gt_map, dtype=np.float64)
for k in range(n_components):
region_mask = labeled == (k + 1)
region_size = np.sum(region_mask)
pro_change[region_mask] = 1. / region_size
fp_changes[gt_ind, :, :] = fp_change
pro_changes[gt_ind, :, :] = pro_change
# Flatten the numpy arrays before sorting.
anomaly_scores_flat = np.array(anomaly_maps).ravel()
fp_changes_flat = fp_changes.ravel()
pro_changes_flat = pro_changes.ravel()
# Sort all anomaly scores.
print(f"Sort {len(anomaly_scores_flat)} anomaly scores...")
sort_idxs = np.argsort(anomaly_scores_flat).astype(np.uint32)[::-1]
# Info: np.take(a, ind, out=a) followed by b=a instead of
# b=a[ind] showed to be more memory efficient.
np.take(anomaly_scores_flat, sort_idxs, out=anomaly_scores_flat)
anomaly_scores_sorted = anomaly_scores_flat
np.take(fp_changes_flat, sort_idxs, out=fp_changes_flat)
fp_changes_sorted = fp_changes_flat
np.take(pro_changes_flat, sort_idxs, out=pro_changes_flat)
pro_changes_sorted = pro_changes_flat
del sort_idxs
# Get the (FPR, PRO) curve values.
np.cumsum(fp_changes_sorted, out=fp_changes_sorted)
fp_changes_sorted = fp_changes_sorted.astype(np.float32, copy=False)
np.divide(fp_changes_sorted, num_ok_pixels, out=fp_changes_sorted)
fprs = fp_changes_sorted
np.cumsum(pro_changes_sorted, out=pro_changes_sorted)
np.divide(pro_changes_sorted, num_gt_regions, out=pro_changes_sorted)
pros = pro_changes_sorted
# Merge (FPR, PRO) points that occur together at the same threshold.
# For those points, only the final (FPR, PRO) point should be kept.
# That is because that point is the one that takes all changes
# to the FPR and the PRO at the respective threshold into account.
# -> keep_mask is True if the subsequent score is different from the
# score at the respective position.
# anomaly_scores_sorted = [7, 4, 4, 4, 3, 1, 1]
# -> keep_mask = [T, F, F, T, T, F]
keep_mask = np.append(np.diff(anomaly_scores_sorted) != 0, np.True_)
del anomaly_scores_sorted
fprs = fprs[keep_mask]
pros = pros[keep_mask]
del keep_mask
# To mitigate the adding up of numerical errors during the np.cumsum calls,
# make sure that the curve ends at (1, 1) and does not contain values > 1.
np.clip(fprs, a_min=None, a_max=1., out=fprs)
np.clip(pros, a_min=None, a_max=1., out=pros)
# Make the fprs and pros start at 0 and end at 1.
zero = np.array([0.])
one = np.array([1.])
return np.concatenate((zero, fprs, one)), np.concatenate((zero, pros, one))
def main():
"""
Compute the area under the PRO curve for a toy dataset and an algorithm
that randomly assigns anomaly scores to each pixel. The integration
limit can be specified.
"""
from generic_util import trapezoid, generate_toy_dataset
integration_limit = 0.3
# Generate a toy dataset.
anomaly_maps, ground_truth_maps = generate_toy_dataset(
num_images=200, image_width=500, image_height=300, gt_size=10)
# Compute the PRO curve for this dataset.
all_fprs, all_pros = compute_pro(
anomaly_maps=anomaly_maps,
ground_truth_maps=ground_truth_maps)
au_pro = trapezoid(all_fprs, all_pros, x_max=integration_limit)
au_pro /= integration_limit
print(f"AU-PRO (FPR limit: {integration_limit}): {au_pro}")
if __name__ == "__main__":
main()
roc_curve_util.py
"""
Utility functions that compute a ROC curve and integrate its area from a set
of anomaly maps and corresponding ground truth classification labels.
"""
import numpy as np
def compute_classification_roc(
anomaly_maps,
scoring_function,
ground_truth_labels):
"""Compute the ROC curve for anomaly classification on the image level.
Args:
anomaly_maps: List of anomaly maps (2D numpy arrays) that contain
a real-valued anomaly score at each pixel.
scoring_function: Function that turns anomaly maps into a single
real valued anomaly score.
ground_truth_labels: List of integers that indicate the ground truth
class for each input image. 0 corresponds to an anomaly-free sample
while a value != 0 indicates an anomalous sample.
Returns:
fprs: List of false positive rates.
tprs: List of correspoding true positive rates.
"""
assert len(anomaly_maps) == len(ground_truth_labels)
# Compute the anomaly score for each anomaly map.
anomaly_scores = map(scoring_function, anomaly_maps)
num_scores = len(anomaly_maps)
# Sort samples by anomaly score. Keep track of ground truth label.
sorted_samples = \
sorted(zip(anomaly_scores, ground_truth_labels), key=lambda x: x[0])
# Compute the number of OK and NOK samples from the ground truth.
ground_truth_labels_np = np.array(ground_truth_labels)
num_nok = ground_truth_labels_np[ground_truth_labels_np != 0].size
num_ok = ground_truth_labels_np[ground_truth_labels_np == 0].size
# Initially, every NOK sample is correctly classified as anomalous
# (tpr = 1.0), and every OK sample is incorrectly classified as anomalous
# (fpr = 1.0).
fprs = [1.0]
tprs = [1.0]
# Keep track of the current number of false and true positive predictions.
num_fp = num_ok
num_tp = num_nok
# Compute new true and false positive rates when successively increasing
# the threshold.
next_score = None
for i, (current_score, label) in enumerate(sorted_samples):
if label == 0:
num_fp -= 1
else:
num_tp -= 1
if i < num_scores - 1:
next_score = sorted_samples[i + 1][0]
else:
next_score = None # end of list
if (next_score != current_score) or (next_score is None):
fprs.append(num_fp / num_ok)
tprs.append(num_tp / num_nok)
# Return (FPR, TPR) pairs in increasing order.
fprs = fprs[::-1]
tprs = tprs[::-1]
return fprs, tprs
def main():
"""
Compute the area under the ROC curve for a toy dataset and an algorithm
that randomly assigns anomaly scores to each image pixel.
"""
from generic_util import trapezoid, generate_toy_dataset
# Generate a toy dataset.
anomaly_maps, _ = generate_toy_dataset(
num_images=10000, image_width=30, image_height=30, gt_size=0)
# Assign a random classification label to each image.
np.random.seed(42)
labels = np.random.randint(2, size=len(anomaly_maps))
# Compute the ROC curve.
all_fprs, all_tprs = compute_classification_roc(anomaly_maps=anomaly_maps,
scoring_function=np.max,
ground_truth_labels=labels)
# Compute the area under the ROC curve.
au_roc = trapezoid(all_fprs, all_tprs)
print(f"AU-ROC: {au_roc}")
if __name__ == "__main__":
main()
使用注意
注意输入 calculate_au_pro_au_roc
的 ground_truth
和 predictions
的列表格式,
ground_truth
和 predictions
均为一个列表[]
中 append
一连串的二维array,
若遇到报错,请先debug,查看输入的格式是否匹配!
不匹配,则转换一下,很简单的,
举例:
我在使用时,输入是一个列表中存入了一连串的三维的array
ground_truth = []
predictions = []
for gt, pred in zip(gt_mask_list, segment_scores):
ground_truth.append(gt)
predictions.append(pred[0])
# 为什么不是gt[0]?请根据具体情况具体使用,gt拆出后是二维的就不需要再降维了