Source code for playground_metrics.map_metric

"""Implement the public interface to compute mAP from a set of detections and ground truths.

For more in formation on how the metric is computed, see: :doc:`../content/map_metric`.

If one wants to integrate the module into a framework to use it as a validation metric, the
:class:`~MeanAveragePrecisionMetric` class described below should be wrappped accordingly to follow the framework
convention.
"""
import warnings
from collections import defaultdict

import numpy as np

from .utils import to_builtin, to_list
from .match_detections import MatchEngineIoU
from .utils.conversion import get_type_and_convert


[docs]class MeanAveragePrecisionMetric: r"""Implement an API to compute mAP. It gives three methods: * :meth:`update(detections, ground_truths) <update>` which accumulates TP, FP and FN over examples * :meth:`compute` which computes mAP and AP per label from accumulated values * :meth:`reset` which resets accumulated values to their initial values to start mAP computation from scratch See Also: Information on how **mAP**, **AP**, **precision** and **recall** are computed may be found in :doc:`../content/map_metric`. Args: threshold (float): Optional, default to 0.5. Similarity threshold for which we consider a valid match between detection and ground truth. match_algorithm (str): Optional, default to 'coco'. 'xview' or 'coco' to choose the matching algorithm (c.f. :ref:`match`) or 'non-unitary' to use non-unitary matching. label_mean_area (dict) : Optional, default to ``None``. A dictionary containing the mean area for each label in the dataset, if given, it is used to match with *iIoU* instead of *IoU* (c.f. :ref:`iiou`). trim_invalid_geometry (bool): Optional, default to ``False``. If set to ``True`` conversion will ignore invalid geometries and leave them out of mAP computations. This means that the function will work on arrays where ``work_array.shape[0] <= input_array.shape[0]``. If set to ``False``, an invalid geometry will raise an :exc:`~playground_metrics.utils.geometry_utils.InvalidGeometryError`. autocorrect_invalid_geometry (Bool): Optional, default to ``False``. Whether to attempt correcting a faulty geometry to form a valid one. If set to ``True`` and the autocorrect attempt is unsuccessful, it falls back to the behaviour defined in ``trim_invalid_geometry``. match_engine (:class:`~map_metric_api.match_detections.MatchEngineBase`): Optional, default to :class:`~playground_metrics.match_detections.MatchEngineIoU`. If provided matching will be done using the provided ``match_engine`` instead of the default one. Note that the ``threshold`` and ``match_algorithm`` provided parameters will be overridden by those provided in the ``match_engine``. Warning: When using non-unitary matching, the AP per class and the mAP are ill-defined and must be taken with a grain of salt. Warns: UserWarning: If ``match_algorithm`` is 'non-unitary' to warn that mAP and AP per class values are ill-defined. RuntimeWarning: If a ``match_engine`` is provided and its ``threshold`` or ``match_algorithm`` attribute differs from those provided as arguments to the constructor. Note: * Polygon auto-correction only corrects self-crossing exterior rings, in which case it creates one Polygon out of every simple ring which might be extracted from the original Polygon exterior. * Polygon auto-correction will systematically fail on Polygons with at least one inner ring. Attributes: mAP (float) : The mAP computed by :meth:`compute` from accumulated values average_precision_per_class (defaultdict) : The AP for each label as constructed by :meth:`compute` from accumulated values precision_per_class (defaultdict) : The precision for each label as constructed by :meth:`compute` from accumulated values recall_per_class (defaultdict) : The recall for each label as constructed by :meth:`compute` from accumulated values number_true_detection_per_class (defaultdict): The number of detection matched to a ground truth as constructed by :meth:`compute` from accumulated values number_false_detection_per_class (defaultdict): The number of detection not matched to a ground truth as constructed by :meth:`compute` from accumulated value number_found_ground_truth_per_class (defaultdict): The number of ground truth matched to a detection as constructed by :meth:`compute` from accumulated values number_missed_ground_truth_per_class (defaultdict): The number of ground truth not matched to a detection as constructed by :meth:`compute` from accumulated values match_engine (:class:`~map_metric_api.match_detections.MatchEngineBase`) : The match_engine object used to match detections and ground truths. If none where provided in the constructor call, it defaults to :class:`~playground_metrics.match_detections.MatchEngineIoU`. """ def __init__(self, threshold=None, match_algorithm=None, label_mean_area=None, trim_invalid_geometry=False, autocorrect_invalid_geometry=False, match_engine=None): if match_engine is not None and (threshold is not None or match_algorithm is not None): warnings.warn('In the future match_engine will be made incompatible with threshold and match_algorithm. ' 'Providing both will raise a ValueError.', FutureWarning) # Set configurations values threshold = threshold if threshold is not None else 0.5 match_algorithm = match_algorithm or 'coco' self.match_engine = match_engine or MatchEngineIoU(threshold, match_algorithm) if threshold != self.threshold: warnings.warn('Discrepancy between user provided threshold and ' 'match_engine threshold ({} != {})'.format(threshold, self.threshold), RuntimeWarning) if match_algorithm != self.match_engine.match_algorithm: warnings.warn('Discrepancy between user provided match_algorithm and ' 'match_engine match_algorithm ({} != {})'.format(match_algorithm, self.match_engine.match_algorithm), RuntimeWarning) if match_algorithm == 'non-unitary': warnings.warn('When using non-unitary matching, the AP per class and the mAP are ' 'ill-defined and must be taken with a grain of salt.', UserWarning) self.label_mean_area = label_mean_area self.trim_invalid_geometry = trim_invalid_geometry self.autocorrect_invalid_geometry = autocorrect_invalid_geometry # Set intermediate and return values self._init_values() @property def threshold(self): # noqa: D205,D400 """float: The IoU threshold by :attr:`self.match_engine <match_engine>` or ``None`` if :attr:`self.match_engine <match_engine>` doesn't use any threshold. """ try: return self.match_engine.threshold except AttributeError: return None @property def ground_truth_labels(self): """set: The set of unique label accumulated up to this point.""" return self._ground_truth_labels def _init_values(self): # Set intermediate values self._detection_matched = defaultdict(self._empty_array) self._ground_truth_matched = defaultdict(self._empty_array) self._number_of_ground_truths = defaultdict(int) self._ground_truth_labels = set() self._confidence = defaultdict(self._empty_array) # Set return values self.mAP = 0.0 # pylint: disable=invalid-name self.average_precision_per_class = defaultdict(float) self.precision_per_class = defaultdict(float) self.recall_per_class = defaultdict(float) self.number_true_detection_per_class = defaultdict(int) self.number_false_detection_per_class = defaultdict(int) self.number_found_ground_truth_per_class = defaultdict(int) self.number_missed_ground_truth_per_class = defaultdict(int)
[docs] def update(self, detections, ground_truths): r"""Accumulate values necessary to compute mAP with detections and ground truths of a single image. Args: detections (ndarray, list) : A ndarray of detections stored as: * Bounding boxes for a given class where each row is a detection stored as: ``[x_min, y_min, x_max, y_max, confidence, label]`` * Polygons for a given class where each row is a detection stored as: ``[[[outer_ring], [inner_rings]], confidence, label]`` * Points for a given class where each row is a detection stored as: ``[x, y, confidence, label]`` ground_truths (ndarray,list) : A ndarray of ground truth stored as: * Bounding boxes for a given class where each row is a ground truth stored as: ``[x_min, y_min, x_max, y_max, label]`` * Polygons for a given class where each row is a ground truth stored as: ``[[[outer_ring], [inner_rings]], label]`` * Points for a given class where each row is a ground truth stored as: ``[x, y, label]`` Raises: KeyError : If ``self.label_mean_area`` is not ``None`` but a label is missing The input detections and ground truths are allowed to be **points** in the documentation. This is to allow the use of a custom `MatchEngine` for points, however, the default :class:`~playground_metrics.match_detections.MatchEngineIoU` works on **intersection-over-union** which is incompatible with **points**. More information on input geometrical types can be found in :doc:`playground_metrics.match_detections`. Note: The labels provided in the input arrays can theoretically be any hashable type, however, only numeric types, strings and tuples are officially supported. """ detections_type, detections = self._format_input(detections) ground_truths_type, ground_truths = self._format_input(ground_truths) if detections.size == ground_truths.size == 0: return if detections.size == 0: self._ground_truth_labels.update(to_list(ground_truths[:, 1])) for ground_truth_label in self._ground_truth_labels: self._number_of_ground_truths[ground_truth_label] += \ len(ground_truths[to_builtin(ground_truths[:, 1]) == ground_truth_label, :1]) self._ground_truth_matched[ground_truth_label] = \ np.concatenate((self._ground_truth_matched[ground_truth_label], np.zeros((ground_truths[to_builtin(ground_truths[:, 1]) == ground_truth_label, :].shape[0])))) return if ground_truths.size == 0: self._ground_truth_labels.update(to_list(detections[:, 2])) for ground_truth_label in self._ground_truth_labels: self._detection_matched[ground_truth_label] = np.concatenate( (self._detection_matched[ground_truth_label], np.zeros((detections[to_builtin(detections[:, 2]) == ground_truth_label, :2].shape[0])))) self._confidence[ground_truth_label] = \ np.concatenate((self._confidence[ground_truth_label], np.sort(detections[to_builtin(detections[:, 2]) == ground_truth_label, 1])[::-1])) return self._ground_truth_labels.update(to_list(ground_truths[:, 1]), to_list(detections[:, 2])) for ground_truth_label in self._ground_truth_labels: try: mean_area = self.label_mean_area[ground_truth_label] except KeyError: raise KeyError('label_mean_area is missing the label {}'.format(ground_truth_label)) except TypeError: mean_area = None match_matrix = \ self.match_engine.match(detections[to_builtin(detections[:, 2]) == ground_truth_label, :2], ground_truths[to_builtin(ground_truths[:, 1]) == ground_truth_label, :1], label_mean_area=mean_area) # Having this before checking if there were detections for this particular class breaks the xview score # equality test, however this is the way to go to ensure that False-Negative are correctly accounted for # in all cases. So xView scoring code is wrong again here, sorry xView. self._number_of_ground_truths[ground_truth_label] += \ len(ground_truths[to_builtin(ground_truths[:, 1]) == ground_truth_label, :1]) self._ground_truth_matched[ground_truth_label] = \ np.concatenate((self._ground_truth_matched[ground_truth_label], np.clip(match_matrix.sum(0), 0, 1))) # If no detections for this label pass here if match_matrix.shape[0] == 0: continue self._detection_matched[ground_truth_label] = \ np.concatenate((self._detection_matched[ground_truth_label], np.clip(match_matrix.sum(1), 0, 1))) self._confidence[ground_truth_label] = \ np.concatenate((self._confidence[ground_truth_label], np.sort(detections[to_builtin(detections[:, 2]) == ground_truth_label, 1])[::-1]))
[docs] def compute(self): r"""Compute the **mAP** according to the accumulated values. Moreover it sets the value for the following attributes: * :attr:`self.precision_per_class <precision_per_class>`: A dict of precisions per label * :attr:`self.recall_per_class <recall_per_class>`: A dict of recall per label * :attr:`self.average_precision_per_class <average_precision_per_class>`: A dict of average precisions per label * :attr:`self.number_true_detection_per_class <number_true_detection_per_class>`: A dict of the number of detection matched to a ground truth * :attr:`self.number_false_detection_per_class <number_false_detection_per_class>`: A dict of the number of detection not matched to a ground truth * :attr:`self.number_found_ground_truth_per_class <number_found_ground_truth_per_class>`: A dict of the number of ground truth matched to a detection * :attr:`self.number_missed_ground_truth_per_class <number_missed_ground_truth_per_class>`: A dict of the number of ground truth not matched to a detection Returns: float : The Mean Average Precision metric """ for ground_truth_label in self._ground_truth_labels: # Compute the Det positive, Det negative, Gt positive and gt negative counters self.number_true_detection_per_class[ground_truth_label] = \ np.sum(self._detection_matched[ground_truth_label]).item() self.number_false_detection_per_class[ground_truth_label] = \ np.sum(np.logical_not(self._detection_matched[ground_truth_label])).item() self.number_found_ground_truth_per_class[ground_truth_label] = \ np.sum(self._ground_truth_matched[ground_truth_label]).item() self.number_missed_ground_truth_per_class[ground_truth_label] = \ np.sum(np.logical_not(self._ground_truth_matched[ground_truth_label])).item() if self._number_of_ground_truths[ground_truth_label] != 0: # Prepare the cumulative sum along confidence-sorted detections to compute Precision(Recall) sorted_detection_indices = np.argsort(self._confidence[ground_truth_label])[::-1] tp_sum = np.cumsum(self._detection_matched[ground_truth_label][sorted_detection_indices]) fp_sum = np.cumsum( np.logical_not(self._detection_matched[ground_truth_label][sorted_detection_indices]) ) # Compute the Precision(Recall) function precision = tp_sum / (tp_sum + fp_sum + np.spacing(1)) recall = tp_sum / (self._number_of_ground_truths[ground_truth_label] + np.spacing(1)) # Compute the precision and recall # For precision tp is the number of detections matched to the ground truth (unique, non-unique matches) tp = np.sum(self._detection_matched[ground_truth_label]) p = len(self._detection_matched[ground_truth_label]) self.precision_per_class[ground_truth_label] = (tp / (p + np.spacing(1))).item() # For recall tp is the number of ground-truth targets matched to detections tp = np.sum(self._ground_truth_matched[ground_truth_label]) a = self._number_of_ground_truths[ground_truth_label] self.recall_per_class[ground_truth_label] = (tp / (a + np.spacing(1))).item() # Average precision and mAP computation precision, recall = self._remove_jaggedness(precision, recall) self.average_precision_per_class[ground_truth_label] = \ self._integrate_precision_recall_curve(precision, recall).item() else: self.precision_per_class[ground_truth_label] = 0.0 self.recall_per_class[ground_truth_label] = np.nan # self.average_precision_per_class is a defaultdict, when label is absent the value is 0 by default self.mAP = np.nanmean(np.array([self.average_precision_per_class[label] for label in self._ground_truth_labels])) return self.mAP
[docs] def reset(self): r"""Reset all intermediate and return values to their initial value. If :meth:`reset` is not called in-between two :meth:`compute` call, the values returned by :meth:`compute` will take into account the entire prediction stack, not just the predictions in-between the two :meth:`compute` calls. """ self._init_values()
def _format_input(self, input_array): return get_type_and_convert(input_array, trim_invalid_geometry=self.trim_invalid_geometry, autocorrect_invalid_geometry=self.autocorrect_invalid_geometry) @staticmethod def _remove_jaggedness(precision, recall): recall = np.concatenate([[0], recall, [1]]) precision = np.concatenate([[0], precision, [0]]) for i in range(precision.shape[0] - 2, 0, -1): if precision[i] > precision[i - 1]: precision[i - 1] = precision[i] return precision, recall @staticmethod def _integrate_precision_recall_curve(precision, recall): # The indices where recall changes value i = np.where(recall[1:] != recall[:len(recall) - 1])[0] + 1 # Integration with step interpolation average_precision = np.sum((recall[i] - recall[i - 1]) * precision[i]) return average_precision # Default factories @staticmethod def _nan(): return np.nan @staticmethod def _empty_array(): return np.array([])