Source code for avalanche.evaluation.metrics.mean_scores

"""
This metric was described in the IL2M paper:

E. Belouadah and A. Popescu,
"IL2M: Class Incremental Learning With Dual Memory,"
2019 IEEE/CVF International Conference on Computer Vision (ICCV),
2019, pp. 583-592, doi: 10.1109/ICCV.2019.00067.

It selects the scores of the true class and then average them for past and new
classes.
"""

from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Callable, Dict, Set, TYPE_CHECKING, List, Optional, TypeVar, Literal

import torch
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from matplotlib.pyplot import subplots
from torch import Tensor, arange

from avalanche.evaluation import Metric, PluginMetric
from avalanche.evaluation.metric_utils import get_metric_name

from avalanche.evaluation.metrics import Mean
from avalanche.evaluation.metric_results import MetricValue, AlternativeValues


if TYPE_CHECKING:
    from avalanche.training.templates import SupervisedTemplate
    from avalanche.evaluation.metric_results import MetricResult


TAggregation = TypeVar("TAggregation")
LabelCat = Literal["new", "old"]


[docs]class MeanScores(Metric[Dict[TAggregation, float]], ABC):
    """
    Average the scores of the true class by label
    """

[docs]    def __init__(self):
        self.label2mean: Dict[int, Mean] = defaultdict(Mean)
        self.reset()

    def reset(self) -> None:
        self.label2mean = defaultdict(Mean)

    @torch.no_grad()
    def update(self, predicted_y: Tensor, true_y: Tensor):
        assert (
            len(predicted_y.size()) == 2
        ), "Predictions need to be logits or scores, not labels"

        if len(true_y.size()) == 2:
            true_y = true_y.argmax(axis=1)

        scores = predicted_y[arange(len(true_y)), true_y]

        for score, label in zip(scores.tolist(), true_y.tolist()):
            self.label2mean[label].update(score)

    @abstractmethod
    def result(self) -> Dict[TAggregation, float]:
        pass


[docs]class PerClassMeanScores(MeanScores[int]):
    def result(self) -> Dict[int, float]:
        return {label: m.result() for label, m in self.label2mean.items()}


[docs]class MeanNewOldScores(MeanScores[LabelCat]):
    """
    Average the scores of the true class by old and new classes
    """

[docs]    def __init__(self):
        super().__init__()
        self.new_classes: Set[int] = set()

    def reset(self) -> None:
        super().reset()
        self.new_classes = set()

    def update_new_classes(self, new_classes: Set[int]):
        self.new_classes.update(new_classes)

    @property
    def old_classes(self) -> Set[int]:
        return set(self.label2mean) - self.new_classes

    def result(self) -> Dict[LabelCat, float]:
        # print(self.new_classes, self.label2mean)
        rv: Dict[LabelCat, float] = {
            "new": sum(
                (self.label2mean[label] for label in self.new_classes),
                start=Mean(),
            ).result()
        }
        if not self.old_classes:
            return rv

        rv["old"] = sum(
            (self.label2mean[label] for label in self.old_classes),
            start=Mean(),
        ).result()

        return rv


def default_mean_scores_image_creator(
    label2step2mean_scores: Dict[LabelCat, Dict[int, float]]
) -> Figure:
    """
    Default function to create an image of the evolution of the scores of the
        true class, averaged by new and old classes.

    :param label2step2mean_scores: A dictionary that, for each label category
        ("old" and "new") contains a dictionary of mean scores indexed by the
        step of the observation.
    :return: The figure containing the graphs.
    """
    ax: Axes
    fig, ax = subplots()

    markers = "*o"

    for marker, (label, step2mean_scores) in zip(
        markers, label2step2mean_scores.items()
    ):
        ax.plot(
            step2mean_scores.keys(),
            step2mean_scores.values(),
            marker,
            label=label,
        )

    ax.legend(loc="lower left")
    ax.set_xlabel("step")
    ax.set_ylabel("mean score")

    fig.tight_layout()
    return fig


MeanScoresImageCreator = Callable[[Dict[LabelCat, Dict[int, float]]], Figure]


class MeanScoresPluginMetricABC(PluginMetric, ABC):
    """
    Base class for the plugins that show the scores of the true class, averaged
        by new and old classes.

    :param image_creator: The function to use to create an image of the history
        of the mean scores grouped by old and new classes
    """

    def __init__(
        self,
        image_creator: Optional[
            MeanScoresImageCreator
        ] = default_mean_scores_image_creator,
    ):
        super().__init__()
        self.mean_scores = MeanNewOldScores()
        self.image_creator = image_creator
        self.label_cat2step2mean: Dict[LabelCat, Dict[int, float]] = defaultdict(dict)

    def reset(self) -> None:
        self.mean_scores.reset()

    def update_new_classes(self, strategy: "SupervisedTemplate"):
        assert strategy.experience is not None
        self.mean_scores.update_new_classes(
            strategy.experience.classes_in_this_experience
        )

    def update(self, strategy: "SupervisedTemplate"):
        self.mean_scores.update(predicted_y=strategy.mb_output, true_y=strategy.mb_y)

    def result(self) -> Dict[LabelCat, float]:
        return self.mean_scores.result()

    def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
        label_cat2mean_score: Dict[LabelCat, float] = self.result()
        num_it = strategy.clock.train_iterations

        for label_cat, m in label_cat2mean_score.items():
            self.label_cat2step2mean[label_cat][num_it] = m

        base_metric_name = get_metric_name(
            self, strategy, add_experience=False, add_task=False
        )

        rv = [
            MetricValue(
                self,
                name=base_metric_name + f"/{label_cat}_classes",
                value=m,
                x_plot=num_it,
            )
            for label_cat, m in label_cat2mean_score.items()
        ]
        if "old" in label_cat2mean_score and "new" in label_cat2mean_score:
            rv.append(
                MetricValue(
                    self,
                    name=base_metric_name + f"/new_old_diff",
                    value=label_cat2mean_score["new"] - label_cat2mean_score["old"],
                    x_plot=num_it,
                )
            )
        if self.image_creator is not None:
            rv.append(
                MetricValue(
                    self,
                    name=base_metric_name,
                    value=AlternativeValues(
                        self.image_creator(self.label_cat2step2mean),
                        self.label_cat2step2mean,
                    ),
                    x_plot=num_it,
                )
            )

        return rv

    def __str__(self):
        return "MeanScores"


class MeanScoresTrainPluginMetric(MeanScoresPluginMetricABC):
    """
    Plugin to show the scores of the true class during the lasts training
        epochs of each experience, averaged  by new and old classes.
    """

    def before_training_epoch(self, strategy: "SupervisedTemplate") -> None:
        self.reset()
        self.update_new_classes(strategy)

    def after_training_iteration(self, strategy: "SupervisedTemplate") -> None:
        if strategy.clock.train_exp_epochs == strategy.train_epochs - 1:
            self.update(strategy)
        super().after_training_iteration(strategy)

    def after_training_epoch(self, strategy: "SupervisedTemplate") -> "MetricResult":
        if strategy.clock.train_exp_epochs == strategy.train_epochs - 1:
            return self._package_result(strategy)
        else:
            return None


class MeanScoresEvalPluginMetric(MeanScoresPluginMetricABC):
    """
    Plugin to show the scores of the true class during evaluation, averaged by
        new and old classes.
    """

    def before_training(self, strategy: "SupervisedTemplate") -> None:
        self.reset()

    def before_training_exp(self, strategy: "SupervisedTemplate") -> None:
        self.update_new_classes(strategy)

    def after_eval_iteration(self, strategy: "SupervisedTemplate") -> None:
        self.update(strategy)
        super().after_eval_iteration(strategy)

    def after_eval(self, strategy: "SupervisedTemplate") -> "MetricResult":
        return self._package_result(strategy)


[docs]def mean_scores_metrics(
    *,
    on_train: bool = True,
    on_eval: bool = True,
    image_creator: Optional[MeanScoresImageCreator] = default_mean_scores_image_creator,
) -> List[PluginMetric]:
    """
    Helper to create plugins to show the scores of the true class, averaged by
        new and old classes. The plugins are available during training (for the
        last epoch of each experience) and evaluation.

    :param on_train: If True the train plugin is created
    :param on_eval: If True the eval plugin is created
    :param image_creator: The function to use to create an image of the history
        of the mean scores grouped by old and new classes
    :return: The list of plugins that were specified
    """
    plugins: List[PluginMetric] = []

    if on_eval:
        plugins.append(MeanScoresEvalPluginMetric(image_creator=image_creator))
    if on_train:
        plugins.append(MeanScoresTrainPluginMetric(image_creator=image_creator))

    return plugins


__all__ = [
    "mean_scores_metrics",
    "MeanScoresTrainPluginMetric",
    "MeanScoresEvalPluginMetric",
    "MeanScores",
    "MeanNewOldScores",
    "PerClassMeanScores",
]