diff --git a/CHANGELOG.md b/CHANGELOG.md index 34a0eb3038..1e2ac21663 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improved** - New model: `StatsForecastAutoTBATS`. This model offers the [AutoTBATS](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autotbats) model from Nixtla's `statsforecasts` library. [#2611](https://github.com/unit8co/darts/pull/2611) by [He Weilin](https://github.com/cnhwl). +- Added parameter `component_wise` to `show_anomalies()` to separately plot each component in multivariate series. [#2544](https://github.com/unit8co/darts/pull/2544) by [He Weilin](https://github.com/cnhwl). **Fixed** - Fixed a bug when performing optimized historical forecasts with `stride=1` using a `RegressionModel` with `output_chunk_shift>=1` and `output_chunk_length=1`, where the forecast time index was not properly shifted. [#2634](https://github.com/unit8co/darts/pull/2634) by [Mattias De Charleroy](https://github.com/MattiasDC). diff --git a/darts/ad/anomaly_model/anomaly_model.py b/darts/ad/anomaly_model/anomaly_model.py index 82322f4838..63655db40c 100644 --- a/darts/ad/anomaly_model/anomaly_model.py +++ b/darts/ad/anomaly_model/anomaly_model.py @@ -247,6 +247,7 @@ def show_anomalies( names_of_scorers: Union[str, Sequence[str]] = None, title: str = None, metric: Optional[Literal["AUC_ROC", "AUC_PR"]] = None, + component_wise: bool = False, **score_kwargs, ): """Plot the results of the anomaly model. @@ -283,6 +284,8 @@ def show_anomalies( Default: "AUC_ROC". score_kwargs parameters for the `score()` method. + component_wise + If True, will separately plot each component in case of multivariate anomaly detection. """ series = _check_input(series, name="series", num_series_expected=1)[0] predict_kwargs = predict_kwargs if predict_kwargs is not None else {} @@ -310,6 +313,7 @@ def show_anomalies( names_of_scorers=names_of_scorers, title=title, metric=metric, + component_wise=component_wise, ) @property diff --git a/darts/ad/anomaly_model/forecasting_am.py b/darts/ad/anomaly_model/forecasting_am.py index 88ce67b9ce..8b4339cd9c 100644 --- a/darts/ad/anomaly_model/forecasting_am.py +++ b/darts/ad/anomaly_model/forecasting_am.py @@ -440,6 +440,7 @@ def show_anomalies( names_of_scorers: Union[str, Sequence[str]] = None, title: str = None, metric: Optional[Literal["AUC_ROC", "AUC_PR"]] = None, + component_wise: bool = False, **score_kwargs, ): """Plot the results of the anomaly model. @@ -506,6 +507,8 @@ def show_anomalies( Optionally, the name of the metric function to use. Must be one of "AUC_ROC" (Area Under the Receiver Operating Characteristic Curve) and "AUC_PR" (Average Precision from scores). Default: "AUC_ROC". + component_wise + If True, will separately plot each component in case of multivariate anomaly detection. score_kwargs parameters for the `score()` method. """ @@ -527,6 +530,7 @@ def show_anomalies( names_of_scorers=names_of_scorers, title=title, metric=metric, + component_wise=component_wise, **score_kwargs, ) diff --git a/darts/ad/scorers/scorers.py b/darts/ad/scorers/scorers.py index c30aefbc9e..3fadee463a 100644 --- a/darts/ad/scorers/scorers.py +++ b/darts/ad/scorers/scorers.py @@ -176,6 +176,7 @@ def show_anomalies_from_prediction( anomalies: TimeSeries = None, title: str = None, metric: Optional[Literal["AUC_ROC", "AUC_PR"]] = None, + component_wise: bool = False, ): """Plot the results of the scorer. @@ -208,6 +209,8 @@ def show_anomalies_from_prediction( Optionally, the name of the metric function to use. Must be one of "AUC_ROC" (Area Under the Receiver Operating Characteristic Curve) and "AUC_PR" (Average Precision from scores). Default: "AUC_ROC". + component_wise + If True, will separately plot each component in case of multivariate anomaly detection. """ series = _check_input(series, name="series", num_series_expected=1)[0] pred_series = _check_input( @@ -230,6 +233,7 @@ def show_anomalies_from_prediction( names_of_scorers=scorer_name, title=title, metric=metric, + component_wise=component_wise, ) @property @@ -580,6 +584,7 @@ def show_anomalies( scorer_name: str = None, title: str = None, metric: Optional[Literal["AUC_ROC", "AUC_PR"]] = None, + component_wise: bool = False, ): """Plot the results of the scorer. @@ -610,6 +615,8 @@ def show_anomalies( Optionally, the name of the metric function to use. Must be one of "AUC_ROC" (Area Under the Receiver Operating Characteristic Curve) and "AUC_PR" (Average Precision from scores). Default: "AUC_ROC". + component_wise + If True, will separately plot each component in case of multivariate anomaly detection. """ series = _check_input(series, name="series", num_series_expected=1)[0] pred_scores = self.score(series) @@ -633,6 +640,7 @@ def show_anomalies( names_of_scorers=scorer_name, title=title, metric=metric, + component_wise=component_wise, ) @property diff --git a/darts/ad/utils.py b/darts/ad/utils.py index 943ad9498e..4395afdfeb 100644 --- a/darts/ad/utils.py +++ b/darts/ad/utils.py @@ -311,6 +311,7 @@ def show_anomalies_from_scores( names_of_scorers: Union[str, Sequence[str]] = None, title: str = None, metric: Optional[Literal["AUC_ROC", "AUC_PR"]] = None, + component_wise: bool = False, ): """Plot the results generated by an anomaly model. @@ -352,11 +353,14 @@ def show_anomalies_from_scores( Receiver Operating Characteristic Curve) and "AUC_PR" (Average Precision from scores). Only effective when `pred_scores` is not `None`. Default: "AUC_ROC". + component_wise + If True, will separately plot each component in case of multivariate anomaly detection. """ series = _check_input( series, name="series", num_series_expected=1, + check_multivariate=component_wise, )[0] if title is None and pred_scores is not None: @@ -420,107 +424,87 @@ def show_anomalies_from_scores( logger=logger, ) - nbr_plots = nbr_plots + len(set(window)) + nbr_plots += len(set(window)) + series_width = series.n_components + if pred_series is not None: + pred_series = _check_input( + pred_series, + name="pred_series", + width_expected=series_width, + num_series_expected=1, + check_multivariate=component_wise, + )[0] + + if anomalies is not None and component_wise: + anomalies = _check_input( + anomalies, + name="anomalies", + width_expected=series_width, + num_series_expected=1, + check_binary=True, + check_multivariate=component_wise, + )[0] + + if pred_scores is not None and component_wise: + for pred_score in pred_scores: + _ = _check_input( + pred_score, + name="pred_score", + width_expected=series_width, + num_series_expected=1, + check_multivariate=component_wise, + )[0] + + plots_per_ts = nbr_plots * series_width if component_wise else nbr_plots + height_ratios = ([2] + [1] * (nbr_plots - 1)) * (plots_per_ts // nbr_plots) + height_total = 2 * sum(height_ratios) fig, axs = plt.subplots( - nbr_plots, - figsize=(8, 4 + 2 * (nbr_plots - 1)), + nrows=plots_per_ts, + figsize=(8, height_total), sharex=True, - gridspec_kw={"height_ratios": [2] + [1] * (nbr_plots - 1)}, - squeeze=False, + gridspec_kw={"height_ratios": height_ratios}, ) - index_ax = 0 - - _plot_series(series=series, ax_id=axs[index_ax][0], linewidth=0.5, label_name="") - - if pred_series is not None: - _plot_series( - series=pred_series, - ax_id=axs[index_ax][0], - linewidth=0.5, - label_name="model output", - ) - - axs[index_ax][0].set_title("") - - if anomalies is not None or pred_scores is not None: - axs[index_ax][0].set_xlabel("") - - axs[index_ax][0].legend(loc="upper center", bbox_to_anchor=(0.5, 1.1), ncol=2) - - if pred_scores is not None: - dict_input = {} - - for idx, (score, w) in enumerate(zip(pred_scores, window)): - dict_input[idx] = {"series_score": score, "window": w, "name_id": idx} - - for index, elem in enumerate( - sorted(dict_input.items(), key=lambda x: x[1]["window"]) - ): - if index == 0: - current_window = elem[1]["window"] - index_ax = index_ax + 1 - - idx = elem[1]["name_id"] - w = elem[1]["window"] - - if w != current_window: - current_window = w - index_ax = index_ax + 1 - - if metric is not None: - value = round( - eval_metric_from_scores( - anomalies=anomalies, - pred_scores=pred_scores[idx], - window=w, - metric=metric, - ), - 3, - ) - else: - value = None - - if names_of_scorers is not None: - label = names_of_scorers[idx] + [f" ({value})", ""][value is None] - else: - label = f"score_{str(idx)}" + [f" ({value})", ""][value is None] - - _plot_series( - series=elem[1]["series_score"], - ax_id=axs[index_ax][0], - linewidth=0.5, - label_name=label, + for i in range(series_width if component_wise else 1): + if component_wise: + series_ = series[series.components[i]] + anomalies_ = ( + anomalies[anomalies.components[i]] if anomalies is not None else None ) - - axs[index_ax][0].legend( - loc="upper center", bbox_to_anchor=(0.5, 1.19), ncol=2 + pred_series_ = ( + pred_series[pred_series.components[i]] + if pred_series is not None + else None ) - axs[index_ax][0].set_title(f"Window: {str(w)}", loc="left") - axs[index_ax][0].set_title("") - axs[index_ax][0].set_xlabel("") - - if anomalies is not None: - _plot_series( - series=anomalies, - ax_id=axs[index_ax + 1][0], - linewidth=1, - label_name="anomalies", - color="red", - ) - - axs[index_ax + 1][0].set_title("") - axs[index_ax + 1][0].set_ylim([-0.1, 1.1]) - axs[index_ax + 1][0].set_yticks([0, 1]) - axs[index_ax + 1][0].set_yticklabels(["no", "yes"]) - axs[index_ax + 1][0].legend( - loc="upper center", bbox_to_anchor=(0.5, 1.2), ncol=2 + pred_scores_ = ( + [pc[pc.components[i]] for pc in pred_scores] + if pred_scores is not None + else None + ) + else: + series_ = series + anomalies_ = anomalies + pred_series_ = pred_series + pred_scores_ = pred_scores + + _plot_series_and_anomalies( + series=series_, + anomalies=anomalies_, + pred_series=pred_series_, + pred_scores=pred_scores_, + window=window, + names_of_scorers=names_of_scorers, + metric=metric, + axs=axs, + index_ax=i * nbr_plots, ) - else: - axs[index_ax][0].set_xlabel("timestamp") + # make title fit nicely on plot + title_height = 0.1 + title_y = 1 - title_height / height_total - fig.suptitle(title) + fig.suptitle(title, y=title_y) + fig.tight_layout() def _assert_binary(series: TimeSeries, name: str): @@ -780,3 +764,122 @@ def _assert_fit_called(fit_called: bool, name: str): ), logger=logger, ) + + +def _plot_series_and_anomalies( + series: TimeSeries, + anomalies: TimeSeries, + pred_series: TimeSeries, + pred_scores: Sequence[TimeSeries], + window: Sequence[int], + names_of_scorers: Sequence[str], + metric: str, + axs: plt.Axes, + index_ax: int, +): + """Helper function to plot series and anomalies. + + Parameters + ---------- + series + The actual series to visualize anomalies from. + anomalies + The ground truth of the anomalies (1 if it is an anomaly and 0 if not). + pred_series + Output of the model given as input the `series` (can be stochastic). + pred_scores + Output of the scorers given the output of the model and `series`. + window + Window parameter for each anomaly scores. + names_of_scorers + Name of the scores. + metric + The name of the metric function to use. + axs + The axes to plot on. + index_ax + The index of the current axis. + """ + _plot_series(series=series, ax_id=axs[index_ax], linewidth=0.5, label_name="") + + if pred_series is not None: + _plot_series( + series=pred_series, + ax_id=axs[index_ax], + linewidth=0.5, + label_name="model output", + ) + + axs[index_ax].set_title("") + + if anomalies is not None or pred_scores is not None: + axs[index_ax].set_xlabel("") + + axs[index_ax].legend(loc="upper center", bbox_to_anchor=(0.5, 1.1), ncol=2) + + if pred_scores is not None: + dict_input = {} + + for idx, (score, w) in enumerate(zip(pred_scores, window)): + dict_input[idx] = {"series_score": score, "window": w, "name_id": idx} + + for index, elem in enumerate( + sorted(dict_input.items(), key=lambda x: x[1]["window"]) + ): + if index == 0: + current_window = elem[1]["window"] + index_ax = index_ax + 1 + + idx = elem[1]["name_id"] + w = elem[1]["window"] + + if w != current_window: + current_window = w + index_ax = index_ax + 1 + + if metric is not None: + value = round( + eval_metric_from_scores( + anomalies=anomalies, + pred_scores=pred_scores[idx], + window=w, + metric=metric, + ), + 3, + ) + else: + value = None + + if names_of_scorers is not None: + label = names_of_scorers[idx] + [f" ({value})", ""][value is None] + else: + label = f"score_{str(idx)}" + [f" ({value})", ""][value is None] + + _plot_series( + series=elem[1]["series_score"], + ax_id=axs[index_ax], + linewidth=0.5, + label_name=label, + ) + + axs[index_ax].legend(loc="upper center", bbox_to_anchor=(0.5, 1.19), ncol=2) + axs[index_ax].set_title(f"Window: {str(w)}", loc="left") + axs[index_ax].set_title("") + axs[index_ax].set_xlabel("") + + if anomalies is not None: + _plot_series( + series=anomalies, + ax_id=axs[index_ax + 1], + linewidth=1, + label_name="anomalies", + color="red", + ) + + axs[index_ax + 1].set_title("") + axs[index_ax + 1].set_ylim([-0.1, 1.1]) + axs[index_ax + 1].set_yticks([0, 1]) + axs[index_ax + 1].set_yticklabels(["no", "yes"]) + axs[index_ax + 1].legend(loc="upper center", bbox_to_anchor=(0.5, 1.2), ncol=2) + else: + axs[index_ax].set_xlabel("timestamp")