import pandas as pd
import numpy as np
import logging

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from dataiku.base.utils import safe_unicode_str
from dataiku.core import doctor_constants
from dataiku.doctor.diagnostics import diagnostics
from dataiku.doctor.prediction.custom_scoring import check_customscore
from dataiku.doctor.prediction.custom_scoring import get_custom_metric_scorefunc
from dataiku.doctor.timeseries.preparation.resampling.utils import get_frequency
from dataiku.doctor.timeseries.utils import JSONForecast
from dataiku.doctor.timeseries.utils import ModelForecast
from dataiku.doctor.timeseries.utils import SINGLE_TIMESERIES_IDENTIFIER
from dataiku.doctor.timeseries.utils import timeseries_iterator
from dataiku.doctor.timeseries.utils import prefix_custom_metric_name
from dataiku.doctor.timeseries.utils import get_dataframe_of_timeseries_identifier
from dataiku.doctor.timeseries.utils import pretty_timeseries_identifiers
from dataiku.doctor.utils import dku_nonaninf
from dataiku.doctor.utils import dku_average_nonan
from dataiku.doctor.utils import dku_std_nonan
from dataiku.doctor.utils.metrics import mean_absolute_scaled_error
from dataiku.doctor.utils.metrics import mean_absolute_percentage_error
from dataiku.doctor.utils.metrics import mean_scaled_interval_error
from dataiku.doctor.utils.metrics import symetric_mean_absolute_percentage_error
from dataiku.doctor.utils.metrics import quantile_loss
from dataiku.doctor.utils.metrics import abs_error
from dataiku.doctor.utils.metrics import abs_target_sum


logger = logging.getLogger(__name__)

# metrics computation is largely inspired by GluonTS:
# https://github.com/awslabs/gluon-ts/blob/v0.10.4/src/gluonts/evaluation/metrics.py

# these metrics can be computed per timeseries, other metrics are only aggregated metrics over all timeseries
SINGLE_TIMESERIES_METRICS = ["mse", "mape", "mase", "smape", "mae", "msis"]
AGGREGATED_TIMESERIES_METRICS = SINGLE_TIMESERIES_METRICS + [
    "rmse",
    "nd",
    "meanAbsoluteQuantileLoss",
    "meanWeightedQuantileLoss",
]

PER_TIMESERIES_METRICS = "perTimeseriesMetrics"
TIMESERIES_AGGREGATED_METRICS = "aggregatedMetrics"


class TimeseriesModelIntrinsicScorer:
    # TODO: inherit from PredictionModelIntrinsicScorer once refactored ?

    def __init__(self, modeling_params, clf, algorithm, prediction_length, frequency, initial_intrinsic_perf_data):
        self.algorithm = algorithm
        self.clf = clf

        actual_params = algorithm.get_actual_params(modeling_params, clf, fit_params=None)
        self.resolved_actual_params = actual_params["resolved"]

        self.prediction_length = prediction_length
        self.frequency = frequency
        self.initial_intrinsic_perf_data = initial_intrinsic_perf_data

    def score(self):
        intrinsic_perf = self.initial_intrinsic_perf_data

        intrinsic_perf["minTimeseriesSizeForScoring"] = self.algorithm.get_min_size_for_scoring(self.resolved_actual_params, self.prediction_length)
        intrinsic_perf["maxUsedTimestepsForScoring"] = self.algorithm.get_max_used_timesteps_for_scoring(self.resolved_actual_params, self.prediction_length, self.frequency)

        if self.algorithm.SUPPORTS_MODEL_COEFFICIENTS:
            coefficients_map, fixed_coefficients, variable_coefficients, external_features_coefficients = self.clf.get_coefficients_map_and_names()
            intrinsic_perf["modelCoefficients"] = self.build_model_coefficients_list(coefficients_map, fixed_coefficients, variable_coefficients, external_features_coefficients)
        if self.algorithm.SUPPORTS_INFORMATION_CRITERIA:
            intrinsic_perf["informationCriteria"] = self.clf.get_information_criteria()

        return intrinsic_perf

    def build_model_coefficients_list(self, coefficients_map, fixed_coefficients, variable_coefficients, external_features_coefficients):
        """Returns a list of dictionary of coefficients name and values per time series.
        
        Args:
            coefficients_map (dict): Stores for each coefficient name a dictionary of coefficient value, p-value, t-value and stderr by time series identifier.
            fixed_coefficients (list): Coefficients present in all time series.
            variable_coefficients (list): Coefficients with suffix '_i' that can be different from one time series to another depending on the model.
            external_features_coefficients (list): Coefficients corresponding to each of the external features, they can also be different from one time series to another depending on the data.
        """    
        model_coefficients = []
        if external_features_coefficients:
            for coeff_name in external_features_coefficients:
                model_coefficients.append({
                    "displayName": coeff_name,
                    "values": coefficients_map[coeff_name]["values"],
                    "pvalues": coefficients_map[coeff_name]["pvalues"],
                    "tvalues": coefficients_map[coeff_name]["tvalues"],
                    "stderrs": coefficients_map[coeff_name]["stderrs"],
                    "isExternalFeature": True
                })
        
        if fixed_coefficients:
            for coeff_name in fixed_coefficients:
                model_coefficients.append({
                    "displayName": coeff_name,
                    "values": coefficients_map[coeff_name]["values"],
                    "pvalues": coefficients_map[coeff_name]["pvalues"],
                    "tvalues": coefficients_map[coeff_name]["tvalues"],
                    "stderrs": coefficients_map[coeff_name]["stderrs"],
                })

        if variable_coefficients:
            for coeff_prefix in variable_coefficients:
                i = 1
                coeff_name = "{}_{}".format(coeff_prefix, i)
                while coeff_name in coefficients_map:
                    model_coefficients.append({
                        "displayName": coeff_name,
                        "values": coefficients_map[coeff_name]["values"],
                        "pvalues": coefficients_map[coeff_name]["pvalues"],
                        "tvalues": coefficients_map[coeff_name]["tvalues"],
                        "stderrs": coefficients_map[coeff_name]["stderrs"],
                    })
                    i += 1
                    coeff_name = "{}_{}".format(coeff_prefix, i)

        return model_coefficients


class TimeseriesModelScorer(object):
    PAST_TIME_STEPS_TO_INCLUDE = 100

    def __init__(
        self,
        target_variable,
        time_variable,
        timeseries_identifier_columns,
        prediction_length,
        gap_size,
        quantiles,
        use_external_features,
        frequency,
        custom_metrics,
        max_nb_forecast_timesteps=None
    ):
        self.target_variable = target_variable
        self.time_variable = time_variable
        self.timeseries_identifier_columns = timeseries_identifier_columns
        self.prediction_length = prediction_length
        self.gap_size = gap_size
        self.quantiles = quantiles
        assert .5 in self.quantiles, "Quantile 0.5 must be one of the quantiles to forecast"
        
        self.use_external_features = use_external_features

        # prediction offset is the number of time steps between 2 consecutive predicted horizons in the test set
        self.prediction_offset = prediction_length
        self.frequency = frequency
        # list of metrics per fold
        self.scores = []

        # historical and forecasts values per timeseries
        self.forecasts = {}

        self.custom_metrics = custom_metrics  # list of {"name": XXX, "metricCode": XXX, "greaterIsBetter", ...}
        self.max_nb_forecast_timesteps = max_nb_forecast_timesteps

    @classmethod
    def build(cls, core_params, metrics_params, use_external_features):
        target_variable = core_params[doctor_constants.TARGET_VARIABLE]
        time_variable = core_params[doctor_constants.TIME_VARIABLE]
        timeseries_identifier_columns = core_params[doctor_constants.TIMESERIES_IDENTIFIER_COLUMNS]
        prediction_length = core_params[doctor_constants.PREDICTION_LENGTH]
        quantiles = sorted(core_params[doctor_constants.QUANTILES])
        frequency = get_frequency(core_params)
        evaluation_params = core_params[doctor_constants.EVALUATION_PARAMS]
        gap_size = evaluation_params[doctor_constants.GAP_SIZE]

        test_size = evaluation_params[doctor_constants.TEST_SIZE]
        assert test_size % prediction_length == 0

        custom_metrics = metrics_params["customMetrics"]

        return cls(
            target_variable,
            time_variable,
            timeseries_identifier_columns,
            prediction_length,
            gap_size,
            quantiles,
            use_external_features,
            frequency,
            custom_metrics,
        )

    def predict_all_test_timesteps(self, estimator, historical_dataframe, test_df, fit_before_predict):
        """
        Forecast all time steps in the test dataframe, using train dataframe as input.
        If there are more than one forecast horizon in the test set, we forecast each one by iteratively shifting
        the train and test sets.

        Return:
            Dictionary where keys are time series identifiers and values are the forecast values for the time series.
            Each forecast contains the time stamps, the mean forecast values, and the quantile forecasts (2D-array)
        """
        forecasts_by_timeseries = {}
        for timeseries_identifier, timeseries_identifier_historical_df in timeseries_iterator(historical_dataframe, self.timeseries_identifier_columns):
            test_df_of_timeseries_identifier = get_dataframe_of_timeseries_identifier(test_df, timeseries_identifier)
            test_length = len(test_df_of_timeseries_identifier.index)
            
            forecasts_of_timeseries_identifier = {
                ModelForecast.TIMESTAMPS: [],
                ModelForecast.FORECAST_VALUES: [],
                ModelForecast.QUANTILES_FORECASTS: [[] for _ in self.quantiles],
            }
            # Number of forecast horizons (of length prediction_length) in the test set. Consecutive forecast horizons
            # are spaced by prediction_offset time steps. The prediction offset is always equal to the prediction length.
            # e.g. If the test_set contains 14 time steps, prediction length is 5, then the offset is 5 and:
            #         nb_horizons_in_test_set = (14-5)//5 + 1 = 2
            #
            #  Test set          :    *-*-*-*-*-*-*-*-*-*-*-*-*-*         => Contains 14 time steps
            #  Forecast horizon 1:    *-*-*-*-*          |                => Prediction length is 5 time steps
            #  Forecast horizon 2:              *-*-*-*-*|                => Prediction offset is 5 time steps
            #  Forecast horizon 3:                        *-*-*-*         => Does not fit in test set: not considered
            nb_horizons_in_test_set = (test_length - self.prediction_length) // self.prediction_offset + 1
            logger.info("Predicting {} time steps ({} horizon{}) of time series {}".format(
                self.prediction_length * nb_horizons_in_test_set,
                nb_horizons_in_test_set,
                "s" if nb_horizons_in_test_set > 1 else "",
                "" if timeseries_identifier == SINGLE_TIMESERIES_IDENTIFIER else pretty_timeseries_identifiers(timeseries_identifier),
            ))

            extra_steps_to_forecast = test_length % self.prediction_length

            for i in range(nb_horizons_in_test_set):
                # At every iteration, we:
                # - add prediction_offset time steps to the past data
                # - shift the future data by prediction_offset time steps
                past_df_of_timeseries_identifier = pd.concat([
                    timeseries_identifier_historical_df, 
                    test_df_of_timeseries_identifier.iloc[:i * self.prediction_offset]
                ], ignore_index=True)
                future_df_of_timeseries_identifier = test_df_of_timeseries_identifier.iloc[
                    i * self.prediction_offset: i * self.prediction_offset + self.prediction_length
                ] if self.use_external_features else None

                forecasts = estimator.predict_single(
                    past_df_of_timeseries_identifier, future_df_of_timeseries_identifier, self.quantiles,
                    timeseries_identifier, fit_before_predict=fit_before_predict
                )

                self.populate_forecasts(forecasts, forecasts_of_timeseries_identifier)

            if extra_steps_to_forecast > 0:
                # this should only occur when using a custom train/test split

                # past_df_of_timeseries_identifier now contains everthing but the last extra_steps_to_forecast rows
                # future_df_of_timeseries_identifier is just these rows
                past_df_of_timeseries_identifier = pd.concat([
                    timeseries_identifier_historical_df,
                    test_df_of_timeseries_identifier.iloc[:-extra_steps_to_forecast]
                ], ignore_index=True)
                future_df_of_timeseries_identifier = test_df_of_timeseries_identifier.iloc[-extra_steps_to_forecast:] if self.use_external_features else None

                forecasts = estimator.predict_single(
                    past_df_of_timeseries_identifier, future_df_of_timeseries_identifier, self.quantiles,
                    timeseries_identifier, fit_before_predict=fit_before_predict, prediction_length_override=extra_steps_to_forecast
                )

                self.populate_forecasts(forecasts, forecasts_of_timeseries_identifier)

            if self.max_nb_forecast_timesteps:
                # in this case, we only return the LAST X predicted values, where x is max_nb_forecast_timesteps
                if len(forecasts_of_timeseries_identifier[ModelForecast.TIMESTAMPS]) > self.max_nb_forecast_timesteps:
                    forecasts_of_timeseries_identifier[ModelForecast.TIMESTAMPS] = forecasts_of_timeseries_identifier[ModelForecast.TIMESTAMPS][-self.max_nb_forecast_timesteps:]
                    forecasts_of_timeseries_identifier[ModelForecast.FORECAST_VALUES] = forecasts_of_timeseries_identifier[ModelForecast.FORECAST_VALUES][-self.max_nb_forecast_timesteps:]
                    forecasts_of_timeseries_identifier[ModelForecast.QUANTILES_FORECASTS] = forecasts_of_timeseries_identifier[ModelForecast.QUANTILES_FORECASTS][:,-self.max_nb_forecast_timesteps:]

            forecasts_by_timeseries[timeseries_identifier] = forecasts_of_timeseries_identifier

        return forecasts_by_timeseries

    @staticmethod
    def populate_forecasts(forecasts, forecasts_of_timeseries_identifier):
        forecasts_of_timeseries_identifier[ModelForecast.TIMESTAMPS] = np.concatenate(
            [forecasts_of_timeseries_identifier[ModelForecast.TIMESTAMPS], forecasts[ModelForecast.TIMESTAMPS]]
        )
        forecasts_of_timeseries_identifier[ModelForecast.FORECAST_VALUES] = np.concatenate(
            [forecasts_of_timeseries_identifier[ModelForecast.FORECAST_VALUES], forecasts[ModelForecast.FORECAST_VALUES]]
        )
        forecasts_of_timeseries_identifier[ModelForecast.QUANTILES_FORECASTS] = np.concatenate(
            [forecasts_of_timeseries_identifier[ModelForecast.QUANTILES_FORECASTS], forecasts[ModelForecast.QUANTILES_FORECASTS]], axis=1
        )

    def _score_single_timeseries(
        self, timeseries_identifier, historical_df, test_df, timeseries_forecasts, fold_id=0, append_forecasts=False
    ):
        forecast_dates = timeseries_forecasts[ModelForecast.TIMESTAMPS]
        forecast_values = timeseries_forecasts[ModelForecast.FORECAST_VALUES]
        quantiles_forecasts = timeseries_forecasts[ModelForecast.QUANTILES_FORECASTS]

        # Determine how many timesteps to use in y_true
        nb_timesteps_to_keep = self.max_nb_forecast_timesteps or len(forecast_values)

        # Slice y_true to match shortened timeseries_forecasts
        y_true = test_df[self.target_variable][-nb_timesteps_to_keep:].to_numpy()

        naive_error = self._compute_single_timeseries_naive_error(historical_df, y_true)

        if append_forecasts:
            max_ground_truth_date = pd.Timestamp(self.forecasts[timeseries_identifier][JSONForecast.GROUND_TRUTH_TIME].max())
            if historical_df[self.time_variable].max() > max_ground_truth_date:
                # the current train set ends after the ground truth retrieved by the previous test set (because there
                # is a fold offset) so we need to add the missing ground truth values (values after
                # max_ground_truth_date) from the train set to self.forecasts
                historical_df_after_max_ground_truth_date = historical_df[historical_df[self.time_variable] > max_ground_truth_date]
                self._append_per_timeseries_ground_truth(
                    timeseries_identifier,
                    historical_df_after_max_ground_truth_date[self.time_variable].astype(str),
                    historical_df_after_max_ground_truth_date[self.target_variable].values,
                )

            self._append_per_timeseries_forecasts(
                timeseries_identifier,
                forecast_dates,
                y_true,
                forecast_values,
                quantiles_forecasts,
                fold_id,
            )

        return self._compute_timeseries_metrics(
            y_true,
            forecast_values,
            quantiles_forecasts,
            naive_error,
        )

    def _compute_per_timeseries_metrics(self, historical_df, test_df, forecasts_by_timeseries, fold_id, append_forecasts):
        if append_forecasts and not self.forecasts:  # retrieve historical values
            self._set_forecasts_ground_truth(historical_df)

        per_timeseries_metrics = {}
        for timeseries_identifier, test_df_of_timeseries_identifier in timeseries_iterator(
            test_df, self.timeseries_identifier_columns
        ):
            timeseries_historical_df = get_dataframe_of_timeseries_identifier(historical_df, timeseries_identifier)
            per_timeseries_metrics[timeseries_identifier] = self._score_single_timeseries(
                timeseries_identifier,
                timeseries_historical_df,
                test_df_of_timeseries_identifier,
                forecasts_by_timeseries[timeseries_identifier],
                fold_id=fold_id,
                append_forecasts=append_forecasts
            )

        return per_timeseries_metrics

    def _parse_custom_metrics(self):
        """
        Parse the custom metric code. Must be called in the score() method to be inside a ProcessingStep because it can create a diagnostics.
        """
        parsed_custom_metrics = []
        for custom_metric_params in self.custom_metrics:
            parsed_custom_metric = {	
                "name": custom_metric_params["name"],	
                "description": custom_metric_params["description"],	
                "metricCode": custom_metric_params["metricCode"],	
                "greaterIsBetter": custom_metric_params["greaterIsBetter"],	
            }
            try:
                parsed_custom_metric["custom_metric_func"] = get_custom_metric_scorefunc(custom_metric_params["metricCode"], None, allow_naninf=True)
                parsed_custom_metric["didSucceed"] = True
            except Exception as e:
                diagnostics.add_or_update(
                    diagnostics.DiagnosticType.ML_DIAGNOSTICS_MODELING_PARAMETERS,
                    "Calculation of '{}' failed: unable to parse metric code".format(custom_metric_params["name"])
                )
                parsed_custom_metric["didSucceed"] = False
                parsed_custom_metric["error"] = safe_unicode_str(e)
                logger.warning("Custom metric function '{}' failed to be parsed".format(custom_metric_params["name"]), exc_info=True)
            parsed_custom_metrics.append(parsed_custom_metric)
        return parsed_custom_metrics

    def score(
        self,
        historical_df,
        test_df,
        forecasts_by_timeseries,
        fold_id=None,
        append_forecasts=False,
        append_scores=False,
        compute_aggregated_metrics=True
    ):
        self.custom_metrics = self._parse_custom_metrics()
        per_timeseries_metrics = self._compute_per_timeseries_metrics(
            historical_df, test_df, forecasts_by_timeseries, fold_id, append_forecasts
        )
        metrics_df = pd.DataFrame.from_dict(per_timeseries_metrics, orient="index")

        per_timeseries_metrics = self.filter_per_timeseries_metrics(per_timeseries_metrics)

        timeseries_aggregated_metrics = None
        if compute_aggregated_metrics:
            timeseries_aggregated_metrics = self.aggregate_metrics_per_timeseries(metrics_df)

        if append_scores:
            self.scores.append(
                {
                    PER_TIMESERIES_METRICS: per_timeseries_metrics,
                    TIMESERIES_AGGREGATED_METRICS: timeseries_aggregated_metrics,
                }
            )
        
        return per_timeseries_metrics, timeseries_aggregated_metrics

    def remove_naninf(self, score):
        """
        Format properly the perf.json file:
            - Remove NaN or infinity values from the score. Used when serializing the score to json, because the backend and the
              frontend fail when handling NaN or infinity metrics values.
            - Create the customMetricsResults list if there are custom metrics.
        """
        return {
            PER_TIMESERIES_METRICS: {
                timeseries_identifier: self._remove_naninf(timeseries_score)
                for timeseries_identifier, timeseries_score in score[PER_TIMESERIES_METRICS].items()
            },
            TIMESERIES_AGGREGATED_METRICS: self._remove_naninf(score[TIMESERIES_AGGREGATED_METRICS], aggregated_metric=True),
        }
    
    def _remove_naninf(self, score, aggregated_metric=False):
        """
        Format a score dict (of a single time series or aggregated).
        See 'remove_naninf' method docstring for more explanation.
        """
        formatted_score = {metric: dku_nonaninf(value) for metric, value in score.items()}

        if len(self.custom_metrics) > 0:
            self._add_custom_metrics_results(formatted_score, aggregated_metric)

        return formatted_score

    def _add_custom_metrics_results(self, formatted_score, aggregated_metric):
        """
        Add the customMetricsResults list to formatted_score.
        """
        custom_metrics_results = []
        for custom_metric in self.custom_metrics:
            custom_metrics_result = {
                "metric": {
                    "name": custom_metric["name"],
                    "metricCode": custom_metric["metricCode"],
                    "description": custom_metric["description"],
                    "greaterIsBetter": custom_metric["greaterIsBetter"],
                },
                "didSucceed": custom_metric["didSucceed"]
            }
            if "error" in custom_metric:
                custom_metrics_result["error"] = custom_metric["error"]

            metric_name = prefix_custom_metric_name(custom_metric["name"])
            custom_metrics_result["value"] = formatted_score.pop(metric_name)

            if aggregated_metric and custom_metrics_result["value"] is None and custom_metrics_result["didSucceed"]:
                custom_metrics_result["didSucceed"] = False
                custom_metrics_result["error"] = "Custom metric gave a invalid value for at least one time series"

            std_name = "{}std".format(metric_name)
            if std_name in formatted_score:
                custom_metrics_result["valuestd"] = formatted_score.pop(std_name)

            worst_name = "worst{}".format(metric_name.lower().capitalize())
            if worst_name in formatted_score:
                custom_metrics_result["worstValue"] = formatted_score.pop(worst_name)

            custom_metrics_results.append(custom_metrics_result)

        formatted_score["customMetricsResults"] = custom_metrics_results

    def append_future_forecasts(self, forecasts_by_timeseries, train_df):
        """
        Append future forecasts to model scorer if no external features were used (training only)
        """
        for timeseries_identifier, timeseries_forecast in forecasts_by_timeseries.items():
            self.forecasts[timeseries_identifier][JSONForecast.FUTURE_TIME] = timeseries_forecast[ModelForecast.TIMESTAMPS]
            self.forecasts[timeseries_identifier][JSONForecast.FUTURE_FORECAST_VALUES] = timeseries_forecast[ModelForecast.FORECAST_VALUES]
            train_df_of_timeseries_identifier = get_dataframe_of_timeseries_identifier(train_df, timeseries_identifier)[-self.PAST_TIME_STEPS_TO_INCLUDE:]
            train_df_of_timeseries_identifier = train_df_of_timeseries_identifier.loc[train_df_of_timeseries_identifier[self.time_variable] > self.forecasts[timeseries_identifier][JSONForecast.GROUND_TRUTH_TIME].max()]
            self.forecasts[timeseries_identifier][JSONForecast.FUTURE_FORECAST_CONTEXT_GROUND_TRUTH_TIME] = train_df_of_timeseries_identifier[self.time_variable].astype(str).values
            self.forecasts[timeseries_identifier][JSONForecast.FUTURE_FORECAST_CONTEXT_VALUES] = train_df_of_timeseries_identifier[self.target_variable].values

            quantiles_forecasts = timeseries_forecast[ModelForecast.QUANTILES_FORECASTS]
            quantiles_values = self.forecasts[timeseries_identifier][JSONForecast.QUANTILES]
            for i in range(len(self.quantiles)):
                quantiles_values[i][JSONForecast.FUTURE_FORECAST_VALUES] = quantiles_forecasts[i]

    def remove_naninf_in_forecasts(self, forecasts):
        """
        Remove any invalid value in a list of forecasts before exporting to JSON, same as `remove_naninf`
        """
        for key, value in forecasts.items():
            self.remove_nanif_in_one_forecast(value)
        return forecasts

    def remove_nanif_in_one_forecast(self, forecast):
        """
        Remove any invalid value before exporting to JSON, same as `remove_naninf`
        """
        forecast["forecast"] = [dku_nonaninf(v) for v in forecast["forecast"]]
        if "futureForecast" in forecast:
            forecast["futureForecast"] = [dku_nonaninf(v) for v in forecast["futureForecast"]]
        forecast["quantiles"] = [self._remove_nanif_in_one_quantile(q) for q in forecast["quantiles"]]
        return forecast

    def _remove_nanif_in_one_quantile(self, quantile):
        quantile["forecast"] = [dku_nonaninf(v) for v in quantile["forecast"]]
        if "futureForecast" in quantile:
            quantile["futureForecast"] = [dku_nonaninf(v) for v in quantile["futureForecast"]]
        return quantile

    def _compute_timeseries_metrics(self, y_true, forecast_values, quantiles_forecasts, naive_error):
        """
        Compute the time series metrics, given the ground truth (y_true) and the forecast values (forecast_values)

        Return:
            Dictionary of time series metrics
        """
        # remove 1 gap per forecast horizon for evaluation
        indices_to_keep = [i for i in range(len(y_true)) if i % self.prediction_length >= self.gap_size]
        kept_y_true = y_true[indices_to_keep]
        kept_forecast_values = forecast_values[indices_to_keep]
        kept_quantiles_forecasts = quantiles_forecasts[:, indices_to_keep]

        median_forecasts = kept_quantiles_forecasts[self.quantiles.index(0.5)]  # 0.5 must always be a quantile
        lower_quantile = kept_quantiles_forecasts[0]
        upper_quantile = kept_quantiles_forecasts[-1]
        alpha = 1 - (self.quantiles[-1] - self.quantiles[0])

        # abs_error, abs_target_sum and quantile_loss_XX are only temporary metrics, they are never saved
        metrics = {
            "mse": mean_squared_error(kept_y_true, kept_forecast_values),
            "abs_error": abs_error(kept_y_true, median_forecasts),
            "abs_target_sum": abs_target_sum(kept_y_true),
            "mase": mean_absolute_scaled_error(kept_y_true, median_forecasts, naive_error),
            "mape": mean_absolute_percentage_error(kept_y_true, median_forecasts),
            "smape": symetric_mean_absolute_percentage_error(kept_y_true, median_forecasts),
            "mae": mean_absolute_error(kept_y_true, kept_forecast_values),
            "msis": mean_scaled_interval_error(kept_y_true, lower_quantile, upper_quantile, alpha, naive_error),
            "test_size": len(kept_y_true),  # used to compute weighted average
        }

        if self.custom_metrics:
            self._compute_timeseries_custom_metrics(metrics, kept_y_true, kept_forecast_values)

        for i, quantile in enumerate(self.quantiles):
            metrics["quantile_loss_{}".format(quantile)] = quantile_loss(kept_y_true, kept_quantiles_forecasts[i], quantile)

        return metrics

    def _compute_timeseries_custom_metrics(self, metrics, kept_y_true, kept_forecast_values):
        """
        Add custom metrics to the metrics dict and add the errors if any to self.custom_metrics 
        """
        for custom_metric in self.custom_metrics:
            if custom_metric["didSucceed"]:
                try:
                    custom_score = custom_metric["custom_metric_func"](kept_y_true, kept_forecast_values)
                    check_customscore(custom_score, allow_naninf=True)
                    metrics[prefix_custom_metric_name(custom_metric["name"])] = custom_score
                except Exception as e:
                    diagnostics.add_or_update(
                        diagnostics.DiagnosticType.ML_DIAGNOSTICS_MODELING_PARAMETERS,
                        "Calculation of '{}' failed".format(custom_metric["name"])
                    )
                    custom_metric["didSucceed"] = False
                    custom_metric["error"] = safe_unicode_str(e)
            # Set failing values to NaN
            if not custom_metric["didSucceed"]:
                metrics[prefix_custom_metric_name(custom_metric["name"])] = np.nan

    def _append_per_timeseries_ground_truth(self, timeseries_identifier, dates, y_true):
        for key, value_to_append in [
            (JSONForecast.GROUND_TRUTH_TIME, dates),
            (JSONForecast.GROUND_TRUTH_VALUES, y_true),
        ]:
            self.forecasts[timeseries_identifier][key] = np.concatenate(
                [self.forecasts[timeseries_identifier][key], value_to_append]
            )

    def _append_per_timeseries_forecasts(self, timeseries_identifier, dates, y_true, forecast_values, quantiles_forecasts, fold_id):
        for key, value_to_append in [
            (JSONForecast.GROUND_TRUTH_TIME, dates),
            (JSONForecast.GROUND_TRUTH_VALUES, y_true),
            (JSONForecast.FORECAST_TIME, dates),
            (JSONForecast.FORECAST_VALUES, forecast_values),
            (JSONForecast.FORECAST_FOLD_ID, [fold_id for _ in dates]),
        ]:
            self.forecasts[timeseries_identifier][key] = np.concatenate(
                [self.forecasts[timeseries_identifier][key], value_to_append]
            )

        quantiles_values = self.forecasts[timeseries_identifier][JSONForecast.QUANTILES]
        for i in range(len(self.quantiles)):
            quantiles_values[i][JSONForecast.FORECAST_VALUES] = np.concatenate(
                [quantiles_values[i][JSONForecast.FORECAST_VALUES], quantiles_forecasts[i]]
            )

    def _set_forecasts_ground_truth(self, historical_df):
        for timeseries_identifier, historical_df_of_timeseries_identifier in timeseries_iterator(
            historical_df, self.timeseries_identifier_columns
        ):
            historical_df_of_timeseries_identifier = historical_df_of_timeseries_identifier[-self.PAST_TIME_STEPS_TO_INCLUDE:]
            self.forecasts[timeseries_identifier] = {
                JSONForecast.GROUND_TRUTH_TIME: historical_df_of_timeseries_identifier[-self.PAST_TIME_STEPS_TO_INCLUDE:][self.time_variable].astype(str),
                JSONForecast.GROUND_TRUTH_VALUES: historical_df_of_timeseries_identifier[self.target_variable],
                JSONForecast.FORECAST_TIME: [],
                JSONForecast.FORECAST_VALUES: [],
                JSONForecast.FORECAST_FOLD_ID: [],
                JSONForecast.QUANTILES: [
                    {
                        JSONForecast.QUANTILE: quantile,
                        JSONForecast.FORECAST_VALUES: [],
                    }
                    for quantile in self.quantiles
                ],
            }

    def aggregate_metrics_per_timeseries(self, metrics_df):
        # TODO @timeseries: harmonize param names (snake vs camel cases)
        """Compute aggregated metrics over all timeseries.
        'rmse', 'nd', 'meanAbsoluteQuantileLoss' and 'meanWeightedQuantileLoss' cannot be computed per timeseries.
        'abs_error' and 'abs_target_sum' are only used to compute aggregated metrics.
        """
        sum_aggregation_metrics = ["abs_error", "abs_target_sum"]
        for quantile in self.quantiles:
            sum_aggregation_metrics += ["quantile_loss_{}".format(quantile)]

        weighted_average_aggregation_metrics = SINGLE_TIMESERIES_METRICS.copy()

        if self.custom_metrics:
            for custom_metric in self.custom_metrics:
                weighted_average_aggregation_metrics += [prefix_custom_metric_name(custom_metric["name"])]

        aggregated_metrics = {}
        
        for metric_name in sum_aggregation_metrics:
            aggregated_metrics[metric_name] = np.sum(metrics_df[metric_name])

        for metric_name in weighted_average_aggregation_metrics:
            metric_without_nans = metrics_df[metric_name].dropna()
            weight_without_nans = metrics_df["test_size"][metric_without_nans.index]
            aggregated_metrics[metric_name] = (metric_without_nans * weight_without_nans).sum() / weight_without_nans.sum()

        aggregated_metrics["rmse"] = np.sqrt(aggregated_metrics["mse"])
        aggregated_metrics["nd"] = aggregated_metrics["abs_error"] / aggregated_metrics["abs_target_sum"] if aggregated_metrics["abs_target_sum"] != 0 else np.nan

        aggregated_metrics["meanAbsoluteQuantileLoss"] = np.mean(
            [aggregated_metrics["quantile_loss_{}".format(quantile)] for quantile in self.quantiles])

        aggregated_metrics["meanWeightedQuantileLoss"] = np.mean(
            [aggregated_metrics["quantile_loss_{}".format(quantile)] for quantile in self.quantiles]
        ) / aggregated_metrics["abs_target_sum"] if aggregated_metrics["abs_target_sum"] != 0 else np.nan 

        metrics_to_return = AGGREGATED_TIMESERIES_METRICS
        if self.custom_metrics:
            metrics_to_return += [prefix_custom_metric_name(custom_metric["name"]) for custom_metric in self.custom_metrics]

        return {metric_name: aggregated_metrics[metric_name] for metric_name in metrics_to_return}

    def _compute_single_timeseries_naive_error(self, historical_df, y_true):
        """
        Returns the mean absolute error of a naive forecast model (aka a model that forecasts values of the previous
        horizon, exactly like Trivial Identity), it is used in the MASE and MSIS metrics to compare errors to
        the naive error.
        """
        all_target = np.append(historical_df[self.target_variable].to_numpy(), y_true)
        season_shift_target = all_target[: -self.prediction_length]
        target = all_target[self.prediction_length :]
        naive_error = np.mean(abs(target - season_shift_target))
        return naive_error

    @staticmethod
    def aggregate_metrics_per_fold(folds_metrics_df, weights=None, compute_std=True):
        """Returns a dict of mean and standard deviation of all metrics over each fold
        Args:
            folds_metrics_df (DataFrame): DataFrame of metrics for each fold
            weights (list[float], optional): Weights to apply to each fold. Default to None.
        """
        metrics_mean = folds_metrics_df.agg(lambda x: dku_average_nonan(x, weights=weights))
        metrics_dict = metrics_mean.to_dict()
        if compute_std:
            metrics_std = folds_metrics_df.agg(lambda x: dku_std_nonan(x, weights=weights))
            metrics_dict.update({"{}std".format(k): v for k, v in metrics_std.to_dict().items()})
        return metrics_dict

    def filter_per_timeseries_metrics(self, per_timeseries_metrics):
        metrics_to_return = SINGLE_TIMESERIES_METRICS
        if self.custom_metrics:
            metrics_to_return += [prefix_custom_metric_name(custom_metric["name"]) for custom_metric in self.custom_metrics]
        return {
            timeseries_identifier: {
                metric_name: metric_value
                for metric_name, metric_value in metrics.items()
                if metric_name in metrics_to_return
            }
            for timeseries_identifier, metrics in per_timeseries_metrics.items()
        }
