import logging
import numpy as np

from dataiku.doctor.timeseries.models.base_estimator import BaseTimeseriesEstimator
from dataiku.doctor.timeseries.utils import timeseries_iterator, log_df
from dataiku.doctor.timeseries.utils import get_dataframe_of_timeseries_identifier
from dataiku.doctor.timeseries.utils import future_date_range
from dataiku.doctor.timeseries.utils import ModelForecast
from dataiku.doctor.timeseries.preparation.resampling.utils import get_frequency, get_monthly_day_alignment
from dataiku.doctor.utils import doctor_constants


logger = logging.getLogger(__name__)

INFORMATION_CRITERION_TO_DISPLAY_NAME = {
    "aic": "AIC",
    "bic": "BIC",
    "hqic": "HQIC",
    "llf": "Log-likelihood"
}

class DkuStatisticalEstimator(BaseTimeseriesEstimator):
    """
        Contains the trained models, external features and several model & algo specific parameters. 
        
        /!\ From release 11.2 to 12.0 (included), only predictor and external_features were serialized and params
        needed for scoring/evaluation were set via the initialize method.
        We still need to use the initialize method for backward compatibility.
    """
    def __init__(
        self,
        frequency,
        time_variable,
        prediction_length,
        target_variable,
        timeseries_identifier_columns,
        monthly_day_alignment=None,
    ):
        # params not passed directly to the external library model
        super(DkuStatisticalEstimator, self).__init__(
            frequency,
            prediction_length,
            time_variable,
            target_variable,
            timeseries_identifier_columns,
            monthly_day_alignment
        )

        self.timeseries_identifier_columns = timeseries_identifier_columns

        # dict of trained models by timeseries identifier
        self.trained_models = None

    def initialize(self, core_params, modeling_params):
        """ Sets the params that are needed for scoring (required for models that were not serialized with all parameters from release 11.2 to 12.0).
        Params added after 12.1 don't need to be initialized here because they will always be serialized.

        Args:
            core_params (dict): Core params of the model
            modeling_params (dict): Resolved modeling params
        """
        self.prediction_length = core_params[doctor_constants.PREDICTION_LENGTH]
        self.time_variable = core_params[doctor_constants.TIME_VARIABLE]
        self.target_variable = core_params[doctor_constants.TARGET_VARIABLE]
        self.timeseries_identifier_columns = core_params[doctor_constants.TIMESERIES_IDENTIFIER_COLUMNS]
        self.frequency = get_frequency(core_params)
        self.monthly_day_alignment = get_monthly_day_alignment(core_params)

    def _fit_single(self, target_values, date_values=None, external_features_values=None):
        """Fit one time series"""
        raise NotImplementedError()

    def fit(self, train_df, external_features=None):
        """Fit one model per timeseries """
        if external_features is not None:
            self.external_features = external_features

        self.trained_models = {}
        for timeseries_identifier, df_of_timeseries_identifier in timeseries_iterator(
            train_df, self.timeseries_identifier_columns
        ):
            logger.info("Training model for time series {} of shape {}".format(timeseries_identifier, df_of_timeseries_identifier.shape))

            target_values = df_of_timeseries_identifier[self.target_variable].reset_index(drop=True)
            external_features_values = (
                df_of_timeseries_identifier[self.external_features[timeseries_identifier]].reset_index(drop=True)
                if self.external_features and self.external_features.get(timeseries_identifier) else None
            )
            date_values = df_of_timeseries_identifier[self.time_variable].reset_index(drop=True)

            self.trained_models[timeseries_identifier] = self._fit_single(
                target_values, date_values, external_features_values 
            )

    def _forecast_single_timeseries(
        self,
        trained_model,
        past_target_values,
        past_date_values,
        quantiles,
        past_external_features_values,
        future_external_features_values,
        fit_before_predict,
        prediction_length
    ):
        raise NotImplementedError()

    def _prepare_predict(self):
        """
        Perform basic checks before running the predict method.
        """
        if self.trained_models is None:
            raise ValueError("Trying to predict an estimator that has not been trained")

    def predict_single(
        self,
        past_df_of_timeseries_identifier,
        future_df_of_timeseries_identifier,
        quantiles,
        timeseries_identifier,
        fit_before_predict=False,
        prediction_length_override=None
    ):
        """Produce the forecast values for a single time series, with identifier timeseries_identifier"""
        self._prepare_predict()

        if timeseries_identifier not in self.trained_models:
            raise ValueError("""Cannot forecast time series {}: unseen during training.
                Try to retrain the model including this time series in the train set, or use a deep learning model instead.
                """.format(timeseries_identifier)
            )
        
        past_target_values = past_df_of_timeseries_identifier[self.target_variable].reset_index(drop=True)
        past_date_values = past_df_of_timeseries_identifier[self.time_variable].reset_index(drop=True)
        past_external_features_values = (
            past_df_of_timeseries_identifier[self.external_features[timeseries_identifier]].reset_index(drop=True)
            if self.external_features and self.external_features.get(timeseries_identifier) else None
        )

        future_external_features_values = None
        if self.external_features and self.external_features.get(timeseries_identifier):
            future_external_features_values = future_df_of_timeseries_identifier[self.external_features[timeseries_identifier]].reset_index(drop=True)

        trained_model = self.trained_models[timeseries_identifier]

        if prediction_length_override is not None:
            prediction_length = prediction_length_override
        else:
            prediction_length = self.prediction_length

        forecasts = self._forecast_single_timeseries(
            trained_model,
            past_target_values,
            past_date_values,
            quantiles,
            past_external_features_values,
            future_external_features_values,
            fit_before_predict,
            prediction_length
        )

        last_past_date = past_df_of_timeseries_identifier[self.time_variable].iloc[-1]

        forecasts[ModelForecast.TIMESTAMPS] = future_date_range(
            last_past_date,
            prediction_length,
            self.frequency,
            self.monthly_day_alignment,
        )

        return forecasts

    def predict(self, past_df, future_df, quantiles, fit_before_predict=False, prediction_length_override=None):
        """
        Produce the forecast values for all time series

        Return:
            Dictionary where keys are time series identifiers and values are the forecast values for the time series.
            Each forecast contains the time stamps, the mean forecast values, and the quantile forecasts (2D-array)
        """
        self._prepare_predict()

        forecasts_by_timeseries = {}
        for timeseries_identifier, past_df_of_timeseries_identifier in timeseries_iterator(
                past_df, self.timeseries_identifier_columns
        ):
            logger.info("Predicting model for time series %s" % timeseries_identifier)
            log_df(logger, past_df_of_timeseries_identifier, self.time_variable, None, "\t - Past")
            future_df_of_timeseries_identifier = None
            if self.external_features and self.external_features.get(timeseries_identifier):
                future_df_of_timeseries_identifier = get_dataframe_of_timeseries_identifier(
                    future_df, timeseries_identifier
                )
                log_df(logger, future_df_of_timeseries_identifier, self.time_variable, None, "\t - External features future")

            forecasts_by_timeseries[timeseries_identifier] = self.predict_single(
                past_df_of_timeseries_identifier,
                future_df_of_timeseries_identifier,
                quantiles,
                timeseries_identifier,
                fit_before_predict=fit_before_predict,
                prediction_length_override=prediction_length_override
            )

        return forecasts_by_timeseries

    @staticmethod
    def _build_forecasts_dict(prediction_results, quantiles):
        forecasts_by_quantile = {}
        for quantile in quantiles:  # quantiles are sorted
            if quantile not in forecasts_by_quantile:
                if quantile < 0.5:
                    alpha = quantile * 2
                elif quantile == 0.5:
                    alpha = 1
                else:
                    alpha = (1 - quantile) * 2

                confidence_interval = prediction_results.conf_int(alpha=alpha)

                if quantile < 0.5:
                    forecasts_by_quantile[quantile] = confidence_interval.iloc[:, 0]
                    if 1 - quantile in quantiles:
                        forecasts_by_quantile[1 - quantile] = confidence_interval.iloc[:, 1]
                else:
                    forecasts_by_quantile[quantile] = confidence_interval.iloc[:, 1]

        quantiles_forecasts = np.array([forecasts_by_quantile[quantile] for quantile in quantiles])

        return {
            ModelForecast.FORECAST_VALUES: prediction_results.predicted_mean.to_numpy(),
            ModelForecast.QUANTILES_FORECASTS: quantiles_forecasts,
        }

    def get_coefficients_map_and_names(self):
        """This method retrieves the coefficients of each trained model and store them in the coefficients_map dict.
        coefficients_map stores for each coefficient name, 4 dictionaries indexed by time series identifier to store their value, p-value, t-value and stderr.
        
        There are 3 kind of coefficients:
        - fixed_coefficients are the coefficients present in all time series.
        - variable_coefficients are coefficients with suffix '_i' that can be different from one time series to another depending on the model.
        - external_features_coefficients are coefficients corresponding to each of the external features, they can also be different from one time series to another depending on the data.

        Returns:
            (dict, list, list, list): coefficients_map, fixed_coefficients, variable_coefficients, external_features_coefficients
        """    
        raise NotImplementedError("Statistical model {} doesn't support coefficient extraction".format(self.__class__.__name__))

    def get_information_criteria(self):
        raise NotImplementedError("Model {} doesn't support information criteria".format(self.__class__.__name__))

    @staticmethod
    def prepare_information_criteria(information_criteria):
        if np.isneginf(information_criteria):
            return {"value": -1, "isInfinite": True}
        elif np.isposinf(information_criteria):
            return {"value": 1, "isInfinite": True}
        else:
            return {"value": information_criteria, "isInfinite": False}
