import logging
import os.path as osp
import numpy as np
import pandas as pd

from dataiku.core.dku_pandas_csv import dataframe_to_csv
from dataiku.doctor.utils.split import load_df_with_normalization

logger = logging.getLogger(__name__)

RANDOM_SEED = 1337


class BackgroundRowsHandler:
    BACKGROUND_FILENAME = "background_rows.csv"
    MIN_BACKGROUND_SIZE = 25
    MAX_BACKGROUND_SIZE = 1000
    MAX_ITER_BACKGROUND = 200
    ALLOWED_BACKGROUND_BIAS = 0.05

    def __init__(self, model_folder_context, split_desc, prediction_type, per_feature):
        self._model_folder_context = model_folder_context
        self.split_desc = split_desc
        self.prediction_type = prediction_type
        self.per_feature = per_feature

    def has_saved_background_rows(self):
        """ Does the model folder contains the background rows file?
        :rtype: bool
        """
        return self._model_folder_context.isfile(self.BACKGROUND_FILENAME)

    def retrieve_background_rows(self):
        """ Load the saved background rows file
        :return: All the background rows
        :rtype: pd.DataFrame
        """
        logger.info("Using saved background rows")
        background_rows_df = load_df_with_normalization(
            self.BACKGROUND_FILENAME, self._model_folder_context, self.split_desc["schema"],
            self.per_feature, prediction_type=self.prediction_type
        )
        logger.info("Loaded background rows with shape={}".format(background_rows_df.shape))
        return background_rows_df

    def draw_background_rows(self, dataset, score, save):
        """ Draw MAX_BACKGROUND_SIZE rows in the dataset and ensure that the MIN_BACKGROUND_SIZE first rows have
        a average prediction close to the overall dataset prediction average and save them in the model folder.
        :param dataset: dataset in which the rows should be drawn
        :type dataset: pd.DataFrame
        :param score: score of the dataset (dataset.shape[0] == score.shape[0])
        :type score: np.ndarray
        :param save: whether to save the background rows. Will save them non-normalized for consistency with the splits serialization
        :type save: bool
        :return: Drawn background rows. This method doesn't normalize the returned DataFrame.
        """
        logger.info("Building background rows")
        nb_rows = dataset.shape[0]

        if nb_rows < BackgroundRowsHandler.MIN_BACKGROUND_SIZE:
            raise ValueError("Can not compute explanations: not enough rows to build background rows")

        # Building first background rows centered around prediction results
        best_background_index = None
        predictions_means = np.mean(score, axis=0)
        predictions_std = np.std(score, axis=0)
        random_state = np.random.RandomState(RANDOM_SEED)
        attempt = 0
        for _ in range(BackgroundRowsHandler.MAX_ITER_BACKGROUND):
            sample_indices = random_state.choice(dataset.shape[0], size=BackgroundRowsHandler.MIN_BACKGROUND_SIZE,
                                                 replace=False)
            sample_score = score[sample_indices]
            sample_predictions_means = np.mean(sample_score, axis=0)
            mean_differences = np.abs(predictions_means - sample_predictions_means)
            if np.all(mean_differences < BackgroundRowsHandler.ALLOWED_BACKGROUND_BIAS * predictions_std):
                best_background_index = sample_indices
                break
            attempt += 1
        if attempt == BackgroundRowsHandler.MAX_ITER_BACKGROUND:
            logger.warning("Could not find a well-centered background, will take a random one")
            best_background_index = sample_indices
        logger.info("Background estimated bias <= {}".format(mean_differences.max()))
        first_background_rows_df = dataset.iloc[best_background_index]

        # Filling the remaining with sample
        remaining_rows_to_add = min(dataset.shape[0],
                                    BackgroundRowsHandler.MAX_BACKGROUND_SIZE) - first_background_rows_df.shape[0]
        if remaining_rows_to_add > 0:
            df_orig_without_first_rows = dataset.drop(first_background_rows_df.index, errors="ignore")
            remaining_background_df = df_orig_without_first_rows.sample(n=remaining_rows_to_add, random_state=RANDOM_SEED)

            background_rows_df = pd.concat([first_background_rows_df, remaining_background_df])
        else:
            background_rows_df = first_background_rows_df

        logger.info("Built background rows with shape={}".format(background_rows_df.shape))

        if save:
            logger.info("Saving background rows with shape={}".format(background_rows_df.shape))
            with self._model_folder_context.get_file_path_to_write(self.BACKGROUND_FILENAME) as background_path:
                dataframe_to_csv(background_rows_df, background_path, open)
        return background_rows_df
