import tempfile
from abc import ABCMeta, abstractmethod
from six import add_metaclass
import os.path as osp

from dataiku.base.folder_context import build_noop_folder_context
from dataiku.base.utils import TmpFolder
from dataiku.core import dkujson
from dataiku.core import doctor_constants
from dataiku.doctor.posttraining.model_information_handler import ModelInformationHandlerBase


@add_metaclass(ABCMeta)
class PredictionExternalModelInformationHandler(ModelInformationHandlerBase):

    """Used for subpopulation analysis, individual explanations and partial dependency for
    external models on which we have no training data but instead a dataset on which we can
    evaluate the model.
    """

    def __init__(self, model_folder_context, postcompute_folder_context=None):
        self._model_folder_context = model_folder_context

        if postcompute_folder_context is None:
            self._postcompute_folder_context = model_folder_context.get_subfolder_context("posttrain")
        else:
            self._postcompute_folder_context = postcompute_folder_context

        self._predictor = self.get_predictor()
        self._evaluated_dataset = None
        self._collector_data = None

    @abstractmethod
    def load_evaluation_dataset(self):
        """For external models there is no train set so the evaluation dataset is used instead"""
        pass

    @abstractmethod
    def get_predictor(self):
        """Should handle the logic to route to the correct External Model predictor"""
        pass

    @abstractmethod
    def run_scoring(self, df, out_folder_context):
        """Generate a perf.json in out_folder"""
        pass

    def use_full_df(self):
        """Always use the evaluation dataset for MLflow models"""
        return True

    def get_full_df(self):
        """We use the evaluation dataset for MLflow models"""
        if self._evaluated_dataset is None:
            self._evaluated_dataset = self.load_evaluation_dataset()
        return self._evaluated_dataset.copy(), True

    @property
    def _target_column(self):
        return self._predictor.params.core_params["target_variable"]

    def get_target_variable(self):
        return self._target_column

    def get_output_folder_context(self):
        return self._postcompute_folder_context

    def get_prediction_type(self):
        return self._predictor.params.core_params["prediction_type"]

    def get_explainer(self):
        return self._predictor._individual_explainer

    def get_inv_map(self):
        return self._predictor.params.model_meta["intToLabelMap"]

    def get_model_folder_context(self):
        return self._model_folder_context

    def get_sample_weight_variable(self):
        return None

    def get_per_feature(self):
        return self._predictor.params.preprocessing_params["per_feature"]

    def get_type_of_column(self, col_name):
        return self.get_per_feature_col(col_name)["type"]

    def get_role_of_column(self, col_name):
        return self.get_per_feature_col(col_name)["role"]

    def predict(self, df, output_probas=True):
        return self._predictor.predict(df, with_probas=output_probas)

    def get_collector_data(self):
        if self._collector_data is None:
            self._collector_data = dkujson.load_from_filepath(osp.join(self._model_folder, "collector_data.json"))
        return self._collector_data

    def get_schema(self):
        return self._predictor.params.split_desc["schema"]

    @staticmethod
    def _get_scoring(out_folder_context):
        perf_file_name = "perf.json"
        has_scored = False
        perf = None
        reason = None
        if out_folder_context.isfile(perf_file_name):
            perf = out_folder_context.read_json(perf_file_name)
            has_scored = True
        return has_scored, reason, perf

    def run_binary_scoring(self, df):
        """Required for subpopulation analysis, should compute a perf.json in out_folder."""
        with TmpFolder(tempfile.gettempdir()) as tmp_folder:
            # No need for real context here, as folder only temporary
            tmp_folder_context = build_noop_folder_context(tmp_folder)

            if len(set(df[self._target_column])) < 2:
                has_scored, reason, perf = False, doctor_constants.PREPROC_ONECLASS, None
                return has_scored, reason, perf

            self.run_scoring(df, tmp_folder_context)
            has_scored, reason, perf = self._get_scoring(tmp_folder_context)

            return has_scored, reason, perf

    def run_regression_scoring(self, df):
        """Required for subpopulation analysis, should compute a perf.json in out_folder."""
        with TmpFolder(tempfile.gettempdir()) as tmp_folder:
            # No need for real context here, as folder only temporary
            tmp_folder_context = build_noop_folder_context(tmp_folder)
            self.run_scoring(df, tmp_folder_context)
            return self._get_scoring(tmp_folder_context)
