import errno
import shutil
from abc import ABCMeta
from abc import abstractmethod
import os
import logging
from contextlib import contextmanager
import threading

from six import add_metaclass

from dataiku.base.remoterun import is_running_remotely
from dataiku.container.file_exchange import LabReadonlyFileExchanger, LocalHuggingfaceFileExchanger
from dataiku.container.file_exchange import FileExchanger
from dataiku.container.file_exchange import SavedModelFileExchanger
from dataiku.core import dkujson
from dataiku.core.intercom import jek_or_backend_json_call

logger = logging.getLogger(__name__)


def build_folder_context(datadir_folder_path):
    """
    :param str datadir_folder_path: absolute path of folder within the Datadir
    :rtype: FolderContext
    """
    return _build_folder_context(datadir_folder_path, "FILTERED_PATHS")


def build_model_cache_folder_context(datadir_folder_path):
    """
    :param str datadir_folder_path: absolute path of folder within the Datadir
    :rtype: FolderContext
    """
    return _build_folder_context(datadir_folder_path, "MODEL_CACHE")


def build_saved_model_folder_context(datadir_folder_path, project_key, saved_model_id):
    """
    :rtype: FolderContext
    """
    if is_running_remotely():
        from dataiku.container.runner import read_execution
        execution = read_execution()
        return _ContainerDatadirFolderContext(datadir_folder_path, execution,
                                              SavedModelFileExchanger(execution["id"], project_key, saved_model_id))
    else:
        return _NoopFolderContext(datadir_folder_path)

def build_local_hf_folder_context(datadir_folder_path, project_key, saved_model_id):
    """
    Identical to build_saved_model_folder_context but filters out the "output" folder
    :rtype: FolderContext
    """
    if is_running_remotely():
        from dataiku.container.runner import read_execution
        execution = read_execution()
        return _ContainerDatadirFolderContext(datadir_folder_path, execution,
                                              LocalHuggingfaceFileExchanger(execution["id"], project_key, saved_model_id))
    else:
        return _NoopFolderContext(datadir_folder_path)

def build_readonly_lab_ml_folder_context(datadir_folder_path, project_key, analysis_id, mltask_id):
    """
    :rtype: FolderContext
    """
    if is_running_remotely():
        from dataiku.container.runner import read_execution
        execution = read_execution()
        return _ContainerDatadirFolderContext(datadir_folder_path, execution,
                                              LabReadonlyFileExchanger(execution["id"], project_key, analysis_id, mltask_id))
    else:
        return _NoopFolderContext(datadir_folder_path)


def get_partitions_fmi_folder_contexts(fmi):
    """
    :return: Dictionary that maps partition names to FmiFolderContexts objects
             If fmi does NOT correspond to a base model, returns None
    """
    model_type_info = jek_or_backend_json_call("ml/get-model-type-info", {"fmi": fmi})
    if not model_type_info["isPartitionedBaseModel"]:
        return None

    partitions_fmis = jek_or_backend_json_call("ml/get-model-partitions-fmis", {"fmi": fmi})

    return {
        partition_name: FmiReadonlyFolderContexts.build(partition_fmi)
        for partition_name, partition_fmi in partitions_fmis.items()
    }


class FmiReadonlyFolderContexts(object):
    """
    Contains the following folder contexts related to an FMI (either analysis or saved model):
    - Session folder context
    - Model folder context
    - Preprocessing folder context
    - Split folder context

    Plus a few more attributes
    - Full model id
    - Name of the split desc file

    .. important::
       The folder contexts SHOULD be considered as READ-ONLY. This is technically not always true,
       if corresponding user has write access on the Saved Model
    """

    def __init__(self, session_folder_context, model_folder_context, preprocessing_folder_context, split_folder_context, split_desc_filename, fmi):
        self.session_folder_context = session_folder_context
        self.model_folder_context = model_folder_context
        self.preprocessing_folder_context = preprocessing_folder_context
        self.split_folder_context = split_folder_context
        self.split_desc_filename = split_desc_filename
        self.fmi = fmi

    @classmethod
    def build(cls, fmi):
        model_folders_info = jek_or_backend_json_call("ml/get-model-folders-info", {"fmi": fmi})
        model_type_info = jek_or_backend_json_call("ml/get-model-type-info", {"fmi": fmi})
        session_folder_context = cls._build_readonly_fmi_folder_context(model_folders_info["sessionFolder"], model_type_info)
        model_folder_context = cls._build_readonly_fmi_folder_context(model_folders_info["modelFolder"], model_type_info)
        preprocessing_folder_context = cls._build_readonly_fmi_folder_context(model_folders_info["preprocessingFolder"], model_type_info)
        split_folder_context = cls._build_readonly_fmi_folder_context(model_folders_info["splitFolder"], model_type_info)
        split_desc_filename = model_folders_info["splitDescFilename"]
        return cls(session_folder_context, model_folder_context, preprocessing_folder_context, split_folder_context, split_desc_filename, fmi)

    @staticmethod
    def _build_readonly_fmi_folder_context(data_dir_folder_path, model_type_info):
        project_key = model_type_info["projectKey"]

        if model_type_info["type"] == "SAVED":
            saved_model_id = model_type_info["savedModelId"]
            return build_saved_model_folder_context(data_dir_folder_path, project_key, saved_model_id)
        else:
            # ANALYSIS
            analysis_id = model_type_info["analysisId"]
            mltask_id = model_type_info["mlTaskId"]
            return build_readonly_lab_ml_folder_context(data_dir_folder_path, project_key, analysis_id, mltask_id)


def build_noop_folder_context(folder_path):
    """
    Returns a simple folder context wrapper on a local folder path, to use where a folder context is expected
    :type folder_path: str
    :rtype: FolderContext
    """
    return _NoopFolderContext(folder_path)


def _build_folder_context(datadir_folder_path, file_kind):
    if is_running_remotely():
        from dataiku.container.runner import read_execution
        execution = read_execution()
        return _ContainerDatadirFolderContext(datadir_folder_path, execution, FileExchanger(execution["id"], file_kind))
    else:
        return _NoopFolderContext(datadir_folder_path)


@add_metaclass(ABCMeta)
class FolderContext(object):
    """
    Interface to interact with a folder and that is responsible for most filesystem related operation:
        * reading file/directory within the folder
        * writing file/directory within the folder
        * checking if some file/directory exist
        * deleting some file/directory within the folder

    Main usage is to seamlessly manipulate folders coming from DSS datadir, regardless of whether the python process:
        * runs on the same server as the datadir (and is then directly accessible from the filesystem)
        * runs on a remote container

    Most of the time, it will concern ML folders, containing information about the model (params, performance, model
    itself, etc...).

    **Warning** this API is not meant to be thread safe, so users must be careful when doing concurrent code involving
    a folder context:
        * See for instance `dataiku.doctor.crossval.result_store.OnDiskResultStore` that uses a lock to leverage
          concurrent reads and writes on a folder context
        * At the moment, ML objects can be modified:
            * during training (single logic at a time for a single model)
            * during post-train computation, enforce to one logic on one model at a time
              (see `PredictionPostComputationHandler` in java)
            * for binary classification, the threshold can be modified from the UI or the public API
        * If we were to make this logic evolve to have the folder contexts concurrently editable, we should probably
          coordinate that from the backend directly

    Examples:

         folder_context = build_folder_context("/path/to/datadir/subpath/to/my/folder")

         data = folder_context.read_json("data.json")
         data["field"] = "updated_value"
         folder_context.write_json("data.json", data)

         with folder_context.get_file_path_to_read("dataset.csv") as dataset_path:
            df = pd.read_csv(dataset_path)

         df["new_col"] = 2

         with folder_context.get_file_path_to_write("dataset.csv") as dataset_path:
            df.to_csv(dataset_path)

         if folder_context.isfile("perf.json"):
             perf = folder_context.read_json("perf.json")
         else:
            perf = None

         with folder_context.get_folder_path_to_read() as folder_path:
            files_in_folder = os.listdir(folder_path)
            model = mlflow.pyfunc.load_model(folder_path)

         with folder_context.get_folder_path_to_write() as folder_path:
            tensorboard_callback = Tensorboard(log_dir=folder_path)  # tensorboard can write any file to the folder path

         folder_context.rm_path("outdated_folder")
    """

    def __init__(self, _folder_path):
        self._folder_path = _folder_path

    def __str__(self):
        return u"{}(folderPath='{}')".format(self.__class__.__name__, self._folder_path)

    @abstractmethod
    def get_file_path_to_read(self, file_name):
        """
        Returns a context manager holding the path of the file to read
        :param str file_name: file within the folder that needs to be read
        """
        pass

    @abstractmethod
    def get_file_path_to_write(self, file_name):
        """
        Returns a context manager holding the path of the file to write

        :param str file_name: file within the folder that needs to be written
        :return:
        """
        pass

    @abstractmethod
    def isfile(self, file_name, allow_cached=True):
        pass

    @abstractmethod
    def isdir(self, dir_name):
        pass

    def read_json(self, file_name):
        with self.get_file_path_to_read(file_name) as file_path:
            return dkujson.load_from_filepath(file_path, with_gzip=file_name.endswith(".gz"))

    def write_json(self, file_name, content):
        with self.get_file_path_to_write(file_name) as file_path:
            dkujson.dump_to_filepath(file_path, content, with_gzip=file_name.endswith(".gz"))

    @abstractmethod
    def get_folder_path_to_read(self):
        """
        Returns a context manager holding the path of the folder.

        Use this when you need to run "folder" operations, such as listing files within the folder, or passing the path
        to a 3rd party library (e.g. MLFlow). If you need a specific file, prefer using `get_file_path_to_read`.
        """
        pass

    @abstractmethod
    def get_folder_path_to_write(self, regularly_synchronize=False):
        """
        :param bool regularly_synchronize: whether folder needs to be synchronized regularly with underlying folder

        Returns a context manager holding the path of the folder, to be able to write files in it.

        Use this when you do not know exactly what is going to be written on the folder, for instance when passing it
        to a 3rd party library. If you need to write a specific file, prefer using `get_file_path_to_write`
        """
        pass

    @abstractmethod
    def get_absolute_folder_path(self):
        """
        Returns the absolute path of the folder on the actual file system
        """
        pass

    def get_folder_name(self):
        return os.path.basename(os.path.abspath(self._folder_path))

    @abstractmethod
    def get_subfolder_context(self, subfolder_relative_path):
        """
        :type subfolder_relative_path: str
        :rtype: FolderContext
        """
        pass

    @abstractmethod
    def create_if_not_exist(self):
        pass

    @abstractmethod
    def rm_path(self, path):
        pass

    def get_origin_folder_path(self):
        """
        :return: the path used to build this context, i.e. most probably the path of the corresponding folder in
        the data dir
        """
        return self._folder_path


class _NoopFolderContext(FolderContext):

    def __init__(self, folder_path):
        """
        Simple implementation of the folder context that relies on the local file system

        :type folder_path: str
        """
        super(_NoopFolderContext, self).__init__(folder_path)

    @contextmanager
    def get_folder_path_to_read(self):
        yield self._folder_path

    def get_subfolder_context(self, subfolder_relative_path):
        return _NoopFolderContext(self._get_file_path(subfolder_relative_path))

    def create_if_not_exist(self):
        _py2compatible_makedirs(self.get_absolute_folder_path(), exist_ok=True)

    def get_absolute_folder_path(self):
        return self._folder_path

    def _get_file_path(self, file_name):
        return os.path.join(self._folder_path, file_name)

    @contextmanager
    def get_file_path_to_read(self, file_name):
        yield self._get_file_path(file_name)

    @contextmanager
    def get_file_path_to_write(self, file_name):
        yield self._get_file_path(file_name)

    @contextmanager
    def get_folder_path_to_write(self, regularly_synchronize=False):
        folder_path = self.get_absolute_folder_path()
        _py2compatible_makedirs(folder_path, exist_ok=True)
        yield folder_path

    def rm_path(self, path):
        _rm_path(self._get_file_path(path))

    def isfile(self, file_name, allow_cached=True):
        return os.path.exists(self._get_file_path(file_name))

    def isdir(self, dir_name):
        dir_path = self._get_file_path(dir_name)
        return os.path.exists(dir_path) and os.path.isdir(dir_path)


class _ContainerDatadirFolderContext(FolderContext):
    CACHE_FOLDER_NAME = "cache"

    def __init__(self, datadir_folder_path, execution, file_exchanger):
        """"
        :param str datadir_folder_path: absolute path of corresponding folder in the datadir
        :type execution: dict
        :type file_exchanger: dataiku.container.file_exchange.AbstractFileExchanger
        """
        super(_ContainerDatadirFolderContext, self).__init__(datadir_folder_path)
        self._relative_path_in_datadir = self._get_relative_path_in_datadir(datadir_folder_path,
                                                                            execution["backendDipHomePath"])
        self._execution = execution
        self._file_exchanger = file_exchanger

    @staticmethod
    def _get_relative_path_in_datadir(folder_path, dip_home):
        abs_folder_path = os.path.abspath(folder_path)
        if not abs_folder_path.startswith(dip_home):
            raise ValueError(u"Cannot create a Container folder context on a folder not in the "
                             "datadir, folderPath={}, datadirPath={}".format(abs_folder_path, dip_home))

        return os.path.relpath(abs_folder_path, start=dip_home)

    def get_subfolder_context(self, subfolder_relative_path):
        return _ContainerDatadirFolderContext(self._get_datadir_file_path(subfolder_relative_path), self._execution,
                                              self._file_exchanger)

    def _get_datadir_file_path(self, file_name):
        return os.path.join(self._folder_path, file_name)

    def _fetch_file_from_backend(self, source_path, target_path):
        logger.info(u"Fetching file from backend: source='{}' target='{}'".format(source_path, target_path))
        dirname = os.path.dirname(target_path)
        _py2compatible_makedirs(dirname, exist_ok=True)
        self._file_exchanger.fetch_file(path=source_path, dest=target_path)

    def _fetch_dir_from_backend(self, source_path, target_path):
        logger.info(u"Fetching dir from backend: source='{}' target='{}'".format(source_path, target_path))
        self._file_exchanger.fetch_dir(source_path, target_path)

    def get_absolute_folder_path(self):
        from dataiku.container.runner import HOME_DIR
        return os.path.join(HOME_DIR, self.CACHE_FOLDER_NAME, self._relative_path_in_datadir)

    @contextmanager
    def get_folder_path_to_read(self):
        # todo @foldercontext: cache information that folder has been downloaded already so as not to download it again
        #  (probably a file within the folder)
        self._fetch_dir_from_backend(self._folder_path, self.get_absolute_folder_path())
        yield self.get_absolute_folder_path()

    def _get_container_file_path(self, file_name):
        return os.path.join(self.get_absolute_folder_path(), file_name)

    @contextmanager
    def _get_file_path_to_read_context_manager(self, file_name, allow_cached=True):
        container_file_path = self._get_container_file_path(file_name)
        if not allow_cached or not os.path.exists(container_file_path):
            self._fetch_file_from_backend(self._get_datadir_file_path(file_name), container_file_path)
        yield container_file_path

    @contextmanager
    def _get_file_path_to_write_context_manager(self, file_name):
        container_file_path = self._get_container_file_path(file_name)
        dirname = os.path.dirname(container_file_path)
        _py2compatible_makedirs(dirname, exist_ok=True)
        yield container_file_path
        self._file_exchanger.send_file(container_file_path, self._get_datadir_file_path(file_name))

    def get_file_path_to_read(self, file_name):
        return self._get_file_path_to_read_context_manager(file_name)

    def get_file_path_to_write(self, file_name):
        return self._get_file_path_to_write_context_manager(file_name)

    def _send_folder_content(self):
        container_folder_path = self.get_absolute_folder_path()
        if os.path.isdir(container_folder_path):
            self._file_exchanger.send_files([container_folder_path],
                                            os.path.abspath(os.path.join(self._folder_path, os.pardir,
                                                                         "_folder_data.tgz")),
                                            archive_root=os.path.join(container_folder_path, os.pardir))

    @contextmanager
    def get_folder_path_to_write(self, regularly_synchronize=False):
        container_folder_path = self.get_absolute_folder_path()
        _py2compatible_makedirs(container_folder_path, exist_ok=True)
        folder_content_sender_thread = None
        if regularly_synchronize:
            folder_content_sender_thread = _ScheduledTaskThread(self._send_folder_content, 2, 60)
            folder_content_sender_thread.start()
        try:
            yield container_folder_path
        finally:
            if regularly_synchronize:
                folder_content_sender_thread.stop()
                folder_content_sender_thread.join()
            self._send_folder_content()  # Send the content of the folder one last time in all cases

    def isfile(self, file_name, allow_cached=True):
        with self._get_file_path_to_read_context_manager(file_name, allow_cached=allow_cached) as file_path:
            return os.path.exists(file_path)

    def isdir(self, dir_name):
        with self.get_subfolder_context(dir_name).get_folder_path_to_read() as sub_folder_local_path:
            return os.path.exists(sub_folder_local_path) and os.path.isdir(sub_folder_local_path)

    def create_if_not_exist(self):
        container_folder_path = self.get_absolute_folder_path()

        if os.path.exists(container_folder_path):  # Consider that if local path exist, then folder already exists
            return

        # Else, check that folder does not exist on the backend datadir, not optimal because will download the full
        # folder if it exists. We might want to add an `exist` endpoint directly in the remote file exchange service.
        with self.get_folder_path_to_read():
            # Nothing to do, will try and fetch the folder
            pass

        exists = os.path.exists(container_folder_path)

        # If folder does not exist, then create it locally and also remotely
        if not exists:
            with self.get_folder_path_to_write():
                # Nothing to do, folder will be created locally, then be sent empty to the backend
                pass

    def rm_path(self, path):
        # First delete local path
        _rm_path(self._get_container_file_path(path))
        # Then remove path in the backend
        self._file_exchanger.delete_path(self._get_datadir_file_path(path))


def _rm_path(path):
    if os.path.isdir(path):
        shutil.rmtree(path, ignore_errors=True)
    elif os.path.exists(path):
        os.remove(path)


def _py2compatible_makedirs(path, exist_ok=False):
    try:
        os.makedirs(path)  # exist_ok has only been added in python 3.2
    except OSError as e:
        if not exist_ok or e.errno != errno.EEXIST:
            raise e


class _ScheduledTaskThread(threading.Thread):

    def __init__(self, task, start_delay, max_delay):
        super(_ScheduledTaskThread, self).__init__()

        assert max_delay > start_delay
        self._start_delay = start_delay
        self._max_delay = max_delay

        self._task = task
        self._stopped = threading.Event()

    def run(self):
        delay = self._start_delay
        while not self._stopped.is_set():
            self._stopped.wait(delay)
            delay = min(delay * 1.02, self._max_delay)
            self._task()

    def stop(self):
        self._stopped.set()
