import atexit
import json
import logging
import os
import random
import shutil
import string
import tarfile
import threading
from typing import Optional, TYPE_CHECKING

from dataiku.base.utils import tar_extractall
from dataiku.core import intercom
from dataiku.core.intercom import has_ticket_auth_mode
from dataiku.core.ticket_local_folder import get_ticket_local_folder_path

if TYPE_CHECKING:
    from dataiku.core.vector_stores.dku_vector_store import DkuVectorStore
    from langchain_core.vectorstores import VectorStore

logger = logging.getLogger(__name__)


_lock = threading.Lock()
_target_folder_path: Optional[str] = None
def set_base_folder_path(base_folder_path: str, delete_at_exit=True):
    """
    Sets the base folder to receive the knowledge bank files. The folder will
    be cleaned up automatically when the program terminates.

    :param str base_folder_path: The folder path.
    :param bool delete_at_exit: Whether to automatically delete the folder at exit.
    """
    global _target_folder_path
    with _lock:
        prefix = os.path.join(base_folder_path, "knowledge-banks_")
        if _target_folder_path and _target_folder_path.startswith(prefix):
            return  # already set

        if not os.path.exists(base_folder_path):
            raise ValueError("the provided base folder does not exists: {}".format(base_folder_path))

        # use a random name to avoid overwriting existing data
        _target_folder_path = prefix + generate_random_name()

        if delete_at_exit:
            # best effort to clean the target folder, for cases outside DSS
            atexit.register(_safe_delete_folder, _target_folder_path)


def _safe_delete_folder(folder_path: str):
    try:
        if os.path.exists(folder_path):
            logger.info("remove folder {}".format(folder_path))
            shutil.rmtree(folder_path, ignore_errors=True)

    except Exception as e:
        logger.warning("cannot remove folder {}: {}".format(
            folder_path, e))


def get_folder_path_to_load_kb(isolated=False) -> str:
    global _target_folder_path
    with _lock:
        cached_folder_path = _target_folder_path

    if cached_folder_path:
        folder_path = cached_folder_path

    elif has_ticket_auth_mode():
        # use the ticket folder for local processes - with ticket auth
        folder_path = os.path.join(get_ticket_local_folder_path(), "knowledge-banks")

    else:
        # execution outside DSS: use current working directory as default
        set_base_folder_path(os.getcwd())
        folder_path = _target_folder_path

    # make separate directories for shared/isolated (easier debugging)
    prefix = "isolated" if isolated else "shared"
    folder_path = os.path.join(folder_path, prefix)

    folder_path = os.path.abspath(folder_path)
    os.makedirs(folder_path, exist_ok=True)
    return folder_path


TAR_BUFFER_SIZE = 10 * 16 * 1024


def download_to_folder(
        project_key: str,
        kb_id: str,
        version: str,
        folder_path: str,
        use_latest_settings: bool
):
    kb_full_id = "{}.{}".format(project_key, kb_id)
    if os.path.exists(folder_path):
        raise ValueError("cannot download kb data into existing folder")

    os.makedirs(folder_path)

    try:
        if version == "not-built":
            logger.info("kb not built yet, only download kb.json to folder {}".format(
                folder_path))

            _download_kb_json(folder_path, kb_full_id)
            return

        logger.info("download kb data to folder {}".format(folder_path))
        api_resp = intercom.backend_api_post_call(
            "knowledge-bank/download-vectorstore-folder",
            data={"knowledgeBankFullId": kb_full_id, "version": version},
            stream=True
        )

        if api_resp.status_code != 200:
            raise ValueError("cannot download kb data, HTTP status {}: {}".format(
                api_resp.status_code, api_resp.text))

        with tarfile.open(fileobj=api_resp.raw, mode='r|', bufsize=TAR_BUFFER_SIZE) as tar:
            tar_extractall(tar, folder_path)

        logger.info("finished downloading kb data to folder {}".format(folder_path))

        if use_latest_settings:  # [sc-255675]
            logger.info("overwrite kb.json to latest version")
            _download_kb_json(folder_path, kb_full_id)

    except Exception:
        # do not leave a partially downloaded folder on disk
        shutil.rmtree(folder_path, ignore_errors=True)
        raise


def _download_kb_json(folder_path: str, kb_full_id: str):
    rk = intercom.backend_json_call(
        "knowledge-bank/get",
        data={"knowledgeBankFullId": kb_full_id}
    )

    kb_json_path = os.path.join(folder_path, "kb.json")
    with open(kb_json_path, "w") as fd:
        json.dump(rk, fd)


def generate_random_name(length=16) -> str:
    return ''.join(random.choice(string.ascii_letters) for _ in range(length))


def build_dku_vector_store(folder_path: str) -> 'DkuVectorStore':
    kb_file_path = os.path.join(folder_path, "kb.json")
    with open(kb_file_path, "r") as rk_file:
        kb = json.load(rk_file)

    from dataiku.core.vector_stores.dku_vector_store import VectorStoreFactory
    return VectorStoreFactory.get_vector_store(kb, folder_path)


def create_langchain_vector_store(folder_path: str, **vectorstore_kwargs) -> 'VectorStore':
    # build the langchain vector store
    dku_vs = build_dku_vector_store(folder_path)
    llm_id = dku_vs.kb["embeddingLLMId"]
    from dataiku.langchain.dku_embeddings import DKUEmbeddings
    embeddings = DKUEmbeddings(llm_id=llm_id)
    return dku_vs.get_db(embeddings, **vectorstore_kwargs)
