import glob
import logging
import os
import tarfile
import tempfile
from typing import TYPE_CHECKING
from dataiku.core import intercom
from dataiku.core.vector_stores.lifecycle.base import build_dku_vector_store
from dataiku.core.vector_stores.data.metadata import DocumentMetadataFormatter

if TYPE_CHECKING:
    from dataiku.core.vector_stores.dku_vector_store import DkuVectorStore
    from dataiku.core.vector_stores.lifecycle.isolated_folder import VectorStoreIsolatedFolder
    from langchain_core.vectorstores import VectorStore


logger = logging.getLogger(__name__)


class VectorStoreWriter:
    """
    A helper class to write vector store data to the underlying knowledge
    bank folder.

    .. important::
        Do not create this class directly, use
        :meth:`dataiku.KnowledgeBank.get_writer()`
    """

    def __init__(self, project_key: str, kb_full_id: str, isolated_folder: 'VectorStoreIsolatedFolder'):
        self._project_key = project_key
        self._kb_full_id = kb_full_id
        self._folder = isolated_folder
        self._cached_dku_vs = None

    @property
    def _dku_vector_store(self) -> 'DkuVectorStore':
        if self._cached_dku_vs is None:
            self._cached_dku_vs = build_dku_vector_store(self.folder_path)

        return self._cached_dku_vs

    @property
    def folder_path(self) -> str:
        """
        The path to the underlying folder on the filesystem.
        """
        return self._folder.folder_path

    def clear(self):
        """
        Clears the vector store data stored in the underlying folder.
        """
        self._dku_vector_store.clear()

    def save(self):
        """
        Saves the content of the underlying folder as a new knowledge bank
        version.

        :return: the created version
        :rtype: str
        """
        all_paths = glob.glob(os.path.join(self.folder_path, "*"))
        logger.info("Archiving files before send: %s".format(all_paths))

        with tempfile.TemporaryDirectory() as temp_dir:
            archive_path = os.path.join(temp_dir, "dku_send.tar.gz")

            with tarfile.open(archive_path, "w:gz") as tar:
                prefix_length = len(os.path.abspath(self.folder_path))

                for file_path in all_paths:
                    path_without_prefix = os.path.abspath(file_path)[prefix_length:]
                    path_in_archive = os.path.join(os.curdir, path_without_prefix)
                    tar.add(file_path, arcname=path_in_archive)

            with open(archive_path, "rb") as fd:
                api_resp = intercom.backend_json_call(
                    "knowledge-bank/create-version",
                    params={"knowledgeBankFullId": self._kb_full_id},
                    data=fd
                )

            return api_resp["version"]

    def as_langchain_vectorstore(self, **vectorstore_kwargs) -> 'VectorStore':
        """
        Gets this writer as a Langchain Vectorstore object

        :rtype: :class:`langchain_core.vectorstores.VectorStore`
        """
        # using a writable folder implies write access by default
        allow_creation = vectorstore_kwargs.pop("allow_creation", True)
        return self._folder.create_langchain_vectorstore(
            allow_creation=allow_creation, **vectorstore_kwargs)

    def get_metadata_formatter(self) -> DocumentMetadataFormatter:
        """
        Gets the metadata formatter to help writing documents to this vector store.

        :rtype: :class:`DocumentMetadataFormatter`
        """
        return DocumentMetadataFormatter(self._project_key, self._dku_vector_store)
