import time
from typing import Dict, List, Union

import dataiku
from common.backend.constants import DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS
from common.backend.models.base import (
    MediaSummary,
    UploadChainTypes,
    UploadFileError,
    UploadFileResponse,
)
from common.backend.utils.dataiku_api import dataiku_api
from common.backend.utils.file_extraction.image import extract_image_summary
from common.backend.utils.file_extraction.pdf import extract_pdf_summary
from common.backend.utils.file_extraction.pptx.pptx_summary_extraction import extract_pptx_summary
from common.backend.utils.file_extraction.text import extract_text_summary
from common.backend.utils.file_utils import allowed_file, get_file_data, is_file_coherent
from common.backend.utils.llm_utils import get_llm_capabilities
from common.backend.utils.upload_utils import get_checked_config
from common.llm_assist.logging import logger
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename

webapp_config: Dict[str, str] = dataiku_api.webapp_config
max_n_files = int(get_checked_config("max_n_upload_files"))
multi_modal: bool = get_llm_capabilities().get("multi_modal", False)


class UploadFileExtractor:
    def __init__(self, files: List[FileStorage], auth_identifier: str, language: str) -> None:
        if len(files) > max_n_files:
            raise Exception(UploadFileError.TOO_MANY_FILES.value)
        self.files = files
        self.auth_identifier = auth_identifier
        self.language = language
        self.multi_modal = multi_modal
        self.folder = dataiku_api.folder_handle

        self.extractions: UploadFileResponse = {"media_summaries": []}
        self.current_file_path: Union[str, None] = None
        self.current_summary: MediaSummary = {}

    def extract(self) -> UploadFileResponse:
        begin_time = int(time.time() * 1000)
        for file in self.files:
            if file.filename == "" or file.filename is None:
                raise Exception(UploadFileError.NO_SELECTED_FILE.value)
            if not is_file_coherent(file):
                raise Exception(UploadFileError.PARSING_ERROR) 
            extension = allowed_file(file, self.multi_modal)
            try:
                secure_name = secure_filename(file.filename)
                file_name = secure_filename(f"{int(time.time())}_{secure_name}")
                logger.debug(f"Uploading file name: {file_name}")
                self.current_file_path = f"{self.auth_identifier}/{file_name}"
                file_data: bytes = get_file_data(file)
                if extension in IMAGE_EXTENSIONS:
                    self.current_summary = extract_image_summary(
                        self.current_file_path, file_data, secure_name, self.language, begin_time
                    )
                elif extension == "pdf":
                    self.current_summary = extract_pdf_summary(
                        self.current_file_path, file_data, secure_name, self.language, begin_time
                    )
                elif extension == "pptx":
                    self.current_summary = extract_pptx_summary(
                        self.current_file_path, file_data, secure_name, self.language
                    )
                elif extension in DOCUMENT_EXTENSIONS:
                    self.current_summary = extract_text_summary(
                        self.current_file_path, file_data, secure_name, extension, self.language, begin_time
                    )
                else:
                    raise Exception(UploadFileError.INVALID_FILE_TYPE.value)
                if self.current_summary.get("chain_type", "") == UploadChainTypes.LONG_DOCUMENT.value:
                    # TODO: This is just a temporary blocking of documents larger than context
                    # It should be removed once in memory RAG is enabled
                    raise Exception(UploadFileError.CONTEXT_EXCURSION.value)
                dataiku.Folder(webapp_config.get("upload_folder")).upload_stream(self.current_file_path, file)
                self.current_summary["original_file_name"] = secure_name
                self.extractions["media_summaries"].append(self.current_summary)
            except Exception:
                raise Exception(UploadFileError.GENERIC_ERROR.value)
        return self.extractions

    def clean_up(self) -> None:
        if file_path := self.current_file_path:
            self.folder.delete_path(file_path)
        if metadata_path := self.current_summary.get("metadata_path"):
            self.folder.delete_path(metadata_path)