from __future__ import annotations

import base64
import csv
import io
import mimetypes
import zipfile
from dataclasses import dataclass
from typing import Any, Dict, List, Tuple

import dataiku
from flask import Blueprint, abort, send_file

from backend.config import get_admin_uploads_managedfolder_id, get_uploads_managedfolder_id
from backend.schemas import schemas
from backend.utils.logger_utils import log_http_request
from backend.utils.logging_utils import get_logger
from backend.utils.utils import get_store

logger = get_logger(__name__)

downloads_bp = Blueprint("downloads", __name__, url_prefix="/downloads")

# simple map because mimetype are hardcoded in DIP, generate_artifacts.py
DIP_MIMETYPE_EXTENSIONS = { 
    "application/pdf": "pdf",
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx"
}


@dataclass
class Table:
    columns: List[str]
    data: List[List[Any]]


def to_csv_string(table: Table) -> str:
    """Convert a Table {columns, data} into a CSV string."""
    if not table or not isinstance(table.columns, list) or not isinstance(table.data, list):
        raise ValueError("Event data is not in { columns, data } format")

    output = io.StringIO()
    writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
    writer.writerow(table.columns)
    for row in table.data:
        writer.writerow(row)
    return output.getvalue()


def records_folder_from_artifacts(artifacts: List[schemas.Artifact]) -> Dict[str, List[Table]]:
    """
    Convert artifacts to { folderName: [Table, Table, ...] }.
    """
    folders: Dict[str, List[Table]] = {}
    for index, art in enumerate(artifacts) or []:
        for item in art.get("parts") or []:
            if item.get("type") == "RECORDS":
                if art["name"] + str(index) not in folders:
                    folders[art["name"] + str(index)] = []
                folders[art["name"] + str(index)].append(
                    Table(columns=item["records"]["columns"], data=item["records"]["data"])
                )
    return folders


def build_csv_files_from_records(records: List[Table], info: str) -> List[Tuple[str, str]]:
    """
    For a list of tables, build CSV strings.
    Returns list of (filename, csv_string).
    """
    files: List[Tuple[str, str]] = []
    for idx, table in enumerate(records):
        csv_str = to_csv_string(table)
        safe_info = (info or "artifact").strip().replace(" ", "_")
        files.append((f"{safe_info}_{idx}.csv", csv_str))
    return files


def _safe(s: str) -> str:
    return (s or "").strip().replace(" ", "_").replace("/", "_")


def build_zip_bytes(aggregated_folders: Dict[str, Dict[str, List]], inline_data_files: List[dict]) -> bytes:
    """
    folders: { folderName: [(filename, csv_str), ...], ... }
    inline_data_files: [{mimeType:'application/pdf', 'dataBase64': 'Qkx42'}, ...]
    Returns zip bytes.
    """

    mem = io.BytesIO()
    with zipfile.ZipFile(mem, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for source, folders in aggregated_folders.items():
            source_prefix = _safe(source or "exports")
            for folder, files in folders.items():
                folder_prefix = f"{source_prefix}/{_safe(folder)}/"
                for filename, csv_str in files:
                    zf.writestr(folder_prefix + filename, csv_str)


        for i, item in enumerate(inline_data_files):
            mimeType = item.get("mimeType")
            dataBase64 = item.get("dataBase64")
            filename = item.get("filename", None)
            if not filename:
                extension = DIP_MIMETYPE_EXTENSIONS.get(mimeType, "bin")
                filename = f"document_{i}.{extension}"

            if mimeType and dataBase64:
                try:
                    decoded_bytes = base64.b64decode(dataBase64)
                    zf.writestr(_safe(filename), decoded_bytes)
                except Exception as e:
                    logger.error(f"Could not decode or write file from INLINE_DATA, mime_type : {mimeType}")

    mem.seek(0)
    return mem.read()


def send_csv_attachment(filename: str, csv_str: str):
    """
    Sends a single CSV with UTF-8 BOM (so Excel opens it correctly).
    """
    bom_prefixed = "\ufeff" + csv_str
    bio = io.BytesIO(bom_prefixed.encode("utf-8"))
    return send_file(
        bio,
        mimetype="text/csv; charset=utf-8",
        as_attachment=True,
        download_name=filename,
        max_age=0,
    )


def send_zip_attachment(filename: str, zip_bytes: bytes):
    bio = io.BytesIO(zip_bytes)
    return send_file(
        bio,
        mimetype="application/zip",
        as_attachment=True,
        download_name=filename,
        max_age=0,
    )


def _build_csv_from_artifacts(artifacts: List[dict], aggregated_folders: dict, source_name):
    total_files = 0
    first_file_candidate = None
    folders = records_folder_from_artifacts(artifacts)  
    # { folder: [Table...] }
    for folder_name, tables in folders.items():
        files = build_csv_files_from_records(tables, folder_name)
        if not files:
            continue
        aggregated_folders.setdefault(source_name, {})[folder_name] = files

        logger.info(f"Found {files} CSV files in artifact '{folder_name}'")
        total_files += len(files)
        # Track first CSV file as a candidate for the single-file case
        if first_file_candidate is None:
            # name for single CSV: "<source>-<first_file_name>"
            first_filename = files[0][0]
            csv_str = files[0][1]
            prefix = (source_name or "").strip().replace(" ", "_")
            filename = f"{prefix + '-' if prefix else ''}{first_filename}"
            first_file_candidate = (filename, csv_str)
    
    return total_files, aggregated_folders, first_file_candidate


def _build_inline_data_file(artifacts: List[schemas.Artifact]):
    """
    extract the inline data files
    also handle SIMPLE_DOCUMENT artf because they come from the same place
    structured agent : generate artifacts
    """
    data_files = []
    simple_doc_counter = 0
    for index, art in enumerate(artifacts) or []:
        for item in art.get("parts") or []:
            part_type = item.get("type")
            if part_type == "DATA_INLINE":
                mimeType = item.get("mimeType", None)
                dataBase64 = item.get("dataBase64", None)
                filename = item.get("filename", None)

                if mimeType and dataBase64:
                    data_files.append(
                        {
                            "mimeType": mimeType,
                            "dataBase64": dataBase64,
                            "filename": filename,
                        }
                    )
                else:
                    logger.debug(f"artifact of type DATA_INLINE could not be extracted : {item}")
            elif part_type == "SIMPLE_DOCUMENT":
                snippet = item.get("markdownSnippet") or item.get("textSnippet")
                if not snippet:
                    logger.debug(f"SIMPLE_DOCUMENT artifact missing snippet: {item}")
                    continue

                if item.get("markdownSnippet"):
                    mimeType = "text/markdown"
                    extension = "md"
                else:
                    mimeType = "text/plain"
                    extension = "txt"

                filename = item.get("filename") or item.get("name")
                if not filename:
                    filename = f"document_{simple_doc_counter}.{extension}"
                simple_doc_counter += 1

                if isinstance(snippet, str):
                    data_files.append(
                        {
                            "mimeType": mimeType,
                            "dataBase64": base64.b64encode(snippet.encode("utf-8")).decode("ascii"),
                            "filename": filename,
                        }
                    )
                else:
                    logger.debug(f"SIMPLE_DOCUMENT artifact snippet is not text: {item}")

    return data_files


def download_artifacts(artifacts_meta: dict[str, schemas.ArtifactsMetadata], zip_name: str = "agent-hub-exports.zip") -> bytes:
    """
    Returns either:
      - A single CSV (if exactly one table), OR
      - A ZIP of CSVs arranged in folders (if multiple).

    Filename pattern mirrors the frontend:
      - CSV:   "<sourceName><info>-<idx>.csv" (we'll use "<source>-<info>_<idx>.csv")
      - ZIP:   "<source or 'agent-hub-exports'>.zip"
    """

    total_files = 0
    aggregated_csv_folders = {}
    inline_data_files = []

    single_file_candidate = None  # ("csv", filename, csv_str) or ("inline", filename, mime_type, data_base64)
    inline_index = 0
    for meta in (artifacts_meta or {}).values():
        artifacts: List[dict] = meta.get("artifacts") or []
        if not artifacts:
            continue
        source_name: str | None = meta.get("agentName")

        total_files_run, aggregated_csv_folders, first_csv_candidate = _build_csv_from_artifacts(
            artifacts, aggregated_csv_folders, source_name
        )
        if single_file_candidate is None and first_csv_candidate:
            filename, csv_str = first_csv_candidate
            single_file_candidate = ("csv", filename, csv_str)
        inline_new_files = _build_inline_data_file(artifacts)
        inline_data_files.extend(inline_new_files)

        total_files += total_files_run
        for item in inline_new_files:
            total_files += 1
            if single_file_candidate is None:
                mime_type = item.get("mimeType")
                data_base64 = item.get("dataBase64")
                filename = item.get("filename")
                if not filename:
                    extension = DIP_MIMETYPE_EXTENSIONS.get(mime_type, "bin")
                    filename = f"document_{inline_index}.{extension}"
                single_file_candidate = ("inline", filename, mime_type, data_base64)
            inline_index += 1


    if total_files == 0: 
        abort(404, description="No artifacts found")

    # If exactly 1 file, return it directly for convenience
    if total_files == 1 and single_file_candidate:
        if single_file_candidate[0] == "csv":
            _, filename, csv_str = single_file_candidate
            return send_csv_attachment(filename, csv_str)
        if single_file_candidate[0] == "inline":
            _, filename, mime_type, data_base64 = single_file_candidate
            if mime_type and data_base64:
                try:
                    decoded_bytes = base64.b64decode(data_base64)
                except Exception as e:
                    logger.error(f"Could not decode inline file for download: {e}")
                    abort(500, description="Invalid inline data")
                return send_file(
                    io.BytesIO(decoded_bytes),
                    mimetype=mime_type or "application/octet-stream",
                    as_attachment=True,
                    download_name=filename,
                    max_age=0,
                )

    # Otherwise, zip all CSVs grouped by folder
    zip_bytes = build_zip_bytes(aggregated_csv_folders, inline_data_files)
    return send_zip_attachment(zip_name, zip_bytes)


# ------------------------------------------
# Route: download all artifacts for a message
# ------------------------------------------
@downloads_bp.route("/messages/<msg_id>", methods=["GET"])
@log_http_request
def download_msg_artifacts(msg_id: str):
    # -------- retrieve message  --------------------------------
    store = get_store()
    artifacts_meta = store.get_message_artifacts_meta(msg_id)
    return download_artifacts(artifacts_meta)


@downloads_bp.route("/messages/<msg_id>/artifacts/<art_id>/<art_index>", methods=["GET"])
@log_http_request
def download_artifact_by_id(msg_id: str, art_id: str, art_index: str):
    store = get_store()
    artifacts_meta = store.get_message_artifacts_meta(msg_id)
    if not artifacts_meta or art_id not in artifacts_meta:
        abort(404, description=f"Artifact id '{art_id}' not found for message '{msg_id}'")
    artifacts = artifacts_meta[art_id].get("artifacts") or []
    if int(art_index) < 0 or int(art_index) >= len(artifacts):
        abort(404, description=f"Artifact index '{art_index}' out of range for artifact id '{art_id}'")
    # Keep only the selected artifact
    artifacts_meta = {art_id: {**artifacts_meta[art_id], "artifacts": [artifacts[int(art_index)]]}}
    return download_artifacts(artifacts_meta, zip_name=artifacts_meta[art_id]["agentName"])


@downloads_bp.route("/attachments", methods=["GET"])
@log_http_request
def download_attachment():
    """
    Stream an attachment directly using its document path.
    Query params: path (required), filename (optional), type (optional)
    """
    from flask import request
    
    file_path = request.args.get("path")
    if not file_path:
        abort(400, description="Missing 'path' parameter")
    
    filename = request.args.get("filename") or file_path.rsplit("/", 1)[-1]
    mimetype = request.args.get("type") or "application/octet-stream"

    folder_id = get_uploads_managedfolder_id()
    try:
        folder = dataiku.Folder(folder_id)
    except Exception as e:
        logger.exception("Unable to open managed folder %s: %s", folder_id, e)
        abort(500, description="Unable to access attachment storage")

    try:
        with folder.get_download_stream(file_path) as stream:
            data = stream.read()
    except Exception as e:
        logger.exception("Failed to read attachment from path %s: %s", file_path, e)
        abort(404, description="Attachment file not found")

    return send_file(
        io.BytesIO(data),
        as_attachment=True,
        download_name=filename,
        mimetype=mimetype,
        max_age=0,
    )


@downloads_bp.route("/admin/assets", methods=["GET"])
@log_http_request
def download_admin_asset():
    """
    Stream an admin asset (logos, images, etc.) directly using its document path.
    Query params: path (required), managedFolderId (optional), filename (optional), type (optional)
    
    If managedFolderId is provided, uses that folder (for draft/preview scenarios).
    Otherwise falls back to the persisted admin uploads managed folder from database.
    """
    from flask import request
    
    file_path = request.args.get("path")
    if not file_path:
        abort(400, description="Missing 'path' parameter")
    
    filename = request.args.get("filename") or file_path.rsplit("/", 1)[-1]
    
    # Auto-detect MIME type from file extension, or use provided type
    mimetype = request.args.get("type")
    if not mimetype:
        # Guess MIME type from filename
        guessed_type, _ = mimetypes.guess_type(filename)
        mimetype = guessed_type or "application/octet-stream"

    # Use provided managedFolderId (from draft config) or fall back to persisted setting
    folder_id = request.args.get("managedFolderId")
    if not folder_id:
        folder_id = get_admin_uploads_managedfolder_id()
    
    if not folder_id:
        abort(500, description="Admin uploads managed folder not configured")
    
    try:
        # Use dataiku.Folder for reading (works with managed folder IDs)
        folder = dataiku.Folder(folder_id)
    except Exception as e:
        logger.exception("Unable to open admin uploads managed folder %s: %s", folder_id, e)
        abort(500, description="Unable to access admin asset storage")

    try:
        with folder.get_download_stream(file_path) as stream:
            data = stream.read()
    except Exception as e:
        logger.exception("Failed to read admin asset from path %s: %s", file_path, e)
        abort(404, description="Admin asset file not found")

    return send_file(
        io.BytesIO(data),
        as_attachment=False,  # Display inline for images
        download_name=filename,
        mimetype=mimetype
    )
