import logging
from typing import Any, Dict, List, Tuple

import dataiku
from answers.backend.utils.knowledge_filters import KnowledgeBankFilterConfig, get_knowledge_bank_filtering_settings
from answers.solutions.knowledge_bank import EmbeddingRecipeType
from dataiku import Dataset
from dataiku.sql import Dialects

connection_types = [
    Dialects.POSTGRES,       # 'PostgreSQL',
    Dialects.SNOWFLAKE,      # 'Snowflake'
    Dialects.REDSHIFT,       # 'Redshift'
    Dialects.SQLSERVER,      # 'SQLServer'
    Dialects.BIGQUERY,       # 'BigQuery'
    Dialects.DATABRICKS,      # 'Databricks'
    Dialects.ORACLE         # 'Oracle'
]

supports_vector_db_filter = ["QDRANT_LOCAL", "CHROMA", "PINECONE"]


def get_knowledge_bank_single_choice(config: Dict[str, Any]) -> Dict[str, List[Dict[str, str]]]:
    knowledge_bank_id: str | None = config.get("knowledge_bank_id")
    if knowledge_bank_id is not None:
        if knowledge_bank_id != "None" and knowledge_bank_id != "":
            filer_config: KnowledgeBankFilterConfig = get_knowledge_bank_filtering_settings(knowledge_bank_id, False)
            embedding_recipe_type: str = filer_config.get("embedding_recipe_type")
            if embedding_recipe_type == EmbeddingRecipeType.NLP_LLM_RAG_EMBEDDING:
                choices: List[Dict[str, str]] = [{"value": meta, "label": meta} for meta in filer_config["filter_metadata"]]
            elif embedding_recipe_type == EmbeddingRecipeType.EMBED_DOCUMENTS:
                choices = []
            choices.extend([{"value": "", "label": "None"}])
            return {"choices": choices}
    return {"choices": []}


def get_knowledge_bank_multiple_choices(
        config: Dict[str, Any], filer_support_check: bool = False
) -> Dict[str, List[Dict[str, str]]]:
    knowledge_bank_id: str | None = config.get("knowledge_bank_id")
    default_choices: List[Dict[str, str]] = []
    if knowledge_bank_id is not None:
        if knowledge_bank_id != "None" and knowledge_bank_id != "":
            filer_config: KnowledgeBankFilterConfig = get_knowledge_bank_filtering_settings(knowledge_bank_id, False)
            if filer_support_check and filer_config.get("vector_db_type", "") not in supports_vector_db_filter:
                return {"choices": default_choices}
            embedding_recipe_type: str = filer_config.get("embedding_recipe_type")
            if embedding_recipe_type == EmbeddingRecipeType.NLP_LLM_RAG_EMBEDDING:
                choices = [{"value": meta, "label": meta} for meta in filer_config["filter_metadata"]]
            elif embedding_recipe_type == EmbeddingRecipeType.EMBED_DOCUMENTS:
                if filer_support_check:
                    choices = default_choices
                else:
                    choices = [{"value": meta, "label": meta} for meta in filer_config["filter_metadata"]]
            return {"choices": choices}
    return {"choices": []}



def get_dataset_list_and_proj_key() -> Tuple[List[Dict[str, Any]], str]:
    default_project_key = dataiku.default_project_key()
    client = dataiku.api_client()
    project = client.get_project(default_project_key)
    return project.list_datasets(), default_project_key

def list_datasets_from_conn(config: Dict[str, Any]):
    selected_conn: str | None = config.get("sql_retrieval_connection")
    datasets: List[Dict[str, str]]
    _: str
    datasets, _ = get_dataset_list_and_proj_key()
    connection_types =  {d["type"] for d in datasets if d["params"].get("connection", "") == selected_conn }
    connection_datasets: List[str] = [d["name"] for d in datasets if d["params"].get("connection", "") == selected_conn]
    # Default to empty string if no connection type is found
    connection_type = next(iter(connection_types), "")
    return {"choices": {connection_type: [{"value": dataset_name, "label": dataset_name} for dataset_name in connection_datasets]}}

def list_datasets_by_connection_type(client):
    dataset_map = {}
    try:
        # Get all datasets as list items
        all_datasets = client.list_datasets(as_type="listitems")
        # Initialize empty lists for each connection type
        for conn_type in connection_types:
            dataset_map[conn_type] = []
        # Group datasets by connection type
        for dataset in all_datasets:
            # Get the connection type from dataset details
            connection_type = dataset.type
            # Check if it's one of our target connection types
            if connection_type in connection_types:
                dataset_name = dataset.name
                dataset_map[connection_type].append(dataset_name)

        # Print results and remove empty connection types
        for conn_type in list(dataset_map.keys()):
            if dataset_map[conn_type]:
                logging.info(f"Found {len(dataset_map[conn_type])} {conn_type} datasets: {dataset_map[conn_type]}")
            else:
                logging.info(f"No {conn_type} datasets found")
                # Remove empty entries from the map (optional)
                del dataset_map[conn_type]

    except Exception as e:
        logging.exception(f"Error listing datasets: {e}")

    return {"choices": dataset_map}

def list_connections_by_type(client):
    connection_map = {}
    for conn_type in connection_types:
        try:
            # Get connections of this type
            connections = client.list_connections_names(conn_type)

            # Extract just the connection names from the result
            connection_names = [conn for conn in connections]

            # Add to our map
            if connection_names:
                connection_map[conn_type] = connection_names
                logging.info(f"Found {len(connection_names)} {conn_type} connections: {connection_names}")
            else:
                logging.info(f"No {conn_type} connections found")

        except Exception as e:
            logging.exception(f"Error listing {conn_type} connections: {e}")

    return {"choices": connection_map}

def create_dataset(payload, current_project):
    try:
        # Extract arguments - payload.arguments is a dict, not an object
        dataset_name = payload["arguments"]["name"]
        connection = payload["arguments"]["connection"]["connectionName"]
        dataset_type = payload["arguments"]["connection"]["connectionType"]

        # Create the dataset
        builder = current_project.new_managed_dataset(dataset_name)
        builder.with_store_into(connection)

        # Rest of your code...
        dataset = builder.create(overwrite=True)



        return {
            "success": True,
            "dataset": {
                "id": dataset_name,
                "datasetName": dataset_name,
                "datasetDisplayName": dataset_name,
                "datasetType": dataset_type
            },
            "message": f"Dataset '{dataset_name}' created successfully"
        }

    except Exception as e:
        import traceback
        return {
            "success": False,
            "error": str(e),
            "traceback": traceback.format_exc(),
            "message": f"Failed to create dataset: {str(e)}"
        }

def list_llms_by_connections_type(current_project, purpose="GENERIC_COMPLETION"):
    llm_by_type = {}

    # Get LLMs based on the specified purpose
    all_llms = current_project.list_llms(purpose=purpose)

    # Group LLMs by their type
    for llm in all_llms:
        llm_type = llm.get("type")

        # Initialize the list for this type if it doesn't exist
        if llm_type not in llm_by_type:
            llm_by_type[llm_type] = []

        # Add the LLM to the appropriate group
        llm_by_type[llm_type].append({
            "value": llm.get("id"),
            "label": llm.get("friendlyName"),
            "type": llm_type
        })

    return {"choices": llm_by_type}



def list_user_languages(payload) -> Dict[str, List]:
    defined_user_languages = payload.get("rootModel", {}).get("user_profile_languages", []) if "rootModel" in payload else payload.get("user_profile_languages", [])
    return {
        "choices": [
            {"value": lang.get("to", "English"), "label": lang.get("to", "English")} for lang in defined_user_languages
        ]
    }

def list_users(client):
    """
    Fetches users from Dataiku and formats them as choices for a dropdown
    Returns:
        dict: An object with choices array containing user login as value and displayName as label
    """
    try:
        users = client.list_users()

        # Format the users as choices
        choices = [{"value": user["login"], "label": user["displayName"]} for user in users]
        return {"choices": choices}
    except Exception as e:
        logging.info(f"Error fetching user choices: {e}")
        return {"choices": []}  # Return empty choices in case of error

def get_possible_joins_columns(payload: Dict[str, Dict[str, Any]]) -> Dict[str, List[Dict[str, str]]]:
    _: List[Dict[str, str]]
    default_project_key: str
    _, default_project_key = get_dataset_list_and_proj_key()
    sql_retrieval_table_list: List[str] = payload.get("sql_retrieval_table_list", [])
    column_options: List[Dict[str, str]] = []
    for dataset_name in sql_retrieval_table_list:
        dataset = Dataset(project_key=default_project_key, name=dataset_name)
        column_options.extend(
            [
                {"value": f"{dataset_name}.{c['name']}", "label": f"{dataset_name}.{c['name']}"}
                for c in dataset.read_schema()
            ]
        )
    return {"choices": column_options}

def get_kb_search_type(config):
    client = dataiku.api_client()
    current_project = client.get_default_project()
    choices = [
        {"value": "similarity", "label": "Similarity Score Only"},
        {"value": "similarity_score_threshold", "label": "Similarity Score with Threshold"},
    ]
    kb_id = config.get("knowledge_bank_id")
    kb = current_project.get_knowledge_bank(kb_id) if kb_id and kb_id != "None" else None
    if kb and kb.get_settings().get_raw():
        type = kb.get_settings().get_raw().get("vectorStoreType")
        if type == "AZURE_AI_SEARCH":
            choices.extend(
                [
                    {"value": "hybrid", "label": "Hybrid search"},
                    {"value": "semantic_hybrid", "label": "Semantic hybrid search"},
                ]
            )
        else:
            choices.append({"value": "mmr", "label": "Improve diversity of documents"})
    return {"choices": choices}


def do(payload, config, plugin_config, inputs):
    parameter_name = payload.get("parameterName")
    client = dataiku.api_client()
    current_project = client.get_default_project()

    if parameter_name == "llm_id" or parameter_name == "title_llm_id" or parameter_name == "json_decision_llm_id":
        return list_llms_by_connections_type(current_project)
    elif parameter_name == "connection_selector" or parameter_name == "sql_retrieval_connection":
        return list_connections_by_type(client)
    elif parameter_name == "datasets_names":
        return list_datasets_by_connection_type(current_project)
    elif parameter_name == "sql_retrieval_table_list":
        return list_datasets_from_conn(config)
    elif parameter_name == 'create_dataset':
        return create_dataset(payload,current_project)
    elif parameter_name == "upload_folder":
        return {
            "choices": [{"value": "", "label": "None"}]
                       + [
                           {"value": folder.get("id"), "label": folder.get("name")}
                           for folder in current_project.list_managed_folders()
                       ]
        }
    elif parameter_name == "knowledge_bank_id":
        return {
            "choices": [{"value": "", "label": "None"}]
                       + [{"value": kb.get("id"), "label": kb.get("name")} for kb in current_project.list_knowledge_banks()]
        }
    elif parameter_name == "knowledge_sources_context_metadata":
        return get_knowledge_bank_multiple_choices(config)
    elif parameter_name == "knowledge_sources_filters":
        return get_knowledge_bank_multiple_choices(config, filer_support_check=True)
    elif parameter_name == "knowledge_sources_displayed_metas":
        return get_knowledge_bank_multiple_choices(config)
    elif parameter_name == "knowledge_source_url":
        return get_knowledge_bank_single_choice(config)
    elif parameter_name == "knowledge_source_title":
        return get_knowledge_bank_single_choice(config)
    elif parameter_name == "knowledge_source_thumbnail":
        return get_knowledge_bank_single_choice(config)
    elif parameter_name == "knowledge_retrieval_search_type":
       return get_kb_search_type(config)
    elif parameter_name == "default_user_language":
        return list_user_languages(payload)
    elif parameter_name == "columns_mappings":
        return get_possible_joins_columns(payload)
    elif parameter_name == "image_generation_llm_id":
        return list_llms_by_connections_type(current_project, purpose="IMAGE_GENERATION")

    # Handle other parameter choices if needed
    return {"choices": []}