import hashlib
import time
from datetime import datetime
from functools import lru_cache
from typing import Any, Dict, List
import dataiku
import pandas as pd
from webaiku.apis.dataiku.api import dataiku_api

# --- Helper Functions for Caching and Reusability ---

@lru_cache(maxsize=None)
def _get_project_variables() -> Dict[str, Any]:
    """
    Retrieves and caches the project's standard variables.
    This avoids repeatedly calling the API for the same information.
    """
    client = dataiku.api_client()
    project = client.get_project(dataiku_api.project_key)
    variables = project.get_variables()
    return variables.get('standard', {})

@lru_cache(maxsize=4)
def _get_dataset_df(dataset_name: str) -> pd.DataFrame:
    """
    Loads and caches a Dataiku dataset into a pandas DataFrame.
    The cache stores the most recently used datasets to avoid slow re-reads.
    """
    print(f"Loading dataset: {dataset_name}...") # Added for demonstrating caching
    return dataiku.Dataset(dataset_name).get_dataframe()

# --- Data Retrieval Functions ---

def get_primary_id_name() -> Dict[str, str]:
    """Gets the name of the first column (primary ID)."""
    df = _get_dataset_df('webapp_dataset')
    return {"primary_id_name": df.columns[0]}

def get_secondary_id_name() -> Dict[str, str]:
    """Gets the name of the second column (secondary ID)."""
    df = _get_dataset_df('webapp_dataset')
    return {"secondary_id_name": df.columns[1]}

def get_id_list() -> Dict[str, List[str]]:
    """Gets a sorted, unique list of IDs from the primary column."""
    df = _get_dataset_df('webapp_dataset')
    id_list = sorted(list(df[df.columns[0]].astype(str).unique()))
    return {"id_list": id_list}

def get_id_occurrences() -> Dict[str, int]:
    """Gets the occurrence count for each ID in the primary column."""
    df = _get_dataset_df('webapp_dataset')
    id_series = df[df.columns[0]].astype(str)
    return id_series.value_counts().to_dict()

def get_keys() -> Dict[str, List[Any]]:
    """Constructs the matching keys configuration from project variables."""
    global_vars = _get_project_variables()
    
    keys = {
        "primary_keys": [], "secondary_keys": [], "key_types": [],
        "thresholds": [], "weights": []
    }

    for i in range(1, 16):
        # The first key is always added, subsequent keys depend on the 'add_key' flag
        if i == 1 or global_vars.get(f'add_key_{i}'):
            keys["primary_keys"].append(global_vars.get(f'primary_column_{i}'))
            keys["secondary_keys"].append(global_vars.get(f'secondary_column_{i}'))
            keys["key_types"].append(global_vars.get(f'type_{i}'))
            keys["thresholds"].append(global_vars.get(f'matching_distance_threshold_{i}'))
            keys["weights"].append(global_vars.get(f'matching_distance_weight_{i}'))
        else:
            # Stop if a key is not configured to be added
            break
            
    return keys

def get_extra_cols() -> Dict[str, List[str]]:
    """Retrieves the extra columns to display from project variables."""
    global_vars = _get_project_variables()
    return {
        "extra_cols_primary": global_vars.get("extra_cols_primary", []),
        "extra_cols_secondary": global_vars.get("extra_cols_secondary", [])
    }

def get_reconciliation_type() -> Dict[str, str]:
    """Retrieves the reconciliation type from project variables."""
    global_vars = _get_project_variables()
    return {"reconciliation_type": global_vars.get('reconciliation_type')}

# --- Data Modification and Action Functions ---

def generate_unique_id(input_string: str, length: int = 8) -> str:
    """Generates a unique hash ID based on a string and the current time."""
    timestamp = str(time.time()) + input_string
    return hashlib.sha256(timestamp.encode()).hexdigest()[:length]
    
def get_cell_value_change(input_payload: Dict[str, Any], headers: Dict[str, Any]) -> Dict[str, str]:
    """Logs a user's change to the editlog dataset and triggers an update scenario."""
    # 1. Prepare Log Entry Data
    celltype = input_payload.get("celltype")
    new_value = input_payload.get("newvalue")
    
    match_value = new_value if celltype != "comment" else ""
    comment_value = new_value if celltype == "comment" else ""

    try:
        auth_info = dataiku.api_client().get_auth_info_from_browser_headers(headers)
        user = auth_info.get("authIdentifier", "user_not_found")
    except Exception as e:
        print(f"Could not retrieve user from browser headers: {e}")
        user = "user_not_found"

    log_id = generate_unique_id(new_value)
    
    new_log_entry = {
        "date": datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
        "user": user,
        "primary_id": input_payload.get("primary_id"),
        "secondary_id": input_payload.get("secondary_id"),
        "match_value": match_value,
        "comment_value": comment_value,
    }

    # 2. Update Edit Log Dataset
    editlog_dataset = dataiku.Dataset('editlog')
    editlog_df = editlog_dataset.get_dataframe()
    new_log_df = pd.DataFrame([new_log_entry])
    updated_df = pd.concat([editlog_df, new_log_df], ignore_index=True)
    editlog_dataset.write_from_dataframe(updated_df)
    
    # 3. Trigger Update Scenario
    client = dataiku.api_client()
    project = client.get_project(dataiku_api.project_key)
    scenario = project.get_scenario("10UPDATEMATCHING")
    scenario.run()
    
    return {"status": "OK", "log_id": log_id}