import os
import requests

# Map models.dev model provider ID to LLM Mesh connection ID.
# Model provider mapping up to date as of Aug 2025.
MODELS_DEV_TO_LLM_MESH = {
    "xai": None,
    "google-vertex-anthropic": None,
    "llama": None,
    "github-models": None,
    "deepinfra": None,
    "moonshotai-cn": None,
    "anthropic": "ANTHROPIC",
    "openai": "OPENAI",
    "openrouter": None,
    "google": None,
    "amazon-bedrock": "BEDROCK",
    "chutes": None,
    "opencode": None,
    "v0": None,
    "venice": None,
    "inception": None,
    "morph": None,
    "azure": None,
    "requesty": None,
    "upstage": None,
    "modelscope": None,
    "github-copilot": None,
    "wandb": None,
    "lmstudio": None,
    "inference": None,
    "deepseek": None,
    "moonshotai": None,
    "huggingface": "HUGGINGFACE_TRANSFORMER_LOCAL",
    "cerebras": None,
    "zhipuai": None,
    "fireworks-ai": None,
    "vercel": None,
    "google-vertex": "VERTEX",
    "groq": None,
    "alibaba": None,
    "mistral": "MISTRALAI",
    "togetherai": None
}


def _get_token_limits_from_models_dev():
    """    
    a) Download model data from https://models.dev
    b) Extract context window and max output tokens for each model
      
    Only data for model providers that are available in the LLM Mesh 
    (as mapped in MODELS_DEV_TO_LLM_MESH) are extracted from models.dev.
    
    Returns:
      A list of `model_token_limits_models_dev` dicts (see plugin.json
      configuration file for expected format).
    """
    # Download models.dev data
    r = requests.get("https://models.dev/api.json")
    models_dev_data = r.json()

    # Extract and format models.dev data
    model_token_limits = []
    
    for models_dev_provider, provider_data in models_dev_data.items():
        # Check that there is an LLM Mesh connection for Models.dev provider
        if not MODELS_DEV_TO_LLM_MESH.get(models_dev_provider):
            continue
        # Extract data for each model from provider, and store as plugin settings
        # object
        for model_id, model_data in provider_data["models"].items():
            model_token_limits.append({
                "type": MODELS_DEV_TO_LLM_MESH[models_dev_provider],
                "model": model_id,
                "context_token_limit": model_data["limit"]["context"],
                "output_token_limit": model_data["limit"]["output"]
            }) 
    
    return model_token_limits


def get_token_limits_for_llms(llms, config, plugin_config):
    """
    Adds context window and max output tokens to LLMs.
    Ignores RAG-augmented models and Agents (for now), as they'll
    add significant latency and add minimal (if not negative) value.
    
    Args:
        llms: a list of llms, as returned by 
              dataikuapi.dss.project.DSSProject.list_llms
        plugin_config: the plugin config.
        
    Returns:
        `llms` with two additional fields:
        "output_token_limit" and "context_token_limit".
    """    
    llms_with_limits = []
    
    # Default context and max output tokens
    output_token_limit = plugin_config["default_output_token_limit"]
    context_token_limit = plugin_config["default_context_token_limit"]
    
    # Retrieve context and max output tokens from plugin settings
    # Note: model overrides take precedence over models.dev 
    model_token_limits_models_dev = []
    if(config.get("add_default_limits", True)):
        model_token_limits_models_dev = _get_token_limits_from_models_dev()
    model_token_limits_override = plugin_config.get("model_token_limits_override", [])

    for model in llms:
        # Ignore RAG-augmented models and agents
        # i.e. they don't have a 'connection' parameter
        if not model.get("connection"):
            continue
        # Azure OpenAI and Sagemaker LLMs don't have a 'model' field, so we set it
        # to 'deployment' and 'connection' respectively, as Sagemaker LLM connections
        # only have a single endpoint per connection.
        elif model["type"] == "AZURE_OPENAI_DEPLOYMENT":
            model["model"] = model["deployment"]
        elif model["type"] == "SAGEMAKER_GENERICLLM":
            model["model"] = model["connection"]

        for model_info in model_token_limits_override + model_token_limits_models_dev:
            if (model["model"]==model_info.get("model", "")) and (model["type"]==model_info.get("type", "")):
                model["output_token_limit"] = model_info["output_token_limit"]
                model["context_token_limit"] = model_info["context_token_limit"]
                break
        else:
            # else apply default limits
            model["output_token_limit"] = output_token_limit
            model["context_token_limit"] = context_token_limit
    
        llms_with_limits.append(model)
    
    return llms_with_limits
    

# opencode.json template
# Custom provider docs: https://opencode.ai/docs/providers/#custom-provider
# Env var substitution: https://opencode.ai/docs/config/#env-vars
OPENCODE_JSON_TEMPLATE = {
  "$schema": "https://opencode.ai/config.json",
  "share": "disabled",
  "instructions": ["/home/dataiku/workspace/code_studio-resources/AGENTS.md"],
  "disabled_providers": ["opencode", "openai", "anthropic", "amazon-bedrock"],
  "provider": {
    "dataiku": {
      "npm": "@ai-sdk/openai-compatible",
      "name": "Dataiku LLM Mesh",
      "options": {
        "baseURL": "{env:DKU_BASE_PROTOCOL}://{env:DKU_SERVER_HOST}:{env:DKU_BASE_PORT}/public/api/projects/{env:DKU_CURRENT_PROJECT_KEY}/llms/openai/v1",
        "headers": {
          "Content-Type": "application/json",
          "X-DKU-APITicket": "{env:DKU_API_TICKET}"
        }
      },
      "models": {}
    }
  }
}

OPENCODE_JSON_TEMPLATE_NO_LLM_MESH = {
  "$schema": "https://opencode.ai/config.json",
  "share": "disabled",
  "instructions": ["/home/dataiku/workspace/code_studio-resources/AGENTS.md"]
}

def build_opencode_json(client, config, plugin_config):
    """
    Build opencode.json with the 'dataiku' model provider, which:

      - Points to the LLM Mesh OpenAI-compatible API endpoint
      - Uses the DKU_API_TICKET env var to authenticate with Dataiku
        (set as the X-DKU-APITicket HTTP header)
      - Lists all LLM Mesh models available to the user running the Code
        Studio.
      
    opencode.json also includes (and, in fact, requires) the context window
    and the max output tokens per LLM Mesh model.
    """
    
    if config["model_selection_mode"] == "EXTERNAL_ONLY":
        return OPENCODE_JSON_TEMPLATE_NO_LLM_MESH
    
    project = client.get_default_project()
    llms_in_project = project.list_llms(purpose='GENERIC_COMPLETION')
    
    default_model = None
    
    if config["model_selection_mode"] == "SINGLE_MODEL":
        llms = [llm for llm in llms_in_project if llm["id"] == config["model"]]
        if len(llms) == 0:
            raise Exception("LLM to use %s" % (config["model"], llms_in_project))
            
    elif config["model_selection_mode"] == "MODELS":
        llms = [llm for llm in llms_in_project if llm["id"] in config["models"]]
        if len(llms) == 0:
            raise Exception("LLMs to use %s" % (config["models"]))

    elif config["model_selection_mode"] == "SINGLE_CONNECTION":
        llms = [llm for llm in llms_in_project if llm.get("connection") == config["connection"]]       
        if len(llms) == 0:
            raise Exception("Connection to use %s does not exist or does not have models" % (config["connection"]))        

    elif config["model_selection_mode"] == "CONNECTION_TYPES":
        llms = [llm for llm in llms_in_project if llm["type"] in config.get("restrict_to_providers")]
        if len(llms) == 0:
            raise Exception("No valid model")

    else:
        llms = llms_in_project
            
    if len(llms) == 0:
        raise Exception("No LLM is usable")
        
    llms_with_limits = get_token_limits_for_llms(
         llms=llms,
         config=config,
         plugin_config=plugin_config
    )
        
    opencode_json = OPENCODE_JSON_TEMPLATE    
    for model in llms_with_limits:
        # If there are several models with the same name, dedup
        friendly_name = model["friendlyName"]
        if len([llm for llm in llms if llm["friendlyName"] == friendly_name]) > 1:
            friendly_name += " - " + model.get("connection")
        
        opencode_json["provider"]["dataiku"]["models"][model["id"]] = {
            "name": friendly_name,
            "limit": {
                "output": model["output_token_limit"],
                "context": model["context_token_limit"]
            }
        }

    default_model = llms_with_limits[0]["id"]

    if config.get("default_model") is not None:
        default_model = config.get("default_model")
    
    opencode_json["model"] = "dataiku/%s" % default_model
    
    if config.get("default_small_model") is not None:
        opencode_json["small_model"] = "dataiku/%s" % config["default_small_model"]
        
    return opencode_json

    