import logging

import dataiku
from dataiku.customrecipe import get_output_names_for_role, get_recipe_config

from universal_data_generator import UniversalDataGenerator


logger = logging.getLogger(__name__)


def _require(condition, message):
    if not condition:
        raise ValueError(message)


def _load_columns_spec(raw_spec):
    _require(isinstance(raw_spec, list) and raw_spec, "You must define at least one column")
    return raw_spec


def _build_generator(config):
    rows = int(config.get("rows", 1000))
    seed = int(config.get("seed", 42))
    locale = config.get("locale", "en_US")
    _require(rows > 0, "'Rows to generate' must be a positive integer")
    return UniversalDataGenerator(n_rows=rows, seed=seed, locale=locale)


def _apply_column(generator, column_conf):
    _require(isinstance(column_conf, dict), "Each column specification must be an object")
    name = column_conf.get("name")
    column_type = column_conf.get("column_type") or column_conf.get("type")
    _require(name, "Every column definition needs a 'name'")
    _require(column_type, f"Column '{name}' must provide a 'column_type'")

    column_type = column_type.lower()

    if column_type == "normal":
        mean = float(column_conf.get("mean", 0.0))
        std = float(column_conf.get("std", 1.0))
        generator.generate_independent(name, "normal", mean=mean, std=std)
    elif column_type == "uniform":
        low = float(column_conf.get("min", 0.0))
        high = float(column_conf.get("max", 1.0))
        generator.generate_independent(name, "uniform", min=low, max=high)
    elif column_type == "integer":
        low = int(column_conf.get("min", 0))
        high = int(column_conf.get("max", 100))
        generator.generate_independent(name, "integer", min=low, max=high)
    elif column_type == "category":
        choices = column_conf.get("choices") or []
        _require(choices, f"Categorical column '{name}' needs at least one choice")
        weights = column_conf.get("weights") or None
        if weights:
            weights = [float(w) for w in weights]
        generator.generate_independent(name, "category", choices=choices, weights=weights)
    elif column_type == "date":
        start = column_conf.get("start") or "2023-01-01"
        end = column_conf.get("end") or "2024-01-01"
        generator.generate_independent(name, "date", start=_normalize_date(start), end=_normalize_date(end))
    elif column_type == "faker":
        provider = column_conf.get("faker_provider")
        _require(provider, f"Faker column '{name}' needs a provider selection")
        unique = bool(column_conf.get("unique", False))
        generator.generate_faker(name, provider=provider, unique=unique)
    elif column_type == "dependent":
        dependencies = column_conf.get("dependencies") or []
        _require(isinstance(dependencies, list) and dependencies, f"Dependent column '{name}' needs at least one dependency")
        normalized_dependencies = []
        for dep in dependencies:
            _require(isinstance(dep, dict), f"Each dependency for '{name}' must be an object")
            source = dep.get("source_column") or dep.get("column")
            _require(source, f"Dependencies for '{name}' must specify a source column")
            correlation = float(dep.get("correlation", 1.0))
            normalized_dependencies.append({"column": source, "correlation": correlation})
        noise_std = float(column_conf.get("noise_std", 0.0))
        output_type = column_conf.get("output_type", "numeric")
        threshold = float(column_conf.get("threshold", 0.5))
        rescale = bool(column_conf.get("rescale", False))
        min_value = float(column_conf.get("min_value", 0.0))
        max_value = float(column_conf.get("max_value", 100.0))
        generator.generate_correlated(name, dependencies=normalized_dependencies, noise_std=noise_std, 
                                     output_type=output_type, threshold=threshold,
                                     rescale=rescale, min_value=min_value, max_value=max_value)
    else:
        raise ValueError(f"Unsupported column type '{column_type}' for column '{name}'")

    logger.info("Generated column '%s' using type '%s'", name, column_type)


def _normalize_date(value):
    if not value:
        return None
    return value.split("T", 1)[0]


def main():
    config = get_recipe_config() or {}
    generator = _build_generator(config)
    columns_spec = _load_columns_spec(config.get("columns_spec"))

    for column_conf in columns_spec:
        _apply_column(generator, column_conf)

    df = generator.get_dataframe()
    _require(not df.empty, "Generator produced an empty dataframe; check your configuration")

    output_names = get_output_names_for_role("output_dataset")
    _require(output_names, "You must select an output dataset")
    output = dataiku.Dataset(output_names[0])
    output.write_with_schema(df)


if __name__ == "__main__":
    main()
