import dataiku 
from dku_commons.dku_utils.projects.recipes.recipe_commons import get_recipe_settings_and_dictionary
from dku_commons.dku_utils.projects.datasets.dataset_commons import get_dataset_schema_information
from dku_commons.dku_utils.type_checking import DSSProject

def generate_python_script_computing_ratio(project, recipe_name, 
                                           input_dataset_name, output_dataset_name, 
                                           column_name_pattern, new_column_name_suffix="_ratio",
                                           total_column_name=None, columns_to_avoid=["store_id"], 
                                           remove_total_column=True, remove_initial_sum_count_columns=True):
    
    """
    In the product category performance branch, this generate the python script in the feature engineering flow zone
    in order to compute the percentage of sales of each product category within a store.
    
    :param project: dataikuapi.dss.project.DSSProject: A handle to interact with a project on the DSS instance.
    :param recipe_name: str: Name of the python recipe.
    :param input_dataset_name: str: Name of the dataset that must be the python recipe input.
    :param output_dataset_name: str: Name of the dataset that must be the python recipe output.
    :param column_name_pattern: string: Regex pattern capturing the base column name for the new ratio columns.
    :param new_column_name_suffix: string: Suffix given at the end of the new ratio columns
    :param columns_to_avoid: list[string]: List of the columns on which the ratio should not be calculated.
    :param remove_initial_sum_count_columns: boolean: True if you want to remove the columns on which it calculates the ratio. 
    
    """
    
    # Get the python recipe settings
    recipe_settings, _ = get_recipe_settings_and_dictionary(project, recipe_name, False)
    
    if total_column_name:
        total_column_name_parameter = f', total_column_name="{total_column_name}", '
    else:
        total_column_name_parameter = ", "
    
    # Definition of the python script
    SCRIPT_TEMPLATE =f"""# -*- coding: utf-8 -*-
import dataiku
from dku_commons.dku_utils.projects.datasets.dataset_commons import get_dataset_schema_information
from solution.utils.pandas_utils import compute_dataframe_columns_ratio
from dku_commons.dku_utils.projects.project_commons import get_current_project_and_variables

project, _ = get_current_project_and_variables()

# Get input dataset as a Dataframe
dataset_columns, dataset_column_datatypes = get_dataset_schema_information(project, "{input_dataset_name}")
dataset_to_ratio = dataiku.Dataset("{input_dataset_name}")
dataset_to_ratio_df = dataset_to_ratio.get_dataframe()

# Define the columns to compute ratio on
columns_to_compute_ratio_on = []
for column_name in dataset_columns:
    if column_name not in {columns_to_avoid}:
        columns_to_compute_ratio_on.append(column_name)

output_dataset_df = compute_dataframe_columns_ratio(dataset_to_ratio_df, columns_to_compute_ratio_on,
                                                    "{column_name_pattern}", "{new_column_name_suffix}"{total_column_name_parameter}
                                                    remove_initial_sum_count={remove_initial_sum_count_columns}, remove_total_column={remove_total_column})

# Write recipe outputs
output_dataset = dataiku.Dataset("{output_dataset_name}")
output_dataset.write_with_schema(output_dataset_df)
    """
    
    # Setting the python script into the recipe, and save it. 
    recipe_settings.set_code(SCRIPT_TEMPLATE)    
    recipe_settings.save()
    
    print("Recipe '{}' successfully updated!".format(recipe_name))
    
    pass
    
def remove_null_columns(project, dataset_name):
    """
    After a pivot recipe has been done, remove the potential column where null values were counted.
    """
    dataset_columns, dataset_column_datatypes = get_dataset_schema_information(project, dataset_name)
    for column_name in dataset_columns:
        if "null" in column_name:
            dataset = dataiku.Dataset(dataset_name)
            df = dataset.get_dataframe()
            df = df.drop(column_name, axis=1)
            dataset.write_with_schema(df)
    pass
