import dataiku 
from dku_commons.dku_utils.projects.datasets.dataset_commons import get_dataset_schema_information
from dku_commons.dku_utils.projects.flow_graph.flow_zones import move_recipe_in_flow_zone
from dku_commons.dku_utils.projects.recipes.pivot_recipe import define_pivot_recipe_aggregations
from dku_commons.dku_utils.projects.recipes.prepare_recipe import instantiate_prepare_recipe, compute_prepare_rename_step, add_step_in_prepare_recipe
from dku_commons.dku_utils.projects.recipes.recipe_commons import update_recipe_ouput_schema
import re 

def configure_pivot_recipe(project, recipe_name, column_name, categorical_values_focus, n_most_frequent_values):
    """
    Configure the pivot recipe.
    
    :param project: dataikuapi.dss.project.DSSProject: A handle to interact with a project on the DSS instance.
    
    :param recipe_name: str: Name of the recipe.
    :param column_name: str: Name of the column to pivot on.
    :param categorical_values_focus: str: Categorical values to focus on for the pivot 
                                            (ie.: "most_frequent", "all")
    :param n_most_frequent_values: str: Number of the most frequent values to count, 
                                            if 'categorical_values_focus'=='most_frequent'
    """
    
    row_identifiers = ["store_id"]
    column_aggregations_mapping={}
    minimum_number_of_occurences = 2
    max_number_of_pivoted_column_values = 20
    bool_compute_global_count=True

    # Set the categorical values to focus on
    if categorical_values_focus == 'most_frequent':
        pivoted_values_selection_strategy = "TOP_N"
        max_number_of_pivoted_column_values = n_most_frequent_values
    else:
        pivoted_values_selection_strategy = "NO_LIMIT"

    columns_to_pivot = [column_name]

    # Define the pivot recipe aggregations
    define_pivot_recipe_aggregations(project,
                         recipe_name,
                         row_identifiers,
                         columns_to_pivot,
                         column_aggregations_mapping,
                         pivoted_values_selection_strategy,
                         max_number_of_pivoted_column_values,
                         minimum_number_of_occurences,
                         bool_compute_global_count)
            
    pass        

def create_prepare_recipe_to_rename_pivoted_columns(project, recipe_name, recipe_input_dataset_name, 
                                                    recipe_output_dataset_name, connection_name, flow_zone_name):
    
    # Create a prepare recipe to rename the pivoted columns
    instantiate_prepare_recipe(project, recipe_name, recipe_input_dataset_name,
                         recipe_output_dataset_name, connection_name)

    # For all the pivoted columns
    dataset_columns, _ = get_dataset_schema_information(project, recipe_input_dataset_name)
    column_name_pattern = r'\w+_count$'
    for column_name in dataset_columns:
        if re.search(column_name_pattern, column_name):
            column_to_rename = column_name
            pivoted_column_pattern = r'store_(.*?)_counts'
            matches = re.findall(pivoted_column_pattern, recipe_input_dataset_name)
            new_column_name = f"{matches[0]}_{column_name}"

            rename_step = compute_prepare_rename_step(column_to_rename, new_column_name)
            add_step_in_prepare_recipe(project, recipe_name, step=rename_step, step_comment="", 
                                       show_step_comment=True)
    
    update_recipe_ouput_schema(project, recipe_name)
    move_recipe_in_flow_zone(project, recipe_name, flow_zone_name)
            
    # Build the output dataset
    project_dataset = project.get_dataset(recipe_output_dataset_name)
    print("Building dataset '{}'...".format(recipe_output_dataset_name))
    project_dataset.build()
    print("Dataset '{}' successfully built!".format(recipe_output_dataset_name))
    
    pass

