import dataiku
import pandas
import re

def compute_dataframe_columns_ratio(dataframe, columns_to_compute_ratio_on, 
                                    column_name_pattern, new_column_name_suffix, total_column_name=None, 
                                    remove_total_column=True, remove_initial_sum_count=True):
    """
    Calculate and create ratio columns for each columns in "columns_to_compute_ratio_on".
    The columns in "columns_to_compute_ratio_on" are sums from a previous group recipe.
    The function first calculates the total of the "columns_to_compute_ratio_on" columns and then 
    compute the ratio for each column in "columns_to_compute_ratio_on".
    
    :param dataframe: pandas.DataFrame: A handle to interact with a project on the DSS instance.
    :param columns_to_compute_ratio_on: list[str]: list of the columns names to compute ratio on.
    :param column_name_pattern: string: Regex pattern capturing the base column name for the new ratio columns.
    :param new_column_name_suffix: string: Suffix given at the end of the new ratio columns
    :param total_column_name: string: Name of the total count column if it exists
    :param remove_initial_sum_count: boolean: True if you want to remove the columns on which it calculates the ratio. 
    :param remove_total_column: boolean: True if you want to remove the "total" column used to compute the ratio.
    
    :returns: dataframe_with_ratio: pandas.DataFrame: Resulted dataframe with ratio columns. 
    """
    dataframe_with_ratio = dataframe
    dataframe_with_ratio.fillna(0, inplace=True)
    
    if not total_column_name:
        # There is no total column, one is then created
        total_column_name = "total"
        dataframe_with_ratio[total_column_name] = [0] * dataframe_with_ratio.shape[0]

        # for each row, imput in the "total" column the sum of all the values in the "columns_to_compute_ratio_on" columns
        for column_name in columns_to_compute_ratio_on:
            dataframe_with_ratio[total_column_name] += dataframe_with_ratio[column_name]
    
    for column_name in columns_to_compute_ratio_on:
        # create the new column's name
        match = re.match(column_name_pattern, column_name)
        if match:
            captured_column_name = match.group(1)
            column_new_name = captured_column_name + new_column_name_suffix

            # calculate the ratio for this column
            dataframe_with_ratio[column_new_name] = dataframe_with_ratio[column_name] / dataframe_with_ratio[total_column_name]

            if remove_initial_sum_count: # keep only the ratio columns
                dataframe_with_ratio = dataframe_with_ratio.drop(column_name, axis=1)

    if remove_total_column:
        dataframe_with_ratio = dataframe_with_ratio.drop(total_column_name, axis=1)
        
    dataframe_with_ratio.fillna(0, inplace=True)

    return dataframe_with_ratio
