# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#%config Completer.use_jedi = False

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from market_basket_analysis.association_rules import compute_association_rules
from market_basket_analysis.dku_utils import get_current_project_and_variables, clear_dataset
from market_basket_analysis.utils import melt_dataframe
from market_basket_analysis.config.flow.constants import DISTINCT_ITEMSETS_SCHEMA, ASSOCIATION_RULES_SCHEMA

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
max_itemsets_size = variables["standard"]["max_itemsets_size_app"]
itemsets_min_support = variables["standard"]["itemsets_min_frequency_app"]
confidence_threshold = variables["standard"]["rules_confidence_threshold_app"]
compute_refined_rules = variables["standard"].get("compute_refined_rules_app")

if compute_refined_rules :
    association_rules_scope = variables["standard"].get("association_rules_scope_app")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Recipe inputs :
transactions_preprocessed = dataiku.Dataset("transactions_preprocessed")
transactions_preprocessed_df = transactions_preprocessed.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Recipe outputs :

association_rules = dataiku.Dataset("association_rules")
distinct_itemsets = dataiku.Dataset("distinct_itemsets")
rules_denormalized_antecedents = dataiku.Dataset("rules_denormalized_antecedents")
rules_denormalized_consequents = dataiku.Dataset("rules_denormalized_consequents")
association_rules_summary = dataiku.Dataset("association_rules_summary")

for dataset_name in ["association_rules", "distinct_itemsets",
                     "rules_denormalized_antecedents", "rules_denormalized_consequents",
                     "association_rules_summary"]:
    clear_dataset(project, dataset_name)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
transactions_preprocessed_df["transaction_items"] = \
transactions_preprocessed_df["transaction_items"].apply(lambda x: np.unique(json.loads(x)))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
if compute_refined_rules:

    transactions_preprocessed_df["rule_scope"] = \
    transactions_preprocessed_df[association_rules_scope].apply(tuple, axis=1)
    scope_granular_combinations = np.unique(transactions_preprocessed_df["rule_scope"])
    n_scope_granular_combinations = len(scope_granular_combinations)

    print("{} scope granular combination found : '{}' ".format(n_scope_granular_combinations, scope_granular_combinations))

    for loop_index, scope_granular_combination in enumerate(scope_granular_combinations):
        scope_granular_combination_str = "_".join(elem for elem in scope_granular_combination)
        granular_df = transactions_preprocessed_df[transactions_preprocessed_df["rule_scope"]==scope_granular_combination]

        print("Computing association rules on scope_granular_combination : {} (n°{}/{})".format(scope_granular_combination,
                                                                                                loop_index+1,
                                                                                                n_scope_granular_combinations))

        association_rules_found, df_association_rules, df_itemsets_supports = compute_association_rules(granular_df,
                                                                                                        "transaction_items",
                                                                                                        itemsets_min_support,
                                                                                                        max_itemsets_size,
                                                                                                        confidence_threshold)
        n_association_rules = len(df_association_rules)
        df_association_rules["rule_id"] = ["rule_{}_".format(scope_granular_combination_str)+str(id_) for id_ in range(n_association_rules)]

        for column_index, column in enumerate(association_rules_scope):
            df_association_rules[column] = scope_granular_combination[column_index]
            df_itemsets_supports[column] = scope_granular_combination[column_index]

        df_association_rules["rule_scope"] = scope_granular_combination_str
        df_association_rules = df_association_rules[association_rules_scope + ["rule_id", "rule_scope"] + ASSOCIATION_RULES_SCHEMA]

        print("Writing association rules data ...")
        association_rules.write_with_schema(df_association_rules, dropAndCreate=False)

        print("Writing association rules summary ...")
        df_association_rules_summary = pd.DataFrame({"n_rules_found":n_association_rules,
                                                     "n_itemsets_found":len(df_itemsets_supports),
                                                     "rule_scope":scope_granular_combination_str}, index=[0])
        df_association_rules_summary = df_association_rules_summary[["rule_scope", "n_rules_found", "n_itemsets_found"]]
        association_rules_summary.write_with_schema(df_association_rules_summary, dropAndCreate=False)

        df_itemsets_supports["rule_scope"] = scope_granular_combination_str
        df_itemsets_supports = df_itemsets_supports[association_rules_scope + ["rule_scope"] + DISTINCT_ITEMSETS_SCHEMA]

        print("n association_rules : {}".format(n_association_rules))
        print("Writing frequent itemsets data ...")
        distinct_itemsets.write_with_schema(df_itemsets_supports, dropAndCreate=False)


        del df_itemsets_supports

        print("Indexing antecedent items/rules ...")
        df_rules_denormalized_antecedents = melt_dataframe(df_association_rules, "rule_antecedent", {"rule_antecedent":"antecedent_item"})

        print("Writing antecedent items/rules ...")
        rules_denormalized_antecedents.write_with_schema(df_rules_denormalized_antecedents, dropAndCreate=False)

        print("Indexing consequent items/rules ...")
        df_rules_denormalized_consequents = melt_dataframe(df_association_rules, "rule_consequent", {"rule_consequent":"consequent_item"})

        print("Writing consequent items/rules ...")
        rules_denormalized_consequents.write_with_schema(df_rules_denormalized_consequents, dropAndCreate=False)

else:
    granular_combination = None

    print("Looking for frequent itemsets ...")
    association_rules_found, df_association_rules, df_itemsets_supports =\
    compute_association_rules(transactions_preprocessed_df,
                              "transaction_items",
                              itemsets_min_support,
                              max_itemsets_size,
                              confidence_threshold)
    n_association_rules = len(df_association_rules)
    df_association_rules["rule_id"] = ["rule_"+str(id_) for id_ in range(n_association_rules)]
    df_association_rules = df_association_rules[["rule_id"] + ASSOCIATION_RULES_SCHEMA]

    print("Writing association rules data ...")
    association_rules.write_with_schema(df_association_rules, dropAndCreate=False)

    print("Writing association rules summary ...")
    df_association_rules_summary = pd.DataFrame({"n_rules_found":n_association_rules,
                                                 "n_itemsets_found":len(df_itemsets_supports)},
                                                index=[0])
    df_association_rules_summary = df_association_rules_summary[["n_rules_found", "n_itemsets_found"]]
    association_rules_summary.write_with_schema(df_association_rules_summary, dropAndCreate=False)

    print("Writing frequent itemsets data ...")
    distinct_itemsets.write_with_schema(df_itemsets_supports, dropAndCreate=False)

    del df_itemsets_supports

    print("Indexing antecedent items/rules ...")
    df_rules_denormalized_antecedents = melt_dataframe(df_association_rules, "rule_antecedent", {"rule_antecedent":"antecedent_item"})

    print("Writing antecedent items/rules ...")
    rules_denormalized_antecedents.write_with_schema(df_rules_denormalized_antecedents, dropAndCreate=False)

    print("Indexing consequent items/rules ...")
    df_rules_denormalized_consequents = melt_dataframe(df_association_rules, "rule_consequent", {"rule_consequent":"consequent_item"})

    print("Writing consequent items/rules ...")
    rules_denormalized_consequents.write_with_schema(df_rules_denormalized_consequents, dropAndCreate=False)