# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from project_root.dku_utils.core import get_current_project_and_variables

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()
app_variables = variables["standard"]

items_revenue_available = app_variables["items_revenue_available_app"]
items_selection_strategy = app_variables.get("items_selection_strategy_app")
items_distribution_contribution_threshold = app_variables["items_distribution_contribution_threshold_app"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
item_interactions_count_rank_df = dataiku.Dataset("item_interactions_count_rank").get_dataframe(infer_with_pandas=False)
total_interactions_df = dataiku.Dataset("total_interactions").get_dataframe(infer_with_pandas=False)
total_user_interactions = total_interactions_df["total_interactions"][0]
item_interactions_count_rank_df["total_user_interactions"] = total_user_interactions
item_interactions_count_rank_df["item_total_interactions_fraction"] =\
item_interactions_count_rank_df["item_interactions_rank_cumulated_sum"]/\
item_interactions_count_rank_df["total_user_interactions"]

if items_revenue_available:
    item_revenue_rank_df = dataiku.Dataset("item_revenue_rank").get_dataframe(infer_with_pandas=False)
    total_revenue_df = dataiku.Dataset("total_revenue").get_dataframe(infer_with_pandas=False)
    total_revenue = total_revenue_df["total_revenue"][0]
    item_revenue_rank_df["total_revenue"] = total_revenue
    item_revenue_rank_df["item_total_revenue_fraction"] =\
    item_revenue_rank_df["item_revenue_rank_cumulated_sum"]/item_revenue_rank_df["total_revenue"]
    all_item_distributions_information_df =\
    item_interactions_count_rank_df.merge(item_revenue_rank_df, how="left", on="item_id")
    if items_selection_strategy == "filter_items_based_on_revenue_distribution":
        items_filtering_column = "item_total_revenue_fraction"
        focus_rank_column = "item_revenue_rank"
        interactions_or_revenue_column = "item_revenue"
        item_rank_origin = "Item revenue rank"
    else:
        items_filtering_column = "item_total_interactions_fraction"
        focus_rank_column = "item_interactions_rank"
        interactions_or_revenue_column = "n_interactions"
        item_rank_origin = "Item interactions rank"

else:
    all_item_distributions_information_df = item_interactions_count_rank_df
    items_filtering_column = "item_total_interactions_fraction"
    focus_rank_column = "item_interactions_rank"
    interactions_or_revenue_column = "n_interactions"
    item_rank_origin = "Item interactions rank"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_item_distributions_information_df["item_interactions_or_revenue"] = all_item_distributions_information_df[interactions_or_revenue_column]
all_item_distributions_information_df["item_rank"] = all_item_distributions_information_df[focus_rank_column]
all_item_distributions_information_df["item_rank_origin"] = item_rank_origin

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_item_distributions_information_df["is_item_to_keep"] = "False"
all_item_distributions_information_df["is_item_to_keep"]\
[all_item_distributions_information_df[items_filtering_column] <= items_distribution_contribution_threshold] = "True"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
items_kept_df =\
all_item_distributions_information_df[all_item_distributions_information_df["is_item_to_keep"]=="True"].copy()
items_rejected_df =\
all_item_distributions_information_df[all_item_distributions_information_df["is_item_to_keep"]=="False"].copy()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
items_kept = dataiku.Dataset("items_kept")
items_kept.write_with_schema(items_kept_df)
items_rejected = dataiku.Dataset("items_rejected")
items_rejected.write_with_schema(items_rejected_df)
all_item_distributions_information = dataiku.Dataset("all_item_distributions_information")
all_item_distributions_information.write_with_schema(all_item_distributions_information_df)