# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs

# Dataset product_revenue_with_cluster_and_categ renamed to product_revenue_with_cluster_and_categ_grouped_by_product_id by anne-soline.guilbert-ly@dataiku.com on 2024-04-24 16:23:50
product_revenue_with_cluster_and_categ = dataiku.Dataset("product_revenue_with_cluster_and_categ_grouped_by_product_id")
input_df = product_revenue_with_cluster_and_categ.get_dataframe()

# Compute the total revenue per category
## Create a dictionnary to store the total revenue per category, per cluster
target_category_revenue = {}

for cluster in input_df["cluster_labels"].unique():
    target_category_revenue[cluster] = {}
    
    for category in input_df["target_category"].unique():
        target_category_revenue[cluster][category] = 0.0
        
## Fill in the dictionnary with the total revenue per category, per cluster
for i, product_revenue in enumerate(input_df["product_revenue_sum"]):
    # get the cluster and the category of the product on the row i
    cluster = input_df["cluster_labels"][i]
    category = input_df["target_category"][i]
    
    # add the revenue for this product in the right place in the dictionnary
    target_category_revenue[cluster][category] += product_revenue

## Create the category_revenue column and fill in thanks to the dictionnary
input_df["category_revenue"] = 0.0

for i in range(len(input_df)):
    cluster = input_df["cluster_labels"][i]
    category = input_df["target_category"][i]
    
    input_df["category_revenue"][i] = target_category_revenue[cluster][category]

# Compute the product revenue share in a category
input_df["product_revenue_share"] = 0.0

for i, product_revenue in enumerate(input_df["product_revenue_sum"]):
    category_revenue = input_df["category_revenue"][i]
    input_df["product_revenue_share"][i] = (product_revenue / category_revenue) * 100
    input_df.at[i, "product_revenue_share"] = round(input_df.at[i, "product_revenue_share"], 3)

# Round category_revenue column
input_df["category_revenue"] = input_df["category_revenue"].round(2)
input_df["product_revenue_sum"] = input_df["product_revenue_sum"].round(2)
    
# Write recipe outputs
product_revenue_and_share = dataiku.Dataset("product_revenue_and_share")
product_revenue_and_share.write_with_schema(input_df)
