# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
Sales_final_dataset_joined = dataiku.Dataset("Sales_final_dataset_joined")
Sales_final_df = Sales_final_dataset_joined.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# global variable for the channel and the target group to analyse
channel_analysis_variable = dataiku.get_custom_variables()['channel_analysis_variable']
target_group_category = dataiku.get_custom_variables()["target_group_category"]
taget_group_value = dataiku.get_custom_variables()["taget_group_value"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# filter the original data within the user selection
Sales_final_df = Sales_final_df[(Sales_final_df[channel_analysis_variable]>=1 ) & (Sales_final_df[target_group_category] == taget_group_value)]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
Sales_final_df = Sales_final_df.reset_index(drop=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# retreive only the channels that show user actions/response
df_map = Sales_final_df.filter(regex='_success')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# sum over all the interactions for each HCP account each week
interactions = df_map.sum(axis=1, numeric_only=True).to_list()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Replace cell values with column names where value is greater than zero
df_chain = df_map.apply(lambda row: ', '.join(row.index[row > 0]), axis=1).to_frame().rename(columns={0: 'communication_touchpoints'})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_chain['user_interactions'] = interactions

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_chain['total_revenue'] =  Sales_final_df['product_revenue_weekly']

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Group by communication_chain and aggregate using custom aggregation functions
agg_funcs = {
    'user_interactions': ['count', 'sum'],
    'total_revenue': ['mean', 'sum']
}
df_communication_disc = df_chain.groupby('communication_touchpoints').agg(agg_funcs).reset_index()

# Rename the columns for clarity
df_communication_disc.columns = [
                                 'communication_touchpoints', 
                                 'chain_occurance',
                                 'user_interactions',
                                 'revenue_avg', 'revenue'
                                ]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# compute the number of channels in each chain
df_communication_disc['number_channels'] = [df_communication_disc['communication_touchpoints'][i].count(",")+1 for i in range(len(df_communication_disc))]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Compute the revenue_per_interaction by taking the total revenue from each chain over all the interactions happen through the relevant channels
df_communication_disc['revenue_per_interaction'] = (df_communication_disc['revenue'] / df_communication_disc['user_interactions']).round(2)
df_communication_disc = df_communication_disc.sort_values('revenue_per_interaction')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_communication_disc['target_group'] = [taget_group_value]*len(df_communication_disc)
df_communication_disc['channel'] = [channel_analysis_variable.replace("_success", "")]*len(df_communication_disc)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_communication_disc = df_communication_disc.reset_index().drop('index', axis=1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_communication_disc[['revenue_avg','revenue']] = df_communication_disc[['revenue_avg','revenue']].round(2)
df_communication_disc['user_interactions'] = df_communication_disc['user_interactions'].astype(int)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
Sales_channel_chain_interactions = dataiku.Dataset("Sales_channel_chain_interactions")
Sales_channel_chain_interactions.write_with_schema(df_communication_disc)