# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#%config Completer.use_jedi = False

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import numpy as np
from statsmodels.genmod.families.family import Tweedie

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def score(y_valid, y_pred, var_power, weights):
    n = y_valid.count()
    tweedie = Tweedie(var_power=var_power)
    return tweedie.deviance(endog=y_valid, mu=y_pred, var_weights=weights) / n

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
model_comparison_prepared = dataiku.Dataset("model_comparison_prepared")
model_comparison_prepared_df = model_comparison_prepared.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
var_powers = [1.25, 1.5, 1.6, 1.7, 1.8, 1.9, 1.99]

n = model_comparison_prepared_df.size

product_model_scores = [score(model_comparison_prepared_df['ClaimAmount'], 
                              model_comparison_prepared_df['PremiumPredictionCompound'],
                              var_power,
                              model_comparison_prepared_df['Exposure']) for var_power in var_powers]

tweedie_model_scores = [score(model_comparison_prepared_df['ClaimAmount'], 
                              model_comparison_prepared_df['PremiumPrediction'],
                              var_power,
                              model_comparison_prepared_df['Exposure']) for var_power in var_powers]

product_model_mean_abs_error = sum(model_comparison_prepared_df['Exposure']*abs(model_comparison_prepared_df['ClaimAmount'] -
                                  model_comparison_prepared_df['PremiumPredictionCompound']))/sum(model_comparison_prepared_df['Exposure'])
tweedie_model_mean_abs_error = sum(model_comparison_prepared_df['Exposure']*abs(model_comparison_prepared_df['ClaimAmount'] -
                                  model_comparison_prepared_df['PremiumPrediction']))/sum(model_comparison_prepared_df['Exposure'])

product_model_mean_square_error = np.sqrt(sum(model_comparison_prepared_df['Exposure']*(model_comparison_prepared_df['ClaimAmount'] -
                                      model_comparison_prepared_df['PremiumPredictionCompound'])**2)/sum(model_comparison_prepared_df['Exposure']))
tweedie_model_mean_square_error = np.sqrt(sum(model_comparison_prepared_df['Exposure']*(model_comparison_prepared_df['ClaimAmount'] -
                                      model_comparison_prepared_df['PremiumPrediction'])**2)/sum(model_comparison_prepared_df['Exposure']))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
results = pd.DataFrame(data={'var_power': var_powers, 
                             'product_model': product_model_scores,
                             'tweedie_model': tweedie_model_scores})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
results = results.append(pd.DataFrame({'var_power': [1], 
                                       'product_model': [product_model_mean_abs_error],
                                       'tweedie_model': [tweedie_model_mean_abs_error]}))

results = results.append(pd.DataFrame({'var_power': [0.5], 
                                       'product_model': [product_model_mean_square_error],
                                       'tweedie_model': [tweedie_model_mean_square_error]}))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
results

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
regression_fit = dataiku.Dataset("regression_fit")
regression_fit.write_with_schema(results)