# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from scipy.stats import norm
import math
import statistics
from scipy.optimize import minimize_scalar
from commons.dku_utils.core import get_current_project_and_variables

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
transition_matrices_x_bounds = dataiku.Dataset("transition_matrices_x_bounds_joined")
transition_matrices_x_bounds_df = transition_matrices_x_bounds.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def fitted_transition_rate(lower_bound, upper_bound, rho, z):
    return norm.cdf((upper_bound - math.sqrt(rho) * z)/math.sqrt(1-rho)) - norm.cdf((lower_bound - math.sqrt(rho) * z)/math.sqrt(1-rho))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def objective_function(count, proba, lower_bound, upper_bound, rho, z):
    transition = fitted_transition_rate(lower_bound, upper_bound, rho, z)
    if math.isnan(transition):
        return 0
    else:
        return count * pow(proba - transition, 2) / (transition * (1 - transition))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def quarter_objective_function(quarter, rho, z):
    quarter_transitions = transition_matrices_x_bounds_df[transition_matrices_x_bounds_df['quarter']==quarter]
    lower_bounds = quarter_transitions['x_bin_lag'].tolist()
    upper_bounds = quarter_transitions['x_bin'].tolist()
    counts = quarter_transitions['UPB_sum'].tolist()
    probas = quarter_transitions['probability'].tolist()
    res = 0
    for i in range(len(lower_bounds)):
        res += objective_function(counts[i], probas[i],
                              lower_bounds[i], upper_bounds[i],
                              rho, z)
    return res

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
quarters = transition_matrices_x_bounds_df['quarter'].unique()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def obj_variance_z(rho):
    zs = [minimize_scalar(lambda z: quarter_objective_function(quarter, rho, z)).x for quarter in quarters]
    return math.pow(statistics.variance(zs)-1, 2)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
rho = minimize_scalar(obj_variance_z, method='bounded', bounds=[0.0001, 0.9999]).x

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()
variables['standard']['rho'] = rho
project.set_variables(variables)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
zs = [minimize_scalar(lambda z: quarter_objective_function(quarter, rho, z)).x for quarter in quarters]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
z_score_df = pd.DataFrame({'quarter': quarters, 'z_score': zs})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
quarter_date = transition_matrices_x_bounds_df.groupby('quarter')['date'].max().reset_index()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
z_score_df = pd.merge(z_score_df, quarter_date, how='left', on='quarter')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
z_score = dataiku.Dataset("z_score")
z_score.write_with_schema(z_score_df)