# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
applications_correlation_filtered = dataiku.Dataset("applications_correlation_filtered")
applications_correlation_filtered_df = applications_correlation_filtered.get_dataframe()
score_card = dataiku.Dataset("score_card")
score_card_df = score_card.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
variables = score_card_df['variable'].unique()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
feature_ranges_df = pd.DataFrame()

for variable in variables:
    applications_variable = applications_correlation_filtered_df[variable]
    score_card_variable = score_card_df[score_card_df['variable']==variable]
    if score_card_variable['category'].isnull().all():
        feature_ranges_df = feature_ranges_df.append(pd.DataFrame({'variable': [variable], 'type': ['numeric'],
                                                                   'min': [applications_variable.min()],
                                                                   'max': [applications_variable.max()],
                                                                   'values': [np.nan],
                                                                   'score_width': [score_card_variable['score'].max() - score_card_variable['score'].min()]}))
    else:
        feature_ranges_df = feature_ranges_df.append(pd.DataFrame({'variable': [variable], 'type': ['categorical'],
                                                                   'min': [np.nan], 'max': [np.nan],
                                                                   'values': [list(applications_variable.unique())],
                                                                   'score_width': [score_card_variable['score'].max() - score_card_variable['score'].min()]}))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
feature_ranges = dataiku.Dataset("feature_ranges")
feature_ranges.write_with_schema(feature_ranges_df)