# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
# Dataset new_condition_dataset_county renamed to new_measure_dataset_county by georgia.kouyialis@dataiku.com on 2023-04-19 18:48:59
new_condition_dataset_county = dataiku.Dataset("new_measure_dataset_county")
df = new_condition_dataset_county.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Generate the unique list of health reasons
name = df['Health_Measure'].unique()
cleanedList = [x for x in name if str(x) != 'nan']

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Pivot the percent and percentile values for each health reason
# Standardize the prefix and suffix to follow the rest of the health conditions format
# Remove any duplications
percent = df.pivot(index = 'FIPS', columns="Health_Measure",values="Percent_Measure_Value").reset_index().add_prefix('Percent ').add_suffix(' Disease_county').drop([ 'Percent nan Disease_county'], axis = 1)

percentile = df.pivot(index = 'FIPS', columns="Health_Measure",values="Measure_Value_Percentile").reset_index().add_suffix(' Disease_county Percentile').drop([ 'nan Disease_county Percentile'], axis = 1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Create a dataframe with the core data: location, SV factors and the other health reasons
CleanTable = df.drop(['Health_Measure','Percent_Measure_Value','Measure_Value_Percentile'], axis = 1).drop_duplicates()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Merge the new health conditions
MetricsTable =  percent.merge(percentile, how="inner", left_on=["Percent FIPS Disease_county"], right_on =["FIPS Disease_county Percentile"]).drop([ 'FIPS Disease_county Percentile'], axis = 1)
FinalTable = CleanTable.merge(MetricsTable, how="inner", left_on=["FIPS"], right_on =["Percent FIPS Disease_county"]).drop([ 'Percent FIPS Disease_county'], axis = 1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Fill NaN values with 0
FinalTable = FinalTable.replace(np.nan, 0)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Standardize FIPS, State and County codes to the right digit format
FinalTable['FIPS'] = FinalTable['FIPS'].astype(str).apply(lambda x: '0'+x  if len(x)!=5 else x)
FinalTable['State_code'] = FinalTable['State_code'].astype(str).apply(lambda x: '0'+x  if len(x)!=2 else x)
FinalTable['County_code'] = FinalTable['County_code'].astype(str).apply(lambda x: '0'+x  if len(x)!=3 else x)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
# Dataset new_condition_final_dataset_county renamed to new_measure_final_dataset_county by georgia.kouyialis@dataiku.com on 2023-04-19 18:49:59
new_condition_final_dataset_county = dataiku.Dataset("new_measure_final_dataset_county")
new_condition_final_dataset_county.write_with_schema(FinalTable)