# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
new_condition_dataset_county = dataiku.Dataset("final_dataset_tract_joined")
df = new_condition_dataset_county.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Generate the unique list of health reasons
name = df['Health_Measure'].unique()
cleanedList = [x for x in name if str(x) != 'nan']

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Pivot the percent and percentile values for each health reason
# Standardize the prefix and suffix to follow the rest of the health conditions format
# Remove any duplications
percent = df.pivot(index = 'FIPS', columns="Health_Measure",values="Percent_Measure_Value").reset_index().add_prefix('Percent ').add_suffix(' Disease_tract')

percentile = df.pivot(index = 'FIPS', columns="Health_Measure",values="Measure_Value_Percentile").reset_index().add_suffix(' Disease_tract Percentile')


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Create a dataframe with the core data: location, SV factors and the other health reasons
CleanTable = df.drop(['Health_Measure','Percent_Measure_Value','Measure_Value_Percentile'], axis = 1).drop_duplicates()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Merge the new health conditions
MetricsTable =  percent.merge(percentile, how="inner", left_on=["Percent FIPS Disease_tract"], right_on =["FIPS Disease_tract Percentile"]).drop(['FIPS Disease_tract Percentile'], axis = 1)
FinalTable = CleanTable.merge(MetricsTable, how="inner", left_on=["FIPS"], right_on =["Percent FIPS Disease_tract"]).drop([ 'Percent FIPS Disease_tract'], axis = 1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Fill NaN values with 0
FinalTable = FinalTable.replace(np.nan, 0)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Standardize FIPS, State and County codes to the right digit format
FinalTable['FIPS'] = FinalTable['FIPS'].astype(str).apply(lambda x: '0'+x  if len(x)!=11 else x)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
# Dataset new_condition_final_dataset_tract renamed to new_measure_final_dataset_tract by georgia.kouyialis@dataiku.com on 2023-04-19 18:25:13
new_condition_final_dataset_county = dataiku.Dataset("new_measure_final_dataset_tract")
new_condition_final_dataset_county.write_with_schema(FinalTable)