# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import itertools
# Read recipe inputs
New_disease = dataiku.Dataset("health_measure_county_sync")
df = New_disease.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# A list of the unique names of health conditions
conditions_names = df['Health_Measure'].unique().tolist()

# Compute the percentile values of each health condition
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
list_comp = [df['Percent_Measure_Value'][df['Health_Measure'] == res].rank(pct=True) for res in conditions_names]
df['Measure_Value_Percentile'] = list(itertools.chain.from_iterable(list_comp))
df['Measure_Value_Percentile'] = df['Measure_Value_Percentile'].round(2)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Standardize FIPS, State and County codes to the right digit format
df['FIPS'] = df['FIPS'].astype(str).apply(lambda x: '0'+x  if len(x)!=5 else x)
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
new_disease_percentage_percentile_county = dataiku.Dataset("new_measure_percentage_percentile_county")
new_disease_percentage_percentile_county.write_with_schema(df)