# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
svi_county_joined_prepared = dataiku.Dataset("svi_county_joined_prepared")
df = svi_county_joined_prepared.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Select the percentage column values
EP = [col for col in df if col.startswith('EP_')]
df_EP = df[EP]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Rank the numerical percentage values and express them as percentile on 4 digits
df_qroup = df_EP.rank(pct=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Replace the column name with EPL that indicates the percentile rank
df_qroup.columns = df_EP.columns.str.replace(r'EP_', 'EPL_')
final = pd.concat([df, df_qroup], axis=1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Standardize FIPS, State and County codes to the right digit format
final['State_code'] = final['State_code'].astype(int)
final['State_code'] = final['State_code'].astype(str).apply(lambda x: '0'+x  if len(x)!=2 else x)
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final['County_code'] = final['County_code'].astype(int)
final['County_code'] = final['County_code'].astype(str).apply(lambda x: '0'+x  if len(x)==2 else x)
final['County_code'] = final['County_code'].astype(str).apply(lambda x: '00'+x  if len(x)==1 else x)
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final['State_County_code'] = final['State_County_code'].astype(int)
final['State_County_code'] = final['State_County_code'].astype(str).apply(lambda x: '0'+x  if len(x)!=5 else x)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
svi_tracts_percentage_ntile = dataiku.Dataset("svi_county_percentage_percentile")
svi_tracts_percentage_ntile.write_with_schema(final)
