# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
# Dataset new_condition_scored_county renamed to new_measure_scored_county by georgia.kouyialis@dataiku.com on 2023-04-19 18:36:04
new_condition_scored_county = dataiku.Dataset("new_measure_scored_county")
df = new_condition_scored_county.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df['FIPS'] = df['FIPS'].astype(str).apply(lambda x: '0'+x  if len(x)!=5 else x)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Create two datasets
# 1. A dataset with name and version
df_model = df[['Health_Measure', 'smmd_modelVersion']].drop_duplicates().rename(columns={'smmd_modelVersion': 'Model_version'})
# 2. Drop the model info
df_final = df.drop(['smmd_savedModelId', 'smmd_modelVersion', 'smmd_fullModelId', 'smmd_predictionTime'], axis = 1).drop_duplicates()
# Standardize FIPS code

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
# Dataset new_condition_format renamed to new_measure_format by georgia.kouyialis@dataiku.com on 2023-04-19 18:40:45
new_condition_format = dataiku.Dataset("new_measure_format")
new_condition_format.write_with_schema(df_final)

model_version_county = dataiku.Dataset("model_version_county")
model_version_county.write_with_schema(df_model)