# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import requests
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
client = dataiku.api_client()
project = client.get_project(project_key)
variables = project.get_variables()
year = 2022

census_api_key = variables["standard"]["api_key"]
url_path = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=NAME&for=county:*&key={census_api_key}"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def get_query_text(query_url):
    response = requests.get(query_url)
    return response.text

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_state_names_query_url = url_path
all_state_names_query_text = get_query_text(all_state_names_query_url)
all_state_names_query_result_list = json.loads(all_state_names_query_text)
all_state_names_df = pd.DataFrame(all_state_names_query_result_list[1:],columns=all_state_names_query_result_list[0])
state_nums_list = list(all_state_names_df["state"].unique())

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def get_code_dataset(census_code):
    all_tracts_df = pd.DataFrame()
    for state in state_nums_list:
        try:
            state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=NAME,group({census_code})&for=county:*&in=state:{state}&key={census_api_key}"
            state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
            state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
            state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:], columns=state_all_tract_names_query_result_list[0])
            df = state_all_tract_names_df.iloc[:, 1:]
            
            # Use pd.concat instead of append
            all_tracts_df = pd.concat([all_tracts_df, df], ignore_index=True)
        except Exception as e:
            print(f"Error processing state {state}: {e}")
            continue
    return all_tracts_df

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_code = 'DP02'
all_tracts_df_DP02 = get_code_dataset(census_code)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_DP02 = all_tracts_df_DP02[['GEO_ID', 'state', 'county', 'DP02_0001E', 'DP02_0072E', 'DP02_0072PE']]
all_tracts_df_DP02.rename(columns={'DP02_0001E':'E_HH', 'DP02_0072E':'E_DISABL', 'DP02_0072PE':'EP_DISABL'}, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_code = 'DP03'
all_tracts_df_DP03 = get_code_dataset(census_code)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_DP03 = all_tracts_df_DP03[['GEO_ID',  'DP03_0005E',
                               'DP03_0009PE']]
all_tracts_df_DP03 = all_tracts_df_DP03.rename(columns = {'DP03_0005E':'E_UNEMP',
                                                'DP03_0009PE':'EP_UNEMP'})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_code = 'DP04'
all_tracts_df_DP04 = get_code_dataset(census_code)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df = all_tracts_df_DP04[['GEO_ID', 'DP04_0001E',
                               'DP04_0012E', 'DP04_0013E',
                               'DP04_0014E',
                               'DP04_0078E', 'DP04_0079E',
                               'DP04_0058E',
                               'DP04_0014PE',
                               'DP04_0002E',
                              'DP04_0058PE']]
all_tracts_df = all_tracts_df.rename(columns = {'DP04_0001E':'E_HU',
                                                'DP04_0014E':'E_MOBILE',
                                                'DP04_0058E': 'E_NOVEH',
                                                "DP04_0014PE": 'EP_MOBILE',
                                               'DP04_0058PE':'EP_NOVEH'})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df[['E_HU','DP04_0012E', 'DP04_0013E','DP04_0078E', 'DP04_0079E','DP04_0002E']] = all_tracts_df[['E_HU','DP04_0012E', 'DP04_0013E','DP04_0078E', 'DP04_0079E','DP04_0002E']].astype(float)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df['E_MUNIT'] = all_tracts_df['DP04_0012E'] + all_tracts_df['DP04_0013E']
all_tracts_df['E_CROWD'] = all_tracts_df['DP04_0078E'] + all_tracts_df['DP04_0079E']
all_tracts_df['EP_CROWD'] = (all_tracts_df['E_CROWD'] / all_tracts_df["DP04_0002E"]) * 100
all_tracts_df = all_tracts_df.drop(['DP04_0012E', 'DP04_0013E','DP04_0078E', 'DP04_0079E', "DP04_0002E"], axis = 1)
try:
    all_tracts_df['EP_MUNIT'] = (all_tracts_df['E_MUNIT'] / all_tracts_df["E_HU"]) * 100
except ZeroDivisionError:
    all_tracts_df['EP_MUNIT'] = 0

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_code = 'DP05'
all_tracts_df_DP05 = get_code_dataset(census_code)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_DP05 = all_tracts_df_DP05[['GEO_ID', 'DP05_0071E', 'DP05_0078E', 'DP05_0079E',
                                'DP05_0080E', 'DP05_0081E',
                               'DP05_0082E', 'DP05_0083E']]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_DP05.iloc[:, 1:] = all_tracts_df_DP05.iloc[:, 1:].astype(float)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_DP05['E_MINRTY'] = all_tracts_df_DP05.iloc[:, 1:].sum(axis=1)
all_tracts_df_DP05 = all_tracts_df_DP05.drop(['DP05_0071E', 'DP05_0078E', 'DP05_0079E',
                                'DP05_0080E', 'DP05_0081E',
                               'DP05_0082E', 'DP05_0083E'], axis = 1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
dfs = [all_tracts_df_DP02, all_tracts_df_DP03, all_tracts_df, all_tracts_df_DP05]
dfs = [df.set_index('GEO_ID') for df in dfs]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final_data = dfs[0].join(dfs[1:])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
D_SVI_county = dataiku.Dataset("D_SVI_county")
D_SVI_county.write_with_schema(final_data)