# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import requests
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

project_key = dataiku.get_custom_variables()["projectKey"]
client = dataiku.api_client()
project = client.get_project(project_key)
variables = project.get_variables()
year = 2022

census_api_key = variables["standard"]["api_key"]
url_path = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=NAME&for=state:*&key={census_api_key}"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def get_query_text(query_url):
    response = requests.get(query_url)
    return response.text

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_state_names_query_url = url_path
all_state_names_query_text = get_query_text(all_state_names_query_url)
all_state_names_query_result_list = json.loads(all_state_names_query_text)
all_state_names_df = pd.DataFrame(all_state_names_query_result_list[1:],columns=all_state_names_query_result_list[0])
state_nums_list = list(all_state_names_df["state"].unique())

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df = pd.DataFrame()

all_tracts_df = pd.DataFrame()

for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5?get=NAME,group(B16005)&for=tract:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:], columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[:, 1:]
        
        # Use pd.concat instead of append
        all_tracts_df = pd.concat([all_tracts_df, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df = all_tracts_df[['GEO_ID', 'B16005_001E', 'B16005_007E', 'B16005_008E', 'B16005_012E', 
                                'B16005_013E', 'B16005_017E', 'B16005_018E', 'B16005_022E',
                               'B16005_023E', 'B16005_029E', 'B16005_030E', 'B16005_034E', 'B16005_035E',
                               'B16005_039E', 'B16005_040E', 'B16005_044E', 'B16005_045E',
                              ]]

all_tracts_df.iloc[:, 1:] = all_tracts_df.iloc[:, 1:].astype(float)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df['E_LIMENG'] = all_tracts_df.iloc[:, 2:].sum(axis=1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

try:
    all_tracts_df['EP_LIMENG'] = (all_tracts_df['E_LIMENG'] / all_tracts_df['B16005_001E']) * 100
except ZeroDivisionError:
    all_tracts_df['EP_LIMENG'] = 0
    

all_tracts_df = all_tracts_df.drop(['B16005_001E', 'B16005_007E', 'B16005_008E', 'B16005_012E', 
                                'B16005_013E', 'B16005_017E', 'B16005_018E', 'B16005_022E',
                               'B16005_023E', 'B16005_029E', 'B16005_030E', 'B16005_034E', 'B16005_035E',
                               'B16005_039E', 'B16005_040E', 'B16005_044E', 'B16005_045E'], axis = 1 )

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

# Write recipe outputs
B16005_svi_tracts = dataiku.Dataset("B16005_svi_tracts")
B16005_svi_tracts.write_with_schema(all_tracts_df)