# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import requests
import json

# custom function that can be found within Libraries tab > G+L
from census_api_functions import get_project_variables, get_query_text, state_name_list, get_tracts_code_table

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# variable specifications
census_api_key = get_project_variables('standard','api_key')
# census API variables specifications
census_code = 'B26001'
year = 2022

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# url path to call census API services
url_path = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=NAME&for=state:*&key={census_api_key}"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# numerical list of US states
state_nums_list = state_name_list(url_path)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# API request to gather the dataset from U.S. Census Bureau
all_tracts_df = get_tracts_code_table(state_nums_list, year, census_code, census_api_key)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# data preprocessing: column mapping, feature generation
all_tracts_df = all_tracts_df[['GEO_ID', 'B26001_001E']]
all_tracts_df = all_tracts_df.rename(columns = {'B26001_001E':'E_GROUPQ'})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
B26001_svi_tracts = dataiku.Dataset("B26001_svi_tracts")
B26001_svi_tracts.write_with_schema(all_tracts_df)