# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd, numpy as np
from pandas.io.json import json_normalize
import requests
import json
from collections import OrderedDict
from multiprocessing import pool, Lock
from urllib.parse import quote
from dku_utils.projects.project_commons import get_current_project_and_variables
from clinical_sites_intelligence.query_ctgov import create_normalized_columns_name, get_all_studies


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Get schema
url = "https://beta.clinicaltrials.gov/api/v2/studies/metadata"
response = requests.get(url)
data = response.json()
normalized_col_name = create_normalized_columns_name(data)

# Write recipe outputs
project, variables = get_current_project_and_variables()
study_status = variables["standard"]['study_status']
conditions = variables["standard"]['conditions']
interventions = variables["standard"]['interventions']
locations = variables["standard"]['locations']
lead_sponsors = variables["standard"]['lead_sponsors']
fields = variables["standard"]['fields']
start_dates = f"AREA[StartDate]RANGE[{variables['standard']['start_date']},{variables['standard']['end_date']}]"
essie_expression_syntax = start_dates
if variables["standard"]['essie_expression_syntax']:
    essie_expression_syntax += "AND"
    essie_expression_syntax += variables["standard"]['essie_expression_syntax']

# Call ct.gov API and write dataframe
get_all_studies(
    normalized_col_name, study_status=study_status, conditions=conditions, interventions=interventions,
    locations=locations, lead_sponsors=lead_sponsors, fields=fields, essie_expression_syntax=essie_expression_syntax)