# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from dku_utils import (get_managed_folder_info,
                       get_managed_folder_id_with_folder_name,
                       write_pickle_in_dss_folder)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from pandas_utils import nest_dataframe_data_to_key_column

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
census_district_last_features = dataiku.Dataset("census_district_last_features")
census_district_last_features_df = census_district_last_features.get_dataframe()
census_polygon_last_features = dataiku.Dataset("census_polygon_last_features")
census_polygon_last_features_df = census_polygon_last_features.get_dataframe()
closest_station_last_features = dataiku.Dataset("closest_station_last_features")
closest_station_last_features_df = closest_station_last_features.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
time_features_indexing_folder_id = get_managed_folder_id_with_folder_name(project_key, "time_features_indexing")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
closest_station_last_features_df.head(2)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
dataframes = [census_district_last_features_df, census_polygon_last_features_df, closest_station_last_features_df]
dataframe_labels = ["census_district_last_features", "census_polygon_last_features", "closest_station_last_features"]
dataframe_columns_to_index = ["census_district", "census_polygon_name", "closest_station_name"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_district_last_features_df.head(2)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
for dataframe, dataframe_label, column_to_index in zip(dataframes, dataframe_labels, dataframe_columns_to_index):
    dataframe_features = nest_dataframe_data_to_key_column(dataframe, column_to_index)

    write_pickle_in_dss_folder(dataframe_features,
                               "dict_{}".format(dataframe_label),
                               time_features_indexing_folder_id)