# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from dku_utils import (get_managed_folder_info,
                       get_managed_folder_id_with_folder_name,
                       write_pickle_in_dss_folder)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from pandas_utils import nest_dataframe_data_to_key_column

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from real_estate_pricing.geographic_handling.relationship.inclusion import GeoPointsIndexer
from real_estate_pricing.geographic_handling.formating.points import read_geo_point

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
stations_metadata = dataiku.Dataset("stations_metadata")
stations_metadata_df = stations_metadata.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
reverse_coordinates = True
stations_geo_points = [read_geo_point(geo_point, reverse_coordinates) for geo_point in stations_metadata_df["station_geo_point"]]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
subway_stations_indexer = GeoPointsIndexer(stations_geo_points)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
subway_stations_indexed_folder_id = get_managed_folder_id_with_folder_name(project_key, "subway_stations_indexing")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
write_pickle_in_dss_folder(subway_stations_indexer,
                           "subway_stations_indexer",
                           subway_stations_indexed_folder_id)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
stations_metadata_columns = list(stations_metadata_df.columns)
stations_metadata_df["station_index"] = range(len(stations_metadata_df))
stations_metadata_df = stations_metadata_df[["station_index"] + [column for column in stations_metadata_columns]]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
dict_subway_stations_metadata = nest_dataframe_data_to_key_column(stations_metadata_df, "station_index")
write_pickle_in_dss_folder(dict_subway_stations_metadata,
                           "dict_subway_stations_metadata",
                           subway_stations_indexed_folder_id)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
stations_metadata_indexed = dataiku.Dataset("stations_metadata_indexed")
stations_metadata_indexed.write_with_schema(stations_metadata_df)