# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def get_transitions_between_list_values(list_):
    list_size = len(list_)
    list_last_index = list_size - 1
    values_indexes = {value: index for index, value in enumerate(list_)}

    list_values_transitions = []
    for value_index, value in enumerate(list_):
        if value_index < list_last_index:
            next_value_index = value_index + 1
            list_values_transitions.append({"origin": value,
                                            "destination": list_[next_value_index],
                                            "transition_id": value_index})
    return list_values_transitions

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
subway_stations_prepared = dataiku.Dataset("subway_stations_prepared")
subway_stations_prepared_df = subway_stations_prepared.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
subway_stations_prepared_df

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# We load the 'station_line_portion_index' column values as jsons:
subway_stations_prepared_df["station_line_portion_index"] = subway_stations_prepared_df["station_line_portion_index"].apply(lambda x: json.loads(x))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
lines_metadata_df = subway_stations_prepared_df[["line", "line_station_sub_portions"]].drop_duplicates()
lines = list(lines_metadata_df["line"])
lines_sub_portions = {
    line: n_sub_portions for line, n_sub_portions in zip(lines, lines_metadata_df["line_station_sub_portions"])}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
stations_dynamic_schema = ["line", "origin", "destination", "portion_id" ,"transition_id"]
df_stations_dynamic = pd.DataFrame(columns=stations_dynamic_schema, index=[])

for line in lines:
    line_df = subway_stations_prepared_df[subway_stations_prepared_df["line"]==line].copy()
    line_sub_portions = lines_sub_portions[line]
    line_df["contains_core_portion"] = line_df["station_line_portion_index"]\
    .apply(lambda x: True if "core_portion" in x.keys() else False)

    line_core_portion_df = line_df[line_df["contains_core_portion"]==True].copy()
    line_core_portion_df["portion_index"] = line_core_portion_df["station_line_portion_index"]\
    .apply(lambda x: x["core_portion"])
    line_core_portion_df.sort_values(by="portion_index", ascending=True, inplace=True)
    line_core_portion_stations = list(line_core_portion_df["station_name"])
    line_core_portion_transitions = get_transitions_between_list_values(line_core_portion_stations)
    line_core_portion_transitions_df = pd.DataFrame(line_core_portion_transitions)
    line_core_portion_transitions_df["line"] = line
    line_core_portion_transitions_df["portion_id"] = "core_portion"
    df_stations_dynamic = pd.concat([df_stations_dynamic, line_core_portion_transitions_df], ignore_index=True)

    if line_sub_portions > 0:
        for sub_portion_id in range(line_sub_portions):
            sub_portion_id_str = "sub_portion_{}".format(sub_portion_id)
            contains_sub_portion_column = "contains_{}".format(sub_portion_id_str)
            line_df[contains_sub_portion_column] = line_df["station_line_portion_index"]\
            .apply(lambda x: True if sub_portion_id_str in x.keys() else False)
            line_sub_portion_df = line_df[line_df[contains_sub_portion_column]==True].copy()

            line_sub_portion_df["portion_index"] = line_sub_portion_df["station_line_portion_index"]\
            .apply(lambda x: x[sub_portion_id_str])
            line_sub_portion_df.sort_values(by="portion_index", ascending=True, inplace=True)
            line_sub_portion_stations = list(line_sub_portion_df["station_name"])
            line_sub_portion_transitions = get_transitions_between_list_values(line_sub_portion_stations)
            line_sub_portion_transitions_df = pd.DataFrame(line_sub_portion_transitions)
            line_sub_portion_transitions_df["line"] = line
            line_sub_portion_transitions_df["portion_id"] = sub_portion_id_str
            df_stations_dynamic = pd.concat([df_stations_dynamic, line_sub_portion_transitions_df], ignore_index=True)

df_stations_dynamic = df_stations_dynamic[stations_dynamic_schema]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
subway_stations_dynamic = dataiku.Dataset("subway_stations_dynamic")
subway_stations_dynamic.write_with_schema(df_stations_dynamic)