# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd
import numpy as np
from flask import request
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from dss_project_utils import enrich_with_dictionary
from dku_utils import read_pickle_from_dss_folder, get_managed_folder_id_with_folder_name
from real_estate_pricing.geographic_handling.formating.points import convert_from_list_geo_point_to_geojson
from real_estate_pricing.geographic_handling.feature_engineering.information_extraction import get_geodesic_distance
from real_estate_pricing.services.utils import load_features_from_parameters
from real_estate_pricing.services.census_lookup import lookup_census_features
from real_estate_pricing.services.stations_lookup import lookup_stations_features
from real_estate_pricing.services.time_features_lookup import (lookup_district_time_features,
                                                               lookup_polygon_time_features,
                                                               lookup_subway_station_time_features)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from real_estate_pricing.flow.constants import CITY_CENTER, WINDOW_SIZES

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
property_pricing_model = dataiku.Model("GxEFXycR")
property_pricing_predictor = property_pricing_model.get_predictor()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
## Census polygons :
census_folder_id = get_managed_folder_id_with_folder_name(project_key, "census_data_indexing")
census_years = read_pickle_from_dss_folder("census_years.p", census_folder_id)
last_census_year = np.max(census_years)
census_polygons_indexer = read_pickle_from_dss_folder("census_data_indexer_year_{}.p".format(last_census_year), census_folder_id)
census_metadata = read_pickle_from_dss_folder("dict_census_metadata.p", census_folder_id)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
## Subway stations :
subway_stations_folder_id = get_managed_folder_id_with_folder_name(project_key, "subway_stations_indexing")
subway_stations_indexer = read_pickle_from_dss_folder("subway_stations_indexer.p", subway_stations_folder_id)
stations_metadata = read_pickle_from_dss_folder("dict_subway_stations_metadata.p", subway_stations_folder_id)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
## Time features :
time_features_folder_id = get_managed_folder_id_with_folder_name(project_key, "time_features_indexing")
census_district_features = read_pickle_from_dss_folder("dict_census_district_last_features.p", time_features_folder_id)
census_polygon_features = read_pickle_from_dss_folder("dict_census_polygon_last_features.p", time_features_folder_id)
subway_stations_features = read_pickle_from_dss_folder("dict_closest_station_last_features.p", time_features_folder_id)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
portfolio_geocoded = dataiku.Dataset("portfolio_geocoded")
portfolio_geocoded_df = portfolio_geocoded.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
portfolio_geocoded_df.columns

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def prepare_property_features_for_model(property_features, property_geo_point):
    property_features["average_rooms_surface"] = property_features["living_surface"] / property_features["number_of_rooms"]
    # Distance from city center :
    property_features["distance_from_city_center"] = get_geodesic_distance(property_geo_point, CITY_CENTER, False)

    # Census features :
    print("Retrieving census features ...")
    census_features = lookup_census_features(property_geo_point, census_metadata, census_polygons_indexer)
    census_district = census_features["census_district"]
    census_polygon_name = census_features["census_polygon_name"]
    property_features = enrich_with_dictionary(property_features, census_features, True)

    # Station features :
    print("Retrieving station features ...")
    station_features = lookup_stations_features(property_geo_point, stations_metadata, subway_stations_indexer)
    closest_station_name = station_features["closest_station_name"]
    property_features = enrich_with_dictionary(property_features, station_features, True)

    # Time features :
    print("Retrieving time features ...")
    district_time_features = lookup_district_time_features(census_district, census_district_features)
    polygon_time_features = lookup_polygon_time_features(census_polygon_name, census_polygon_features)
    station_time_features = lookup_subway_station_time_features(closest_station_name, subway_stations_features)

    for time_features in [district_time_features, polygon_time_features, station_time_features]:
        property_features = enrich_with_dictionary(property_features, time_features, True)

    for window_size in WINDOW_SIZES:
        polygon_xx_days_square_meter_price = property_features["census_polygon_name_{}_days_living_surface_square_meter_price_avg".format(window_size)]
        property_features["polygon_{}_days_price_estimate".format(window_size)] = property_features["living_surface"] * polygon_xx_days_square_meter_price

        district_xx_days_square_meter_price = property_features["census_district_{}_days_living_surface_square_meter_price_avg".format(window_size)]
        property_features["district_{}_days_price_estimate".format(window_size)] = property_features["living_surface"] * district_xx_days_square_meter_price

        closest_station_xx_days_square_meter_price = property_features["closest_station_{}_days_living_surface_square_meter_price_avg".format(window_size)]
        property_features["closest_station_{}_days_price_estimate".format(window_size)] = property_features["living_surface"] * closest_station_xx_days_square_meter_price

    return property_features

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import logging

logger=logging.getLogger()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
PORTFOLIO_FEATURES = ["property_type", "number_of_rooms", "living_surface", "number_of_lots"]
portfolio_price_predictions = []
portfolio_price_predictions_statuses = []
for row in portfolio_geocoded_df.iterrows():
    try:
        property_data = row[1]
        property_features = {feature: property_data[feature] for feature in PORTFOLIO_FEATURES}
        number_of_lots = property_features["number_of_rooms"]
        if number_of_lots > 0:
            has_lots = True
        else:
            has_lots = False
        property_features["has_lots"] = True
        property_features = load_features_from_parameters(property_features)
        property_geo_point = [property_data["latitude"], property_data["longitude"]]

        property_features = prepare_property_features_for_model(property_features, property_geo_point)
        property_price_prediction = property_pricing_predictor.predict(pd.DataFrame(property_features, index=[0]))["prediction"][0]
        
        prediction_status = "Success" 
    
    except Exception as e:
        property_price_prediction = None
        
        prediction_status = "Exception met: '{}'".format(repr(e))
    
    portfolio_price_predictions.append(property_price_prediction)
    portfolio_price_predictions_statuses.append(prediction_status)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
portfolio_geocoded_df["price_prediction"] = portfolio_price_predictions
portfolio_geocoded_df["prediction_status"] = portfolio_price_predictions_statuses

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
portfolio_pricing = dataiku.Dataset("portfolio_pricing")
portfolio_pricing.write_with_schema(portfolio_geocoded_df)