# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from dku_utils import read_pickle_from_dss_folder, get_managed_folder_id_with_folder_name

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from real_estate_pricing.geographic_handling.formating.points import read_geo_point
from real_estate_pricing.flow.constants import (N_NEIGHBORS_POLYGONS_TO_SEARCH,
                                                N_SUCCESSIVE_EXCLUSIONS_STOPPING,
                                                SEARCH_INCLUSION_IN_MULTIPLE_POLYGONS)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from dku_utils import update_one_schema_column
from dates_handling import from_datetime_to_dss_string_date

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
real_estate_sales = dataiku.Dataset("real_estate_sales_prepared")
real_estate_sales_df = real_estate_sales.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
properties_sale_years = list(real_estate_sales_df["transaction_year"])
reverse_coordinates = True

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
census_data_indexed_folder_id = get_managed_folder_id_with_folder_name(project_key, "census_data_indexing")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_years = read_pickle_from_dss_folder("census_years.p", census_data_indexed_folder_id)
years_census_polygons_indexer = {}
for year in census_years:
    year_census_polygons_indexer = read_pickle_from_dss_folder("census_data_indexer_year_{}.p".format(year),
                                                               census_data_indexed_folder_id)
    years_census_polygons_indexer[year] = year_census_polygons_indexer

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
PROPERTIES_CENSUS_JOIN_KEY = ["transaction_year", "geo_point_index"]
properties_with_census_information_schema = list(real_estate_sales_df.columns)+["included_in_polygon_index"]
yearly_properties_dataframes = []

for year in census_years:
    print("Handling population data from year '{}'.".format(year))
    filtered_real_estate_sales_df = real_estate_sales_df[real_estate_sales_df["transaction_year"]==year].copy()
    filtered_real_estate_sales_df["geo_point_index"] = range(len(filtered_real_estate_sales_df))
    properties_geo_points = [read_geo_point(geo_point, reverse_coordinates)
                             for geo_point in filtered_real_estate_sales_df["property_geo_point"]]

    year_census_polygons_indexer = years_census_polygons_indexer[year]
    properties_belonging_polygons_df =\
    year_census_polygons_indexer.search_geo_points_belonging_polygons(properties_geo_points,
                                                                      N_NEIGHBORS_POLYGONS_TO_SEARCH,
                                                                      N_SUCCESSIVE_EXCLUSIONS_STOPPING,
                                                                      SEARCH_INCLUSION_IN_MULTIPLE_POLYGONS
                                                                     )
    properties_belonging_polygons_df["transaction_year"] = year
    for column in PROPERTIES_CENSUS_JOIN_KEY:
        properties_belonging_polygons_df[column] = properties_belonging_polygons_df[column].astype(int)
        filtered_real_estate_sales_df[column] = filtered_real_estate_sales_df[column].astype(int)
    filtered_real_estate_sales_df =\
    filtered_real_estate_sales_df.merge(properties_belonging_polygons_df,
                                          how="left",
                                          on=PROPERTIES_CENSUS_JOIN_KEY)
    yearly_properties_dataframes.append(filtered_real_estate_sales_df)
properties_with_census_information_df = pd.concat(yearly_properties_dataframes, ignore_index=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
properties_with_census_information_df =\
properties_with_census_information_df[properties_with_census_information_schema]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
properties_with_census_information_df["included_in_polygon_index"] =\
properties_with_census_information_df["included_in_polygon_index"].astype(str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
properties_with_census_information_df["transaction_date"] =\
properties_with_census_information_df["transaction_date"].apply(lambda x: from_datetime_to_dss_string_date(x))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
properties_belonging_census_polygons = dataiku.Dataset("properties_belonging_census_polygons")
properties_belonging_census_polygons.write_with_schema(properties_with_census_information_df)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
update_one_schema_column(project_key,
                         "properties_belonging_census_polygons",
                         "transaction_date",
                         "date")