# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from project_root.dku_utils.core import get_current_project_and_variables

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from project_root.product_recommendations.config.flow.constants import (WEEK_COMPONENTS_SCHEMA,
                                                                        APP_DATE_COMPONENTS_TO_REMOVE)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from project_root.product_recommendations.dates_handling import (extract_datetime_components,
                                                                 from_datetime_to_dss_string_date,
                                                                 simplify_datetime_date)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
global_variables = variables["standard"]
batch_start_date = global_variables["batch_start_date_app"]
batch_end_date = global_variables["batch_end_date_app"]
n_weeks_in_machine_learning_set = global_variables["n_weeks_in_machine_learning_set_app"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
batch_dates = list(pd.date_range(start=batch_start_date, end=batch_end_date))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
batch_dates = [date.to_pydatetime() for date in batch_dates]
batch_dates_components = [extract_datetime_components(date, ["year", "week_of_year"])
                          for date in batch_dates]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
years = []
weeks_of_year = []
for date_components in batch_dates_components:
    year = date_components["year"]
    week_of_year = date_components["week_of_year"]
    years.append(year)
    weeks_of_year.append(week_of_year)
    pass

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
batch_dates = [from_datetime_to_dss_string_date(simplify_datetime_date(date, APP_DATE_COMPONENTS_TO_REMOVE))
               for date in batch_dates]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
batch_dates_df = pd.DataFrame(batch_dates, columns=["date"])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
batch_dates_df["year"] = years
batch_dates_df["week_of_year"] = weeks_of_year

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Computing weeks set:
year_week_df = batch_dates_df[["year", "week_of_year"]].copy().drop_duplicates()
year_week_df.sort_values(by=["year", "week_of_year"], ascending=False, inplace=True)
year_week_df["past_week_rank"] = list(range(1, len(year_week_df) + 1))
year_week_df["week_scope"] = "collaborative_filtering"
year_week_df["week_scope"][year_week_df["past_week_rank"] <= n_weeks_in_machine_learning_set] = "machine_learning"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
batch_dates_df = pd.merge(batch_dates_df, year_week_df, how="left", on=["year", "week_of_year"])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
OUTPUT_DATASET_NAME = "date_weeks"
dataset = project.get_dataset(OUTPUT_DATASET_NAME)
dataset_settings = dataset.get_settings()
dataset_settings.get_raw()["schema"]["columns"] = WEEK_COMPONENTS_SCHEMA
dataset_settings.save()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
date_weeks = dataiku.Dataset(OUTPUT_DATASET_NAME)
date_weeks.write_dataframe(batch_dates_df,
                           infer_schema=False,
                           dropAndCreate=True)