# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from commons.dku_utils.core import get_current_project_and_variables
from commons.dku_utils.datasets.dataset_commons import get_dataset_schema

project, variables = get_current_project_and_variables()

# Read recipe inputs
applications_filtered = dataiku.Dataset("applications_filtered")
applications_filtered_df = applications_filtered.get_dataframe()
selected_features = dataiku.Dataset("selected_features")
selected_features_df = selected_features.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
kept_columns = list(selected_features_df['feature'])

applications_final_df = applications_filtered_df[['id', 'credit_event'] + kept_columns]

input_schema = get_dataset_schema(project, 'applications_filtered')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
output_schema = [column for column in input_schema if column['name'] in ['id', 'credit_event'] + kept_columns]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
applications_final = dataiku.Dataset("applications_final")
applications_final.write_schema(output_schema)
applications_final.write_dataframe(applications_final_df,
                                     infer_schema=False,
                                     dropAndCreate=True)