# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from commons.dku_utils.datasets.dataset_commons import get_dataset_schema
from commons.dku_utils.core import get_current_project_and_variables

project, variables = get_current_project_and_variables()

# Read recipe inputs
applications_binned = dataiku.Dataset("applications_binned")
applications_binned_df = applications_binned.get_dataframe()
top_ivs = dataiku.Dataset("top_ivs")
top_ivs_df = top_ivs.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
kept_columns = list(top_ivs_df['variable'])

applications_filtered_df = applications_binned_df[['id', 'credit_event'] + kept_columns]

input_schema = get_dataset_schema(project, 'applications_binned')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
output_schema = [column for column in input_schema if column['name'] in ['id', 'credit_event'] + kept_columns]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
applications_filtered = dataiku.Dataset("applications_filtered")
applications_filtered.write_schema(output_schema)
applications_filtered.write_dataframe(applications_filtered_df,
                                     infer_schema=False,
                                     dropAndCreate=True)