# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
input1 = dataiku.Dataset("process-data-joined-new_prepared_24hfiltered")
df = input1.get_dataframe()

# Get factor variables:
factor_1 = dataiku.get_custom_variables()["factor_1"]
factor_2 = dataiku.get_custom_variables()["factor_2"]
factor_3 = dataiku.get_custom_variables()["factor_3"]
defect_col = dataiku.get_custom_variables()["defect_col"]

# filter dataframe:
column_names_to_keep = ['timestamp','timestamp_hours',defect_col,factor_1,factor_2,factor_3]
df = df[column_names_to_keep]
df = df.dropna()

# Compute average per hour:
df = df.groupby(by="timestamp_hours").mean().reset_index()

# Write recipe outputs
new_prepared_24h_filtered_factors = dataiku.Dataset("new_prepared_24h_filtered_factors")
new_prepared_24h_filtered_factors.write_with_schema(df)