library(h2o)
library(dataiku)

# Recipe inputs
df <- dkuReadDataset("train")

#-----------------------------------------------------------------
# Settings
#-----------------------------------------------------------------
target.variable <- 'Churn'

features.num <- c(
    'Account_Length', 'VMail_Message', 'Day_Mins', 'Day_Calls', 'Day_Charge', 'Eve_Mins', 
    'Eve_Calls', 'Eve_Charge', 'Night_Mins', 'Night_Calls', 'Night_Charge', 'Intl_Mins', 
    'Intl_Calls', 'Intl_Charge', 'CustServ_Calls'        
)

features.cat <- c(
    'State', 'Area_Code', 'Intl_Plan', 'VMail_Plan'
)

#-----------------------------------------------------------------
# Preprocessing
#-----------------------------------------------------------------
df[features.cat]    <- lapply(df[features.cat], as.factor)
df[features.num]    <- lapply(df[features.num], as.double)
df[target.variable] <- lapply(df[target.variable], as.factor)
train.ml <- df[c(features.cat, features.num, target.variable)]

#-------------------------------------------------------------------------
# TRAINING
#-------------------------------------------------------------------------
localH2O <- h2o.init(nthreads = -1)

as.h2o(train.ml, destination_frame = 'h2otrain')

h2o.model <- h2o.gbm(
    x = c(features.cat, features.num), 
    y = target.variable, 
    "h2otrain", 
    distribution = "bernoulli",
    tweedie_power = 1.5, 
    ntrees = 50, 
    max_depth = 5, 
    min_rows = 10,
    learn_rate = 0.1, 
    sample_rate = 1, 
    col_sample_rate = 1, 
    nbins = 20,
    nbins_cats = 1024
)

print(h2o.model)


# Recipe outputs
model_h2o <- dkuManagedFolderPath("c1YRtnGl")
setwd(model_h2o)
system("rm -rf *")
h2o.saveModel(h2o.model, path = model_h2o, force = TRUE)
h2o.shutdown(prompt = FALSE)

