/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.analysis.ml.clustering.flow;

import com.dataiku.dip.analysis.ml.AbstractTrainingRecipeRunner;
import com.dataiku.dip.analysis.ml.FullModelId;
import com.dataiku.dip.analysis.ml.MLDiagnostics;
import com.dataiku.dip.analysis.ml.MLPaths;
import com.dataiku.dip.analysis.ml.clustering.ClusteringNamer;
import com.dataiku.dip.analysis.ml.clustering.extract.ForcedSampleExtractor;
import com.dataiku.dip.analysis.ml.clustering.flow.ClusteringTrainingRecipePayloadParams;
import com.dataiku.dip.analysis.ml.prediction.split.SplitDesc;
import com.dataiku.dip.analysis.model.MLTask;
import com.dataiku.dip.analysis.model.ModelTrainInfo;
import com.dataiku.dip.analysis.model.clustering.ResolvedClusteringCoreParams;
import com.dataiku.dip.analysis.model.core.ModelUserMeta;
import com.dataiku.dip.analysis.model.core.PreTrainModelingParams;
import com.dataiku.dip.analysis.model.core.ResolvedCoreParams;
import com.dataiku.dip.analysis.model.core.ResolvedPreprocessingParams;
import com.dataiku.dip.analysis.model.core.TrainExecutionParams;
import com.dataiku.dip.cluster.SparkSettings;
import com.dataiku.dip.code.CodeEnvModel;
import com.dataiku.dip.containers.exec.ContainerExecConfigSelector;
import com.dataiku.dip.containers.exec.ContainerExecRuntimeConfig;
import com.dataiku.dip.containers.exec.ContainerExecSelection;
import com.dataiku.dip.containers.exec.KubernetesExecUtils;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.InfoMessage;
import com.dataiku.dip.coremodel.SimpleKeyValue;
import com.dataiku.dip.dataflow.JobActivity;
import com.dataiku.dip.dataflow.exec.AbstractPythonRecipeRunner;
import com.dataiku.dip.dataflow.exec.AbstractSparkBasedRecipeRunner;
import com.dataiku.dip.dataflow.exec.ContainerRecipeParams;
import com.dataiku.dip.dataflow.exec.SparkExecutionEnginesHelper;
import com.dataiku.dip.dataflow.graph.FlowDataset;
import com.dataiku.dip.dataflow.graph.FlowSavedModel;
import com.dataiku.dip.dataflow.jobrunner.JobContext;
import com.dataiku.dip.dataflow.utils.FlowJobUtils;
import com.dataiku.dip.export.ZipUnzipDir;
import com.dataiku.dip.recipes.InitializableAbortableRecipeRunner;
import com.dataiku.dip.recipes.code.spark.SparkRecipeUtils;
import com.dataiku.dip.recipes.consistency.RecipeCodes;
import com.dataiku.dip.remoterun.RemoteRunsRegistry;
import com.dataiku.dip.security.impersonation.FilesystemACLUtils;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.server.datasets.DatasetAccessService;
import com.dataiku.dip.shaker.model.SerializedShakerScript;
import com.dataiku.dip.shaker.resources.ResourcesGatherer;
import com.dataiku.dip.spark.SparkJob;
import com.dataiku.dip.spark.SparkJobHelper;
import com.dataiku.dip.spark.SparkOverrideConfig;
import com.dataiku.dip.util.AutoDelete;
import com.dataiku.dip.utils.CollectionUtils;
import com.dataiku.dip.utils.DKUFileUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.NotImplementedException;
import com.google.common.collect.Lists;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;

public class ClusteringTrainingRecipeRunner
extends AbstractTrainingRecipeRunner {
    @Autowired
    private DatasetAccessService datasetAccessService;
    private final ResourcesGatherer gatherer = new ResourcesGatherer();
    private ClusteringTrainingRecipePayloadParams desc;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.recipes.clustering");

    public ClusteringTrainingRecipeRunner(JobActivity activity) {
        super(activity);
    }

    private Dataset getInputDataset(String role) throws IOException {
        return this.datasetAccessService.getMandatory(this.recipe.getModel().getSingleInput(role).getLoc(this.recipe.getProjectKey()));
    }

    @Override
    public void setPayload(String payload) {
        this.desc = (ClusteringTrainingRecipePayloadParams)JSON.parse((String)payload, ClusteringTrainingRecipePayloadParams.class);
    }

    @Override
    public void init() {
        SpringUtils.getInstance().autowire((Object)this.gatherer);
    }

    @Override
    public void run() throws Exception {
        File previousVersionFolder;
        Dataset inputDataset;
        Object inputFDS;
        FlowSavedModel fsm = (FlowSavedModel)this.activity.getSubgraph().getTargets().get(0);
        this.sm = fsm.getSavedModel();
        this.newVersionId = "" + System.currentTimeMillis();
        FullModelId newFMI = new FullModelId(this.sm.projectKey, this.sm.id, this.newVersionId);
        File outModelFolder = MLPaths.savedModelVersionFolder(this.sm, this.newVersionId);
        MLPaths.createIfNeededSavedModelFolderAndRestrictPermissions(this.sm);
        DKUFileUtils.mkdirs((File)outModelFolder);
        ContainerExecSelection containerSelection = this.recipe.getModel().getParamsAs(ContainerRecipeParams.class).getContainerSelection();
        ContainerExecRuntimeConfig clusterContainerConfig = new ContainerExecConfigSelector().selectForML_autoTXN(this.authCtxService.getAuthCtx(), this.recipe.getProjectKey(), containerSelection, this.desc.backendType);
        File splitFolder = new File(outModelFolder, "split");
        DKUFileUtils.mkdirs((File)splitFolder);
        if (this.desc.script == null) {
            this.desc.script = new SerializedShakerScript();
        }
        if (this.desc.expectedPreparationOutputSchema == null) {
            logger.info((Object)"1.4-migrated recipe, inferring preparation output schema");
            assert (this.desc.script.steps.size() == 0);
            inputFDS = this.activity.getSubgraph().getSourceDatasets().get(0);
            inputDataset = ((FlowDataset)inputFDS).getMandatory(this.datasetsDAO);
            this.desc.expectedPreparationOutputSchema = inputDataset.getSchema();
        }
        if ((inputFDS = this.activity.getSubgraph().getSourceDatasets()).size() == 0) {
            throw ErrorContext.iae((String)"Missing input dataset in training recipe");
        }
        inputDataset = this.getInputDataset("main");
        ForcedSampleExtractor fsg = new ForcedSampleExtractor(this.authCtxService.getAuthCtx(), inputDataset, this.desc.sampling, this.desc.script, this.desc.expectedPreparationOutputSchema, splitFolder);
        SplitDesc splitDesc = fsg.compute();
        this.prepareModelFolder(outModelFolder, containerSelection, clusterContainerConfig, splitFolder, splitDesc);
        InitializableAbortableRecipeRunner runner = this.createRunner(containerSelection, clusterContainerConfig, newFMI, outModelFolder);
        this.startRunner(runner);
        String defaultInputDataset = ((FlowDataset)this.recipe.getPredecessors().get(0)).getFullName();
        ModelTrainInfo mti = (ModelTrainInfo)JSON.parseFile((File)new File(outModelFolder, "train_info.json"), ModelTrainInfo.class);
        ModelUserMeta mum = this.createUserMeta(splitDesc.params, mti, this.desc.modelVersionNamePrefix, this.desc.modeling.generateName(), this.desc.modeling.algorithm.name(), defaultInputDataset);
        ClusteringNamer.fill(mum, this.desc.preprocessing, this.desc.modeling);
        if (this.sm.activeVersion != null && (previousVersionFolder = MLPaths.savedModelVersionFolder(this.sm, this.sm.activeVersion)) != null && previousVersionFolder.exists()) {
            try {
                ModelUserMeta oldMUM = (ModelUserMeta)JSON.parseFile((File)new File(previousVersionFolder, "user_meta.json"), ModelUserMeta.class);
                mum.clusterMetas = oldMUM.clusterMetas;
            }
            catch (Exception e) {
                logger.warn((Object)"Failed to copy user meta", (Throwable)e);
            }
        }
        MLDiagnostics.mergeIntoWarnings(new FullModelId(this.recipe.getProjectKey(), this.sm.id, this.newVersionId), this.activity.warnContext);
        JSON.prettyToFile((Object)mum, (File)new File(outModelFolder, "user_meta.json"));
        this.saveSavedModelOriginInfo(outModelFolder, this.desc.generatingModelId);
    }

    private InitializableAbortableRecipeRunner createRunner(ContainerExecSelection containerSelection, final ContainerExecRuntimeConfig clusterContainerConfig, final FullModelId fmi, final File outModelFolder) throws Exception {
        switch (this.desc.backendType) {
            case PY_MEMORY: {
                final File additionalLogsDir = FlowJobUtils.getJobMadeDir("clustering-recipe", "additional-logs");
                final File mainLogFile = FlowJobUtils.getJobTouchedFile("clustering-recipe", "python.log");
                JobContext.getCurrentActivitySummary().engineType = "DSS";
                return new AbstractPythonRecipeRunner(this.activity){

                    /*
                     * Enabled aggressive block sorting
                     * Enabled unnecessary exception pruning
                     * Enabled aggressive exception aggregation
                     */
                    @Override
                    public void run() throws Exception {
                        FilesystemACLUtils.grantFSReadACLs(this.authCtxService.getAuthCtx(), this.projectKey, fmi.getFolderEnsuringSecurity());
                        FilesystemACLUtils.grantFSFullACLs(this.authCtxService.getAuthCtx(), this.projectKey, outModelFolder);
                        JSON.prettyToFile((Object)ClusteringTrainingRecipeRunner.this.desc.script, (File)new File(outModelFolder, "script.json"));
                        String envName = ClusteringTrainingRecipeRunner.this.desc.envName;
                        CodeEnvModel.UsedCodeEnvRef codeEnvRef = new CodeEnvModel.UsedCodeEnvRef(CodeEnvModel.EnvLang.PYTHON, envName);
                        logger.info((Object)("Run training in code env " + StringUtils.defaultIfBlank((String)envName, (String)"built-in") + " (set at deploy-time)"));
                        try (AutoDelete outputTmpDir = FlowJobUtils.getTmpFolder("clustering-train-recipe", "pyrun");){
                            if (clusterContainerConfig == null) {
                                this.executeModule(envName, (File)outputTmpDir, "dataiku.doctor.clustering.reg_train_recipe", outModelFolder.getAbsolutePath());
                                return;
                            }
                            List<String> readableAndWritablePaths = Arrays.asList(outModelFolder.getAbsolutePath(), outputTmpDir.getAbsolutePath());
                            switch (clusterContainerConfig.type) {
                                case DOCKER: {
                                    this.executeDockerCodeRecipe(codeEnvRef, clusterContainerConfig, outModelFolder, mainLogFile, outputTmpDir, RemoteRunsRegistry.ExecutionType.RECIPE_CLUSTERING_TRAIN_PYTHON, JSON.json((Object)ClusteringTrainingRecipeRunner.this.desc), Collections.emptyMap(), readableAndWritablePaths, readableAndWritablePaths);
                                    return;
                                }
                                case KUBERNETES: {
                                    this.executeKubernetesCodeRecipe(codeEnvRef, clusterContainerConfig, outModelFolder, mainLogFile, additionalLogsDir, outputTmpDir, RemoteRunsRegistry.ExecutionType.RECIPE_CLUSTERING_TRAIN_PYTHON, JSON.json((Object)ClusteringTrainingRecipeRunner.this.desc), Collections.emptyMap(), readableAndWritablePaths, readableAndWritablePaths, new KubernetesExecUtils.KubernetesFailureCodeProvider(){

                                        @Override
                                        public InfoMessage.MessageCode codeForOOMKilled() {
                                            return RecipeCodes.ERR_RECIPE_ML_TRAINING_K8S_OOM;
                                        }
                                    });
                                    return;
                                }
                            }
                            return;
                        }
                    }

                    @Override
                    public void init() throws Exception {
                    }
                };
            }
            case H2O: 
            case MLLIB: {
                JobContext.getCurrentActivitySummary().engineType = "SPARK";
                if (containerSelection.containerMode == ContainerExecSelection.ContainerExecMode.EXPLICIT_CONTAINER) {
                    logger.warn((Object)("Ignoring container configuration " + containerSelection.containerConf + ", not compatible with Spark ML engine"));
                }
                final String hiveDb = SparkRecipeUtils.getHiveMetastoreDatabase(this.activity, this.datasetsDAO);
                return new AbstractSparkBasedRecipeRunner(this.activity){

                    @Override
                    public void run() throws Exception {
                        SerializedShakerScript expandedScript = ClusteringTrainingRecipeRunner.this.desc.script.expandedDeepCopy(this.variablesService.getForProject(this.projectKey));
                        ClusteringTrainingRecipeRunner.this.gatherer.gatherAndCompute(this.authCtxService.getAuthCtx(), this.projectKey, expandedScript.steps);
                        JSON.prettyToFile((Object)expandedScript, (File)new File(outModelFolder, "script.json"));
                        JSON.prettyToFile(ClusteringTrainingRecipeRunner.this.gatherer.getResourceMapping(), (File)new File(outModelFolder, "resource_mapping.json"));
                        this.runSpark("clustering", ClusteringTrainingRecipeRunner.this.desc.sparkParams.sparkExecutionEngine, new SparkExecutionEnginesHelper.SparkRecipeJobBuilder(){

                            @Override
                            public <T extends SparkJob> T buildSparkJob(SparkJobHelper<T> helper, File runDir, SparkSettings sparkSettings, List<SimpleKeyValue> effectiveConf) throws Exception {
                                return helper.makeClassJobWithNonSecretGlobalFiles("DSS (train): " + activity.id(), effectiveConf, ClusteringTrainingRecipeRunner.this.gatherer.getResourceFiles(), ClusteringTrainingRecipeRunner.this.desc.backendType == MLTask.BackendType.H2O, "com.dataiku.dip.spark.MLLibClusteringTrainingJob", recipe.getProjectKey(), outModelFolder.getAbsolutePath());
                            }

                            @Override
                            public SparkOverrideConfig getRecipeOverrideConf() {
                                return ClusteringTrainingRecipeRunner.this.desc.sparkParams.sparkConf;
                            }

                            @Override
                            public Map<String, String> getContextOverrideConf() {
                                return CollectionUtils.appendableSSMap().put("spark.dku.ml.preparedDF.storageLevel", ClusteringTrainingRecipeRunner.this.desc.sparkParams.sparkPreparedDFStorageLevel).put("spark.dku.ml.repartitionNonHDFS", String.valueOf(ClusteringTrainingRecipeRunner.this.desc.sparkParams.sparkRepartitionNonHDFS)).put("spark.dku.ml.useGlobalMetastore", Boolean.toString(ClusteringTrainingRecipeRunner.this.desc.sparkParams.sparkUseGlobalMetastore)).put("spark.dku.ml.hiveDb", StringUtils.defaultIfBlank((String)hiveDb, (String)"")).get();
                            }

                            @Override
                            public List<File> getExtraRecursiveFolders() {
                                return Lists.newArrayList((Object[])new File[]{outModelFolder});
                            }

                            @Override
                            public List<String> getWritablePaths() {
                                return Lists.newArrayList((Object[])new String[]{outModelFolder.getAbsolutePath()});
                            }
                        }, new SparkJobHelper.SparkJobPostProcessor(){

                            @Override
                            public void postProcess(SparkJobHelper.SparkJobContext context) throws Exception {
                                if (context.driverRunsRemotely()) {
                                    ZipUnzipDir.extractFolder(new File(outModelFolder, "trainedModel"), outModelFolder);
                                }
                            }
                        }, null);
                    }

                    @Override
                    public void init() throws Exception {
                    }
                };
            }
        }
        throw new NotImplementedException("Unsupported backend type: " + String.valueOf((Object)this.desc.backendType));
    }

    @Override
    protected ResolvedPreprocessingParams getPreprocessing() {
        return this.desc.preprocessing;
    }

    @Override
    protected PreTrainModelingParams getModeling() {
        return this.desc.modeling;
    }

    @Override
    protected ResolvedCoreParams resolveCoreParams(ContainerExecSelection containerSelection) {
        ResolvedClusteringCoreParams coreParams = new ResolvedClusteringCoreParams();
        TrainExecutionParams executionParams = new TrainExecutionParams();
        executionParams.envSelection = this.desc.envSelection;
        executionParams.envName = this.desc.envName;
        executionParams.containerSelection = containerSelection;
        coreParams.executionParams = executionParams;
        coreParams.backendType = this.desc.backendType;
        coreParams.diagnosticsSettings = this.desc.diagnosticsSettings;
        return coreParams;
    }
}

