/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.DKUApp;
import com.dataiku.dip.analysis.ml.FullModelId;
import com.dataiku.dip.analysis.ml.prediction.split.SplitDesc;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SerializedDataset;
import com.dataiku.dip.dataflow.exec.filter.FilterDescUtils;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.PipelineInterruptedException;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.memimpl.MemColumn;
import com.dataiku.dip.datalayer.memimpl.MemRow;
import com.dataiku.dip.datalayer.memimpl.MemTable;
import com.dataiku.dip.datalayer.memimpl.MemTableAppendingOutput;
import com.dataiku.dip.datalayer.memimpl.MemTableHStack;
import com.dataiku.dip.datalayer.utils.FilterProcessorOutput;
import com.dataiku.dip.datalayer.utils.LimitProcessorOutput;
import com.dataiku.dip.datasets.DatasetHandler;
import com.dataiku.dip.datasets.DatasetRecordCount;
import com.dataiku.dip.datasets.DatasetSelectionToMemTable;
import com.dataiku.dip.datasets.SamplingParam;
import com.dataiku.dip.datasets.SingleThreadPusherToMemTable;
import com.dataiku.dip.datasets.sample.SampleDatasetMeta;
import com.dataiku.dip.datasets.sample.SampleDatasetParams;
import com.dataiku.dip.futures.DSSFuturePayloadUtils;
import com.dataiku.dip.futures.FuturePayload;
import com.dataiku.dip.futures.FutureResponse;
import com.dataiku.dip.futures.FutureService;
import com.dataiku.dip.futures.FutureThread;
import com.dataiku.dip.input.DatasetHandlerFactory;
import com.dataiku.dip.input.formats.ExtractionLimit;
import com.dataiku.dip.input.formats.csv.CSVFormatConfig;
import com.dataiku.dip.input.formats.csv.CSVFormatExtractor;
import com.dataiku.dip.input.utils.ExternalInputStreamInput;
import com.dataiku.dip.logging.MainLoggingConfigurator;
import com.dataiku.dip.mec.AbstractGenAIModelEvaluation;
import com.dataiku.dip.mec.AgentModelEvaluation;
import com.dataiku.dip.mec.FullModelEvaluationId;
import com.dataiku.dip.mec.LLMModelEvaluation;
import com.dataiku.dip.recipes.nlp.evaluation.AbstractGenAIEvaluationRecipePayloadParams;
import com.dataiku.dip.reports.IReflectedEventsService;
import com.dataiku.dip.resourceusage.ComputeResourceUsageContext;
import com.dataiku.dip.resourceusage.CurrentComputeResourceUsageContext;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.security.DSSAuthCtx;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.shaker.PreviewSampleHelper;
import com.dataiku.dip.shaker.sampleio.SampleReader;
import com.dataiku.dip.shaker.sampleio.SampleWriter;
import com.dataiku.dip.shaker.server.SampleMetadata;
import com.dataiku.dip.streaming.endpoints.StreamingEndpointSimpleConsumer;
import com.dataiku.dip.streaming.endpoints.StreamingEndpointSimplePusher;
import com.dataiku.dip.streaming.endpoints.StreamingEndpointSimplePusherFactory;
import com.dataiku.dip.streaming.endpoints.model.StreamingEndpoint;
import com.dataiku.dip.utils.AutoCloseableLock;
import com.dataiku.dip.utils.DKUFileUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ExceptionUtils;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.NamedLock;
import com.dataiku.dip.variables.VariablesContext;
import com.dataiku.dip.variables.VariablesService;
import com.dataiku.dip.warnings.WarningsContext;
import com.dataiku.dss.shadelib.org.apache.commons.codec.digest.DigestUtils;
import com.dataiku.dss.shadelib.org.apache.commons.io.FileUtils;
import com.dataiku.dss.shadelib.org.apache.commons.lang3.math.NumberUtils;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.JsonSerializationContext;
import com.google.gson.reflect.TypeToken;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Type;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.kafka.common.errors.InterruptException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class SampleBuilder {
    @Autowired
    private FutureService futureService;
    private static final boolean perUserSamples = DKUApp.getParams().getBoolParam("dku.security.perUserCaches.tableSamples.enabled", false);
    public static final int PREDICTED_SAMPLE_MAX_LINES = 50000;
    public static final int MODEL_EVALUATION_SAMPLE_MAX_LINES = 50000;
    private static final String SAMPLE_FILE_PREFIX = "sample-";
    private static final Pattern SAMPLE_ID_PATTERN = Pattern.compile("sample-([A-Za-z0-9-]+)\\..+");
    public static final String DSS1_SUFFIX = ".dss1";
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.shaker.sample");

    public static CountDatasetRecordsThread startCountDatasetRecords(AuthCtx owner, Dataset inputDataset, boolean isForPreview) {
        if (isForPreview) {
            return null;
        }
        CountDatasetRecordsThread thread = new CountDatasetRecordsThread(owner, inputDataset, Thread.currentThread().getName() + "-ctr");
        thread.start();
        return thread;
    }

    public static String getComputeSampleLockName(String name) {
        return "shaker.mem.computesample.dataset." + name;
    }

    public static void clearSamplesWithLock(Dataset dataset) throws IOException, InterruptedException {
        String lockName = SampleBuilder.getComputeSampleLockName(dataset.getFullName());
        logger.info((Object)"Waiting for sample computing");
        try (AutoCloseableLock lock = NamedLock.acquireInterruptibly((String)lockName);){
            logger.info((Object)("Removing samples for " + dataset.getFullName()));
            SampleBuilder.clearSamplesForAllUsers(dataset, true);
            SampleBuilder.clearSamplesForAllUsers(dataset, false);
        }
    }

    public static SampleMetadata getSampleMetadata(File sampleMetadataFile) {
        if (sampleMetadataFile.isFile()) {
            try {
                return (SampleMetadata)JSON.parseFile((File)sampleMetadataFile, SampleMetadata.class);
            }
            catch (Exception e) {
                logger.info((Object)("Could not parse sample metadata file:" + ExceptionUtils.getMessageWithCauses((Throwable)e)));
            }
        }
        return null;
    }

    public static WarningsContext.SerializedWarnings getSampleWarning(File sampleWarningFile) {
        if (sampleWarningFile.isFile()) {
            try {
                return (WarningsContext.SerializedWarnings)JSON.parseFile((File)sampleWarningFile, WarningsContext.SerializedWarnings.class);
            }
            catch (Exception e) {
                logger.info((Object)("Could not parse sample warnings file:" + ExceptionUtils.getMessageWithCauses((Throwable)e)));
            }
        }
        return null;
    }

    public static SampleMetadata buildSampleMetadata(boolean hasPartitions, int partitionCount, boolean hasFilter, long processedRowCount, boolean memoryLimitReached, long memoryLimitInMB, int tableRowCount, boolean maxRecordLimitOverReached, SamplingParam selection, boolean wasSamplingPushedDownToDB) {
        SampleMetadata sampleMetadata = new SampleMetadata();
        sampleMetadata.samplingMethod = selection.samplingMethod;
        sampleMetadata.maxRecords = selection.maxRecords;
        sampleMetadata.targetRatio = selection.targetRatio;
        sampleMetadata.hasFilter = hasFilter;
        sampleMetadata.partitionCount = partitionCount;
        sampleMetadata.memoryLimitInMB = memoryLimitInMB;
        sampleMetadata.sampleRecordCount = tableRowCount;
        if (hasPartitions || hasFilter || memoryLimitReached) {
            sampleMetadata.sampleIsWholeDataset = false;
            sampleMetadata.datasetRecordCount = -1L;
            sampleMetadata.memoryLimitReached = memoryLimitReached;
        } else if (selection.maxRecords <= 0L || selection.samplingMethod == SamplingParam.SamplingMethod.FULL) {
            sampleMetadata.sampleIsWholeDataset = true;
            sampleMetadata.datasetRecordCount = tableRowCount;
        } else if (selection.samplingMethod == SamplingParam.SamplingMethod.HEAD_SEQUENTIAL) {
            sampleMetadata.sampleIsWholeDataset = !maxRecordLimitOverReached;
            sampleMetadata.datasetRecordCount = !maxRecordLimitOverReached ? (long)tableRowCount : -1L;
        } else if (wasSamplingPushedDownToDB) {
            sampleMetadata.sampleIsWholeDataset = false;
            sampleMetadata.datasetRecordCount = -1L;
        } else {
            sampleMetadata.sampleIsWholeDataset = processedRowCount == (long)tableRowCount;
            sampleMetadata.datasetRecordCount = processedRowCount;
        }
        return sampleMetadata;
    }

    public static void completeCountDatasetRecords(CountDatasetRecordsThread countTotalRecordsThread, SampleMetadata sampleMetadata) throws InterruptedException {
        if (countTotalRecordsThread == null) {
            return;
        }
        if (sampleMetadata.datasetRecordCount == -1L) {
            DatasetRecordCount recordCount;
            if (countTotalRecordsThread.isAlive()) {
                logger.info((Object)"countTotalRecordsThread not finished yet, give it one more second to complete.");
                countTotalRecordsThread.join(1000L);
            }
            if ((recordCount = countTotalRecordsThread.recordCount) != null) {
                sampleMetadata.datasetRecordCount = recordCount.count;
                sampleMetadata.recordCountIsApproximate = recordCount.isApproximate;
                if (!sampleMetadata.recordCountIsApproximate && sampleMetadata.datasetRecordCount == sampleMetadata.sampleRecordCount) {
                    sampleMetadata.sampleIsWholeDataset = true;
                }
            }
        }
        if (countTotalRecordsThread.isAlive()) {
            countTotalRecordsThread.interrupt();
        }
    }

    private static File datasetsSamplesDir() {
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "shaker-samples"});
    }

    private static File datasetSamplesDirForAllUsers(String datasetName, boolean isForPreview) {
        String directoryName = datasetName + (isForPreview ? ".preview" : "");
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "shaker-samples", directoryName});
    }

    private static File datasetSamplesDirForUser(AuthCtx user, String datasetName, boolean isForPreview) {
        File base = SampleBuilder.datasetSamplesDirForAllUsers(datasetName, isForPreview);
        return perUserSamples ? ApplicationConfigurator.getFile((File)base, (String[])new String[]{SampleBuilder.cleanIdentifier(user)}) : base;
    }

    private static File streamingEndpointSamplesDirForAllUsers(String name) {
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "streaming-endpoint-samples", name});
    }

    private static File streamingEndpointSamplesDirForUser(AuthCtx authCtx, String name) {
        File base = SampleBuilder.streamingEndpointSamplesDirForAllUsers(name);
        return perUserSamples ? ApplicationConfigurator.getFile((File)base, (String[])new String[]{SampleBuilder.cleanIdentifier(authCtx)}) : base;
    }

    private static File predictedDataSamplesDir(String name) {
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "analysis-predicted-data-samples", name});
    }

    private static File modelEvaluationSamplesDir(String name) {
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "model-evaluation-samples", name});
    }

    private static File modelComparisonSamplesDir(String projectKey, String modelComparisonId) {
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "model-comparison-samples", projectKey, modelComparisonId});
    }

    private static File rootModelComparisonSamplesDir() {
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "model-comparison-samples"});
    }

    private static File modelComparisonSamplesDir(String projectKey) {
        return ApplicationConfigurator.getFile((String[])new String[]{"caches", "model-comparison-samples", projectKey});
    }

    public static SampleMeta getSampleMeta(AuthCtx authCtx, Dataset dataset, String id, boolean isForPreview) {
        File f = SampleBuilder.sampleFile(authCtx, dataset, id, isForPreview);
        if (!f.exists()) {
            return null;
        }
        SampleMeta s = new SampleMeta();
        s.id = id;
        s.computeTime = f.lastModified();
        s.metadata = SampleBuilder.getSampleMetadata(SampleBuilder.metadataFile(authCtx, dataset, id, isForPreview));
        s.warnings = SampleBuilder.getSampleWarning(SampleBuilder.warningFile(authCtx, dataset, id, isForPreview));
        return s;
    }

    public static SampleMeta getSampleMeta(AuthCtx authCtx, StreamingEndpoint streamingEndpoint, String id) {
        File f = SampleBuilder.sampleFile(authCtx, streamingEndpoint, id);
        if (!f.exists()) {
            return null;
        }
        SampleMeta s = new SampleMeta();
        s.id = id;
        s.computeTime = f.lastModified();
        File warningsFile = SampleBuilder.warningFile(authCtx, streamingEndpoint, id);
        if (warningsFile.isFile()) {
            try {
                s.warnings = (WarningsContext.SerializedWarnings)JSON.parseFile((File)warningsFile, WarningsContext.SerializedWarnings.class);
            }
            catch (Exception e) {
                logger.info((Object)("Could not parse sample warnings file:" + ExceptionUtils.getMessageWithCauses((Throwable)e)));
            }
        }
        return s;
    }

    public static void dumpWarnings(AuthCtx authCtx, Dataset dataset, String id, WarningsContext.SerializedWarnings sw, boolean isForPreview) throws IOException {
        if (sw.totalCount > 0) {
            JSON.prettyToFile((Object)sw, (File)SampleBuilder.warningFile(authCtx, dataset, id, isForPreview));
        }
    }

    public static void dumpWarnings(AuthCtx authCtx, StreamingEndpoint streamingEndpoint, String id, WarningsContext.SerializedWarnings sw) throws IOException {
        if (sw.totalCount > 0) {
            JSON.prettyToFile((Object)sw, (File)SampleBuilder.warningFile(authCtx, streamingEndpoint, id));
        }
    }

    public static SampleMeta getSampleMetaFromSampleFile(File sampleFile, String sampleId) {
        if (!sampleFile.exists()) {
            return null;
        }
        SampleMeta s = new SampleMeta();
        s.id = sampleId;
        s.computeTime = sampleFile.lastModified();
        return s;
    }

    public static SampleMeta getPredictedDataSampleMeta(FullModelId fmi, String sampleId) {
        String filename = SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX);
        File f = DKUFileUtils.getWithin((File)SampleBuilder.predictedDataSamplesDir(fmi.toString()), (String[])new String[]{filename});
        return SampleBuilder.getSampleMetaFromSampleFile(f, sampleId);
    }

    public static SampleMeta getModelEvaluationSampleMeta(FullModelEvaluationId fme, String sampleId) {
        String filename = SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX);
        File f = DKUFileUtils.getWithin((File)SampleBuilder.modelEvaluationSamplesDir(fme.toString()), (String[])new String[]{filename});
        return SampleBuilder.getSampleMetaFromSampleFile(f, sampleId);
    }

    public static SampleMeta getModelComparisonsSampleMeta(String projectKey, String modelComparisonId, String sampleId) {
        String filename = SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX);
        File f = DKUFileUtils.getWithin((File)SampleBuilder.modelComparisonSamplesDir(projectKey, modelComparisonId), (String[])new String[]{filename});
        return SampleBuilder.getSampleMetaFromSampleFile(f, sampleId);
    }

    public static Collection<File> listModelComparisonSamples(String projectKey, boolean allProjects) {
        File dir = allProjects ? SampleBuilder.rootModelComparisonSamplesDir() : SampleBuilder.modelComparisonSamplesDir(projectKey);
        if (!dir.isDirectory() || !dir.exists()) {
            return Collections.emptyList();
        }
        return FileUtils.listFiles((File)dir, null, (boolean)true);
    }

    public static void clearSamplesForUser(AuthCtx user, Dataset dataset, boolean isForPreview) throws IOException {
        DKUFileUtils.deleteDirectory((File)SampleBuilder.datasetSamplesDirForUser(user, dataset.getFullName(), isForPreview));
    }

    public static void clearSamplesForUser(AuthCtx user, StreamingEndpoint streamingEndpoint) throws IOException {
        DKUFileUtils.deleteDirectory((File)SampleBuilder.streamingEndpointSamplesDirForUser(user, streamingEndpoint.getFullId()));
    }

    public static void clearSamplesForAllUsers(Dataset dataset, boolean isForPreview) throws IOException {
        DKUFileUtils.deleteDirectory((File)SampleBuilder.datasetSamplesDirForAllUsers(dataset.getFullName(), isForPreview));
    }

    public static void clearSamplesForProject(String projectKey) throws IOException {
        File[] files = SampleBuilder.datasetsSamplesDir().listFiles((dir, name) -> name.startsWith(projectKey + "."));
        if (files != null) {
            for (File datasetDir : files) {
                DKUFileUtils.deleteDirectory((File)datasetDir);
            }
        }
    }

    public static void clearPredictedDataSamples(FullModelId fmi) throws IOException {
        DKUFileUtils.deleteDirectory((File)SampleBuilder.predictedDataSamplesDir(fmi.toString()));
    }

    public static void clearModelEvaluationSamples(FullModelEvaluationId fme) throws IOException {
        DKUFileUtils.deleteDirectory((File)SampleBuilder.modelEvaluationSamplesDir(fme.toString()));
    }

    public static void clearModelComparisonSamples(String projectKey, String modelComparison) throws IOException {
        DKUFileUtils.deleteDirectory((File)SampleBuilder.modelComparisonSamplesDir(projectKey, modelComparison));
    }

    public static String sampleId(AuthCtx authCtx, Dataset dataset, boolean isForPreview) {
        File folderDatasetSample = SampleBuilder.datasetSamplesDirForUser(authCtx, dataset.getFullName(), isForPreview);
        if (!folderDatasetSample.isDirectory()) {
            return null;
        }
        File anyFileInDatasetSample = DKUFileUtils.newestFile((File)folderDatasetSample, (String)SAMPLE_FILE_PREFIX);
        if (anyFileInDatasetSample == null) {
            return null;
        }
        Matcher sampleIdMatcher = SAMPLE_ID_PATTERN.matcher(anyFileInDatasetSample.getName());
        if (!sampleIdMatcher.find()) {
            return null;
        }
        return sampleIdMatcher.group(1);
    }

    public static File sampleFile(AuthCtx authCtx, Dataset dataset, String sampleId, boolean isForPreview) {
        return DKUFileUtils.getWithin((File)SampleBuilder.datasetSamplesDirForUser(authCtx, dataset.getFullName(), isForPreview), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
    }

    public static File sampleFile(AuthCtx authCtx, StreamingEndpoint streamingEndpoint, String sampleId) {
        return DKUFileUtils.getWithin((File)SampleBuilder.streamingEndpointSamplesDirForUser(authCtx, streamingEndpoint.getFullId()), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
    }

    public static File metadataFile(AuthCtx authCtx, Dataset dataset, String sampleId, boolean isForPreview) {
        return DKUFileUtils.getWithin((File)SampleBuilder.datasetSamplesDirForUser(authCtx, dataset.getFullName(), isForPreview), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, ".metadata.json")});
    }

    public static File warningFile(AuthCtx authCtx, Dataset dataset, String sampleId, boolean isForPreview) {
        return DKUFileUtils.getWithin((File)SampleBuilder.datasetSamplesDirForUser(authCtx, dataset.getFullName(), isForPreview), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, ".warn")});
    }

    public static File warningFile(AuthCtx authCtx, StreamingEndpoint streamingEndpoint, String sampleId) {
        return DKUFileUtils.getWithin((File)SampleBuilder.streamingEndpointSamplesDirForUser(authCtx, streamingEndpoint.getFullId()), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, ".warn")});
    }

    public static MemTable readSample(AuthCtx authCtx, Dataset dataset, String sampleId, boolean isForPreview) throws IOException {
        try (SampleReader sr = new SampleReader(SampleBuilder.sampleFile(authCtx, dataset, sampleId, isForPreview), dataset);){
            MemTable memTable = sr.readAsMemtable();
            return memTable;
        }
    }

    public static MemTable readSample(AuthCtx authCtx, Dataset dataset, String sampleId, boolean isForPreview, long maxRows) throws IOException {
        try (SampleReader sr = new SampleReader(SampleBuilder.sampleFile(authCtx, dataset, sampleId, isForPreview), dataset, maxRows);){
            MemTable memTable = sr.readAsMemtable();
            return memTable;
        }
    }

    public static MemTable readSample(AuthCtx authCtx, StreamingEndpoint streamingEndpoint, String sampleId) throws IOException {
        try (SampleReader sr = new SampleReader(SampleBuilder.sampleFile(authCtx, streamingEndpoint, sampleId), streamingEndpoint);){
            MemTable memTable = sr.readAsMemtable();
            return memTable;
        }
    }

    public static MemTable readSample(File sampleFile) throws IOException {
        try (SampleReader sr = new SampleReader(sampleFile);){
            MemTable memTable = sr.readAsMemtable();
            return memTable;
        }
    }

    public static MemTable readPredictedSample(FullModelId fmi, String sampleId) throws IOException {
        File f = DKUFileUtils.getWithin((File)SampleBuilder.predictedDataSamplesDir(fmi.toString()), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
        return SampleBuilder.readSample(f);
    }

    public static MemTable readModelEvaluationSample(FullModelEvaluationId fme, String sampleId) throws IOException {
        File f = DKUFileUtils.getWithin((File)SampleBuilder.modelEvaluationSamplesDir(fme.toString()), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
        return SampleBuilder.readSample(f);
    }

    public static MemTable readModelComparisonSample(String projectKey, String modelComparisonId, String sampleId) throws IOException {
        File f = DKUFileUtils.getWithin((File)SampleBuilder.modelComparisonSamplesDir(projectKey, modelComparisonId), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
        return SampleBuilder.readSample(f);
    }

    public void buildSample(Dataset inputDataset, String id, DatasetSelectionToMemTable selection, WarningsContext warningsContext, AuthCtx user, boolean isForPreview, boolean isForceBuild) throws Exception {
        if (user == null) {
            user = DSSAuthCtx.newNone();
        }
        BuildSampleFutureThread ft = new BuildSampleFutureThread((DSSAuthCtx)user, id, inputDataset, selection, isForPreview, isForceBuild);
        try {
            FutureResponse future = this.futureService.runFuture(ft, 0L, new TypeToken<FutureResponse<WarningsContext.SerializedWarnings>>(){});
            future = this.futureService.waitForFinalResponse(future);
            if (future.result == null) {
                throw new Exception("Failed to build the sample, remote future failed or was aborted.");
            }
            logger.info((Object)("Received sample future result, warningsCount=" + ((WarningsContext.SerializedWarnings)future.result).totalCount));
            warningsContext.merge((WarningsContext.SerializedWarnings)future.result);
        }
        catch (Exception e) {
            logger.warn((Object)"Failure while building sample", (Throwable)e);
            throw e;
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static Sample computeSample(Dataset inputDataset, String id, DatasetSelectionToMemTable selection, WarningsContext warningsContext, AuthCtx user, boolean isForPreview) throws Exception {
        int staticDatasetRecordCount;
        if (ApplicationConfigurator.getProcessType() == MainLoggingConfigurator.ProcessType.BACKEND) {
            String message = "Backend should never compute sample in memory";
            ((IReflectedEventsService)SpringUtils.getBean(IReflectedEventsService.class)).publish(new IReflectedEventsService.ReflectedEvent(message, (Throwable)new RuntimeException(message)));
        }
        CountDatasetRecordsThread countDatasetRecordsThread = (staticDatasetRecordCount = SampleBuilder.getStaticRecordCount(inputDataset)) < 0 ? SampleBuilder.startCountDatasetRecords(user, inputDataset, isForPreview) : null;
        MemTable table = new MemTable();
        long processedRowCount = -1L;
        boolean memoryLimitReached = false;
        boolean maxRecordLimitOverReached = false;
        boolean selectionHasPartition = selection.hasPartition(inputDataset);
        boolean selectionHasFilter = selection.hasFilter();
        boolean wasSamplingPushedDownToDB = false;
        if (selection.filter != null) {
            VariablesContext variablesContext = ((VariablesService)SpringUtils.getBean(VariablesService.class)).getForProject(inputDataset.getProjectKey());
            FilterDescUtils.expand(selection.filter, variablesContext);
        }
        try {
            SingleThreadPusherToMemTable pusher = new SingleThreadPusherToMemTable(user, inputDataset, table, true);
            pusher.setDatasetSelection(selection);
            pusher.setWarningsContext(warningsContext);
            if (!selectionHasPartition && !selectionHasFilter && selection.samplingMethod == SamplingParam.SamplingMethod.HEAD_SEQUENTIAL && selection.maxRecords > 0L) {
                try {
                    selection.maxRecordsForDisplay = selection.maxRecords++;
                    pusher.push();
                }
                finally {
                    if ((long)table.nrows() >= selection.maxRecords) {
                        table.rows.remove(table.nrows() - 1);
                        maxRecordLimitOverReached = true;
                    }
                    --selection.maxRecords;
                }
            } else {
                pusher.push();
            }
            processedRowCount = pusher.getProcessedRowCount();
            wasSamplingPushedDownToDB = pusher.wasSamplingPushedDownToDB();
        }
        catch (MemTableAppendingOutput.MemTableSizeLimitReachedException e) {
            logger.warn((Object)(String.valueOf(WarningsContext.WarningType.MEMSIZE_TRUNCATED_SAMPLE) + ": The sample size limit in MB was reached"));
            memoryLimitReached = true;
        }
        logger.info((Object)("Done extracting sample, nrows=" + table.nrows()));
        SampleMetadata sampleMetadata = SampleBuilder.buildSampleMetadata(selectionHasPartition, selection.getRequestedPartitionCount(), selectionHasFilter, processedRowCount, memoryLimitReached, selection.maxStoredBytes > 0L ? selection.maxStoredBytes / 0x100000L : 0L, table.nrows(), maxRecordLimitOverReached, selection, wasSamplingPushedDownToDB);
        if (staticDatasetRecordCount >= 0) {
            sampleMetadata.datasetRecordCount = staticDatasetRecordCount;
        } else {
            SampleBuilder.completeCountDatasetRecords(countDatasetRecordsThread, sampleMetadata);
        }
        SampleMeta s = new SampleMeta(id, new Date().getTime(), sampleMetadata, warningsContext.getOutput());
        return new Sample(s, table);
    }

    public void buildSample(StreamingEndpoint inputStreamingEndpoint, String id, DatasetSelectionToMemTable selection, WarningsContext warningsContext, AuthCtx user) throws Exception {
        if (user == null) {
            user = DSSAuthCtx.newNone();
        }
        BuildStreamingSampleFutureThread ft = new BuildStreamingSampleFutureThread((DSSAuthCtx)user, id, inputStreamingEndpoint, selection);
        try {
            FutureResponse future = this.futureService.runFuture(ft, 0L, new TypeToken<FutureResponse<WarningsContext.SerializedWarnings>>(){});
            future = this.futureService.waitForFinalResponse(future);
            if (future.result == null) {
                throw new Exception("Failed to build the sample, remote future failed or was aborted.");
            }
            logger.info((Object)("Received sample future result, warningsCount=" + ((WarningsContext.SerializedWarnings)future.result).totalCount));
            warningsContext.merge((WarningsContext.SerializedWarnings)future.result);
        }
        catch (Exception e) {
            logger.warn((Object)"Failure while building sample", (Throwable)e);
            throw e;
        }
    }

    public static FuturePayload buildFuturePayload(FutureThread<?> parentFuture, Dataset dataset) {
        FuturePayload payload;
        FuturePayload fp = new FuturePayload();
        fp.action = "build_sample";
        fp.targets.add(DSSFuturePayloadUtils.forDataset(dataset).withPart("sample"));
        fp.displayName = parentFuture != null ? "Building sample for " + ((payload = parentFuture.getPayload()) != null ? payload.displayName : "unnamed") : "Building sample";
        return fp;
    }

    public static FuturePayload buildFuturePayload(FutureThread<?> parentFuture, StreamingEndpoint streamingEndpoint) {
        FuturePayload payload;
        FuturePayload fp = new FuturePayload();
        fp.action = "build_sample";
        fp.targets.add(DSSFuturePayloadUtils.forStreamingEndpoint(streamingEndpoint).withPart("sample"));
        fp.displayName = parentFuture != null ? "Building sample for " + ((payload = parentFuture.getPayload()) != null ? payload.displayName : "unnamed") : "Building sample";
        return fp;
    }

    public static void writeResultSample(File sampleFile, MemTable table) throws IOException {
        DKUFileUtils.mkdirsParent((File)sampleFile);
        try (SampleWriter sw = new SampleWriter(sampleFile);){
            sw.writeMemTable(table);
        }
        logger.info((Object)"Dumped sample to disk");
    }

    private static void buildPredictedDataSample_(File left, File right, Schema leftSchema, FullModelId fmi, String sampleId) throws Exception {
        MemTable table;
        MemTable rightTable = new MemTable();
        CSVFormatConfig config = CSVFormatConfig.getStandardTabExcelFormat();
        config.parseHeaderRow = true;
        CSVFormatExtractor predictedExtractor = new CSVFormatExtractor(config);
        predictedExtractor.setLimit(new ExtractionLimit(50000L));
        try (FileInputStream is = new FileInputStream(right);
             ExternalInputStreamInput eis = new ExternalInputStreamInput(is, right.getName(), right::lastModified);){
            predictedExtractor.run(eis.getSingleSplit(), (ProcessorOutput)new MemTableAppendingOutput(rightTable), rightTable, rightTable);
        }
        if (left != null) {
            table = new MemTable();
            CSVFormatExtractor testSetExtractor = new CSVFormatExtractor(CSVFormatConfig.getStandardTabExcelFormat());
            testSetExtractor.setSchema(leftSchema, false);
            testSetExtractor.setLimit(new ExtractionLimit(50000L));
            try (FileInputStream is = new FileInputStream(left);
                 ExternalInputStreamInput eis = new ExternalInputStreamInput(is, left.getName(), left::lastModified);){
                testSetExtractor.run(eis.getSingleSplit(), (ProcessorOutput)new MemTableAppendingOutput(table), table, table);
            }
            MemTableHStack.hStackInto(table, rightTable);
        } else {
            table = rightTable;
        }
        logger.info((Object)("Done extracting sample, got " + table.nrows() + " rows"));
        File f = DKUFileUtils.getWithin((File)SampleBuilder.predictedDataSamplesDir(fmi.toString()), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
        SampleBuilder.writeResultSample(f, table);
    }

    public static void buildPredictedSampleForPrediction(FullModelId fmi, String sampleId, SplitDesc splitDesc) throws Exception {
        File predictedFile = fmi.getModelFile("predicted.csv");
        String datasetFilename = splitDesc.params.kfold ? splitDesc.fullPath : splitDesc.testPath;
        File dataSetPath = DKUFileUtils.getWithin((File)fmi.getSplitFolder(), (String[])new String[]{datasetFilename});
        SampleBuilder.buildPredictedDataSample_(dataSetPath, predictedFile, splitDesc.schema, fmi, sampleId);
    }

    public static void buildPredictedSampleForClustering(FullModelId fmi, String sampleId, SplitDesc splitDesc) throws Exception {
        File sampleFile = DKUFileUtils.getWithin((File)fmi.getSplitFolder(), (String[])new String[]{splitDesc.fullPath});
        File clustersFile = fmi.getModelFile("clustered.csv");
        SampleBuilder.buildPredictedDataSample_(sampleFile, clustersFile, splitDesc.schema, fmi, sampleId);
    }

    public static void buildPredictedSampleForTimeseriesForecast(FullModelId fmi, String sampleId) throws Exception {
        File predictedFile = fmi.getModelFile("predicted.csv");
        SampleBuilder.buildPredictedDataSample_(null, predictedFile, null, fmi, sampleId);
    }

    public static void buildModelEvaluationSample(FullModelEvaluationId fme, String sampleId) throws Exception {
        MemTable sampleTable = SampleBuilder.extractModelEvaluationSample(fme);
        logger.info((Object)("Done extracting sample, got " + sampleTable.nrows() + " rows"));
        File f = DKUFileUtils.getWithin((File)SampleBuilder.modelEvaluationSamplesDir(fme.toString()), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
        DKUFileUtils.mkdirsParent((File)f);
        try (SampleWriter sw = new SampleWriter(f);){
            for (int i = 0; i < sampleTable.rows.size(); ++i) {
                Row row = sampleTable.rows.get(i);
                for (MemColumn column : sampleTable.columnsList) {
                    String value = row.get((Column)column);
                    if (!NumberUtils.isParsable((String)value)) continue;
                    BigDecimal number = new BigDecimal(value);
                    number = number.setScale(3, RoundingMode.HALF_UP);
                    value = number.toString();
                    row.put((Column)column, value);
                }
            }
            sw.writeMemTable(sampleTable);
        }
        logger.info((Object)"Dumped sample to disk");
    }

    public static Map<AbstractGenAIModelEvaluation, Integer> buildModelComparisonSample(List<AbstractGenAIModelEvaluation> genAIModelEvaluations, String sampleId, Schema sampleSchema, String modelComparisonId, String projectKey) throws Exception {
        MemTable sampleTable = new MemTable();
        LinkedHashMap<AbstractGenAIModelEvaluation, MemTable> allTables = new LinkedHashMap<AbstractGenAIModelEvaluation, MemTable>();
        for (AbstractGenAIModelEvaluation abstractGenAIModelEvaluation : genAIModelEvaluations) {
            allTables.put(abstractGenAIModelEvaluation, SampleBuilder.extractModelEvaluationSample(abstractGenAIModelEvaluation.ref));
        }
        LinkedHashMap<String, JsonObject> rowByInput = new LinkedHashMap<String, JsonObject>();
        for (AbstractGenAIModelEvaluation abstractGenAIModelEvaluation : genAIModelEvaluations) {
            MemTable table = (MemTable)allTables.get(abstractGenAIModelEvaluation);
            String inputColumnName = abstractGenAIModelEvaluation.inputFormat == AbstractGenAIEvaluationRecipePayloadParams.GenAiEvalInputFormat.PROMPT_RECIPE ? "dkuReconstructedInput" : abstractGenAIModelEvaluation.inputColumnName;
            String outputColumnName = abstractGenAIModelEvaluation.inputFormat == AbstractGenAIEvaluationRecipePayloadParams.GenAiEvalInputFormat.PROMPT_RECIPE ? "dkuParsedOutput" : abstractGenAIModelEvaluation.outputColumnName;
            String contextColumnName = null;
            String actualToolCallsColumnName = null;
            String referenceToolCallsColumnName = null;
            if (abstractGenAIModelEvaluation instanceof LLMModelEvaluation) {
                LLMModelEvaluation llmme = (LLMModelEvaluation)abstractGenAIModelEvaluation;
                String string = contextColumnName = llmme.inputFormat == AbstractGenAIEvaluationRecipePayloadParams.GenAiEvalInputFormat.PROMPT_RECIPE || llmme.inputFormat == AbstractGenAIEvaluationRecipePayloadParams.GenAiEvalInputFormat.DATAIKU_ANSWERS ? "dkuParsedContexts" : llmme.contextColumnName;
            }
            if (abstractGenAIModelEvaluation instanceof AgentModelEvaluation) {
                AgentModelEvaluation agentme = (AgentModelEvaluation)abstractGenAIModelEvaluation;
                actualToolCallsColumnName = agentme.inputFormat == AbstractGenAIEvaluationRecipePayloadParams.GenAiEvalInputFormat.PROMPT_RECIPE ? "dkuParsedTrajectory" : agentme.actualToolCallsColumnName;
                referenceToolCallsColumnName = agentme.referenceToolCallsColumnName;
            }
            String meName = abstractGenAIModelEvaluation.userMeta.name;
            MemTable.RowsIterator rows = table.getInterruptibleRows();
            while (rows.hasNext()) {
                MemRow row = rows.next();
                String inputValue = row.get(inputColumnName);
                JsonObject aggregatedRow = rowByInput.getOrDefault(inputValue, new JsonObject());
                for (MemColumn column : table.columnsList) {
                    String columnName = column.getName();
                    String value = row.get(columnName);
                    if (NumberUtils.isParsable((String)value)) {
                        BigDecimal number = new BigDecimal(value);
                        number = number.setScale(3, RoundingMode.HALF_UP);
                        value = number.toString();
                    }
                    boolean isOneOfInputs = false;
                    if (columnName.equals(inputColumnName)) {
                        aggregatedRow.addProperty("Input", value);
                        isOneOfInputs = true;
                    }
                    if (columnName.equals(outputColumnName)) {
                        aggregatedRow.addProperty("Output for " + meName, value);
                        isOneOfInputs = true;
                    }
                    if (columnName.equals(abstractGenAIModelEvaluation.groundTruthColumnName)) {
                        aggregatedRow.addProperty("Ground truth for " + meName, value);
                        isOneOfInputs = true;
                    }
                    if (columnName.equals(contextColumnName)) {
                        aggregatedRow.addProperty("Context for " + meName, value);
                        isOneOfInputs = true;
                    }
                    if (columnName.equals(actualToolCallsColumnName)) {
                        aggregatedRow.addProperty("Actual tool calls for " + meName, value);
                        isOneOfInputs = true;
                    }
                    if (columnName.equals(referenceToolCallsColumnName)) {
                        aggregatedRow.addProperty("Reference tool calls for " + meName, value);
                        isOneOfInputs = true;
                    }
                    if (isOneOfInputs) continue;
                    aggregatedRow.addProperty(columnName + " for " + meName, value);
                }
                rowByInput.put(inputValue, aggregatedRow);
            }
        }
        HashMap<AbstractGenAIModelEvaluation, Integer> hashMap = new HashMap<AbstractGenAIModelEvaluation, Integer>();
        for (JsonObject row : rowByInput.values()) {
            if (row.keySet().size() == sampleSchema.columns.size()) {
                sampleTable.addRowFromJsonObject(row);
                continue;
            }
            for (AbstractGenAIModelEvaluation me : genAIModelEvaluations) {
                if (!row.keySet().contains("Output for " + me.userMeta.name)) continue;
                hashMap.put(me, hashMap.getOrDefault(me, 0) + 1);
            }
        }
        logger.info((Object)("Done extracting sample, got " + sampleTable.nrows() + " rows"));
        File file = DKUFileUtils.getWithin((File)SampleBuilder.modelComparisonSamplesDir(projectKey, modelComparisonId), (String[])new String[]{SampleBuilder.sampleFilename(sampleId, DSS1_SUFFIX)});
        SampleBuilder.writeResultSample(file, sampleTable);
        return hashMap;
    }

    private static MemTable extractModelEvaluationSample(FullModelEvaluationId fme) throws Exception {
        File sampleFile = fme.getSampleWithMetricsFile();
        File sampleSchemaFile = fme.getSampleWithMetricsSchemaFile();
        if (!sampleFile.exists() || !sampleSchemaFile.exists()) {
            throw new IllegalArgumentException("Missing sample in evaluationId " + fme.evaluationId + ". Try to run a new evaluation.");
        }
        Schema sampleSchema = (Schema)JSON.parseFile((File)sampleSchemaFile, Schema.class);
        MemTable sampleTable = new MemTable();
        CSVFormatConfig config = CSVFormatConfig.getStandardTabExcelFormat();
        config.parseHeaderRow = false;
        CSVFormatExtractor sampleExtractor = new CSVFormatExtractor(config);
        sampleExtractor.setLimit(new ExtractionLimit(50000L));
        sampleExtractor.setSchema(sampleSchema, false);
        try (FileInputStream is = new FileInputStream(sampleFile);
             ExternalInputStreamInput eis = new ExternalInputStreamInput(is, sampleFile.getName(), sampleFile::lastModified);){
            sampleExtractor.run(eis.getSingleSplit(), (ProcessorOutput)new MemTableAppendingOutput(sampleTable), sampleTable, sampleTable);
        }
        return sampleTable;
    }

    public static String sampleFilename(String sampleId, String suffix) {
        return SAMPLE_FILE_PREFIX + sampleId + suffix;
    }

    private static String cleanIdentifier(AuthCtx authCtx) {
        String replaced;
        String base = authCtx.getIdentifier();
        return base.equals(replaced = base.replaceAll("[^A-Za-z0-9@.]", "_")) ? base : replaced + "-" + DigestUtils.sha1Hex((String)base);
    }

    private static int getStaticRecordCount(Dataset inputDataset) {
        SampleDatasetMeta sampleDatasetMeta;
        Integer rowCount;
        DatasetHandler.DatasetMeta<?, ?> meta = DatasetHandlerFactory.getMeta(inputDataset);
        if (meta instanceof SampleDatasetMeta && (rowCount = (sampleDatasetMeta = (SampleDatasetMeta)meta).getRowCount(inputDataset.getParamsAs(SampleDatasetParams.class).getVersionAsString())) != null && rowCount >= 0) {
            return rowCount;
        }
        return -1;
    }

    public static class CountDatasetRecordsThread
    extends Thread {
        private final AuthCtx owner;
        private final Dataset inputDataset;
        public volatile DatasetRecordCount recordCount;

        public CountDatasetRecordsThread(AuthCtx owner, Dataset inputDataset, String threadName) {
            super(threadName);
            this.owner = owner;
            this.inputDataset = inputDataset;
        }

        @Override
        public void run() {
            CurrentComputeResourceUsageContext.setInCurrentThreadIfNull((ComputeResourceUsageContext)ComputeResourceUsageContext.forDatasetSampleBuild((AuthCtx)this.owner, (String)this.inputDataset.getProjectKey(), (String)this.inputDataset.getName()));
            logger.debug((Object)"Start retrieving dataset total row count");
            try (DatasetHandler handler = DatasetHandlerFactory.build(this.owner, this.inputDataset);){
                this.recordCount = handler.getRecordsFast();
                logger.info((Object)("Dataset total row count retrieved: " + String.valueOf(this.recordCount)));
            }
            catch (InterruptedException e) {
                logger.info((Object)"Unable to retrieve dataset total row count (interrupted before completion)");
            }
            catch (Exception e) {
                logger.info((Object)"Unable to retrieve dataset total row count", (Throwable)e);
            }
        }
    }

    public static class SampleMeta {
        public String id;
        public long computeTime;
        public SampleMetadata metadata;
        public WarningsContext.SerializedWarnings warnings;

        public SampleMeta() {
        }

        public SampleMeta(String id, long computeTime, SampleMetadata metadata, WarningsContext.SerializedWarnings warnings) {
            this.id = id;
            this.computeTime = computeTime;
            this.metadata = metadata;
            this.warnings = warnings;
        }
    }

    private static class BuildSampleFutureThread
    extends FutureThread<WarningsContext.SerializedWarnings> {
        private final WarningsContext warningsContext;
        private final DatasetSelectionToMemTable selection;
        private final Dataset inputDataset;
        private final boolean isForPreview;
        private final boolean isForceBuild;
        private final String id;
        private final FuturePayload futurePayload;

        public BuildSampleFutureThread(DSSAuthCtx user, String id, Dataset inputDataset, DatasetSelectionToMemTable selection, boolean isForPreview, boolean isForceBuild) {
            super(user);
            this.id = id;
            this.inputDataset = inputDataset;
            this.isForPreview = isForPreview;
            this.isForceBuild = isForceBuild;
            this.selection = selection;
            this.warningsContext = new WarningsContext();
            FutureThread parentFuture = null;
            if (BuildSampleFutureThread.currentThread() instanceof FutureThread) {
                parentFuture = (FutureThread)((Object)BuildSampleFutureThread.currentThread());
            }
            this.futurePayload = SampleBuilder.buildFuturePayload(parentFuture, inputDataset);
        }

        public FuturePayload getPayload() {
            return this.futurePayload;
        }

        public double getDangerosity() {
            return 1.0;
        }

        public WarningsContext.SerializedWarnings getResult() {
            return this.warningsContext.getOutput();
        }

        public void execute() throws Exception {
            SampleMeta nonPreviewSampleMeta;
            CurrentComputeResourceUsageContext.setInCurrentThreadIfNull((ComputeResourceUsageContext)ComputeResourceUsageContext.forDatasetSampleBuild((AuthCtx)this.owner, (String)this.inputDataset.getProjectKey(), (String)this.inputDataset.getName()));
            MemTable table = null;
            SampleMetadata sampleMetadata = null;
            if (this.isForPreview && !this.isForceBuild && (nonPreviewSampleMeta = PreviewSampleHelper.findNonPreviewSampleForPreview(this.getOwner(), this.inputDataset, this.id, this.selection)) != null) {
                if (nonPreviewSampleMeta.warnings != null) {
                    this.warningsContext.merge(nonPreviewSampleMeta.warnings);
                }
                logger.info((Object)("Using non-preview sample for preview, wc=" + String.valueOf(this.warningsContext)));
                table = SampleBuilder.readSample(this.getOwner(), this.inputDataset, nonPreviewSampleMeta.id, false, this.selection.maxRecords);
                logger.info((Object)("Done using non-preview sample for preview, nrows=" + table.nrows()));
                sampleMetadata = PreviewSampleHelper.buildPreviewSampleMetadataFromNonPreview(nonPreviewSampleMeta.metadata, this.selection, table.nrows());
            }
            if (table == null) {
                Sample s = SampleBuilder.computeSample(this.inputDataset, this.id, this.selection, this.warningsContext, this.owner, this.isForPreview);
                table = s.data;
                sampleMetadata = s.meta.metadata;
            }
            File f = SampleBuilder.sampleFile(this.getOwner(), this.inputDataset, this.id, this.isForPreview);
            DKUFileUtils.mkdirsParent((File)f);
            try (SampleWriter sw = new SampleWriter(f);){
                sw.writeMemTable(table);
            }
            JSON.prettyToFile((Object)sampleMetadata, (File)SampleBuilder.metadataFile(this.getOwner(), this.inputDataset, this.id, this.isForPreview));
            SampleBuilder.dumpWarnings(this.getOwner(), this.inputDataset, this.id, this.warningsContext.getOutput(), this.isForPreview);
            logger.info((Object)("Dumped sample to disk warnings=" + this.warningsContext.getTotalCount() + " fileSize=" + f.length()));
        }

        static {
            JSON.registerAdapter(BuildSampleFutureThread.class, (Object)new JSON.Adapter<BuildSampleFutureThread>(){

                public BuildSampleFutureThread deserialize(JsonElement jsonElement, Type scriptType, JsonDeserializationContext ctx) throws JsonParseException {
                    JsonObject jsonObj = jsonElement.getAsJsonObject();
                    DSSAuthCtx owner = (DSSAuthCtx)((Object)ctx.deserialize(jsonObj.get("owner"), DSSAuthCtx.class));
                    String id = jsonObj.get("id").getAsString();
                    String datasetFullName = jsonObj.get("datasetFullName").getAsString();
                    SerializedDataset dataset = (SerializedDataset)ctx.deserialize(jsonObj.get("inputDataset"), SerializedDataset.class);
                    Dataset inputDataset = Dataset.fromSerialized(datasetFullName, dataset);
                    DatasetSelectionToMemTable selection = (DatasetSelectionToMemTable)((Object)ctx.deserialize(jsonObj.get("selection"), DatasetSelectionToMemTable.class));
                    boolean isForPreviewFromJson = jsonObj.get("isForPreview").getAsBoolean();
                    boolean isForceBuildFromJson = jsonObj.get("isForceBuild").getAsBoolean();
                    return new BuildSampleFutureThread(owner, id, inputDataset, selection, isForPreviewFromJson, isForceBuildFromJson);
                }

                public JsonElement serialize(BuildSampleFutureThread ft, Type type, JsonSerializationContext ctx) {
                    JsonObject ret = new JsonObject();
                    ret.addProperty("id", ft.id);
                    ret.addProperty("datasetFullName", ft.inputDataset.getFullName());
                    ret.add("inputDataset", ctx.serialize((Object)ft.inputDataset.serialize()));
                    ret.add("selection", ctx.serialize((Object)ft.selection));
                    ret.add("owner", ctx.serialize((Object)ft.owner));
                    ret.add("isForPreview", ctx.serialize((Object)ft.isForPreview));
                    ret.add("isForceBuild", ctx.serialize((Object)ft.isForceBuild));
                    return ret;
                }
            });
        }
    }

    public static class Sample {
        public SampleMeta meta;
        public MemTable data;

        public Sample(SampleMeta metadata, MemTable data) {
            this.meta = metadata;
            this.data = data;
        }
    }

    private static class BuildStreamingSampleFutureThread
    extends FutureThread<WarningsContext.SerializedWarnings>
    implements StreamingEndpointSimpleConsumer {
        private final WarningsContext warningsContext;
        private final DatasetSelectionToMemTable selection;
        private final StreamingEndpoint inputStreamingEndpoint;
        private final String id;
        private final FuturePayload futurePayload;
        private volatile boolean collecting;
        private ProcessorOutput output;

        public BuildStreamingSampleFutureThread(DSSAuthCtx user, String id, StreamingEndpoint inputStreamingEndpoint, DatasetSelectionToMemTable selection) {
            super(user);
            this.id = id;
            this.inputStreamingEndpoint = inputStreamingEndpoint;
            this.selection = selection;
            this.warningsContext = new WarningsContext();
            FutureThread parentFuture = null;
            if (BuildStreamingSampleFutureThread.currentThread() instanceof FutureThread) {
                parentFuture = (FutureThread)((Object)BuildStreamingSampleFutureThread.currentThread());
            }
            this.futurePayload = SampleBuilder.buildFuturePayload(parentFuture, inputStreamingEndpoint);
        }

        public FuturePayload getPayload() {
            return this.futurePayload;
        }

        public double getDangerosity() {
            return 1.0;
        }

        public WarningsContext.SerializedWarnings getResult() {
            return this.warningsContext.getOutput();
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        public void execute() throws Exception {
            MemTable table = new MemTable();
            MemTableAppendingOutput tableOutput = new MemTableAppendingOutput(table);
            if (this.selection.maxStoredBytes > -1L) {
                tableOutput.setMaxMemoryUsed(this.selection.maxStoredBytes);
            }
            this.output = new LimitProcessorOutput((ProcessorOutput)tableOutput, this.selection.maxRecords);
            if (FilterDescUtils.willFilter(this.selection.filter)) {
                logger.info((Object)("Filter expression: " + FilterDescUtils.getFilterRepr(this.selection.filter)));
                this.output = new FilterProcessorOutput(this.output, table, this.selection.filter, this.inputStreamingEndpoint.schema);
            }
            this.collecting = true;
            final StreamingEndpointSimplePusher driver = new StreamingEndpointSimplePusherFactory().build(this.owner, this.inputStreamingEndpoint, true);
            Thread interruptor = new Thread(new Runnable(){

                @Override
                public void run() {
                    try {
                        if (selection.timeout > 0L) {
                            Thread.sleep(selection.timeout * 1000L);
                            if (collecting) {
                                driver.interrupt();
                                this.interrupt();
                            }
                        }
                    }
                    catch (InterruptedException e) {
                        logger.warn((Object)"Timeout-producing thread interrupted");
                    }
                }
            });
            interruptor.start();
            try {
                driver.run(this, null, table, table, this.inputStreamingEndpoint.schema);
                this.output.lastRowEmitted();
            }
            catch (InterruptedException | InterruptException e) {
                logger.info((Object)"Stream capture timeouted");
            }
            catch (PipelineInterruptedException e) {
                logger.info((Object)"Succeeded in fetching the sample fully");
            }
            catch (MemTableAppendingOutput.MemTableSizeLimitReachedException e) {
                this.warningsContext.addWarning(WarningsContext.WarningType.MEMSIZE_TRUNCATED_SAMPLE, "The sample size limit in MB was reached", logger);
            }
            finally {
                this.collecting = false;
                Thread.interrupted();
            }
            logger.info((Object)("Done extracting sample, nrows=" + table.nrows()));
            File f = SampleBuilder.sampleFile(this.getOwner(), this.inputStreamingEndpoint, this.id);
            DKUFileUtils.mkdirsParent((File)f);
            try (SampleWriter sw = new SampleWriter(f);){
                sw.writeMemTable(table);
            }
            SampleBuilder.dumpWarnings(this.getOwner(), this.inputStreamingEndpoint, this.id, this.warningsContext.getOutput());
            logger.info((Object)("Dumped sample to disk warnings=" + this.warningsContext.getTotalCount() + " fileSize=" + f.length()));
        }

        @Override
        public void onRow(Row row) throws Exception {
            this.output.emitRow(row);
        }

        @Override
        public void onNewState(String state) throws Exception {
        }

        static {
            JSON.registerAdapter(BuildStreamingSampleFutureThread.class, (Object)new JSON.Adapter<BuildStreamingSampleFutureThread>(){

                public BuildStreamingSampleFutureThread deserialize(JsonElement jsonElement, Type scriptType, JsonDeserializationContext ctx) throws JsonParseException {
                    JsonObject jsonObj = jsonElement.getAsJsonObject();
                    DSSAuthCtx owner = (DSSAuthCtx)((Object)ctx.deserialize(jsonObj.get("owner"), DSSAuthCtx.class));
                    String id = jsonObj.get("id").getAsString();
                    StreamingEndpoint streamingEndpoint = (StreamingEndpoint)ctx.deserialize(jsonObj.get("inputStreamingEndpoint"), StreamingEndpoint.class);
                    DatasetSelectionToMemTable selection = (DatasetSelectionToMemTable)((Object)ctx.deserialize(jsonObj.get("selection"), DatasetSelectionToMemTable.class));
                    return new BuildStreamingSampleFutureThread(owner, id, streamingEndpoint, selection);
                }

                public JsonElement serialize(BuildStreamingSampleFutureThread ft, Type type, JsonSerializationContext ctx) {
                    JsonObject ret = new JsonObject();
                    ret.addProperty("id", ft.id);
                    ret.add("inputStreamingEndpoint", ctx.serialize((Object)ft.inputStreamingEndpoint));
                    ret.add("selection", ctx.serialize((Object)ft.selection));
                    ret.add("owner", ctx.serialize((Object)ft.owner));
                    return ret;
                }
            });
        }
    }

    public static class SampleBuildException
    extends IOException {
        private static final long serialVersionUID = 1L;

        public SampleBuildException(String message) {
            super(message);
        }

        public SampleBuildException(String message, Throwable cause) {
            super(message, cause);
        }
    }
}

