/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.datasets.fs;

import com.dataiku.common.server.SerializedError;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.FormatParams;
import com.dataiku.dip.coremodel.InfoMessage;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.memimpl.MemTable;
import com.dataiku.dip.datalayer.memimpl.MemTableAppendingOutput;
import com.dataiku.dip.datasets.DatasetCodes;
import com.dataiku.dip.datasets.DatasetHandler;
import com.dataiku.dip.datasets.DatasetInspector;
import com.dataiku.dip.datasets.SchemaDetection;
import com.dataiku.dip.datasets.StreamDatasetUtils;
import com.dataiku.dip.datasets.fs.AbstractFSDatasetHandler;
import com.dataiku.dip.datasets.fs.FSDatasetUtils;
import com.dataiku.dip.datasets.fs.FSLikeDatasetTestHandler;
import com.dataiku.dip.datasets.fs.FilesystemDatasetTestHandler;
import com.dataiku.dip.formats.FormatFactory;
import com.dataiku.dip.formats.custom.CustomFormatSchemaProvider;
import com.dataiku.dip.formats.delta.DeltaFormat;
import com.dataiku.dip.fs.FSPath;
import com.dataiku.dip.input.DatasetHandlerFactory;
import com.dataiku.dip.input.DatasetTestHandler;
import com.dataiku.dip.input.InputSplit;
import com.dataiku.dip.input.formats.ExtractionLimit;
import com.dataiku.dip.input.formats.FixedSchemaExtractor;
import com.dataiku.dip.input.formats.FormatExtractor;
import com.dataiku.dip.input.formats.InputFormatsDetector;
import com.dataiku.dip.input.formats.SchemaTweakingExtractor;
import com.dataiku.dip.input.formats.SplitBasedFixedSchemaDetector;
import com.dataiku.dip.input.row.RowsInputSplitWithSchema;
import com.dataiku.dip.input.stream.StreamsInputSplit;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.shaker.server.SerializedMemTableSimple;
import com.dataiku.dip.shaker.services.TypeInferrer2;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ExceptionUtils;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.warnings.WarningsContext;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;

public abstract class FileFormatDatasetTestHandler
implements DatasetTestHandler {
    protected final Dataset dataset;
    protected TypeInferrer2 inferer = new TypeInferrer2();
    private static DKULogger logger = DKULogger.getLogger((String)"dku.datasets");

    public FileFormatDatasetTestHandler(Dataset dataset) {
        this.dataset = dataset;
    }

    protected abstract StreamsInputSplit getSampleSplitForShapeFile() throws Exception;

    public abstract InputSplit testConnection(FSLikeDatasetTestHandler.FSLikeDatasetTestResult var1) throws Exception;

    protected abstract String suggestName() throws Exception;

    public FSLikeDatasetTestHandler.FSLikeDatasetTestResult testFormat(AuthCtx authCtx, boolean forConsistencyCheck, boolean inferStorageTypesFromMeanings) throws Exception {
        FSLikeDatasetTestHandler.FSLikeDatasetTestResult ret = new FSLikeDatasetTestHandler.FSLikeDatasetTestResult();
        ret.suggestedName = this.suggestName();
        InputSplit rawSampleSplit = this.testConnection(ret);
        if (rawSampleSplit == null) {
            ret.empty = true;
            return ret;
        }
        if (rawSampleSplit instanceof StreamsInputSplit) {
            StreamsInputSplit sampleSplit = (StreamsInputSplit)rawSampleSplit;
            if (StreamDatasetUtils.isStreamEmpty(sampleSplit)) {
                ret.empty = true;
            }
            if (!ret.connectionOK || ret.empty) {
                return ret;
            }
            assert (sampleSplit != null);
            if (sampleSplit instanceof AbstractFSDatasetHandler.PathSplit) {
                ret.fileForDetectionPath = ((AbstractFSDatasetHandler.PathSplit)sampleSplit).p.path();
                ret.fileForDetectionSize = ((AbstractFSDatasetHandler.PathSplit)sampleSplit).p.getSize();
            }
            assert (sampleSplit instanceof FSDatasetUtils.PathsBasedSplit);
            FSDatasetUtils.PathsBasedSplit pSampleSplit = (FSDatasetUtils.PathsBasedSplit)sampleSplit;
            logger.info((Object)"Testing configured format");
            ret.format = new FSLikeDatasetTestHandler.DetectedFormat();
            ret.format.params = this.dataset.getFormatParams();
            ret.format.type = this.dataset.getFormatType();
            this.checkFormatParams(authCtx, this.dataset, ret.format);
            if (ret.format.ok) {
                if ("shapefile".equals(ret.format.type)) {
                    sampleSplit = this.getSampleSplitForShapeFile();
                }
                ret.format.ok = true;
                try {
                    MemTable mt = this.gatherSampleRecords(authCtx, sampleSplit, this.dataset.getFormatType(), this.dataset.getFormatParams(), ret.format, 20, inferStorageTypesFromMeanings);
                    sampleSplit.reset();
                    Map<String, String> recomputed = InputFormatsDetector.recomputeMetadata(this.dataset.getFormatType(), this.dataset, pSampleSplit);
                    if (recomputed != null) {
                        ret.format.metadata = recomputed;
                    }
                    ret.format.schemaDetection = this.getSchemaResult(authCtx, this.dataset.getFormatType(), this.dataset.getFormatParams(), mt, (InputSplit)sampleSplit, forConsistencyCheck, inferStorageTypesFromMeanings);
                    this.setInferableStatus(this.dataset.getFormatType(), ret.format.schemaDetection);
                }
                catch (Exception e) {
                    logger.warn((Object)"Failed to gather records with configured format", (Throwable)e);
                    ret.format.ok = false;
                    ret.format.errorMessage = ExceptionUtils.getMessageWithCauses((Throwable)e);
                }
            }
        } else if (rawSampleSplit instanceof RowsInputSplitWithSchema) {
            RowsInputSplitWithSchema sampleSplit = (RowsInputSplitWithSchema)rawSampleSplit;
            logger.info((Object)"Testing configured format (row-mode)");
            ret.format = new FSLikeDatasetTestHandler.DetectedFormat();
            ret.format.params = this.dataset.getFormatParams();
            ret.format.type = this.dataset.getFormatType();
            this.checkFormatParams(authCtx, this.dataset, ret.format);
            if (ret.format.ok) {
                try {
                    MemTable mt = new MemTable();
                    sampleSplit.push((ProcessorOutput)new MemTableAppendingOutput(mt), mt, mt, new ExtractionLimit(100L), null, null);
                    this.inferer.processFullAuto(null, mt);
                    ret.format.table = new SerializedMemTableSimple();
                    ret.format.table.fromMemTable(mt, 0, 20);
                    ret.format.schemaDetection = this.getSchemaResult(authCtx, this.dataset.getFormatType(), this.dataset.getFormatParams(), mt, sampleSplit, forConsistencyCheck, inferStorageTypesFromMeanings);
                    this.setInferableStatus(this.dataset.getFormatType(), ret.format.schemaDetection);
                }
                catch (Exception e) {
                    logger.warn((Object)"Failed to gather records with configured format", (Throwable)e);
                    ret.format.ok = false;
                    ret.format.errorMessage = ExceptionUtils.getMessageWithCauses((Throwable)e);
                }
            }
        } else {
            throw new Error("Unknown sample split type " + String.valueOf(rawSampleSplit));
        }
        return ret;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public FSLikeDatasetTestHandler.FSLikeDatasetTestResult detectFormats(AuthCtx authCtx) throws Exception {
        InputSplit rawSampleSplit;
        FSLikeDatasetTestHandler.FSLikeDatasetTestResult ret = new FSLikeDatasetTestHandler.FSLikeDatasetTestResult();
        ret.suggestedName = this.suggestName();
        if (DeltaFormat.META.getType().equals(this.dataset.getFormatType())) {
            this.dataset.setFormatType(null);
        }
        if ((rawSampleSplit = this.testConnection(ret)) == null) {
            ret.empty = true;
            return ret;
        }
        if (!(rawSampleSplit instanceof StreamsInputSplit)) {
            throw new IllegalArgumentException("Can't detect formats, not a streams input split");
        }
        StreamsInputSplit sampleSplit = (StreamsInputSplit)rawSampleSplit;
        if (StreamDatasetUtils.isStreamEmpty(sampleSplit)) {
            ret.empty = true;
        }
        if (!ret.connectionOK || ret.empty) {
            return ret;
        }
        if (sampleSplit instanceof AbstractFSDatasetHandler.PathSplit) {
            ret.fileForDetectionPath = ((AbstractFSDatasetHandler.PathSplit)sampleSplit).p.path();
            ret.fileForDetectionSize = ((AbstractFSDatasetHandler.PathSplit)sampleSplit).p.getSize();
        }
        if (ret.fileForDetectionPath != null && ret.fileForDetectionPath.startsWith("/_delta_log/0") && (ret.fileForDetectionPath.endsWith(".json") || ret.fileForDetectionPath.contains("checkpoint.parquet"))) {
            logger.info((Object)"Sample split looks like a delta log, using Delta format");
            ret.fileForDetectionPath = null;
            FSLikeDatasetTestHandler.DetectedFormat df = new FSLikeDatasetTestHandler.DetectedFormat();
            df.type = "delta";
            df.params = new DeltaFormat.Config();
            df.ok = false;
            df.errorMessage = "This dataset seems to be a Delta Lake dataset. Please click on \"Format Settings\" then on \"Update Preview\" to finish configuring the Delta Lake format";
            ret.format = df;
            return ret;
        }
        assert (sampleSplit != null);
        logger.debug((Object)sampleSplit);
        assert (sampleSplit instanceof FSDatasetUtils.PathsBasedSplit);
        FSDatasetUtils.PathsBasedSplit pSampleSplit = (FSDatasetUtils.PathsBasedSplit)sampleSplit;
        logger.info((Object)"Detecting candidate formats");
        try {
            ArrayList<FSLikeDatasetTestHandler.DetectedFormat> candidates = new ArrayList<FSLikeDatasetTestHandler.DetectedFormat>();
            for (InputFormatsDetector.FormatWithMetadata format : InputFormatsDetector.detectFormat(this.dataset, pSampleSplit)) {
                logger.info((Object)("Verifying candidate " + format.type));
                FSLikeDatasetTestHandler.DetectedFormat df = new FSLikeDatasetTestHandler.DetectedFormat();
                df.type = format.type;
                df.params = format.params;
                df.metadata = format.metadata;
                try {
                    FormatFactory.buildExtractor(df.type, df.params, authCtx, this.dataset.getProjectKey());
                    df.ok = true;
                }
                catch (Exception e) {
                    logger.info((Object)("It's not ready yet (" + String.valueOf(e.getClass()) + ": " + e.getMessage() + ")"), (Throwable)e);
                    df.ok = false;
                    df.errorMessage = "You need to finish configuring this format (" + ExceptionUtils.getMessageWithCauses((Throwable)e) + ")";
                    candidates.add(df);
                    continue;
                }
                String originalFormatType = this.dataset.getFormatType();
                try {
                    sampleSplit.reset();
                    this.dataset.setFormatType(format.type);
                    MemTable mt = this.gatherSampleRecords(authCtx, sampleSplit, format.type, format.params, df, 20, false);
                    df.detectionScore = format.detectionScore * (double)df.nbOK / (double)(df.nbOK + df.nbNOK + 1);
                    df.schemaDetection = this.getSchemaResult(authCtx, format.type, format.params, mt, (InputSplit)sampleSplit, false, false);
                    this.setInferableStatus(format.type, df.schemaDetection);
                    candidates.add(df);
                }
                catch (Exception e) {
                    logger.info((Object)("Could not test format " + JSON.prettyLog((Object)format.params)), (Throwable)e);
                }
                finally {
                    this.dataset.setFormatType(originalFormatType);
                }
            }
            Collections.sort(candidates, new Comparator<FSLikeDatasetTestHandler.DetectedFormat>(){

                @Override
                public int compare(FSLikeDatasetTestHandler.DetectedFormat o1, FSLikeDatasetTestHandler.DetectedFormat o2) {
                    if (o1.ok && !o2.ok) {
                        return 1;
                    }
                    if (!o1.ok && o2.ok) {
                        return -1;
                    }
                    double diff = o2.detectionScore - o1.detectionScore;
                    if (diff > 0.0) {
                        return 1;
                    }
                    if (diff < 0.0) {
                        return -1;
                    }
                    return 0;
                }
            });
            if (!candidates.isEmpty()) {
                ret.format = (FSLikeDatasetTestHandler.DetectedFormat)candidates.get(0);
            }
        }
        catch (Exception e) {
            logger.info((Object)"Connection test failed", (Throwable)e);
            ret.connectionOK = false;
            ret.connectionError = new SerializedError((Throwable)e, false);
        }
        return ret;
    }

    public FSLikeDatasetTestHandler.FSLikeDatasetTestResult detectOneFormat(AuthCtx authCtx, String limitToFormat) throws Exception {
        FSLikeDatasetTestHandler.FSLikeDatasetTestResult ret = new FSLikeDatasetTestHandler.FSLikeDatasetTestResult();
        ret.suggestedName = this.suggestName();
        InputSplit rawSampleSplit = this.testConnection(ret);
        if (rawSampleSplit == null) {
            ret.empty = true;
            return ret;
        }
        if (rawSampleSplit instanceof StreamsInputSplit) {
            StreamsInputSplit sampleSplit = (StreamsInputSplit)rawSampleSplit;
            if (StreamDatasetUtils.isStreamEmpty(sampleSplit)) {
                ret.empty = true;
            }
            if (!ret.connectionOK || ret.empty) {
                return ret;
            }
            if (sampleSplit instanceof AbstractFSDatasetHandler.PathSplit) {
                ret.fileForDetectionPath = ((AbstractFSDatasetHandler.PathSplit)sampleSplit).p.path();
                ret.fileForDetectionSize = ((AbstractFSDatasetHandler.PathSplit)sampleSplit).p.getSize();
            }
            assert (sampleSplit != null);
            assert (sampleSplit instanceof FSDatasetUtils.PathsBasedSplit);
            FSDatasetUtils.PathsBasedSplit pSampleSplit = (FSDatasetUtils.PathsBasedSplit)sampleSplit;
            InputFormatsDetector.FormatWithMetadata format = InputFormatsDetector.detectOneFormat(this.dataset, pSampleSplit, limitToFormat);
            logger.info((Object)("GOT CANDIDATE " + JSON.log((Object)format)));
            try {
                logger.info((Object)("Verifying candidate " + format.type));
                ret.format = new FSLikeDatasetTestHandler.DetectedFormat();
                ret.format.type = format.type;
                ret.format.params = format.params;
                ret.format.metadata = format.metadata;
                try {
                    FormatFactory.buildExtractor(ret.format.type, ret.format.params, authCtx, this.dataset.getProjectKey());
                    ret.format.ok = true;
                }
                catch (Exception e) {
                    logger.info((Object)("It's not ready yet (" + String.valueOf(e.getClass()) + ": " + e.getMessage() + ")"), (Throwable)e);
                    ret.format.ok = false;
                    ret.format.errorMessage = "Invalid format: " + ExceptionUtils.getMessageWithCauses((Throwable)e);
                    return ret;
                }
                if ("shapefile".equals(format.type)) {
                    sampleSplit = ((FilesystemDatasetTestHandler)this).getMultiFileSampleSplit();
                }
                try {
                    logger.info((Object)("Sample split is " + String.valueOf(sampleSplit)));
                    sampleSplit.reset();
                    MemTable mt = this.gatherSampleRecords(authCtx, sampleSplit, format.type, format.params, ret.format, 20, false);
                    ret.format.detectionScore = format.detectionScore * (double)ret.format.nbOK / (double)(ret.format.nbOK + ret.format.nbNOK + 1);
                    ret.format.schemaDetection = this.getSchemaResult(authCtx, format.type, format.params, mt, (InputSplit)sampleSplit, false, false);
                    this.setInferableStatus(format.type, ret.format.schemaDetection);
                }
                catch (Exception e) {
                    ret.format.ok = false;
                    ret.format.errorMessage = "Could not read file using format: " + ExceptionUtils.getMessageWithCauses((Throwable)e);
                    logger.info((Object)("Could not test format " + JSON.prettyLog((Object)format.params)), (Throwable)e);
                }
            }
            catch (Exception e) {
                logger.info((Object)"Connection test failed", (Throwable)e);
                ret.connectionOK = false;
                ret.connectionError = new SerializedError((Throwable)e, false);
            }
        } else if (rawSampleSplit instanceof RowsInputSplitWithSchema) {
            RowsInputSplitWithSchema sampleSplit = (RowsInputSplitWithSchema)rawSampleSplit;
            logger.info((Object)"Testing configured format (row-mode)");
            ret.format = new FSLikeDatasetTestHandler.DetectedFormat();
            ret.format.params = this.dataset.getFormatParams();
            ret.format.type = this.dataset.getFormatType();
            this.checkFormatParams(authCtx, this.dataset, ret.format);
            if (ret.format.ok) {
                try {
                    MemTable mt = new MemTable();
                    sampleSplit.push((ProcessorOutput)new MemTableAppendingOutput(mt), mt, mt, new ExtractionLimit(100L), null, null);
                    this.inferer.processFullAuto(null, mt);
                    ret.format.table = new SerializedMemTableSimple();
                    ret.format.table.fromMemTable(mt, 0, 20);
                    ret.format.schemaDetection = this.getSchemaResult(authCtx, this.dataset.getFormatType(), this.dataset.getFormatParams(), mt, sampleSplit, false, false);
                    this.setInferableStatus(this.dataset.getFormatType(), ret.format.schemaDetection);
                }
                catch (Exception e) {
                    logger.warn((Object)"Failed to gather records with configured format", (Throwable)e);
                    ret.format.ok = false;
                    ret.format.errorMessage = ExceptionUtils.getMessageWithCauses((Throwable)e);
                }
            }
        } else {
            throw new Error("Unknown sample split type " + String.valueOf(rawSampleSplit));
        }
        return ret;
    }

    public AllFilesSchemaConsistencyResult checkConsistencyOnAllFiles(AuthCtx authCtx) throws Exception {
        AllFilesSchemaConsistencyResult ret = new AllFilesSchemaConsistencyResult();
        if ("shapefile".equals(this.dataset.getFormatType())) {
            throw new IllegalArgumentException("Cannot check sconsistency on all files for shapefiles");
        }
        try (DatasetHandler dh = DatasetHandlerFactory.build(authCtx, this.dataset);){
            AbstractFSDatasetHandler fsdh = (AbstractFSDatasetHandler)dh;
            for (FSPath fsPath : fsdh.enumerateFilesystem()) {
                logger.info((Object)("Checking schema consistency on file " + fsPath.path()));
                StreamsInputSplit sis = fsdh.getSplitForFile(null, fsPath);
                logger.info((Object)"Testing configured format");
                FSLikeDatasetTestHandler.DetectedFormat df = new FSLikeDatasetTestHandler.DetectedFormat();
                df.params = this.dataset.getFormatParams();
                df.type = this.dataset.getFormatType();
                SingleFileSchemaConsistencyResult sfr = new SingleFileSchemaConsistencyResult();
                sfr.filePath = fsPath.path();
                sfr.lastModified = fsPath.getLastModified();
                sfr.size = fsPath.getSize();
                ret.files.add(sfr);
                try {
                    MemTable mt = this.gatherSampleRecords(authCtx, sis, this.dataset.getFormatType(), this.dataset.getFormatParams(), df, 20, false);
                    SchemaDetection.SchemaDetectionResult sdr = this.getSchemaResult(authCtx, this.dataset.getFormatType(), this.dataset.getFormatParams(), mt, (InputSplit)sis, true, false);
                    if (sdr.warningLevel != SchemaDetection.WarningLevel.WARN && sdr.warningLevel != SchemaDetection.WarningLevel.FATAL) continue;
                    for (String textReason : sdr.textReasons) {
                        sfr.messages.withWarning((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_SCHEMA_TO_DATA_MISMATCH, textReason);
                    }
                }
                catch (Exception e) {
                    logger.warn((Object)("Failed to scheck schema consistency on " + fsPath.path()), (Throwable)e);
                    sfr.messages.withFatal((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_SCHEMA_CONSISTENCY_CHECK_FAILED, "Verification failed: " + ExceptionUtils.getMessageWithCauses((Throwable)e));
                }
            }
        }
        return ret;
    }

    protected boolean checkDatasetParams(AuthCtx authCtx, Dataset si, FSLikeDatasetTestHandler.FSLikeDatasetTestResult ret) {
        try {
            DatasetInspector.checkDatasetParams(authCtx, si);
        }
        catch (Exception e) {
            logger.info((Object)"Config NOK", (Throwable)e);
            ret.connectionError = new SerializedError((Throwable)e, false);
            return false;
        }
        return true;
    }

    protected boolean checkFormatParams(AuthCtx authCtx, Dataset si, FSLikeDatasetTestHandler.DetectedFormat df) {
        if (si.getFormatType() != null) {
            try {
                DatasetInspector.checkDatasetFormat(authCtx, si);
            }
            catch (Exception e) {
                logger.info((Object)"Config test failed", (Throwable)e);
                df.ok = false;
                df.errorMessage = ExceptionUtils.getMessageWithCauses((Throwable)e);
                return false;
            }
        }
        return true;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected SchemaDetection.SchemaDetectionResult getSchemaResult(AuthCtx authCtx, String formatType, FormatParams formatParams, MemTable sampleMT, InputSplit sampleSplit, boolean consistencyCheck, boolean inferStorageTypesFromData) throws Exception {
        FormatExtractor extractor = FormatFactory.buildExtractor(formatType, formatParams, authCtx, this.dataset.getProjectKey());
        if (this.dataset.getSchema() != null && extractor.canSetSchemaForExtractor()) {
            extractor.setSchema(this.dataset.getSchema(), true);
        }
        SchemaDetection.Mode detectionMode = null;
        if (consistencyCheck) {
            detectionMode = SchemaDetection.Mode.CONSISTENCY_CHECK;
        } else {
            boolean currentSchemaIsImportant;
            boolean bl = currentSchemaIsImportant = this.dataset.getSchema() != null && this.dataset.getSchema().userModified || this.dataset.isManaged();
            SchemaDetection.Mode mode = currentSchemaIsImportant ? (inferStorageTypesFromData ? SchemaDetection.Mode.DETECTION_ONLY_TYPES : SchemaDetection.Mode.DETECTION_PREFER_USER_AND_WARN) : (detectionMode = SchemaDetection.Mode.DETECTION_PREFER_DATA);
        }
        if (sampleSplit instanceof RowsInputSplitWithSchema) {
            return SchemaDetection.handleDetectionResult(FormatFactory.getMeta(formatType).getSchemaHandlingType(), this.dataset.getSchema(), detectionMode, ((RowsInputSplitWithSchema)sampleSplit).getSchema());
        }
        if (extractor instanceof FixedSchemaExtractor) {
            StreamsInputSplit sSampleSplit = (StreamsInputSplit)sampleSplit;
            sSampleSplit.reset();
            InputStream is = sSampleSplit.nextStream().decompressedHeadStream(100000L);
            try {
                SchemaDetection.SchemaDetectionResult schemaDetectionResult = SchemaDetection.handleDetectionResult(FormatFactory.getMeta(formatType).getSchemaHandlingType(), this.dataset.getSchema(), detectionMode, ((FixedSchemaExtractor)((Object)extractor)).detectSchema(is));
                return schemaDetectionResult;
            }
            finally {
                is.close();
                sSampleSplit.reset();
            }
        }
        if (extractor instanceof SplitBasedFixedSchemaDetector) {
            StreamsInputSplit sSampleSplit = (StreamsInputSplit)sampleSplit;
            sSampleSplit.reset();
            try {
                SchemaDetection.SchemaDetectionResult is = SchemaDetection.handleDetectionResult(FormatFactory.getMeta(formatType).getSchemaHandlingType(), this.dataset.getSchema(), detectionMode, ((SplitBasedFixedSchemaDetector)((Object)extractor)).detectSchema(sSampleSplit));
                return is;
            }
            finally {
                sSampleSplit.reset();
            }
        }
        if (extractor instanceof CustomFormatSchemaProvider) {
            StreamsInputSplit sSampleSplit = (StreamsInputSplit)sampleSplit;
            sSampleSplit.reset();
            try {
                SchemaDetection.SchemaDetectionResult is = SchemaDetection.handleDetectionResult(FormatFactory.getMeta(formatType).getSchemaHandlingType(), this.dataset.getSchema(), detectionMode, ((CustomFormatSchemaProvider)((Object)extractor)).detectSchema(sSampleSplit));
                return is;
            }
            finally {
                sSampleSplit.reset();
            }
        }
        Schema existingSchemaForComparison = this.dataset.getSchema();
        if (extractor instanceof SchemaTweakingExtractor && this.dataset.getSchema() != null && this.dataset.getSchema().userModified) {
            existingSchemaForComparison = ((SchemaTweakingExtractor)((Object)extractor)).preTreatBeforeSchemaComparison(this.dataset.getSchema());
        }
        if (inferStorageTypesFromData) {
            Schema detectedSchema = SchemaDetection.buildDetectedSchemaWithStorageTypesInference(sampleMT);
            SchemaDetection.SchemaDetectionResult result = SchemaDetection.handleDetectionResult(FormatFactory.getMeta(formatType).getSchemaHandlingType(), existingSchemaForComparison, detectionMode, detectedSchema);
            result.detectedSchema.userModified = true;
            result.newSchema.userModified = true;
            return result;
        }
        return SchemaDetection.handleDetectionResult(FormatFactory.getMeta(formatType).getSchemaHandlingType(), existingSchemaForComparison, detectionMode, sampleMT);
    }

    protected MemTable gatherSampleRecords(AuthCtx authCtx, StreamsInputSplit split, String formatType, FormatParams formatParams, FSLikeDatasetTestHandler.DetectedFormat df, int nbOKRecords, boolean performMeaningInference) throws Exception {
        FormatExtractor extractor = FormatFactory.buildExtractor(formatType, formatParams, authCtx, this.dataset.getProjectKey());
        if (this.dataset.getSchema() != null && (extractor.canSetSchemaForExtractor() || this.isSchemaInRealDataset())) {
            extractor.setSchema(this.dataset.getSchema(), false);
        }
        MemTable mt = new MemTable();
        ExtractionLimit limit = new ExtractionLimit(1000L, 10000000L);
        extractor.setLimit(limit);
        WarningsContext wc = new WarningsContext();
        extractor.setWarningsContext(wc);
        logger.info((Object)("Start gathering samples for streams split and format " + formatType));
        extractor.run(split, (ProcessorOutput)new MemTableAppendingOutput(mt), mt, mt);
        logger.info((Object)("Done gathering samples for streams split and format " + formatType + " nbCols=" + mt.columns.size() + " nbOK=" + mt.nrows() + " nbNOK= " + wc.getTotalCount()));
        mt.compact();
        df.nbOK = mt.nrows();
        df.nbNOK = wc.getTotalCount();
        df.percentage = df.nbOK + df.nbNOK == 0 ? 0 : (int)Math.floor(100.0 * (double)df.nbOK / (double)(df.nbOK + df.nbNOK));
        this.inferer.processFullAuto(null, mt);
        df.table = new SerializedMemTableSimple();
        df.table.fromMemTable(mt, 0, nbOKRecords);
        return mt;
    }

    protected void setInferableStatus(String formatType, SchemaDetection.SchemaDetectionResult detection) {
        if (formatType == null) {
            return;
        }
        switch (formatType) {
            case "avro": 
            case "parquet": 
            case "sequencefile": 
            case "rcfile": 
            case "orcfile": {
                detection.makesSenseToInferStorageTypes = false;
                break;
            }
            default: {
                detection.makesSenseToInferStorageTypes = true;
            }
        }
    }

    public boolean isSchemaInRealDataset() throws IOException {
        return false;
    }

    public static class AllFilesSchemaConsistencyResult {
        public List<SingleFileSchemaConsistencyResult> files = new ArrayList<SingleFileSchemaConsistencyResult>();
    }

    public static class SingleFileSchemaConsistencyResult {
        public String filePath;
        public long size;
        public long lastModified;
        public InfoMessage.InfoMessages messages = new InfoMessage.InfoMessages();
    }
}

