/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.datasets;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.dataflow.exec.filter.FilterDesc;
import com.dataiku.dip.dataflow.exec.filter.FilterDescUtils;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.PipelineInterruptedException;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.SinkProcessorOutput;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datalayer.utils.ColumnSamplingProcessorOutput;
import com.dataiku.dip.datalayer.utils.FilterProcessorOutput;
import com.dataiku.dip.datalayer.utils.RandomRatioSamplingProcessorOutput;
import com.dataiku.dip.datasets.DatasetHandler;
import com.dataiku.dip.datasets.DatasetSelection;
import com.dataiku.dip.datasets.LatestPartitionsSelector;
import com.dataiku.dip.datasets.SamplingParam;
import com.dataiku.dip.datasets.StreamableDatasetSelection;
import com.dataiku.dip.datasets.UniversalSingleThreadPusher;
import com.dataiku.dip.datasets.fs.AbstractFSDatasetHandler;
import com.dataiku.dip.datasets.fs.DelegateFSDatasetHandler;
import com.dataiku.dip.formats.FormatFactory;
import com.dataiku.dip.futures.FixedRecordsStreamFutureProgressListener;
import com.dataiku.dip.futures.FutureProgress;
import com.dataiku.dip.futures.FutureProgressState;
import com.dataiku.dip.input.DatasetHandlerFactory;
import com.dataiku.dip.input.InputSplit;
import com.dataiku.dip.input.InputSplitProgressListener;
import com.dataiku.dip.input.filter.FilterResultWithSplits;
import com.dataiku.dip.input.filter.InputFilter;
import com.dataiku.dip.input.formats.ExtractionLimit;
import com.dataiku.dip.input.formats.FormatExtractor;
import com.dataiku.dip.input.row.RowSequenceInputSplit;
import com.dataiku.dip.input.row.RowsInputSplit;
import com.dataiku.dip.input.stream.StreamsInputSplit;
import com.dataiku.dip.input.utils.CountingProcessorOutput;
import com.dataiku.dip.partitioning.FilePartition;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.partitioning.PartitionFactory;
import com.dataiku.dip.partitioning.PartitioningScheme;
import com.dataiku.dip.partitioning.PartitioningUtils;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.shaker.facet.CountMap;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.DKUMathsUtils;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.warnings.WarningsContext;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;

public class AbstractSingleThreadPusher {
    protected final AuthCtx authCtx;
    protected final Dataset dataset;
    protected final ColumnFactory cf;
    protected final RowFactory rf;
    protected ProcessorOutput finalOutput;
    protected InputSplitProgressListener _listener;
    protected WarningsContext warningsContext = null;
    protected final boolean countProcessedRows;
    protected long processedRowCount = -1L;
    protected boolean wasSamplingPushedDownToDB = false;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.datasets.push");

    protected AbstractSingleThreadPusher(AuthCtx authCtx, Dataset dataset, ColumnFactory cf, RowFactory rf) {
        this(authCtx, dataset, cf, rf, false);
    }

    protected AbstractSingleThreadPusher(AuthCtx authCtx, Dataset dataset, ColumnFactory cf, RowFactory rf, boolean countProcessedRows) {
        this.authCtx = authCtx;
        this.dataset = dataset;
        this.cf = cf;
        this.rf = rf;
        this.countProcessedRows = countProcessedRows;
    }

    public void setListener(InputSplitProgressListener listener) {
        this._listener = listener;
    }

    public void setWarningsContext(WarningsContext warningsContext) {
        this.warningsContext = warningsContext;
    }

    public InputSplitProgressListener getListener(FutureProgressState state) {
        if (this._listener == null) {
            this._listener = new FixedRecordsStreamFutureProgressListener(state);
        }
        return this._listener;
    }

    protected static void setSizeTargetIfPossible(FutureProgressState state, DatasetHandler handler, FilterResultWithSplits result) {
        if (handler instanceof AbstractFSDatasetHandler && state.unit != FutureProgressState.StateUnit.RECORDS && state.target < 0.0) {
            long totalFilesSize = 0L;
            for (Partition p : result.getMatchingPartitions()) {
                if (!(p instanceof FilePartition)) continue;
                totalFilesSize += ((FilePartition)p).getTotalSize();
            }
            state.setTarget(totalFilesSize);
        }
    }

    protected static void setSizeTargetIfPossible(FutureProgressState state, DatasetHandler handler) throws Exception {
        if (handler instanceof AbstractFSDatasetHandler && state.unit != FutureProgressState.StateUnit.RECORDS && state.target < 0.0) {
            long totalFilesSize = 0L;
            for (Partition p : handler.listPartitions()) {
                if (!(p instanceof FilePartition)) continue;
                totalFilesSize += ((FilePartition)p).getTotalSize();
            }
            state.setTarget(totalFilesSize);
        }
    }

    protected InputFilter buildInputFilter(DatasetSelection datasetSelection) throws Exception {
        boolean needsPartitionFilter;
        boolean bl = needsPartitionFilter = this.dataset.getPartitioningSchema() != null && this.dataset.getPartitioningSchema().isPartitioned() && datasetSelection.partitionSelectionMethod != DatasetSelection.PartitionSelectionMethod.ALL;
        if (!FilterDescUtils.willFilter(datasetSelection.filter) && !needsPartitionFilter) {
            return null;
        }
        InputFilter ifilter = new InputFilter();
        if (needsPartitionFilter) {
            switch (datasetSelection.partitionSelectionMethod) {
                case ALL: {
                    break;
                }
                case SELECTED: {
                    PartitioningScheme scheme = this.dataset.getPartitioningSchema();
                    ifilter.setPartitionsClause(new ArrayList());
                    if (datasetSelection.selectedPartitions == null) break;
                    for (String p : datasetSelection.selectedPartitions) {
                        ifilter.withSelectedPartition(PartitionFactory.fromIdentifier(scheme, p));
                    }
                    break;
                }
                case LATEST_N: {
                    ifilter.withSelectedPartitions(LatestPartitionsSelector.select(this.authCtx, this.dataset, datasetSelection.latestPartitionsN));
                    logger.info((Object)("Input filter selected latest partitions: " + StringUtils.join(PartitioningUtils.toIds(ifilter.getPartitionsClause()), (String)",")));
                }
            }
        }
        if (datasetSelection.filter != null) {
            ifilter.withFilter(datasetSelection.filter);
        }
        return ifilter;
    }

    protected boolean pushSplits(DatasetHandler handler, ProcessorOutput output, FutureProgressState state, List<? extends InputSplit> splits, ExtractionLimit limit) throws Exception {
        InputSplit firstSplit = splits.get(0);
        try {
            if (firstSplit instanceof RowsInputSplit) {
                logger.info((Object)("Push row splits limit= " + JSON.json((Object)limit)));
                state.unit = FutureProgressState.StateUnit.RECORDS;
                ArrayList<RowsInputSplit> list = new ArrayList<RowsInputSplit>();
                for (InputSplit inputSplit : splits) {
                    list.add((RowsInputSplit)inputSplit);
                }
                RowSequenceInputSplit rsis = new RowSequenceInputSplit(list);
                rsis.push(output, this.cf, this.rf, limit, this.getListener(state), this.warningsContext);
                return true;
            }
            if (firstSplit instanceof StreamsInputSplit) {
                FormatExtractor fmt;
                if (handler instanceof DelegateFSDatasetHandler && this.dataset.getFormatType() == null) {
                    Dataset realDataset = ((DelegateFSDatasetHandler)handler).getRealDataset();
                    logger.info((Object)("Using delegate format, type: " + realDataset.getFormatType() + " params:" + JSON.json((Object)realDataset.getFormatParams())));
                    fmt = FormatFactory.buildExtractor(realDataset.getFormatType(), realDataset.getFormatParams(), this.authCtx, this.dataset.getProjectKey());
                } else {
                    fmt = FormatFactory.buildExtractor(this.dataset.getFormatType(), this.dataset.getFormatParams(), this.authCtx, this.dataset.getProjectKey());
                }
                if (this.dataset.getSchema() != null) {
                    fmt.setSchema(this.dataset.getSchema(), false);
                }
                fmt.setLimit(limit);
                fmt.setProgressListener(this.getListener(state));
                fmt.setWarningsContext(this.warningsContext);
                for (InputSplit inputSplit : splits) {
                    if (fmt.run((StreamsInputSplit)inputSplit, output, this.cf, this.rf)) continue;
                    return false;
                }
                return true;
            }
            throw ErrorContext.iaef((String)"Unable to manage split of class %s", (Object)firstSplit.getClass().getName(), (Object[])new Object[0]);
        }
        catch (PipelineInterruptedException e) {
            logger.info((Object)"Pipeline was interrupted before the end");
            return false;
        }
    }

    protected ColumnSamplingProcessorOutput buildOutputForColumnBased(DatasetSelection datasetSelection, InputFilter ifilter, ProcessorOutput output) throws Exception {
        assert (output != null);
        double proba = 1.0;
        long count = AbstractSingleThreadPusher.nbRecordsWithFilter(this.authCtx, this.dataset, ifilter);
        if (count > datasetSelection.maxRecords) {
            proba = DKUMathsUtils.safeDivide((long)datasetSelection.maxRecords, (long)count);
        }
        logger.info((Object)("Have " + count + " records, sampling with proba=" + proba));
        return new ColumnSamplingProcessorOutput(output, this.cf, datasetSelection.column, proba, (int)System.currentTimeMillis());
    }

    protected RandomRatioSamplingProcessorOutput buildOutputForRandomNb(DatasetSelection datasetSelection, InputFilter ifilter, ProcessorOutput output) throws Exception {
        long count = AbstractSingleThreadPusher.nbRecordsWithFilter(this.authCtx, this.dataset, ifilter);
        double proba = 1.0;
        if (count > datasetSelection.maxRecords) {
            proba = DKUMathsUtils.safeDivide((long)datasetSelection.maxRecords, (long)count);
        }
        logger.info((Object)("Have " + count + " records, sampling with proba=" + proba));
        return new RandomRatioSamplingProcessorOutput(output, proba, datasetSelection.seed);
    }

    protected FilterProcessorOutput buildUnpluggedFilterOutput(InputFilter ifilter, FilterResultWithSplits result) throws Exception {
        if (FilterDescUtils.willFilter(ifilter.getFilter())) {
            logger.info((Object)("Filter expression: " + FilterDescUtils.getFilterRepr(ifilter.getFilter())));
            if (result.isNeedsRefilter()) {
                logger.info((Object)"Create stream filter to apply filtering rules");
                return new FilterProcessorOutput(null, this.cf, ifilter.getFilter(), this.dataset.getSchema());
            }
            logger.info((Object)"Filtering fully handled in database request");
            return null;
        }
        return null;
    }

    protected static long nbRecordsWithFilter(AuthCtx authCtx, Dataset dataset, InputFilter filter) throws Exception {
        try (DatasetHandler dh = DatasetHandlerFactory.build(authCtx, dataset);){
            long total = 0L;
            if (filter == null || filter.getPartitionsClause() == null && !FilterDescUtils.willFilter(filter.getFilter())) {
                logger.info((Object)"First pass : counting records");
                total = dh.getRecords();
            } else if (!FilterDescUtils.willFilter(filter.getFilter())) {
                logger.info((Object)"First pass : counting records");
                try (FutureProgress.AutocloseableFutureProgressState computing = FutureProgress.pushAutoCloseableState((String)"Counting records");){
                    for (Partition p : filter.getPartitionsClause()) {
                        total += dh.getPartitionRecords(p);
                    }
                }
            } else {
                assert (FilterDescUtils.willFilter(filter.getFilter()));
                StreamableDatasetSelection newSel = StreamableDatasetSelection.full();
                newSel.filter = filter.getFilter();
                if (filter.hasPartitionsFiltering()) {
                    newSel.withSelectedPartitions(filter.getPartitionsClause());
                }
                logger.info((Object)"Counting records using full-push-with-filter");
                StreamColumnFactory scf = new StreamColumnFactory();
                StreamRowFactory srf = new StreamRowFactory();
                CountingProcessorOutput counting = new CountingProcessorOutput();
                UniversalSingleThreadPusher.push(authCtx, dataset, newSel, (ProcessorOutput)counting, (ColumnFactory)scf, (RowFactory)srf);
                long l = counting.getCount();
                return l;
            }
            long l = total;
            return l;
        }
    }

    protected static CountMap<String> buildCountMap(AuthCtx authCtx, Dataset dataset, InputFilter filter, long withinNRecords, String column) throws Exception {
        CountMapProcessorOutput result;
        try (DatasetHandler dh = DatasetHandlerFactory.build(authCtx, dataset);){
            StreamableDatasetSelection newSel = StreamableDatasetSelection.full();
            FilterDesc filterDesc = newSel.filter = filter != null ? filter.getFilter() : null;
            if (filter != null && filter.hasPartitionsFiltering()) {
                newSel.withSelectedPartitions(filter.getPartitionsClause());
            }
            if (withinNRecords >= 0L) {
                newSel.samplingMethod = SamplingParam.SamplingMethod.HEAD_SEQUENTIAL;
                newSel.maxRecords = withinNRecords;
            }
            logger.info((Object)"Counting values using full-push-with-filter");
            StreamColumnFactory scf = new StreamColumnFactory();
            StreamRowFactory srf = new StreamRowFactory();
            CountMapProcessorOutput output = new CountMapProcessorOutput(column, (ColumnFactory)scf);
            UniversalSingleThreadPusher.push(authCtx, dataset, newSel, (ProcessorOutput)output, (ColumnFactory)scf, (RowFactory)srf);
            result = output;
        }
        return result.map;
    }

    protected static ProcessorOutput filterIfNeeded(FilterProcessorOutput filterOutput, ProcessorOutput processorOutput) {
        if (filterOutput != null) {
            filterOutput.setDownstream(processorOutput);
            return filterOutput;
        }
        return processorOutput;
    }

    public long getProcessedRowCount() {
        return this.processedRowCount;
    }

    public boolean wasSamplingPushedDownToDB() {
        return this.wasSamplingPushedDownToDB;
    }

    protected static class CountMapProcessorOutput
    extends SinkProcessorOutput {
        private final Column cd;
        private long readRows = 0L;
        CountMap<String> map = new CountMap();

        CountMapProcessorOutput(String column, ColumnFactory cf) {
            this.cd = cf.column(column);
        }

        public void emitRow(Row row) throws Exception {
            String v = row.get(this.cd);
            if (StringUtils.isBlank((String)v)) {
                v = "__dku_no_value__";
            }
            this.map.inc(v);
            if (++this.readRows % 100000L == 0L) {
                logger.info((Object)("Counted " + this.readRows + " records"));
            }
        }
    }
}

