/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats;

import com.dataiku.dip.coremodel.FormatParams;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datasets.SchemaDetection;
import com.dataiku.dip.formats.FormatFactory;
import com.dataiku.dip.formats.FormatMeta;
import com.dataiku.dip.input.formats.ArchiveCapableFormatExtractor;
import com.dataiku.dip.input.formats.RegexpFieldsBuilderFactory;
import com.dataiku.dip.input.formats.RowFactoryWithContextInfo;
import com.dataiku.dip.output.OutputFormatter;
import com.dataiku.dip.plugin.InputStreamWithContextInfo;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.util.ParamDesc;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.NotImplementedException;
import com.dataiku.dip.utils.RegexpFieldsMatcher;
import com.dataiku.dip.warnings.WarningsContext;
import com.dataiku.dss.shadelib.org.apache.commons.io.input.BOMInputStream;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

public class SmartRegexpFormatExtractor
extends ArchiveCapableFormatExtractor {
    public static final FormatMeta<SmartRegexpFormatExtractor, Config> META = new FormatMeta<SmartRegexpFormatExtractor, Config>(){

        @Override
        public String getType() {
            return "regexp_fields";
        }

        @Override
        public Class<? extends FormatParams> paramsClass() {
            return Config.class;
        }

        @Override
        public SmartRegexpFormatExtractor build(AuthCtx authCtx, String projectKey, FormatParams params) {
            Config config = (Config)params;
            return new SmartRegexpFormatExtractor(RegexpFieldsBuilderFactory.build((boolean)config.autoAddSpace, config.steps));
        }

        @Override
        public OutputFormatter buildFormatter(AuthCtx authCtx, String projectKey, FormatParams params) {
            throw new NotImplementedException();
        }

        @Override
        public String getLabel() {
            return "Regular expressions with fields";
        }

        @Override
        public SchemaDetection.SchemaHandlingType getSchemaHandlingType() {
            return SchemaDetection.SchemaHandlingType.TEXT_POSITION_BASED_FIXED_COLUMNS;
        }

        @Override
        public ParamDesc[] getParams() {
            return new ParamDesc[]{new ParamDesc("charset", "charset").withMandatory(false).withLabel("Charset"), FormatFactory.getStandardCompressionMethods()};
        }
    };
    private final RegexpFieldsMatcher regexpFieldsBuilder;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku");

    public SmartRegexpFormatExtractor(RegexpFieldsMatcher regexpFieldsBuilder) {
        this.regexpFieldsBuilder = regexpFieldsBuilder;
    }

    @Override
    protected boolean doExtractStream(InputStreamWithContextInfo isn, ProcessorOutput out, ColumnFactory cf, RowFactory rowFactory, ArchiveCapableFormatExtractor.ArchiveCapableObserver observer) throws Exception {
        InputStream is = isn.getInputStream();
        RowFactoryWithContextInfo rf = new RowFactoryWithContextInfo(rowFactory, isn);
        ArrayList<Column> columns = new ArrayList<Column>();
        for (String columnName : this.regexpFieldsBuilder.getColumnNames()) {
            columns.add(cf.column(columnName));
        }
        long nlines = 0L;
        is = new BOMInputStream(is);
        try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));){
            while (true) {
                if (!observer.checkLimit(nlines)) {
                    boolean bl = false;
                    return bl;
                }
                String line = br.readLine();
                if (line == null) break;
                List values = this.regexpFieldsBuilder.exec(line = line.trim());
                if (values == null) {
                    this.warnContext.addWarning(WarningsContext.WarningType.INPUT_DATA_BAD_DATA, "Row does not parse " + line, logger);
                } else {
                    Row r = rf.row();
                    for (int i = 0; i < values.size(); ++i) {
                        r.put((Column)columns.get(i), (String)values.get(i));
                    }
                    out.emitRow(r);
                }
                if (++nlines % 500L != 0L) continue;
                observer.onInterval(nlines);
            }
            observer.onEnd(nlines);
        }
        return true;
    }

    public static class Config
    implements FormatParams {
        public List<RegexpFieldsBuilderFactory.Step> steps = new ArrayList<RegexpFieldsBuilderFactory.Step>();
        public boolean autoAddSpace;
    }
}

