/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.expr;

import com.dataiku.dip.connections.AbstractSQLConnection;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.SingleRowProcessor;
import com.dataiku.dip.datalineage.DatasetPairLineage;
import com.dataiku.dip.datalineage.RecipeLineage;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.i18n.TranslationService;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.ProcessorScriptStep;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.PrepareSnowflakeUDFUtils;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.processors.expr.ExtractNumbersAlgorithm;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.shaker.sql.SnowflakeUDFProcessorTranslator;
import com.dataiku.dip.shaker.text.Labelled;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.util.ParamDesc;
import com.dataiku.dip.util.SecretKeyGenerator;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.Pair;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;

public class ExtractNumbers {
    private static final String PRODUCED_COLUMN = "producedColumns";
    public static final ProcessorMeta<StreamImpl, Parameter> META = new ProcessorMeta<StreamImpl, Parameter>(){

        @Override
        public String getName() {
            return "ExtractNumbers";
        }

        @Override
        public String getDocPage() {
            return "extract-numbers";
        }

        @Override
        public Category getCategory() {
            return Category.TRANSFORMATION;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.STRING, ProcessorTag.MATH, ProcessorTag.NLP});
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.HELP", "Extract numerical values from a text column.\n\n# Options\n**Extract several values**\nBy default, the processor extracts several values and outputs each detected number into a separate column, suffixed with the index of the number. Unselect this option to extract only the first found number.\n**Extract values into a JSON array**\nOutput the found number(s) in a single column as a JSON-array. \nNote: In SQL mode, the number of output columns must be fixed beforehand. It is therefore extrapolated from the sample.\n**Expand \u2018k\u2019 to \u20181000\u2019 and \u2018m\u2019 to \u20181000000\u2019**\nAutomatically expand notations like \u201810K\u2019 and \u20185M\u2019\n**Decimal separator**\nUse the program's best guess or choose from between comma and dot separators.");
        }

        @Override
        public ProcessorDesc describe(String language) {
            return new ProcessorDesc(this.getName(), this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.DESCRIPTION", 1.actionVerb("Extract") + " numbers"), "", true).withMNEColParam("input", this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.DESCRIPTION.INPUT", "Input column")).withParam("output", "string", false, true, this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.DESCRIPTION.OUTPUT", "Output column or column prefix")).withBoolDefaultTrue("multipleValues", this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.DESCRIPTION.MULTIPLE_VALUES", "Extract several values"), "").withBool("extractToJson", this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.DESCRIPTION.EXTRACT_TO_JSON", "Extract values into a JSON array")).withBool("replaceMultipliers", this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.DESCRIPTION.REPLACE_MULTIPLIERS", "Expand 'k' to '1000' and 'm' to '1000000'")).withParam(ParamDesc.advancedSelect("delimiter", this.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.DESCRIPTION.DELIMITER", "Decimal separator"), "", LabelledDecimalDelimiter.class, language).withDefaultValue(LabelledDecimalDelimiter.BEST_GUESS));
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect, AbstractSQLConnection conn) {
            ProcessorMeta.ProcessorCapabilitiesSummary ret = new ProcessorMeta.ProcessorCapabilitiesSummary();
            if (PrepareSnowflakeUDFUtils.canUseSnowflakeUDF(conn)) {
                if (report != null && report.report.has(ExtractNumbers.PRODUCED_COLUMN)) {
                    ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
                } else {
                    ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot use SQL engine: producedColumns was not recorded");
                }
            }
            return ret;
        }

        @Override
        public Object selfReport(Parameter parameter) {
            return JSON.deepCopyExcept((Object)((Object)parameter), (String[])new String[]{"input", "output"});
        }

        @Override
        public StreamImpl build(Parameter parameter) {
            return new StreamImpl(parameter);
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) {
            return new SnowflakeUDFSQLTranslator((Parameter)parameter, (JsonArray)report.report.get(ExtractNumbers.PRODUCED_COLUMN));
        }

        @Override
        public RecipeLineage getUpdatedRecipeLineage(ProcessorScriptStep pss, RecipeLineage previousRecipeLineage) {
            if (!(pss.params instanceof Parameter)) {
                throw new IllegalArgumentException("Unsupported param type: " + pss.params.getClass().getSimpleName());
            }
            Parameter numbersExtractParam = (Parameter)pss.params;
            RecipeLineage updatedRecipeLineage = new RecipeLineage();
            previousRecipeLineage.getDatasetPairLineages().forEach((datasetPair, previousDatasetPairLineage) -> {
                DatasetPairLineage updatedDatasetPairLineage = new DatasetPairLineage((DatasetPairLineage)previousDatasetPairLineage);
                if (numbersExtractParam.multipleValues && !numbersExtractParam.extractToJson) {
                    if (pss.designTimeReport != null && pss.designTimeReport.report != null && pss.designTimeReport.report.has(ExtractNumbers.PRODUCED_COLUMN)) {
                        JsonArray producedColumns = (JsonArray)pss.designTimeReport.report.get(ExtractNumbers.PRODUCED_COLUMN);
                        for (JsonElement outputColumn : producedColumns) {
                            updatedDatasetPairLineage.addFactorizedColumnRelations(numbersExtractParam.input, outputColumn.getAsString());
                        }
                    } else {
                        updatedRecipeLineage.setUncertain(true);
                    }
                }
                if ((!numbersExtractParam.multipleValues || numbersExtractParam.extractToJson) && StringUtils.isNotBlank((String)numbersExtractParam.output) && !numbersExtractParam.output.equals(numbersExtractParam.input)) {
                    updatedDatasetPairLineage.removeRelationsOnColumn(numbersExtractParam.output);
                    updatedDatasetPairLineage.addFactorizedColumnRelations(numbersExtractParam.input, numbersExtractParam.output);
                }
                updatedRecipeLineage.setDatasetPairLineage((Pair<String, String>)datasetPair, updatedDatasetPairLineage);
            });
            return updatedRecipeLineage;
        }
    };

    private static String translate(String language, String translationID, String defaultValue) {
        TranslationService sts = (TranslationService)SpringUtils.getBean(TranslationService.class);
        return sts.translateNoContext(language, translationID, defaultValue, new Object[0]);
    }

    private static class StreamImpl
    extends SingleRowProcessor
    implements Processor,
    ProcessorWithRecordedReport {
        private Column inCD;
        private Column outCD;
        private final Parameter param;
        private final ExtractNumbersAlgorithm ena;
        Set<String> producedColumns = new LinkedHashSet<String>();

        public StreamImpl(Parameter param) {
            this.param = param;
            this.ena = new ExtractNumbersAlgorithm((ExtractNumbersAlgorithm.Parameter)param);
        }

        public void init() throws Exception {
            this.inCD = this.getColumnFactory().column(this.param.input, Processor.ProcessorRole.INPUT_COLUMN);
            if (!this.param.multipleValues || this.param.extractToJson) {
                this.outCD = !StringUtils.isEmpty((String)this.param.output) ? this.getColumnFactory().columnAfter(this.param.input, this.param.output, Processor.ProcessorRole.OUTPUT_COLUMN) : this.getColumnFactory().column(this.param.input, Processor.ProcessorRole.OUTPUT_COLUMN);
            } else if (StringUtils.isEmpty((String)this.param.output)) {
                this.param.output = this.param.input + "_";
            }
        }

        public void processRow(Row row) throws Exception {
            String inV = row.get(this.inCD);
            if (!this.param.multipleValues) {
                Double res = this.ena.processForSingleValue(inV);
                if (res == null) {
                    row.delete(this.outCD);
                } else {
                    row.put(this.outCD, res.doubleValue());
                }
            } else if (this.param.extractToJson) {
                String res = this.ena.processForJSONArray(inV);
                if (res == null) {
                    row.delete(this.outCD);
                } else {
                    row.put(this.outCD, res);
                }
            } else {
                List values = this.ena.processForMultipleValues(inV);
                int colCount = 0;
                Object beforeCol = this.param.input;
                for (Double d : values) {
                    String newCol = this.param.output + colCount;
                    Column out = this.getColumnFactory().columnAfter((String)beforeCol, newCol, Processor.ProcessorRole.OUTPUT_COLUMN);
                    row.put(out, d.toString());
                    this.producedColumns.add(newCol);
                    ++colCount;
                    beforeCol = newCol;
                }
            }
        }

        public void postProcess() throws Exception {
        }

        @Override
        public ProcessorWithRecordedReport.ProcessorRecordedReport getRecordedReport() {
            ProcessorWithRecordedReport.ProcessorRecordedReport ret = new ProcessorWithRecordedReport.ProcessorRecordedReport();
            JsonArray arr = new JsonArray();
            this.producedColumns.forEach(arg_0 -> ((JsonArray)arr).add(arg_0));
            ret.report.add(ExtractNumbers.PRODUCED_COLUMN, (JsonElement)arr);
            return ret;
        }
    }

    private static class SnowflakeUDFSQLTranslator
    implements SnowflakeUDFProcessorTranslator {
        private final String functionName;
        private final Parameter params;
        private final List<String> producedColumns = new ArrayList<String>();

        private SnowflakeUDFSQLTranslator(Parameter params, JsonArray producedColumnsArray) {
            this.functionName = "extractNumbers_" + SecretKeyGenerator.generate();
            this.params = params;
            for (JsonElement elt : producedColumnsArray) {
                this.producedColumns.add(elt.getAsString());
            }
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> getUDFResources() throws IOException {
            List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> resources = SnowflakeUDFProcessorTranslator.createStandardResourceList();
            if (this.params.multipleValues || this.params.extractToJson) {
                SnowflakeUDFProcessorTranslator.addStandardResources(resources, SnowflakeUDFProcessorTranslator.StandardResource.ORGJSON_JAR);
            }
            return resources;
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef> getUDFs() {
            SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef def;
            if (!this.params.multipleValues) {
                def = new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef(this.functionName, "com.dataiku.dip.shaker.processors.expr.ExtractNumbersUDF.processForSingleValue", "inV STRING, delimiter STRING, replaceMultipliers BOOLEAN", "STRING, STRING, BOOLEAN", "DOUBLE");
            } else if (this.params.extractToJson) {
                def = new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef(this.functionName, "com.dataiku.dip.shaker.processors.expr.ExtractNumbersUDF.processForMultipleValues", "inV STRING, delimiter STRING, replaceMultipliers BOOLEAN", "STRING, STRING, BOOLEAN", "STRING");
                def.importStandardResources(SnowflakeUDFProcessorTranslator.StandardResource.ORGJSON_JAR);
            } else {
                def = new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef(this.functionName, "com.dataiku.dip.shaker.processors.expr.ExtractNumbersUDF.processForMultipleValues", "inV STRING, delimiter STRING, replaceMultipliers BOOLEAN", "STRING, STRING, BOOLEAN", "ARRAY");
                def.importStandardResources(SnowflakeUDFProcessorTranslator.StandardResource.ORGJSON_JAR);
            }
            return Lists.newArrayList((Object[])new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef[]{def});
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            SQLDialect d = chain.getDialect();
            ExpressionBuilder.ExpressionBuilderFactory ebf = new ExpressionBuilder.ExpressionBuilderFactory();
            String outputColumnName = StringUtils.isEmpty((String)this.params.output) ? this.params.input : this.params.output;
            String funcCall = String.format("%s(%s, %s, %s)", this.functionName, d.quoteIdentifier(this.params.input), d.quoteString(this.params.delimiter.toString()), this.params.replaceMultipliers);
            if (chain.isCreatedOrModifiedByCurrentQuery(this.params.input)) {
                chain = chain.makeSubquery();
            }
            if (!this.params.multipleValues) {
                chain.addAfterOrReplaceColumn(chain.getCurrentColumn(this.params.input), ebf.expr(funcCall), Type.DOUBLE, outputColumnName, false);
                return chain;
            }
            if (this.params.extractToJson) {
                chain.addAfterOrReplaceColumn(chain.getCurrentColumn(this.params.input), ebf.expr(funcCall), Type.STRING, outputColumnName, false);
                return chain;
            }
            String tmpColumnName = "EXTRACT_NUMBERS_" + SecretKeyGenerator.generate((int)16);
            chain.select(ebf.expr(funcCall), tmpColumnName);
            SQLQueryWithSchema outer = chain.makeSubquery();
            SchemaColumn inputSchemaColumn = outer.getCurrentColumn(this.params.input);
            for (int i = 0; i < this.producedColumns.size(); ++i) {
                outer.addAfterOrReplaceColumn(inputSchemaColumn, ebf.expr(d.quoteIdentifier(tmpColumnName) + "[" + i + "]"), Type.DOUBLE, this.producedColumns.get(i), false);
            }
            outer.deleteColumn(tmpColumnName);
            outer.deleteSelect(tmpColumnName);
            return outer;
        }
    }

    public static class Parameter
    extends ExtractNumbersAlgorithm.Parameter
    implements StepParams {
        private static final long serialVersionUID = -1L;
        public String input;
        public String output = "";
        public boolean multipleValues = false;
        public boolean extractToJson = false;

        public void validate() throws IllegalArgumentException {
        }
    }

    /*
     * Uses 'sealed' constructs - enablewith --sealed true
     */
    public static enum LabelledDecimalDelimiter implements Labelled
    {
        BEST_GUESS{

            @Override
            public String getLabel() {
                return this.getLabel("en");
            }

            @Override
            public String getLabel(String language) {
                return ExtractNumbers.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.LabelledDecimalDelimiter.BEST_GUESS", "Best guess");
            }
        }
        ,
        COMMA{

            @Override
            public String getLabel() {
                return this.getLabel("en");
            }

            @Override
            public String getLabel(String language) {
                return ExtractNumbers.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.LabelledDecimalDelimiter.COMMA", "Comma");
            }
        }
        ,
        DOT{

            @Override
            public String getLabel() {
                return this.getLabel("en");
            }

            @Override
            public String getLabel(String language) {
                return ExtractNumbers.translate(language, "SHAKER.PROCESSOR.ExtractNumbers.LabelledDecimalDelimiter.DOT", "Dot");
            }
        };

    }
}

