/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.cleansing;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.ProcessorWithResourceFiles;
import com.dataiku.dip.connections.AbstractSQLConnection;
import com.dataiku.dip.data.geo.CountriesGeoDataLoader;
import com.dataiku.dip.data.geo.CountriesGeoDataUtils;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.meanings.AbstractBasicMeaningsService;
import com.dataiku.dip.meanings.NoopBasicMeaningsService;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.BaseProcessorsFactory;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.FilterAndFlagProcessor;
import com.dataiku.dip.shaker.processors.PrepareSnowflakeUDFUtils;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.shaker.sql.SnowflakeUDFFilterAndFlagOnBadType;
import com.dataiku.dip.shaker.sql.SnowflakeUDFProcessorTranslator;
import com.dataiku.dip.shaker.types.MeaningDetector;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.util.SecretKeyGenerator;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.gson.JsonObject;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class FilterAndFlagOnBadType {
    public static final ProcessorMeta<StreamImpl, Parameter> META_FILTER = new FilterAndFlagProcessor.FilterProcessorMeta<StreamImpl, Parameter>(){

        @Override
        public String getName() {
            return "FilterOnBadType";
        }

        @Override
        public String getDocPage() {
            return "filter-on-meaning";
        }

        @Override
        public Category getCategory() {
            return Category.CLEANSING;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.CLEANSING, ProcessorTag.FILTER});
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.FilterAndFlagOnBadType.FILTER.HELP", "Filter rows from the dataset with invalid values, i.e. those that are invalid for the selected meaning. Alternatively, this processor can clear content from invalid cells instead of filtering entire rows. \n\nMeaning is semantic information about the data and is usually automatically detected from the content of the column: URL, IP Address, Country. As such, each cell can be valid or invalid for a given meaning.\n\n# Options\n\n**Action**\n\nSelect the action to perform on matching (in range) rows or cells: \n\n* Keep matching rows only\n\n* Remove matching rows\n\n* Clear content of matching cells\n\n* Clear content of non-matching cells\n\n**Column**\n\nApply the matching condition to the following: \n\n* A single column\n\n* An explicit list of columns\n\n* All columns matching a regex pattern\n\n* All columns\n\n<u>*Note*</u>\nWhen applying the match condition to several columns (multiple, pattern, all), select whether the row will be considered as matching if all columns match (ALL) or at least one column matches (OR).\n\n**Meaning to check**\n\nSelect which meaning to check cells in the column for: text, decimal, integer, boolean, date, object, array, natural lang., geo...\n\n# Related resources\n\nFor more information on data types (storage vs. meaning) in DSS, please see the <a target=\"_blank\" href=\"https://doc.dataiku.com/dss/latest/schemas/definitions.html\">reference documentation</a>. If you prefer a hands-on approach, check out the article on meanings in the <a target='_blank' href=\"https://knowledge.dataiku.com/latest/courses/basics/explore-data/concept-meaning.html?highlight=meaning\">Dataiku Knowledge Base</a> or explore <a target=\"_blank\" href=\"https://doc.dataiku.com/dss/latest/schemas/user-defined-meanings.html\">user-defined meanings.\n");
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams sp, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            return this.getCapabilities(sp, report, dialect, null);
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect, AbstractSQLConnection conn) {
            return FilterAndFlagOnBadType.commonGetCapabilities(params, report, dialect, conn);
        }

        @Override
        public ProcessorDesc describe(String language) {
            return ProcessorDesc.withGenericForm(this.getName(), this.translate(language, "SHAKER.PROCESSOR.FilterAndFlagOnBadType.FILTER.DESCRIPTION", 1.actionVerb("Filter") + " invalid rows/cells")).withParam("type", "type", true, false, this.translate(language, "SHAKER.PROCESSOR.FilterAndFlagOnBadType.DESCRIPTION.TYPE", "Meaning to check")).withFilterAndFlagMode("FILTER");
        }

        @Override
        public Object selfReport(Parameter parameter) {
            JsonObject o = FilterAndFlagProcessor.selfReport(parameter);
            try {
                Class.forName("com.dataiku.dip.shaker.types." + parameter.type);
            }
            catch (Exception e) {
                o.remove("type");
                o.addProperty("udm", Boolean.valueOf(true));
            }
            return o;
        }

        @Override
        public StreamImpl build(Parameter parameter) throws Exception {
            throw new UnsupportedOperationException("This processor requires a PipelineContext");
        }

        @Override
        public StreamImpl build(Parameter parameter, BaseProcessorsFactory.PipelineContext context) throws Exception {
            if (context == null) {
                return this.build(parameter);
            }
            return new StreamImpl(parameter, context.basicMeaningsService);
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) throws IOException {
            return new SnowflakeUDFSQLTranslator((Parameter)parameter);
        }

        @Override
        public String getNativeSparkClassname() {
            return "com.dataiku.dip.shaker.processors.cleansing.FilterAndFlagOnBadTypeNS";
        }
    };
    public static final ProcessorMeta<StreamImpl, Parameter> META_FLAG = new FilterAndFlagProcessor.FlagProcessorMeta<StreamImpl, Parameter>(){

        @Override
        public String getName() {
            return "FlagOnBadType";
        }

        @Override
        public String getDocPage() {
            return "flag-on-meaning";
        }

        @Override
        public Category getCategory() {
            return Category.CLEANSING;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.CLEANSING, ProcessorTag.FILTER});
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.FilterAndFlagOnBadType.FLAG.HELP", "This processor flags rows with invalid values, ie values not matching a selected meaning.\n\nIt creates a column which will contain '1' if the row matches (invalid), nothing else\n\n# Columns selection\n\nThis processor can check its matching condition on multiple columns:\n\n* A single colum\n* An explicit list of columns\n* All columns matching a given pattern\n* All columns\n\nYou can select whether the row will be considered as matching if:\n\n* All columns are matching\n* Or, at least one column is matching\n\n");
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams sp, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            return this.getCapabilities(sp, report, dialect, null);
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect, AbstractSQLConnection conn) {
            return FilterAndFlagOnBadType.commonGetCapabilities(params, report, dialect, conn);
        }

        @Override
        public ProcessorDesc describe(String language) {
            return ProcessorDesc.withGenericForm(this.getName(), this.translate(language, "SHAKER.PROCESSOR.FilterAndFlagOnBadType.FLAG.DESCRIPTION", 2.actionVerb("Flag") + " invalid rows")).withParam("type", "type", true, false, this.translate(language, "SHAKER.PROCESSOR.FilterAndFlagOnBadType.DESCRIPTION.TYPE", "Meaning to check")).withFilterAndFlagMode("FLAG");
        }

        @Override
        public Object selfReport(Parameter parameter) {
            JsonObject o = FilterAndFlagProcessor.selfReport(parameter);
            try {
                Class.forName("com.dataiku.dip.shaker.types." + parameter.type);
            }
            catch (Exception e) {
                o.remove("type");
                o.addProperty("udm", Boolean.valueOf(true));
            }
            return o;
        }

        @Override
        public StreamImpl build(Parameter parameter) throws Exception {
            throw new UnsupportedOperationException("This processor requires a PipelineContext");
        }

        @Override
        public StreamImpl build(Parameter parameter, BaseProcessorsFactory.PipelineContext context) throws Exception {
            if (context == null) {
                return this.build(parameter);
            }
            return new StreamImpl(parameter, context.basicMeaningsService);
        }

        @Override
        public String getNativeSparkClassname() {
            return "com.dataiku.dip.shaker.processors.cleansing.FilterAndFlagOnBadTypeNS";
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) throws IOException {
            return new SnowflakeUDFSQLTranslator((Parameter)parameter);
        }
    };

    private static ProcessorMeta.ProcessorCapabilitiesSummary commonGetCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect, AbstractSQLConnection conn) {
        ProcessorMeta.ProcessorCapabilitiesSummary ret = new ProcessorMeta.ProcessorCapabilitiesSummary().withCan(ProcessorCapabilities.NATIVE_SPARK_IMPL, ProcessorCapabilities.KNOWN_INPUTS, ProcessorCapabilities.KNOWN_OUTPUTS);
        Parameter typedParam = (Parameter)params;
        if (PrepareSnowflakeUDFUtils.canUseSnowflakeUDF(conn)) {
            NoopBasicMeaningsService nbms = new NoopBasicMeaningsService();
            if (!nbms.hasDetector(typedParam.type)) {
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Custom meanings are not supported in Snowflake");
            } else if ("USStateMeaning".equals(typedParam.type)) {
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "'US State' meaning is not supported in Snowflake");
            } else {
                ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
            }
        }
        return ret;
    }

    public static class Parameter
    extends FilterAndFlagProcessor.FilterAndFlagParams
    implements StepParams {
        private static final long serialVersionUID = -1L;
        public String type;
    }

    static class StreamImpl
    extends FilterAndFlagProcessor
    implements Processor,
    ProcessorWithResourceFiles {
        private Parameter parameter;
        private MeaningDetector type;
        private AbstractBasicMeaningsService basicMeaningsService;

        public StreamImpl(Parameter parameter, AbstractBasicMeaningsService basicMeaningsService) throws Exception {
            this.parameter = parameter;
            this.basicMeaningsService = basicMeaningsService;
        }

        public void postProcess() {
        }

        @Override
        public void init() throws Exception {
            super.init();
            this.type = this.basicMeaningsService.buildSingleDetector(this.parameter.type);
        }

        @Override
        public boolean matchCell(Row row, Column column) throws Exception {
            String v = row.get(column);
            return v != null && !this.type.validates(v);
        }

        @Override
        public FilterAndFlagProcessor.FilterAndFlagParams getParams() {
            return this.parameter;
        }

        @Override
        public Map<String, File> gatherRequirements() {
            HashMap<String, File> ret = new HashMap<String, File>();
            ret.put("dku.countries.db", new File(ApplicationConfigurator.getInstallFolder(), "resources/publicdata/countries/countries.csv.gz"));
            return ret;
        }

        @Override
        public void setRequiredFiles(Map<String, File> requiredFiles) {
            CountriesGeoDataLoader.load((File)requiredFiles.get("dku.countries.db"));
        }
    }

    private static class SnowflakeUDFSQLTranslator
    implements SnowflakeUDFProcessorTranslator {
        private final String functionName;
        private final Parameter parameter;

        private SnowflakeUDFSQLTranslator(Parameter parameter) {
            this.parameter = parameter;
            this.functionName = "isValidMeaning_" + SecretKeyGenerator.generate();
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> getUDFResources() throws IOException {
            List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> resources = SnowflakeUDFProcessorTranslator.createStandardResourceList();
            SnowflakeUDFProcessorTranslator.addStandardResources(resources, SnowflakeUDFProcessorTranslator.StandardResource.SHADELIB, SnowflakeUDFProcessorTranslator.StandardResource.COMMONS_LANG_JAR, SnowflakeUDFProcessorTranslator.StandardResource.GUAVA_JAR, SnowflakeUDFProcessorTranslator.StandardResource.DKU_CORE_JAR, SnowflakeUDFProcessorTranslator.StandardResource.LOG4J_JAR);
            if ("CountryMeaning".equals(this.parameter.type)) {
                resources.add(new SnowflakeUDFProcessorTranslator.SnowflakeUDFResource(CountriesGeoDataUtils.getStandardResourceFolder(), ""));
            }
            return resources;
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef> getUDFs() {
            SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef def = new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef(this.functionName, "com.dataiku.dip.shaker.processors.cleansing.IsInvalidMeaningUDF.check", "data STRING, meaning STRING", "STRING, STRING", "BOOLEAN");
            def.importStandardResources(SnowflakeUDFProcessorTranslator.StandardResource.SHADELIB, SnowflakeUDFProcessorTranslator.StandardResource.COMMONS_LANG_JAR, SnowflakeUDFProcessorTranslator.StandardResource.GUAVA_JAR, SnowflakeUDFProcessorTranslator.StandardResource.DKU_CORE_JAR, SnowflakeUDFProcessorTranslator.StandardResource.LOG4J_JAR);
            if ("CountryMeaning".equals(this.parameter.type)) {
                def.imports.add("countries.csv.gz");
            }
            return Lists.newArrayList((Object[])new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef[]{def});
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            return new SnowflakeUDFFilterAndFlagOnBadType(chain, this.parameter, this.functionName).apply();
        }
    }
}

