/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.typespecific;

import com.dataiku.dip.connections.AbstractSQLConnection;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.SingleRowProcessor;
import com.dataiku.dip.datalineage.DatasetPairLineage;
import com.dataiku.dip.datalineage.RecipeLineage;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.exceptions.IllegalConfigurationException;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.ProcessorScriptStep;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.PrepareSnowflakeUDFUtils;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.processors.typespecific.URLSplitterAlgorithm;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.shaker.sql.SnowflakeUDFProcessorTranslator;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.util.SecretKeyGenerator;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.Pair;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.BooleanUtils;

public class URLSplitter {
    public static final ProcessorMeta<StreamImpl, URLSplitterParams> META = new ProcessorMeta<StreamImpl, URLSplitterParams>(){

        @Override
        public String getName() {
            return "URLSplitter";
        }

        @Override
        public String getDocPage() {
            return "url-split";
        }

        @Override
        public Category getCategory() {
            return Category.WEB;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.WEB});
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.URLSplitter.HELP", "This processor splits the elements of an URL into multiple columns\n\nA valid URL is in the form `scheme://hostname[:port][/path][?querystring][#anchor]`\n\nThe output values are produced in columns prefixed by the input column name.\n\nIf the input does not contain a valid URL, no output value is produced.\n# Examples\n* `http://www.google.com/search?q=query#results`\n* `ftp://ftp.server.com/pub/downloads/myfile.tar.gz`");
        }

        @Override
        public Class<URLSplitterParams> stepParamClass() {
            return URLSplitterParams.class;
        }

        @Override
        public ProcessorDesc describe(String language) {
            return ProcessorDesc.withGenericForm(this.getName(), this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION", 1.actionVerb("Split") + " URL (into protocol, host, port, ...)")).withMNEColParam("column", this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION.INPUT_URL_COLUMN", "Input URL column")).withBoolDefaultTrue("extractScheme", this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION.EXTRACT_SCHEME", "Extract scheme"), "").withBoolDefaultTrue("extractHost", this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION.EXTRACT_HOSTNAME", "Extract hostname"), "").withBoolDefaultTrue("extractPort", this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION.EXTRACT_PORT", "Extract port"), "").withBoolDefaultTrue("extractPath", this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION.EXTRACT_PATH", "Extract path"), "").withBoolDefaultTrue("extractQueryString", this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION.EXTRACT_QUERY_STRING", "Extract query string"), "").withBoolDefaultTrue("extractAnchor", this.translate(language, "SHAKER.PROCESSOR.URLSplitter.DESCRIPTION.EXTRACT_ANCHOR", "Extract anchor"), "");
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams sp, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            return this.getCapabilities(sp, report, dialect, null);
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect, AbstractSQLConnection conn) {
            ProcessorMeta.ProcessorCapabilitiesSummary ret = new ProcessorMeta.ProcessorCapabilitiesSummary();
            if (PrepareSnowflakeUDFUtils.canUseSnowflakeUDF(conn)) {
                ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
            }
            return ret;
        }

        @Override
        public Object selfReport(URLSplitterParams p) {
            return JSON.deepCopyExcept((Object)p, (String[])new String[]{"column"});
        }

        @Override
        public StreamImpl build(URLSplitterParams parameter) throws Exception {
            return new StreamImpl(parameter);
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) {
            return new SnowflakeUDFSQLTranslator((URLSplitterParams)parameter);
        }

        @Override
        public RecipeLineage getUpdatedRecipeLineage(ProcessorScriptStep pss, RecipeLineage previousRecipeLineage) {
            if (!(pss.params instanceof URLSplitterParams)) {
                throw new IllegalArgumentException("Unsupported param type: " + pss.params.getClass().getSimpleName());
            }
            URLSplitterParams invalidCellsSplitterParams = (URLSplitterParams)pss.params;
            if (StringUtils.isBlank((String)invalidCellsSplitterParams.column)) {
                throw new IllegalConfigurationException("Missing columns information for lineage on the invalid cells splitter processor.");
            }
            RecipeLineage updatedRecipeLineage = new RecipeLineage();
            previousRecipeLineage.getDatasetPairLineages().forEach((datasetPair, previousDatasetPairLineage) -> {
                DatasetPairLineage updatedDatasetPairLineage = new DatasetPairLineage((DatasetPairLineage)previousDatasetPairLineage);
                if (invalidCellsSplitterParams.extractAnchor) {
                    updatedDatasetPairLineage.addFactorizedColumnRelations(invalidCellsSplitterParams.column, invalidCellsSplitterParams.column + "_anchor");
                }
                if (invalidCellsSplitterParams.extractHost) {
                    updatedDatasetPairLineage.addFactorizedColumnRelations(invalidCellsSplitterParams.column, invalidCellsSplitterParams.column + "_host");
                }
                if (invalidCellsSplitterParams.extractPath) {
                    updatedDatasetPairLineage.addFactorizedColumnRelations(invalidCellsSplitterParams.column, invalidCellsSplitterParams.column + "_path");
                }
                if (invalidCellsSplitterParams.extractPort) {
                    updatedDatasetPairLineage.addFactorizedColumnRelations(invalidCellsSplitterParams.column, invalidCellsSplitterParams.column + "_port");
                }
                if (invalidCellsSplitterParams.extractScheme) {
                    updatedDatasetPairLineage.addFactorizedColumnRelations(invalidCellsSplitterParams.column, invalidCellsSplitterParams.column + "_scheme");
                }
                if (invalidCellsSplitterParams.extractQueryString) {
                    updatedDatasetPairLineage.addFactorizedColumnRelations(invalidCellsSplitterParams.column, invalidCellsSplitterParams.column + "_querystring");
                }
                updatedRecipeLineage.setDatasetPairLineage((Pair<String, String>)datasetPair, updatedDatasetPairLineage);
            });
            return updatedRecipeLineage;
        }
    };

    static class StreamImpl
    extends SingleRowProcessor
    implements Processor {
        private Column columnDesc;
        private Column anchorColDesc;
        private Column queryStringColDesc;
        private Column pathColDesc;
        private Column portColDesc;
        private Column hostColDesc;
        private Column schemeColDesc;
        private final URLSplitterParams parameter;

        public StreamImpl(URLSplitterParams parameter) {
            this.parameter = parameter;
        }

        public void init() {
            this.columnDesc = this.getColumnFactory().column(this.parameter.column, Processor.ProcessorRole.INPUT_COLUMN);
            if (this.parameter.extractAnchor) {
                this.anchorColDesc = this.getColumnFactory().columnAfter(this.parameter.column, this.parameter.column + "_anchor", Processor.ProcessorRole.OUTPUT_COLUMN);
            }
            if (this.parameter.extractQueryString) {
                this.queryStringColDesc = this.getColumnFactory().columnAfter(this.parameter.column, this.parameter.column + "_querystring", Processor.ProcessorRole.OUTPUT_COLUMN);
            }
            if (this.parameter.extractPath) {
                this.pathColDesc = this.getColumnFactory().columnAfter(this.parameter.column, this.parameter.column + "_path", Processor.ProcessorRole.OUTPUT_COLUMN);
            }
            if (this.parameter.extractPort) {
                this.portColDesc = this.getColumnFactory().columnAfter(this.parameter.column, this.parameter.column + "_port", Processor.ProcessorRole.OUTPUT_COLUMN);
            }
            if (this.parameter.extractHost) {
                this.hostColDesc = this.getColumnFactory().columnAfter(this.parameter.column, this.parameter.column + "_host", Processor.ProcessorRole.OUTPUT_COLUMN);
            }
            if (this.parameter.extractScheme) {
                this.schemeColDesc = this.getColumnFactory().columnAfter(this.parameter.column, this.parameter.column + "_scheme", Processor.ProcessorRole.OUTPUT_COLUMN);
            }
        }

        public void processRow(Row row) throws Exception {
            String v = row.get(this.columnDesc);
            URLSplitterAlgorithm.URLSplitterResult parsed = URLSplitterAlgorithm.process((String)v);
            if (parsed != null) {
                if (this.anchorColDesc != null) {
                    row.put(this.anchorColDesc, parsed.anchor);
                }
                if (this.queryStringColDesc != null) {
                    row.put(this.queryStringColDesc, parsed.queryString);
                }
                if (this.pathColDesc != null) {
                    row.put(this.pathColDesc, parsed.path);
                }
                if (this.portColDesc != null) {
                    row.put(this.portColDesc, "" + parsed.port);
                }
                if (this.hostColDesc != null) {
                    row.put(this.hostColDesc, parsed.host);
                }
                if (this.schemeColDesc != null && parsed.protocol != null) {
                    row.put(this.schemeColDesc, parsed.protocol);
                }
            }
        }

        public void postProcess() {
        }
    }

    private static final class SnowflakeUDFSQLTranslator
    implements SnowflakeUDFProcessorTranslator {
        private final String functionName = "urlSplitter_" + SecretKeyGenerator.generate();
        private final String inputColumn;
        private final boolean[] selectedColumns;
        private final String[] outputColumns;

        private SnowflakeUDFSQLTranslator(URLSplitterParams params) {
            this.inputColumn = params.column;
            this.selectedColumns = new boolean[]{params.extractAnchor, params.extractQueryString, params.extractPath, params.extractPort, params.extractHost, params.extractScheme};
            this.outputColumns = new String[]{"anchor", "querystring", "path", "port", "host", "scheme"};
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> getUDFResources() throws IOException {
            List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> resources = SnowflakeUDFProcessorTranslator.createStandardResourceList();
            SnowflakeUDFProcessorTranslator.addStandardResources(resources, SnowflakeUDFProcessorTranslator.StandardResource.DKU_CORE_JAR);
            return resources;
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef> getUDFs() {
            SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef def = new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef(this.functionName, "com.dataiku.dip.shaker.processors.typespecific.URLSplitterUDF.process", "data STRING", "STRING", "ARRAY");
            def.importStandardResources(SnowflakeUDFProcessorTranslator.StandardResource.DKU_CORE_JAR);
            return Lists.newArrayList((Object[])new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef[]{def});
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            if (!BooleanUtils.or((boolean[])this.selectedColumns)) {
                return chain;
            }
            SQLDialect d = chain.getDialect();
            ExpressionBuilder.ExpressionBuilderFactory ebf = new ExpressionBuilder.ExpressionBuilderFactory();
            String tmpColumnName = "URL_SPLITTER_" + SecretKeyGenerator.generate((int)16);
            if (chain.isCreatedOrModifiedByCurrentQuery(this.inputColumn)) {
                chain = chain.makeSubquery();
            }
            chain.select(ebf.expr(String.format("%s(%s)", this.functionName, d.quoteIdentifier(this.inputColumn))), tmpColumnName);
            SQLQueryWithSchema outer = chain.makeSubquery();
            SchemaColumn inputSchemaColumn = outer.getCurrentColumn(this.inputColumn);
            for (int i = 0; i < this.outputColumns.length; ++i) {
                if (!this.selectedColumns[i]) continue;
                outer.addAfterOrReplaceColumn(inputSchemaColumn, ebf.expr(d.quoteIdentifier(tmpColumnName) + "[" + i + "]"), Type.STRING, this.inputColumn + "_" + this.outputColumns[i], false);
            }
            outer.deleteColumn(tmpColumnName);
            outer.deleteSelect(tmpColumnName);
            return outer;
        }
    }

    public static class URLSplitterParams
    implements StepParams {
        private static final long serialVersionUID = -1L;
        public String column;
        public boolean extractAnchor = true;
        public boolean extractQueryString = true;
        public boolean extractPath = true;
        public boolean extractPort = true;
        public boolean extractHost = true;
        public boolean extractScheme = true;

        public void validate() throws IllegalArgumentException {
        }
    }
}

