/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.transform;

import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalineage.DatasetPairLineage;
import com.dataiku.dip.datalineage.RecipeLineage;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.ProcessorScriptStep;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.AppliesToProcessor;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.shaker.text.Labelled;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.sql.queries.QueryUtils;
import com.dataiku.dip.util.ParamDesc;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.Pair;
import com.dataiku.dip.warnings.WarningsContext;
import com.google.common.collect.Sets;
import com.google.common.hash.HashCode;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;

public class ColumnPseudonymization
extends AppliesToProcessor
implements Processor {
    private final Parameter parameter;
    public static final ProcessorMeta<ColumnPseudonymization, Parameter> META = new AppliesToProcessor.AppliesToProcessorMeta<ColumnPseudonymization, Parameter>(){

        @Override
        public String getName() {
            return "ColumnPseudonymization";
        }

        @Override
        public String getDocPage() {
            return "column-pseudonymization";
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public Category getCategory() {
            return Category.TRANSFORMATION;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.MISC});
        }

        @Override
        public ProcessorDesc describe(String language) {
            return ProcessorDesc.withGenericForm(this.getName(), this.translate(language, "SHAKER.PROCESSOR.ColumnPseudonymization.DESCRIPTION", 1.actionVerb("Pseudonymize") + " text")).withParam(ParamDesc.advancedSelect("algorithm", this.translate(language, "SHAKER.PROCESSOR.ColumnPseudonymization.DESCRIPTION.ALGORITHM", "Hashing algorithm"), "", Parameter.HashingAlgo.class).withDefaultValue(Parameter.DEFAULT_HASHING_ALGORITHM)).withColParam("saltColumn", this.translate(language, "SHAKER.PROCESSOR.ColumnPseudonymization.DESCRIPTION.SALT_COLUMN", "Salt")).withParam("pepper", "string", false, true, this.translate(language, "SHAKER.PROCESSOR.ColumnPseudonymization.DESCRIPTION.PEPPER", "Pepper"));
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.ColumnPseudonymization.HELP", "This processor replaces values of columns (containing sensitive data) by hashes, in order to provide pseudonymization.\n\nIn order to protect against dictionary attacks, you can specify:\n\n* An optional static \"pepper\" value that will be added to all input values before hashing. If you intend to use the hash as join or lookup keys, the pepper should be the same for all pseudonymized datasets\n\n* An optional \"salt\" column. For each row, the value of this column will be added to the input values before hashing. If you intend to use the hash as join or lookup keys, the salt column should be present and identical for all pseudonymized datasets\n\nNote that under the GDPR, pseudonymization is not the same as anonymization since the latter irreversibly destroys any way of identifying the data subject.");
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            ProcessorMeta.ProcessorCapabilitiesSummary capa = new ProcessorMeta.ProcessorCapabilitiesSummary().withCan(ProcessorCapabilities.NATIVE_SPARK_IMPL);
            Parameter param = (Parameter)params;
            if (dialect.getOperator(ColumnPseudonymization.getOperatorType(param.algorithm)) != null) {
                capa.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
            }
            return capa;
        }

        @Override
        public ColumnPseudonymization build(Parameter parameter) {
            return new ColumnPseudonymization(parameter);
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) {
            return new SQLTranslator((Parameter)parameter);
        }

        @Override
        public String getNativeSparkClassname() {
            return "com.dataiku.dip.shaker.processors.transform.ColumnPseudonymizationNS";
        }

        @Override
        public Object selfReport(Parameter parameter) {
            return AppliesToProcessor.selfReport(parameter);
        }

        @Override
        public RecipeLineage getUpdatedRecipeLineage(ProcessorScriptStep pss, RecipeLineage previousRecipeLineage) {
            if (!(pss.params instanceof Parameter)) {
                throw new IllegalArgumentException("Unsupported param type: " + pss.params.getClass().getSimpleName());
            }
            Parameter columnPseudonymizationParams = (Parameter)pss.params;
            RecipeLineage updatedRecipeLineage = new RecipeLineage();
            previousRecipeLineage.getDatasetPairLineages().forEach((datasetPair, previousDatasetPairLineage) -> {
                DatasetPairLineage updatedDatasetPairLineage = this.getUpdatedDatasetPairLineage(columnPseudonymizationParams, (DatasetPairLineage)previousDatasetPairLineage, columnPseudonymizationParams.saltColumn, AppliesToProcessor.AppliesToProcessorMeta.RelationDirection.FROM, false);
                updatedRecipeLineage.setDatasetPairLineage((Pair<String, String>)datasetPair, updatedDatasetPairLineage);
            });
            return updatedRecipeLineage;
        }
    };
    private Column saltColumn;
    private HashFunction hashingFunction;
    private String pepper;
    private boolean warnedSaltColumnSkipped;
    private static DKULogger logger = DKULogger.getLogger((String)"dku.shaker.processors.transform");

    private ColumnPseudonymization(Parameter parameter) {
        this.parameter = parameter;
        this.hashingFunction = ColumnPseudonymization.getHashingFunction(parameter.algorithm);
        this.pepper = parameter.pepper == null ? "" : parameter.pepper;
    }

    @Override
    public void processRowForColumns(Row row, Iterable<Column> columns) {
        String saltValue = this.getSaltValueForRow(row);
        for (Column column : columns) {
            if (column.getName().equals(this.parameter.saltColumn)) {
                if (this.warnedSaltColumnSkipped) continue;
                this.warningsContext.addWarning(WarningsContext.WarningType.SHAKER_COLUMN_IGNORED, "The column " + this.parameter.saltColumn + " will be ignored because it's used as salt", logger);
                this.warnedSaltColumnSkipped = true;
                continue;
            }
            String cellValue = row.empty(column) ? "" : row.get(column);
            HashCode hc = this.hashingFunction.hashString((CharSequence)(cellValue + this.pepper + saltValue), StandardCharsets.UTF_8);
            row.put(column, hc.toString());
        }
    }

    @Override
    public AppliesToProcessor.AppliesToParams getParams() {
        return this.parameter;
    }

    public void postProcess() throws Exception {
    }

    private Column getSaltColumn() {
        if (this.saltColumn == null) {
            this.saltColumn = this.getColumnFactory().column(this.parameter.saltColumn, Processor.ProcessorRole.INPUT_COLUMN);
        }
        return this.saltColumn;
    }

    private String getSaltValueForRow(Row row) {
        if (StringUtils.isBlank((String)this.parameter.saltColumn)) {
            return "";
        }
        return row.empty(this.getSaltColumn()) ? "" : row.get(this.getSaltColumn());
    }

    private static HashFunction getHashingFunction(Parameter.HashingAlgo algo) {
        switch (algo) {
            case SHA256: {
                return Hashing.sha256();
            }
            case SHA512: {
                return Hashing.sha512();
            }
            case MD5: {
                return Hashing.md5();
            }
        }
        throw new IllegalArgumentException("Invalid hashing algorithm: " + String.valueOf(algo));
    }

    private static QueryUtils.OperatorType getOperatorType(Parameter.HashingAlgo algo) {
        switch (algo) {
            case SHA256: {
                return QueryUtils.OperatorType.SHA256;
            }
            case SHA512: {
                return QueryUtils.OperatorType.SHA512;
            }
            case MD5: {
                return QueryUtils.OperatorType.MD5;
            }
        }
        throw new IllegalArgumentException("Invalid hashing algorithm: " + String.valueOf(algo));
    }

    public static class Parameter
    extends AppliesToProcessor.AppliesToParams {
        private static final long serialVersionUID = 1L;
        public String saltColumn;
        public String pepper;
        private static final HashingAlgo DEFAULT_HASHING_ALGORITHM = HashingAlgo.SHA256;
        public HashingAlgo algorithm = DEFAULT_HASHING_ALGORITHM;

        public static enum HashingAlgo implements Labelled
        {
            SHA256("SHA-256"),
            SHA512("SHA-512"),
            MD5("MD5");

            private final String label;

            private HashingAlgo(String label) {
                this.label = label;
            }

            @Override
            public String getLabel() {
                return this.label;
            }
        }
    }

    private static class SQLTranslator
    implements ProcessorSQLTranslator {
        private final Parameter params;

        private SQLTranslator(Parameter params) {
            this.params = params;
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            ExpressionBuilder.ExpressionBuilderFactory ebf = new ExpressionBuilder.ExpressionBuilderFactory();
            List<String> appliesToColumns = chain.getAppliesToColumns(this.params);
            if (chain.isAnyCreatedOrModifiedByCurrentQuery(appliesToColumns)) {
                chain = chain.makeSubquery();
            }
            if (appliesToColumns.contains(this.params.saltColumn)) {
                logger.warn((Object)("Column " + this.params.saltColumn + " ignored because used as salt"));
                appliesToColumns.remove(this.params.saltColumn);
            }
            for (String column : appliesToColumns) {
                SchemaColumn inputSchemaColumn = (SchemaColumn)JSON.deepCopy((Object)chain.getMandatoryCurrentColumn(column));
                ExpressionBuilder emptyString = ebf.cst("").castToString(1);
                int maxLength = inputSchemaColumn.getMaxLength() > 0 ? inputSchemaColumn.getMaxLength() : chain.getDialect().getDefaultVarcharLen();
                ExpressionBuilder toBeHashed = ebf.col(column).castToString(maxLength).coalesce(emptyString);
                if (StringUtils.isNotEmpty((String)this.params.pepper)) {
                    ExpressionBuilder pepper = ebf.cst(this.params.pepper);
                    toBeHashed = toBeHashed.concat(pepper);
                }
                if (StringUtils.isNotEmpty((String)this.params.saltColumn)) {
                    ExpressionBuilder salt = ebf.col(this.params.saltColumn).castToString(chain.getDialect().getDefaultVarcharLen()).coalesce(emptyString);
                    toBeHashed = toBeHashed.concat(salt);
                }
                ExpressionBuilder expr = ebf.op(ColumnPseudonymization.getOperatorType(this.params.algorithm), toBeHashed);
                chain.replaceColumn(inputSchemaColumn.withType(Type.STRING));
                chain.replaceSelect(column, expr, column);
            }
            return chain;
        }
    }
}

