/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.pig;

import com.dataiku.dip.cluster.ClusterProperty;
import com.dataiku.dip.cluster.ClusterSelector;
import com.dataiku.dip.cluster.HadoopSettings;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.FormatParams;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.coremodel.SerializedDataset;
import com.dataiku.dip.coremodel.SimpleKeyValue;
import com.dataiku.dip.dao.DatasetsDAO;
import com.dataiku.dip.dataflow.RunnableSubgraph;
import com.dataiku.dip.dataflow.graph.FlowDataset;
import com.dataiku.dip.dataflow.utils.FlowVariables;
import com.dataiku.dip.datasets.DatasetInspector;
import com.dataiku.dip.datasets.FSProviderCodes;
import com.dataiku.dip.datasets.fs.HDFSDatasetHandler;
import com.dataiku.dip.exceptions.CodedException;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.exceptions.DataStoreIOException;
import com.dataiku.dip.formats.avro.AvroFormatMeta;
import com.dataiku.dip.formats.avro.SchemaConverter;
import com.dataiku.dip.fs.FSPath;
import com.dataiku.dip.hadoop.HadoopLoader;
import com.dataiku.dip.hadoop.MapredCompressionSetter;
import com.dataiku.dip.hive.HdfsPathSubstitution;
import com.dataiku.dip.input.DatasetHandlerFactory;
import com.dataiku.dip.input.formats.csv.CSVFormatConfig;
import com.dataiku.dip.input.formats.parquet.ParquetFormatMeta;
import com.dataiku.dip.partitioning.FilePartitioner;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.pig.BadReplacementException;
import com.dataiku.dip.pig.DatasetRelationMapping;
import com.dataiku.dip.pig.PigSchemaTools;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.util.DatasetLocUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.DKUtils;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.PathUtils;
import com.dataiku.dip.variables.VariablesContext;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.json.JSONObject;

public class PigScriptPreprocessor {
    static Pattern[] interestingJarPatterns = new Pattern[]{Pattern.compile("\\/avro-[0-9.]+\\.jar$"), Pattern.compile("\\/avro-mapred-[0-9.]+\\.jar$"), Pattern.compile("\\/jackson-core-asl-[0-9.]+\\.jar$"), Pattern.compile("\\/jackson-mapper-asl-[0-9.]+\\.jar$"), Pattern.compile("\\/json-simple-[0-9.]+\\.jar$"), Pattern.compile("\\/piggybank.jar$"), Pattern.compile("\\/parquet.*.jar$")};
    static Pattern load = Pattern.compile("([^\\s]*)\\s*=\\s*DKULOAD\\s+'([^']*)'\\s*;", 2);
    static Pattern save = Pattern.compile("DKUSTORE\\s+([^\\s]*)\\s+INTO\\s+'([^']*)'\\s*;", 2);
    private static Pattern pigLatinNamePattern = Pattern.compile("^[a-zA-Z][a-zA-Z0-9_]*$");
    private static DKULogger logger = DKULogger.getLogger((String)"dku.flow.pig");
    private final boolean validationMode;
    private DatasetsDAO dao;
    private RunnableSubgraph subgraph;
    private String projectKey;
    private boolean handleOptionalDeps;
    private String pigScript;
    private String preprocessed = null;
    private List<HdfsPathSubstitution> hdfsPaths = Lists.newArrayList();
    private List<DatasetRelationMapping> dkuStoreMapping = new ArrayList<DatasetRelationMapping>();
    private List<SimpleKeyValue> extraConf = Lists.newArrayList();
    private VariablesContext context;
    private final AuthCtx authCtx;

    public PigScriptPreprocessor(AuthCtx authCtx, DatasetsDAO dao, VariablesContext context, String projectKey, RunnableSubgraph subgraph, String pigScript, boolean validationMode) {
        this.authCtx = authCtx;
        this.dao = dao;
        this.context = context;
        this.projectKey = projectKey;
        this.subgraph = subgraph;
        this.pigScript = pigScript;
        this.validationMode = validationMode;
    }

    public static String stripComments(String script) {
        StringBuilder sb = new StringBuilder();
        boolean inString = false;
        boolean inMLComment = false;
        boolean inSLComment = false;
        for (int i = 0; i < script.length(); ++i) {
            char nc;
            char pc = i > 0 ? script.charAt(i - 1) : (char)'\u0000';
            char c2 = script.charAt(i);
            char c3 = nc = i + 1 < script.length() ? script.charAt(i + 1) : (char)'\u0000';
            if (inMLComment) {
                if (c2 == '\n') {
                    sb.append(c2);
                    continue;
                }
                if (c2 != '*' || nc != '/') continue;
                inMLComment = false;
                ++i;
                continue;
            }
            if (inSLComment) {
                if (c2 != '\n') continue;
                inSLComment = false;
                sb.append(c2);
                continue;
            }
            if (c2 == '\'') {
                if (inString) {
                    if (pc == '\\') {
                        boolean escaped = true;
                        for (int j = i - 2; j > 0; --j) {
                            escaped ^= script.charAt(j) == '\\';
                        }
                        if (!escaped) {
                            inString = false;
                        }
                        sb.append(c2);
                        continue;
                    }
                    inString = false;
                    sb.append(c2);
                    continue;
                }
                inString = true;
                sb.append(c2);
                continue;
            }
            if (c2 == '/') {
                if (!inString && nc == '*') {
                    inMLComment = true;
                    continue;
                }
                sb.append(c2);
                continue;
            }
            if (c2 == '-' && nc == '-') {
                if (!inString) {
                    inSLComment = true;
                    ++i;
                    continue;
                }
                sb.append(c2);
                continue;
            }
            sb.append(c2);
        }
        return sb.toString();
    }

    private static int convertOffsetToLine(String s, int offset) {
        Scanner scanner = new Scanner(s);
        int cnt = 0;
        int line = 0;
        while (scanner.hasNextLine()) {
            ++line;
            if ((cnt += scanner.nextLine().length()) < offset) continue;
        }
        scanner.close();
        return line;
    }

    public void setHandleOptionalDeps() {
        this.handleOptionalDeps = true;
    }

    private String updateAS(String as, SchemaColumn sc) {
        if (((String)as).length() > 0) {
            as = (String)as + ",";
        }
        as = (String)as + PigSchemaTools.toPigType(sc, true);
        return as;
    }

    public String getRegisterJars() {
        StringBuilder sb = new StringBuilder();
        String pigCP = System.getenv("DKU_PIG_CP");
        if (pigCP != null) {
            String[] pigJars;
            for (String jarPath : pigJars = DKUtils.parseClassPath((String)pigCP)) {
                for (Pattern pattern : interestingJarPatterns) {
                    if (!pattern.matcher(jarPath).find()) continue;
                    sb.append("REGISTER " + jarPath.replace(";", "\\;") + "; ");
                }
            }
        }
        return sb.toString();
    }

    private String getUsingLoad(Dataset dataset, String formatType, FormatParams formatParams) throws Exception {
        if (this.validationMode) {
            return "PigStorage(',')";
        }
        if (formatType.equals("csv")) {
            CSVFormatConfig config = (CSVFormatConfig)formatParams;
            if (config.style != CSVFormatConfig.CSVStyle.NO_ESCAPE_NO_QUOTE) {
                throw ErrorContext.iaef((String)"CSV quoting style %s is not supported for Pig. Only NO_ESCAPE_NO_QUOTE is supported", (Object)((Object)config.style), (Object[])new Object[0]);
            }
            return "PigStorage('" + config.getSeparatorStr() + "')";
        }
        if (formatType.equals("parquet")) {
            return "parquet.pig.ParquetLoader";
        }
        if (formatType.equals(AvroFormatMeta.META.getType())) {
            JSONObject pigAvroConfig = new JSONObject();
            pigAvroConfig.put("schema", (Object)new JSONObject(SchemaConverter.convertSchema(dataset.getSchema()).toString()));
            pigAvroConfig.put("debug", 5);
            return "org.apache.pig.piggybank.storage.avro.AvroStorage('" + pigAvroConfig.toString().replace("'", "\\'") + "')";
        }
        throw new Exception("Unsupported format " + formatType + " for Pig input");
    }

    private String getUsingStore(Dataset dataset, String formatType, FormatParams formatParams) throws Exception {
        if (this.validationMode) {
            return "PigStorage(',')";
        }
        if (formatType.equals("csv")) {
            CSVFormatConfig config = (CSVFormatConfig)formatParams;
            if (config.style != CSVFormatConfig.CSVStyle.NO_ESCAPE_NO_QUOTE) {
                throw ErrorContext.iaef((String)"CSV quoting style %s is not supported for Pig. Only NO_ESCAPE_NO_QUOTE is supported", (Object)((Object)config.style), (Object[])new Object[0]);
            }
            return "PigStorage('" + config.getSeparatorStr() + "')";
        }
        if (formatType.equals(ParquetFormatMeta.META.getType())) {
            return "parquet.pig.ParquetStorer";
        }
        if (formatType.equals(AvroFormatMeta.META.getType())) {
            JSONObject pigAvroConfig = new JSONObject();
            pigAvroConfig.put("schema", (Object)new JSONObject(SchemaConverter.convertSchema(dataset.getSchema()).toString()));
            pigAvroConfig.put("debug", 5);
            return "org.apache.pig.piggybank.storage.avro.AvroStorage('" + pigAvroConfig.toString().replace("'", "\\'") + "')";
        }
        throw new Exception("Unsupported format " + formatType + " for Pig output");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public String getLoadFor(String datasetName) throws Exception {
        String url;
        DatasetLocUtils.DatasetLoc loc = DatasetLocUtils.resolveSmart(this.projectKey, datasetName);
        FlowDataset dataset = this.subgraph.getSourceDataset(loc.getFullName());
        if (dataset == null) {
            throw ErrorContext.iaef((String)"Unknown input in Pig recipe : %s - Did you declare it as a Flow input ?", (Object)datasetName, (Object[])new Object[0]);
        }
        SerializedDataset sd = (SerializedDataset)this.dao.getMandatory(loc);
        Dataset spec = Dataset.fromSerialized(sd);
        String as = "";
        if (spec.getSchema() != null) {
            Schema schema = spec.getSchema();
            for (SchemaColumn col : schema.getColumns()) {
                if (!pigLatinNamePattern.matcher(col.getName()).matches()) {
                    throw ErrorContext.iaef((String)"The dataset \"%s\" cannot be used as input of a Pig recipe: column \"%s\" is not a valid Pig field name. Pig field names must start with an alphabetic character, and then they can have zero or more alphabetic, numeric, or _ (underscore) characters.", (Object)datasetName, (Object[])new Object[]{col.getName()});
                }
                ErrorContext.push((String)("column " + col.getName()));
                try {
                    as = this.updateAS(as, col);
                }
                finally {
                    ErrorContext.pop();
                }
            }
        }
        String using = this.getUsingLoad(spec, spec.getFormatType(), spec.getFormatParams());
        if (DatasetInspector.canHDFS(spec)) {
            try (HDFSDatasetHandler handler = (HDFSDatasetHandler)DatasetHandlerFactory.build(this.authCtx, spec);){
                this.extraConf.addAll(handler.getFSExtraConf());
                Object rootPathWithinAuthority = handler.getEffectiveDatasetRootWithinAuthority();
                logger.info((Object)("HDFS root path " + (String)rootPathWithinAuthority));
                if (!((String)rootPathWithinAuthority).endsWith("/")) {
                    rootPathWithinAuthority = (String)rootPathWithinAuthority + "/";
                }
                if (spec.getPartitioningSchema().isPartitioned()) {
                    Preconditions.checkArgument((this.subgraph.getSourcePartitions(dataset) != null ? 1 : 0) != 0, (Object)"Source partition is null");
                    url = this.makeUrlForPartitioned(dataset, spec, handler, (String)rootPathWithinAuthority);
                }
                url = this.makeUrlForNotPartitioned(dataset, handler, (String)rootPathWithinAuthority);
            }
        } else {
            throw new Exception("Unsupported input " + spec.getType());
        }
        return "LOAD '" + url + "' USING " + using + (String)(as.length() > 0 ? " AS (" + as + ")" : "") + ";";
    }

    private String makeUrlForNotPartitioned(FlowDataset dataset, HDFSDatasetHandler handler, String rootPathWithinAuthority) throws IOException, InterruptedException, DKUSecurityException, CodedException {
        if (this.handleOptionalDeps) {
            List<FSPath> datasetPaths;
            try {
                datasetPaths = handler.enumerateFilesystem();
            }
            catch (DataStoreIOException e) {
                if (e.getCode() == FSProviderCodes.ERR_FSPROVIDER_ROOT_PATH_DOES_NOT_EXIST) {
                    logger.info((Object)"Dataset totally empty");
                    datasetPaths = null;
                }
                throw e;
            }
            if (datasetPaths == null) {
                logger.infoV("Dataset %s does not exist and optional deps --> replacing it by a fake load", new Object[]{dataset.getFullName()});
                String emptyPath = PathUtils.concatLNT((String[])new String[]{handler.getConnectionRootPathWithinAuthority(), "__dku_empty_folder"});
                handler.getImpersonatedFS().mkdirs(new Path(emptyPath));
                handler.getImpersonatedFS().createNewFile(new Path(emptyPath + "/dku_empty_file"));
                return handler.getFullyQualifiedHDFSPath(emptyPath + "/*");
            }
            return handler.getFullyQualifiedHDFSPath(rootPathWithinAuthority);
        }
        return handler.getFullyQualifiedHDFSPath(rootPathWithinAuthority);
    }

    private String makeUrlForPartitioned(FlowDataset dataset, Dataset spec, HDFSDatasetHandler handler, String rootPathWithinAuthority) throws IOException, InterruptedException, DKUSecurityException, CodedException {
        ArrayList<String> globs = new ArrayList<String>();
        for (Partition p : this.subgraph.getSourcePartitions(dataset)) {
            Object thisPartitionGlob = null;
            if (this.handleOptionalDeps) {
                List<FSPath> partitionPaths;
                try {
                    partitionPaths = handler.enumeratePartition(p, handler.getEnumerationSettings());
                }
                catch (DataStoreIOException e) {
                    if (e.getCode() == FSProviderCodes.ERR_FSPROVIDER_PATH_DOES_NOT_EXIST) {
                        logger.info((Object)"Dataset totally empty");
                        partitionPaths = null;
                    }
                    throw e;
                }
                if (partitionPaths == null) {
                    logger.infoV("Partition %s does not exist in dataset %s and optional deps --> replacing it by a fake load", new Object[]{p.id(), dataset.getFullName()});
                    String emptyPath = PathUtils.concatLNT((String[])new String[]{handler.getConnectionRootPathWithinAuthority(), "__dku_empty_folder"});
                    handler.getImpersonatedFS().mkdirs(new Path(emptyPath));
                    handler.getImpersonatedFS().createNewFile(new Path(emptyPath + "/dku_empty_file"));
                    thisPartitionGlob = emptyPath + "/*";
                }
            }
            if (thisPartitionGlob == null) {
                thisPartitionGlob = PathUtils.concatLNT((String[])new String[]{rootPathWithinAuthority, FilePartitioner.computePartitionRelPathAsShellGlob(p, spec.getPartitioningSchema())});
            }
            logger.info((Object)("Add " + String.valueOf(p) + " -> " + thisPartitionGlob));
            globs.add((String)thisPartitionGlob);
        }
        ArrayList fullyQualifiedGlobs = Lists.newArrayList();
        for (String glob : globs) {
            fullyQualifiedGlobs.add(handler.getFullyQualifiedHDFSPath(glob));
        }
        return StringUtils.join((Collection)fullyQualifiedGlobs, (String)",");
    }

    public String getStoreFor(String alias, String datasetName) throws Exception {
        DatasetLocUtils.DatasetLoc loc = DatasetLocUtils.resolveSmart(this.projectKey, datasetName);
        FlowDataset dataset = this.subgraph.getTargetDataset(loc.getFullName());
        if (dataset == null) {
            throw ErrorContext.iaef((String)"Unknown output in Pig recipe : %s - Did you declare it as a Flow output ?", (Object)datasetName, (Object[])new Object[0]);
        }
        SerializedDataset sd = (SerializedDataset)this.dao.getMandatory(loc);
        Dataset spec = Dataset.fromSerialized(sd);
        String url = null;
        if (DatasetInspector.canHDFS(spec)) {
            try (HDFSDatasetHandler handler = (HDFSDatasetHandler)DatasetHandlerFactory.build(this.authCtx, spec);){
                this.extraConf.addAll(handler.getFSExtraConf());
                Object outPath = handler.getFullyQualifiedRootPath();
                if (!((String)outPath).endsWith("/")) {
                    outPath = (String)outPath + "/";
                }
                String partitionOut = FilePartitioner.computePartitionRelPathAsFolder(this.subgraph.getTargetPartition(dataset), spec.getPartitioningSchema());
                String outTmpPath = ((String)outPath).substring(0, ((String)outPath).length() - 1) + "__dkutmp__/" + partitionOut;
                String outFinalPath = (String)outPath + partitionOut;
                this.hdfsPaths.add(new HdfsPathSubstitution(HadoopLoader.getFS(handler.getConnectionRootSchemeAndAuthority(), handler.getFSExtraConf()), outTmpPath, outFinalPath));
                url = outTmpPath;
            }
        } else {
            throw new Exception("Unsupported output " + spec.getType());
        }
        return "STORE " + alias + " INTO '" + url + "' USING " + this.getUsingStore(spec, spec.getFormatType(), spec.getFormatParams()) + ";";
    }

    private String substVariables(String script) throws IOException, CodedException, DKUSecurityException {
        HashMap<String, String> flowSubst = new HashMap<String, String>();
        FlowVariables.addPartitioningVariables(this.authCtx, flowSubst, this.subgraph, this.dao);
        for (Map.Entry av : flowSubst.entrySet()) {
            script = (String)script + "\n;-- FLOW VARIABLE " + (String)av.getKey() + " =  " + (String)av.getValue() + "\n";
        }
        script = FlowVariables.substitute(flowSubst, (String)script);
        script = this.context.expand((String)script);
        return script;
    }

    public void preprocess() throws Exception {
        this.pigScript = PigScriptPreprocessor.stripComments(StringUtils.defaultIfBlank((String)this.pigScript, (String)""));
        for (FlowDataset flowDataset : this.subgraph.getSourceDatasets()) {
            DatasetLocUtils.DatasetLoc loc = DatasetLocUtils.resolveSmart(this.projectKey, flowDataset.getFullName());
            SerializedDataset sd = (SerializedDataset)this.dao.getMandatory(loc);
            Dataset dataset = Dataset.fromSerialized(sd);
            DatasetInspector.checkDataset(this.authCtx, dataset);
            PigSchemaTools.PigCompatibilityStatus status = PigSchemaTools.isFormatCompatible(dataset);
            if (!status.compatible) {
                throw ErrorContext.iaef((String)"Dataset %s cannot be used for Hive: %s", (Object)flowDataset.getFullName(), (Object[])new Object[]{status.reason});
            }
            if (!DatasetInspector.canHDFS(dataset)) {
                throw ErrorContext.iaef((String)"dataset %s cannot be accessed through HDFS", (Object)flowDataset.getFullName(), (Object[])new Object[0]);
            }
            PigSchemaTools.checkPigSchemaCompatibility(dataset);
        }
        MapredCompressionSetter compressionSetter = new MapredCompressionSetter();
        for (FlowDataset fds : this.subgraph.getTargetsDatasets()) {
            DatasetLocUtils.DatasetLoc loc = DatasetLocUtils.resolveSmart(this.projectKey, fds.getFullName());
            SerializedDataset sd = (SerializedDataset)this.dao.getMandatory(loc);
            Dataset dataset = Dataset.fromSerialized(sd);
            DatasetInspector.checkDataset(this.authCtx, dataset);
            if (!DatasetInspector.canHDFS(dataset)) {
                throw ErrorContext.iaef((String)"dataset %s cannot be accessed through HDFS", (Object)fds.getFullName(), (Object[])new Object[0]);
            }
            PigSchemaTools.checkPigSchemaCompatibility(dataset);
            compressionSetter.inspectOutputDataset(dataset);
        }
        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append(compressionSetter.getCompressionCommands(true));
        int start = -1;
        int end = -1;
        try {
            Matcher loadMatcher = load.matcher(this.pigScript);
            int curOffset = 0;
            while (loadMatcher.find(curOffset)) {
                start = loadMatcher.start();
                end = loadMatcher.end();
                stringBuilder.append(this.pigScript.substring(curOffset, start));
                String alias = loadMatcher.group(1);
                String ipinName = loadMatcher.group(2);
                stringBuilder.append(alias + " = " + this.getLoadFor(ipinName));
                curOffset = end;
            }
            stringBuilder.append(this.pigScript.substring(curOffset));
            this.pigScript = stringBuilder.toString();
            stringBuilder.setLength(0);
            HashSet<Object> dkuStores = new HashSet<Object>();
            Matcher saveMatcher = save.matcher(this.pigScript);
            curOffset = 0;
            while (saveMatcher.find(curOffset)) {
                start = saveMatcher.start();
                end = saveMatcher.end();
                stringBuilder.append(this.pigScript.substring(curOffset, start));
                String alias = saveMatcher.group(1);
                String opinName = saveMatcher.group(2);
                if (dkuStores.contains(opinName)) {
                    throw ErrorContext.iaef((String)"'%s' can only be written to once", (Object)opinName, (Object[])new Object[0]);
                }
                dkuStores.add(opinName);
                stringBuilder.append(this.getStoreFor(alias, opinName));
                DatasetRelationMapping mapping = new DatasetRelationMapping();
                mapping.dataset = DatasetLocUtils.resolveSmart(this.projectKey, opinName).getFullName();
                mapping.relation = alias;
                this.dkuStoreMapping.add(mapping);
                curOffset = end;
            }
            stringBuilder.append(this.pigScript.substring(curOffset));
            StringBuilder extraConfSb = new StringBuilder();
            for (SimpleKeyValue kv : this.extraConf) {
                extraConfSb.append("SET " + kv.key + " '" + kv.value + "'; ");
            }
            HadoopSettings hadoopSettings = new ClusterSelector().selectForProject(this.authCtx, this.projectKey).getHadoopSettings();
            for (ClusterProperty kv : hadoopSettings.extraConf) {
                extraConfSb.append("SET " + kv.key + " '" + kv.value + "'; ");
            }
            this.preprocessed = this.getRegisterJars() + extraConfSb.toString() + this.substVariables(stringBuilder.toString());
        }
        catch (Exception e) {
            throw new BadReplacementException(e.getMessage(), e, PigScriptPreprocessor.convertOffsetToLine(this.pigScript, start));
        }
    }

    public List<HdfsPathSubstitution> getHDFSPathSubstitutions() {
        return this.hdfsPaths;
    }

    public String getPreprocessed() {
        System.out.println("PREPROCESSED SCRIPT\n" + this.preprocessed);
        return this.preprocessed;
    }

    public List<DatasetRelationMapping> getDkuStoreMapping() {
        return this.dkuStoreMapping;
    }
}

