/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.transform.interactivedatapreparation;

import com.dataiku.dip.shaker.processors.transform.interactivedatapreparation.ExtractingPattern;
import com.dataiku.dip.shaker.processors.transform.interactivedatapreparation.TokenizedSelection;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ImmutableValueObject;
import com.google.common.base.Stopwatch;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.TimeUnit;

public class InteractiveExtractor {
    static final int MAX_PROPOSITION_PER_CATEGORY = 3;
    static final int MAX_COMPLEXITY_BEFORE_AFTER = 30;
    static final int PATTERN_GENERATION_LIMIT = 500;
    static final int PRECISE_PATTERN_GENERATION_LIMIT = 50;
    static final float MATCHING_RATIO_FOR_CLEANING = 0.7f;
    static final int NB_MAX_SENTENCE_IN_RESPONSE = 300;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.shaker.idp");
    final List<String> exampleSentences;
    final List<Selection> selections;
    final List<String> excludedSentences;

    public InteractiveExtractor(List<Selection> selections, List<String> excludedSentences, List<String> sentences) {
        this.selections = selections;
        this.excludedSentences = excludedSentences;
        this.exampleSentences = sentences;
    }

    public ExtractorResponse guess() {
        ExtractorResponse resp = new ExtractorResponse(this.exampleSentences);
        if (this.selections.size() == 0) {
            return resp;
        }
        int maxiComplexity = 0;
        Stopwatch guessStart = Stopwatch.createStarted();
        ArrayList<TokenizedSelection> tokenizedSelections = new ArrayList<TokenizedSelection>();
        for (Selection rawSelection : this.selections) {
            TokenizedSelection selection = new TokenizedSelection(rawSelection);
            tokenizedSelections.add(selection);
            int upperSelectionComplexity = selection.selection.size() * 4 + 30;
            maxiComplexity = Math.max(maxiComplexity, upperSelectionComplexity);
        }
        long tokenizationDuration = guessStart.elapsed(TimeUnit.MILLISECONDS);
        Stopwatch customPatternsDuration = Stopwatch.createStarted();
        List<ExtractingPattern> customPatternsFiltered = this.generateCustomPatterns();
        resp.addCategory("library patterns", customPatternsFiltered);
        long customDuration = customPatternsDuration.elapsed(TimeUnit.MILLISECONDS);
        Stopwatch loosePatternsDuration = Stopwatch.createStarted();
        List<ExtractingPattern> loosePatterns = this.generateLoosePatterns(maxiComplexity, tokenizedSelections);
        resp.addCategory("precise around selection", loosePatterns);
        long looseDuration = loosePatternsDuration.elapsed(TimeUnit.MILLISECONDS);
        Stopwatch precisePatternsDuration = Stopwatch.createStarted();
        List<ExtractingPattern> precisePatterns = this.generatePrecisePatterns(maxiComplexity, tokenizedSelections);
        resp.addCategory("precise on selection", precisePatterns);
        long preciseDuration = precisePatternsDuration.elapsed(TimeUnit.MILLISECONDS);
        Stopwatch unionPatternsDuration = Stopwatch.createStarted();
        List<ExtractingPattern> unionPatterns = TokenizedSelection.generateUnionSelectionExtractingPattern(tokenizedSelections);
        unionPatterns = this.filterPatterns(unionPatterns);
        resp.addCategory("union in selection", unionPatterns);
        long unionDuration = unionPatternsDuration.elapsed(TimeUnit.MILLISECONDS);
        long guessDuration = guessStart.elapsed(TimeUnit.MILLISECONDS);
        StringBuilder durationLogBld = new StringBuilder();
        durationLogBld.append("Pattern generation took ").append(guessDuration).append("ms ");
        durationLogBld.append("(tokenization ").append(tokenizationDuration).append("ms, ");
        durationLogBld.append("custom ").append(customDuration).append("ms, ");
        durationLogBld.append("loose ").append(looseDuration).append("ms, ");
        durationLogBld.append("precise ").append(preciseDuration).append("ms, ");
        durationLogBld.append("union ").append(unionDuration).append("ms)");
        logger.debug((Object)durationLogBld.toString());
        return resp;
    }

    private List<ExtractingPattern> generateCustomPatterns() {
        List<ExtractingPattern> customPatterns = this.filterPatterns(ExtractingPattern.getCustomPatterns());
        if (customPatterns.size() > 3) {
            return customPatterns.subList(0, 3);
        }
        return customPatterns;
    }

    private List<ExtractingPattern> generateLoosePatterns(int maxiComplexity, List<TokenizedSelection> tokenizedSelection) {
        ArrayList<ExtractingPattern> loosePatterns = new ArrayList<ExtractingPattern>();
        boolean patternLimitReached = false;
        ArrayList<ExtractingPattern> currentComplexityLoosePatterns = new ArrayList<ExtractingPattern>();
        for (int looseComplexity = 0; loosePatterns.size() < 3 && looseComplexity <= maxiComplexity; ++looseComplexity) {
            currentComplexityLoosePatterns = new ArrayList();
            for (TokenizedSelection selection : tokenizedSelection) {
                List<ExtractingPattern> selectionPatterns = selection.generateLooseSelectionExtractingPatterns(looseComplexity);
                if (selectionPatterns.size() >= 500) {
                    patternLimitReached = true;
                    break;
                }
                currentComplexityLoosePatterns.addAll(selectionPatterns);
            }
            if (patternLimitReached) {
                logger.debug((Object)"loose pattern aborted");
                break;
            }
            List<ExtractingPattern> currentComplexityLoosePatternsFiltered = this.filterPatterns(currentComplexityLoosePatterns);
            loosePatterns.addAll(currentComplexityLoosePatternsFiltered);
        }
        return loosePatterns;
    }

    private List<ExtractingPattern> generatePrecisePatterns(int maxiComplexity, List<TokenizedSelection> tokenizedSelection) {
        ArrayList<ExtractingPattern> precisePatterns = new ArrayList<ExtractingPattern>();
        boolean patternLimitReached = false;
        ArrayList<ExtractingPattern> currentComplexityPrecisePatterns = new ArrayList<ExtractingPattern>();
        for (int preciseComplexity = 0; precisePatterns.size() < 3 && preciseComplexity <= maxiComplexity; ++preciseComplexity) {
            currentComplexityPrecisePatterns = new ArrayList();
            for (TokenizedSelection selection : tokenizedSelection) {
                List<ExtractingPattern> selectionPatterns = selection.generatePreciseSelectionExtractingPatterns(preciseComplexity);
                if (selectionPatterns.size() >= 50) {
                    patternLimitReached = true;
                    break;
                }
                currentComplexityPrecisePatterns.addAll(selectionPatterns);
            }
            if (patternLimitReached) {
                logger.debug((Object)"precise pattern aborted");
                break;
            }
            List<ExtractingPattern> currentComplexityPrecisePatternsFiltered = this.filterPatterns(currentComplexityPrecisePatterns);
            precisePatterns.addAll(currentComplexityPrecisePatternsFiltered);
        }
        return precisePatterns;
    }

    private List<ExtractingPattern> deduplicateRegexPatterns(List<ExtractingPattern> patterns) {
        ArrayList<String> seenRegexes = new ArrayList<String>();
        ArrayList<ExtractingPattern> distinctPatterns = new ArrayList<ExtractingPattern>();
        for (ExtractingPattern pattern : patterns) {
            String regex = pattern.toRegex();
            if (seenRegexes.contains(regex)) continue;
            seenRegexes.add(regex);
            distinctPatterns.add(pattern);
        }
        return distinctPatterns;
    }

    private int numberOfDistinctSelections() {
        HashSet<String> selectionsSet = new HashSet<String>();
        for (Selection selection : this.selections) {
            selectionsSet.add(selection.getFullSentence());
        }
        return selectionsSet.size();
    }

    private List<ExtractingPattern> filterPatterns(List<ExtractingPattern> patterns) {
        List<ExtractingPattern> deduplicatePatterns = this.deduplicateRegexPatterns(patterns);
        ArrayList<ExtractingPattern> selectionCompatiblePatterns = new ArrayList<ExtractingPattern>();
        for (ExtractingPattern pattern : deduplicatePatterns) {
            float patternMatchingScore = 0.0f;
            for (Selection selection : this.selections) {
                String sentence = selection.getFullSentence();
                List<ExtractionPosition> extracts = pattern.extract(sentence);
                if (!extracts.contains((Object)new ExtractionPosition(selection.before.length(), selection.before.length() + selection.selection.length())) || extracts.size() <= 0) continue;
                patternMatchingScore = (float)((double)patternMatchingScore + 1.0 / (double)extracts.size());
            }
            for (String excludedSentence : this.excludedSentences) {
                if (pattern.extract(excludedSentence).size() != 0) continue;
                patternMatchingScore = (float)((double)patternMatchingScore + 1.0);
            }
            if (!(patternMatchingScore >= 0.7f * (float)(this.numberOfDistinctSelections() + this.excludedSentences.size()))) continue;
            selectionCompatiblePatterns.add(pattern);
        }
        return selectionCompatiblePatterns;
    }

    public static class ExtractorResponse {
        public final List<String> sentences;
        public List<PropositionCategory> categories = new ArrayList<PropositionCategory>();
        public String customRegexError = "";

        public ExtractorResponse(List<String> sentences) {
            this.sentences = sentences.size() >= 300 ? sentences.subList(0, 300) : sentences;
        }

        private List<ExtractingPattern> getNoDuplicate(List<ExtractingPattern> patterns) {
            ArrayList<ExtractingPattern> noDuplicatePatters = new ArrayList<ExtractingPattern>();
            HashSet<String> seenRegexes = new HashSet<String>();
            for (PropositionCategory categories : this.categories) {
                for (Proposition pattern : categories.propositions) {
                    seenRegexes.add(pattern.regex);
                }
            }
            for (ExtractingPattern pattern : patterns) {
                if (seenRegexes.contains(pattern.toRegex())) continue;
                seenRegexes.add(pattern.toRegex());
                noDuplicatePatters.add(pattern);
            }
            return noDuplicatePatters;
        }

        void addCategory(String name, List<ExtractingPattern> patterns) {
            ArrayList<ExtractingPattern> sortedPatterns = new ArrayList<ExtractingPattern>(this.getNoDuplicate(patterns));
            Collections.sort(sortedPatterns, new Comparator<ExtractingPattern>(){

                @Override
                public int compare(ExtractingPattern o1, ExtractingPattern o2) {
                    return Integer.compare(o1.score - o2.score, 0);
                }
            });
            this.categories.add(new PropositionCategory(name, sortedPatterns, this.sentences));
        }

        public void addCustomProposition(String regex) {
            ArrayList<ExtractingPattern> patterns = new ArrayList<ExtractingPattern>();
            patterns.add(new ExtractingPattern(regex));
            this.addCategory("userCustomPattern", patterns);
        }

        public static class PropositionCategory {
            static final int NB_MAX_PATTERNS_RESPONSE = 5;
            public String name;
            public List<Proposition> propositions = new ArrayList<Proposition>();

            public PropositionCategory(String name, List<ExtractingPattern> patterns, List<String> sentences) {
                this.name = name;
                if (patterns.size() > 5) {
                    patterns = patterns.subList(0, 5);
                }
                for (ExtractingPattern pattern : patterns) {
                    this.propositions.add(new Proposition(pattern, sentences, name));
                }
            }
        }
    }

    public static class Selection {
        public String before;
        public String selection;
        public String after;

        public Selection(String before, String selection, String after) {
            this.before = before;
            this.selection = selection;
            this.after = after;
        }

        public String getFullSentence() {
            return this.before + this.selection + this.after;
        }
    }

    public static class ExtractionPosition
    extends ImmutableValueObject {
        public final int start;
        public final int end;

        public ExtractionPosition(int start, int end) {
            this.start = start;
            this.end = end;
        }
    }

    public static class Proposition {
        public final String regex;
        public final String category;
        public int nbOK = 0;
        public int nbNOK = 0;
        public List<List<ExtractionPosition>> extractions = new ArrayList<List<ExtractionPosition>>();

        public Proposition(ExtractingPattern pattern, List<String> sentences, String category) {
            this.regex = pattern.toRegex();
            this.category = category;
            for (String sentence : sentences) {
                List<ExtractionPosition> extracts = pattern.extract(sentence);
                if (this.extractions.size() < 300) {
                    this.extractions.add(extracts);
                }
                if (extracts.size() > 0) {
                    ++this.nbOK;
                    continue;
                }
                ++this.nbNOK;
            }
        }
    }
}

