/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.eda.worksheets.suggester;

import com.dataiku.dip.eda.compute.distributions.Distribution;
import com.dataiku.dip.eda.worksheets.cards.UnivariateCard;
import com.dataiku.dip.eda.worksheets.cards.univariate_header.CategoricalHistogramCard;
import com.dataiku.dip.eda.worksheets.cards.univariate_header.NumericalHistogramCard;
import com.dataiku.dip.eda.worksheets.models.Variable;
import com.dataiku.dip.eda.worksheets.queries.SuggestCards;
import com.dataiku.dip.eda.worksheets.suggester.SniffedVariable;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

public class VariableInterestScorer {
    private final List<SniffedVariable> sniffedVariables;
    private final List<SniffedVariable> selectedSniffedVariables;
    private final List<String> selectedVariableNames;

    public VariableInterestScorer(List<SniffedVariable> sniffedVariables) {
        this.sniffedVariables = sniffedVariables;
        this.selectedSniffedVariables = sniffedVariables.stream().filter(SniffedVariable::isSelected).collect(Collectors.toList());
        this.selectedVariableNames = this.selectedSniffedVariables.stream().map(it -> it.name).collect(Collectors.toList());
    }

    public List<SuggestCards.SuggestedVariable> score() {
        double maxIndividualScore = -1.0;
        double maxCorrelationScore = -1.0;
        double maxTimeSeriesScore = -1.0;
        for (SniffedVariable variable : this.sniffedVariables) {
            this.computeIndividualScore(variable);
            this.computeTimeSeriesScore(variable);
            this.computeCorrelationScore(variable);
            if (variable.individualScore > maxIndividualScore) {
                maxIndividualScore = variable.individualScore;
            }
            if (variable.correlationScore > maxCorrelationScore) {
                maxCorrelationScore = variable.correlationScore;
            }
            if (!(variable.timeSeriesScore > maxTimeSeriesScore)) continue;
            maxTimeSeriesScore = variable.timeSeriesScore;
        }
        for (SniffedVariable variable : this.sniffedVariables) {
            if (maxIndividualScore > 0.0) {
                variable.individualScore /= maxIndividualScore;
            }
            if (maxCorrelationScore > 0.0) {
                variable.correlationScore /= maxCorrelationScore;
            }
            if (!(maxTimeSeriesScore > 0.0)) continue;
            variable.timeSeriesScore /= maxTimeSeriesScore;
        }
        double maxTotalScore = -1.0;
        ArrayList<SuggestCards.SuggestedVariable> suggestedVariables = new ArrayList<SuggestCards.SuggestedVariable>(this.sniffedVariables.size());
        for (SniffedVariable sniffedVariable : this.sniffedVariables) {
            SuggestCards.SuggestedVariable suggestedVariable = this.buildSuggestedVariable(sniffedVariable);
            suggestedVariables.add(suggestedVariable);
            if (!(suggestedVariable.totalScore > maxTotalScore)) continue;
            maxTotalScore = suggestedVariable.totalScore;
        }
        for (SuggestCards.SuggestedVariable suggestedVariable : suggestedVariables) {
            if (!(maxTotalScore > 0.0)) continue;
            SuggestCards.SuggestedVariable suggestedVariable2 = suggestedVariable;
            Double.valueOf(suggestedVariable2.totalScore / maxTotalScore);
            suggestedVariable2.totalScore = suggestedVariable2.totalScore;
        }
        return suggestedVariables;
    }

    private SuggestCards.SuggestedVariable buildSuggestedVariable(SniffedVariable variable) {
        UnivariateCard card;
        if (variable.isLikelyNumerical()) {
            card = new NumericalHistogramCard().withShowHistogram(true).withShowBoxPlot(false);
            card.column = new Variable(variable.name, Variable.Type.CONTINUOUS);
        } else {
            card = new CategoricalHistogramCard();
            card.column = new Variable(variable.name, Variable.Type.CATEGORICAL);
        }
        SuggestCards.SuggestedVariable suggestedVariable = new SuggestCards.SuggestedVariable();
        suggestedVariable.name = variable.name;
        suggestedVariable.type = variable.isLikelyNumerical() ? Variable.Type.CONTINUOUS : Variable.Type.CATEGORICAL;
        suggestedVariable.card = card;
        suggestedVariable.individualScore = variable.individualScore;
        suggestedVariable.totalScore = variable.getInterestScore();
        suggestedVariable.explanations = variable.scoreExplanations;
        return suggestedVariable;
    }

    private void computeIndividualScore(SniffedVariable variable) {
        List<Distribution> goodDistributionFits;
        variable.individualScore = 0.0;
        if (variable.isConstant()) {
            return;
        }
        if (variable.isLikelyCategorical()) {
            if (variable.isLikelyAnIdentifier()) {
                variable.scoreExplanations.add("Likely an identifier");
                return;
            }
            if (variable.hasHighCardinality()) {
                return;
            }
        }
        if (variable.holdsTemporalValues()) {
            variable.individualScore += 1.0;
            variable.scoreExplanations.add("Holds temporal values");
            return;
        }
        variable.individualScore += 2.0 * (1.0 - variable.getUnavailableDataRatio());
        if (variable.entropy != null) {
            if (variable.entropy <= 4.0) {
                variable.individualScore += 0.5;
            } else if (variable.entropy <= 5.0) {
                variable.individualScore += 0.1;
            }
        }
        if (variable.isLikelyCategorical() && !variable.isLikelyNumerical()) {
            if (variable.hasVeryLowCardinality()) {
                variable.individualScore += 2.0;
            } else if (variable.hasLowCardinality()) {
                variable.individualScore += 1.0;
            }
        }
        if (variable.isLikelyNumerical()) {
            variable.individualScore += 1.0;
            if (variable.hasSignificantStandardDeviation()) {
                variable.individualScore += 0.5;
            }
            if (variable.hasSignificantShapiroStatistic()) {
                variable.individualScore += 2.0;
                variable.scoreExplanations.add("May be normally distributed");
            }
        }
        if (!(goodDistributionFits = variable.getGoodDistributionFits()).isEmpty()) {
            variable.individualScore += 2.0;
            variable.scoreExplanations.add("May follow a known distribution");
        }
    }

    private void computeTimeSeriesScore(SniffedVariable variable) {
        boolean qualifiesAsSeriesTimestamps;
        boolean qualifiesAsSeriesValues;
        variable.timeSeriesScore = 0.0;
        boolean bl = qualifiesAsSeriesValues = variable.holdsNumericalValues() && this.selectedSniffedVariables.stream().anyMatch(SniffedVariable::holdsTemporalValues);
        if (qualifiesAsSeriesValues) {
            variable.individualScore += 1.0;
        }
        boolean bl2 = qualifiesAsSeriesTimestamps = variable.holdsTemporalValues() && this.selectedSniffedVariables.stream().anyMatch(SniffedVariable::holdsNumericalValues);
        if (qualifiesAsSeriesTimestamps) {
            variable.individualScore += 1.0;
        }
    }

    private void computeCorrelationScore(SniffedVariable variable) {
        variable.correlationScore = 0.0;
        double significantSpearmanScore = 0.0;
        int spearmanCount = 0;
        double significantChi2Score = 0.0;
        int chi2Count = 0;
        double significantAnovaScore = 0.0;
        int anovaCount = 0;
        for (String selectedName : this.selectedVariableNames) {
            double pValue;
            if (variable.spearmanTests.containsKey(selectedName)) {
                pValue = variable.spearmanTests.get((Object)selectedName).pvalue;
                if (pValue < 0.05) {
                    significantSpearmanScore += 1.0;
                } else if (pValue < 0.2) {
                    significantSpearmanScore += 0.5;
                } else if (pValue < 0.6) {
                    significantSpearmanScore += 0.1;
                }
                if (pValue < 0.2) {
                    variable.scoreExplanations.add(String.format("Seems correlated to %s (Spearman test)", selectedName));
                }
                ++spearmanCount;
            }
            if (variable.chi2tests.containsKey(selectedName)) {
                pValue = variable.chi2tests.get((Object)selectedName).pvalue;
                if (pValue < 0.05) {
                    significantChi2Score += 1.0;
                } else if (pValue < 0.2) {
                    significantChi2Score += 0.5;
                } else if (pValue < 0.6) {
                    significantChi2Score += 0.1;
                }
                if (pValue < 0.2) {
                    variable.scoreExplanations.add(String.format("Seems correlated to %s (Chi2 test)", selectedName));
                }
                ++chi2Count;
            }
            if (!variable.oneWayAnovaTests.containsKey(selectedName)) continue;
            pValue = variable.oneWayAnovaTests.get((Object)selectedName).pvalue;
            if (pValue < 0.05) {
                significantAnovaScore += 1.0;
            } else if (pValue < 0.2) {
                significantAnovaScore += 0.5;
            } else if (pValue < 0.6) {
                significantAnovaScore += 0.1;
            }
            if (pValue < 0.2) {
                variable.scoreExplanations.add(String.format("Seems correlated to %s (One-way ANOVA test)", selectedName));
            }
            ++anovaCount;
        }
        if (spearmanCount > 0) {
            variable.correlationScore += significantSpearmanScore / (double)spearmanCount;
        }
        if (chi2Count > 0) {
            variable.correlationScore += significantChi2Score / (double)chi2Count;
        }
        if (anovaCount > 0) {
            variable.correlationScore += significantAnovaScore / (double)anovaCount;
        }
    }
}

