/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.populardatasets;

import com.dataiku.dip.coremodel.ExposedObject;
import com.dataiku.dip.coremodel.SerializedDataset;
import com.dataiku.dip.coremodel.SerializedProject;
import com.dataiku.dip.coremodel.SerializedRecipe;
import com.dataiku.dip.dao.DatasetsDAO;
import com.dataiku.dip.datacollections.AbstractDataCollectionItem;
import com.dataiku.dip.datacollections.DataCollection;
import com.dataiku.dip.datacollections.DataCollectionDataset;
import com.dataiku.dip.datacollections.DataCollectionsService;
import com.dataiku.dip.dataflow.FlowGraphService;
import com.dataiku.dip.dataflow.ProjectFlowGraph;
import com.dataiku.dip.dataflow.graph.FlowDataset;
import com.dataiku.dip.dataflow.graph.FlowRecipe;
import com.dataiku.dip.dataflow.graph.GraphNode;
import com.dataiku.dip.futures.FutureProgress;
import com.dataiku.dip.populardatasets.MostSharedDatasetInfos;
import com.dataiku.dip.populardatasets.MostUsedDatasetInfos;
import com.dataiku.dip.populardatasets.PopularDatasetInfos;
import com.dataiku.dip.populardatasets.PopularDatasetsConfig;
import com.dataiku.dip.populardatasets.PopularDatasetsProcessing;
import com.dataiku.dip.populardatasets.ScoreComputer;
import com.dataiku.dip.populardatasets.TrendingDetector;
import com.dataiku.dip.server.services.ExposedObjectsService;
import com.dataiku.dip.server.services.ITaggingService;
import com.dataiku.dip.server.services.NavigatorService;
import com.dataiku.dip.server.services.ProjectsService;
import com.dataiku.dip.server.services.ReadOnlyJobsInternalDB;
import com.dataiku.dip.server.services.TransactionService;
import com.dataiku.dip.transactions.TransactionContext;
import com.dataiku.dip.transactions.ifaces.Transaction;
import com.dataiku.dip.util.AnyLoc;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dss.shadelib.org.joda.time.DateTime;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.LongPredicate;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class PopularDatasetsProcessingService {
    @Autowired
    private DataCollectionsService dataCollectionsService;
    @Autowired
    private ProjectsService projectsService;
    @Autowired
    private DatasetsDAO datasetsDAO;
    @Autowired
    private ExposedObjectsService exposedObjectsService;
    @Autowired
    private FlowGraphService flowGraphService;
    @Autowired
    private ReadOnlyJobsInternalDB jobsDBService;
    @Autowired
    private NavigatorService navigatorService;
    @Autowired
    private TransactionService transactionService;
    private static final long ONE_DAY_IN_MS = 86400000L;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.popularDatasets");

    public PopularDatasetsProcessing.Output compute_NT(PopularDatasetsConfig.ProcessingOptions config) throws IOException, InterruptedException {
        Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> rawCandidateList;
        logger.info((Object)"Starting to compute most used datasets");
        PopularDatasetsProcessing.PerfMetrics perfMetrics = new PopularDatasetsProcessing.PerfMetrics();
        DateTime now = DateTime.now();
        FutureProgress.pushState((String)"Computing most used dataset candidates");
        FutureProgress.getState().setTarget(5L);
        try (Transaction t = this.transactionService.beginRead();){
            rawCandidateList = this.listCandidates();
        }
        Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> rawPopularDatasetCandidateList = this.filterCandidates(rawCandidateList, config.minimumShares, config.onlyFromDataCollections);
        Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> rawMostSharedDatasets = this.filterCandidates(rawCandidateList, 2, false);
        perfMetrics.doneListing();
        logger.info((Object)String.format("Most used datasets computation: done listing (%s candidates found)", rawCandidateList.size()));
        List<MostSharedDatasetInfos> mostSharedDatasetInfos = ScoreComputer.computeMostSharedDatasetScore_NT(new ArrayList<PopularDatasetsProcessing.DatasetInfo>(rawMostSharedDatasets.keySet()));
        ArrayList<MostUsedDatasetInfos> mostUsedDatasets = new ArrayList<MostUsedDatasetInfos>();
        if (!config.computePopularDatasets) {
            mostUsedDatasets.addAll(mostSharedDatasetInfos);
            logger.info((Object)"Skipping computation of popular datasets as feature is disabled");
            return new PopularDatasetsProcessing.Output(mostUsedDatasets.stream().sorted(Comparator.comparingDouble(v -> -v.score)).toList(), new PopularDatasetsProcessing.RunMetadata(perfMetrics.getResult(), System.currentTimeMillis(), mostUsedDatasets.size()));
        }
        FutureProgress.incrementState((double)1.0);
        long lastRebuildCutoff = now.minusDays(config.maxDaysSinceLastRebuild).getMillis();
        Map<PopularDatasetsProcessing.DatasetInfo, Long> candidatesLastBuildTime = this.getLastBuildTime_NT(rawPopularDatasetCandidateList.keySet(), config.maxDaysSinceLastRebuild >= 0 ? lastBuildTime -> lastBuildTime > 0L && lastBuildTime >= lastRebuildCutoff : x -> true);
        FutureProgress.incrementState((double)1.0);
        rawPopularDatasetCandidateList.keySet().retainAll(candidatesLastBuildTime.keySet());
        perfMetrics.doneGetLastRebuild();
        logger.info((Object)String.format("Popular datasets computation: done get last rebuild timestamp (%s candidates remaining)", rawPopularDatasetCandidateList.size()));
        FutureProgress.incrementState((double)1.0);
        Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedRecipe>> downstreamRecipes = this.collectDownstreamRecipes_NT(rawPopularDatasetCandidateList);
        perfMetrics.doneCollectRecipe();
        logger.info((Object)String.format("Popular datasets computation: done collecting downstream recipes (%s candidates remaining)", downstreamRecipes.size()));
        FutureProgress.incrementState((double)1.0);
        Map<PopularDatasetsProcessing.DatasetInfo, PopularDatasetsProcessing.DownstreamRecipesDesc> downstreamRecipesDesc = this.computeDownstreamRecipeInfo_NT(downstreamRecipes, new TrendingDetector(now.getMillis(), (long)config.windowSize * 86400000L, config.recentTrendTolerance, config.trendPseudoLinearQuadraticThreshold, config.trendPseudoLinearSlopeThreshold, config.trendMaxWindowCount));
        perfMetrics.doneProcessRecipe();
        logger.info((Object)String.format("Popular datasets computation: done processing downstream recipes (%s candidates remaining)", downstreamRecipesDesc.size()));
        List<PopularDatasetInfos> popularDatasets = downstreamRecipesDesc.entrySet().stream().filter(e -> !config.onlyTrendingDatasets || ((PopularDatasetsProcessing.DownstreamRecipesDesc)e.getValue()).isTrending).map(e -> {
            PopularDatasetsProcessing.DatasetInfo dataset = (PopularDatasetsProcessing.DatasetInfo)e.getKey();
            PopularDatasetsProcessing.DownstreamRecipesDesc dr = (PopularDatasetsProcessing.DownstreamRecipesDesc)e.getValue();
            int numShares = ((List)rawPopularDatasetCandidateList.get(dataset)).size();
            return new PopularDatasetInfos(dataset, numShares, (Long)candidatesLastBuildTime.get(dataset), dr, (float)((double)numShares / Math.max(1.0, Math.ceil(dataset.creationTag == 0L ? 0.0 : (double)Math.toIntExact((now.getMillis() - dataset.creationTag) / 86400000L) / 30.0))));
        }).collect(Collectors.toList());
        ScoreComputer.enrichWithScore_NT(popularDatasets);
        popularDatasets = popularDatasets.stream().sorted(Comparator.comparingDouble(v -> -v.score)).collect(Collectors.toList());
        perfMetrics.doneCompileData();
        logger.info((Object)String.format("Popular datasets computation: all done (%s results)", popularDatasets.size()));
        HashSet<PopularDatasetInfos> popularDatasetsSet = new HashSet<PopularDatasetInfos>(popularDatasets);
        Map mostSharedDatasetsMap = mostSharedDatasetInfos.stream().collect(Collectors.toMap(e -> e.dataset, Function.identity()));
        List popularDatasetInfos = popularDatasetsSet.stream().toList();
        for (PopularDatasetInfos popularDataset : popularDatasetInfos) {
            if (!mostSharedDatasetsMap.containsKey(popularDataset.dataset)) continue;
            MostSharedDatasetInfos mostSharedDataset = (MostSharedDatasetInfos)mostSharedDatasetsMap.get(popularDataset.dataset);
            if (mostSharedDataset.score > popularDataset.score) {
                popularDatasetsSet.remove(popularDataset);
                continue;
            }
            mostSharedDatasetsMap.remove(popularDataset.dataset);
        }
        mostUsedDatasets.addAll(mostSharedDatasetsMap.values());
        mostUsedDatasets.addAll(popularDatasetsSet);
        return new PopularDatasetsProcessing.Output(mostUsedDatasets.stream().sorted(Comparator.comparing(v -> -v.score)).toList(), new PopularDatasetsProcessing.RunMetadata(perfMetrics.getResult(), System.currentTimeMillis(), mostUsedDatasets.size()));
    }

    private Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> listCandidates() throws IOException {
        TransactionContext.assertAttachedTransaction();
        HashMap<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> candidates = new HashMap<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>>();
        HashMap<AnyLoc, Integer> allDataCollectionDatasets = new HashMap<AnyLoc, Integer>();
        for (DataCollection collection : this.dataCollectionsService.listUnsafe()) {
            for (AbstractDataCollectionItem item : collection.items) {
                if (!(item instanceof DataCollectionDataset)) continue;
                AnyLoc key = ((DataCollectionDataset)item).reference;
                allDataCollectionDatasets.merge(key, 1, Integer::sum);
            }
        }
        for (SerializedProject serializedProject : this.projectsService.listAllUnsafe()) {
            for (ExposedObject exposedObject : serializedProject.exposedObjects.objects) {
                SerializedDataset sd = (SerializedDataset)this.datasetsDAO.getOrNull(serializedProject.projectKey, exposedObject.localName);
                if (sd == null || exposedObject.type != ITaggingService.TaggableType.DATASET || exposedObject.rules.isEmpty()) continue;
                ArrayList<SerializedProject> shareTargets = new ArrayList<SerializedProject>();
                for (ExposedObject.Rule rule : exposedObject.rules) {
                    SerializedProject target = this.projectsService.getOrNullUnsafe(rule.targetProject);
                    if (target == null || target.projectAppType == SerializedProject.ProjectAppType.APP_INSTANCE) continue;
                    shareTargets.add(target);
                }
                AnyLoc datasetLoc = new AnyLoc(sd.projectKey, sd.name);
                candidates.put(new PopularDatasetsProcessing.DatasetInfo(sd, shareTargets.size() + 1, allDataCollectionDatasets.getOrDefault(datasetLoc, 0)), shareTargets);
            }
        }
        return candidates;
    }

    private Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> filterCandidates(Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> candidates, int minimumShares, boolean onlyFromDataCollections) {
        return candidates.entrySet().stream().filter(entry -> {
            PopularDatasetsProcessing.DatasetInfo datasetInfo = (PopularDatasetsProcessing.DatasetInfo)entry.getKey();
            return (!onlyFromDataCollections || datasetInfo.dataCollections > 0) && datasetInfo.projects >= minimumShares;
        }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    }

    private Map<PopularDatasetsProcessing.DatasetInfo, Long> getLastBuildTime_NT(Collection<PopularDatasetsProcessing.DatasetInfo> candidates, LongPredicate filter) {
        TransactionContext.assertNoAttachedTransaction();
        HashMap<PopularDatasetsProcessing.DatasetInfo, Long> result = new HashMap<PopularDatasetsProcessing.DatasetInfo, Long>();
        for (PopularDatasetsProcessing.DatasetInfo candidate : candidates) {
            long buildEndTime = 0L;
            try {
                ReadOnlyJobsInternalDB.ObjectBuild lastBuild = this.jobsDBService.getLatestSuccessfulBuildForObject(candidate.projectKey, candidate.name);
                if (lastBuild != null) {
                    buildEndTime = lastBuild.buildEndTime;
                }
            }
            catch (SQLException e) {
                logger.warn((Object)("Unable to fetch last build time for dataset " + candidate.projectKey + "." + candidate.name), (Throwable)e);
            }
            if (!filter.test(buildEndTime)) continue;
            result.put(candidate, buildEndTime);
        }
        return result;
    }

    private Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedRecipe>> collectDownstreamRecipes_NT(Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> candidates) throws IOException {
        TransactionContext.assertNoAttachedTransaction();
        HashMap<PopularDatasetsProcessing.DatasetInfo, List<SerializedRecipe>> downstreamRecipes = new HashMap<PopularDatasetsProcessing.DatasetInfo, List<SerializedRecipe>>();
        HashMap<SerializedProject, List> targetProjects = new HashMap<SerializedProject, List>();
        for (Map.Entry<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> entry : candidates.entrySet()) {
            for (SerializedProject serializedProject : entry.getValue()) {
                targetProjects.computeIfAbsent(serializedProject, k -> new ArrayList()).add(entry.getKey());
            }
        }
        for (Map.Entry<PopularDatasetsProcessing.DatasetInfo, List<SerializedProject>> entry : targetProjects.entrySet()) {
            Transaction t = this.transactionService.beginRead();
            try {
                ProjectFlowGraph graph = this.flowGraphService.getProjectGraphUnsafe(((SerializedProject)((Object)entry.getKey())).projectKey);
                for (PopularDatasetsProcessing.DatasetInfo datasetInfo : entry.getValue()) {
                    FlowDataset fd = graph.getDataset(datasetInfo.projectKey, datasetInfo.name);
                    if (fd == null) continue;
                    for (GraphNode graphNode : fd.getSuccessors()) {
                        if (!(graphNode instanceof FlowRecipe)) continue;
                        downstreamRecipes.computeIfAbsent(datasetInfo, k -> new ArrayList()).add(((FlowRecipe)graphNode).getModel());
                    }
                }
            }
            finally {
                if (t == null) continue;
                t.close();
            }
        }
        return downstreamRecipes;
    }

    private Map<PopularDatasetsProcessing.DatasetInfo, PopularDatasetsProcessing.DownstreamRecipesDesc> computeDownstreamRecipeInfo_NT(Map<PopularDatasetsProcessing.DatasetInfo, List<SerializedRecipe>> downstreamRecipes, TrendingDetector trendingDetector) {
        TransactionContext.assertNoAttachedTransaction();
        HashMap<SerializedRecipe, Long> lastBuildCache = new HashMap<SerializedRecipe, Long>();
        HashMap<PopularDatasetsProcessing.DatasetInfo, PopularDatasetsProcessing.DownstreamRecipesDesc> result = new HashMap<PopularDatasetsProcessing.DatasetInfo, PopularDatasetsProcessing.DownstreamRecipesDesc>();
        for (Map.Entry<PopularDatasetsProcessing.DatasetInfo, List<SerializedRecipe>> entry : downstreamRecipes.entrySet()) {
            int numDownstreamRecipes = 0;
            boolean hasEverBeenUsedForABuild = false;
            int newDownstreamRecipeLastWindowCount = 0;
            trendingDetector.reset();
            for (SerializedRecipe downstreamRecipe : entry.getValue()) {
                long lastBuildTime = lastBuildCache.computeIfAbsent(downstreamRecipe, recipe -> {
                    try {
                        ReadOnlyJobsInternalDB.ObjectBuild lastBuild = this.navigatorService.getRecipeLastSuccessfulBuild((SerializedRecipe)recipe);
                        return lastBuild == null ? -1L : lastBuild.buildEndTime;
                    }
                    catch (SQLException e) {
                        logger.warn((Object)("Unable to fetch last build time for recipe " + recipe.getFullId()), (Throwable)e);
                        return -1L;
                    }
                });
                ++numDownstreamRecipes;
                hasEverBeenUsedForABuild |= lastBuildTime != -1L;
                if (downstreamRecipe.creationTag == null) continue;
                int windowId = trendingDetector.addCreationDate(downstreamRecipe.creationTag.getLastModifiedOn());
                newDownstreamRecipeLastWindowCount += windowId == 0 ? 1 : 0;
            }
            if (!hasEverBeenUsedForABuild || newDownstreamRecipeLastWindowCount <= 0) continue;
            result.put(entry.getKey(), new PopularDatasetsProcessing.DownstreamRecipesDesc(numDownstreamRecipes, trendingDetector.isTrending(), newDownstreamRecipeLastWindowCount));
        }
        return result;
    }
}

