/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.common.doc_extraction;

import com.dataiku.dip.dataflow.JobActivity;
import com.dataiku.dip.dataflow.RunnableSubgraph;
import com.dataiku.dip.dataflow.graph.FlowComputable;
import com.dataiku.dip.docextraction.common.InputRefs;
import com.dataiku.dip.exceptions.CodedException;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.managedfolder.ManagedFolder;
import com.dataiku.dip.managedfolder.ManagedFolderHandler;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.recipes.nlp.common.doc_extraction.DocExtractionRuleApplier;
import com.dataiku.dip.recipes.nlp.common.doc_extraction.ExtractedDataWriter;
import com.dataiku.dip.recipes.nlp.embed_documents.EmbedDocumentsIndexer;
import com.dataiku.dip.recipes.nlp.embed_documents.IndexerComputationResult;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.utils.DKULogger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public interface DocExtractionRecipeRunner {
    public static final String ROLE_IMAGES = "images";
    public static final DKULogger logger = DKULogger.getLogger((String)"dku.recipes.nlp.doc_extraction_common");

    private static List<InputRefs.ManagedFolderDocumentRefWithMetadata> listFolderContent(ManagedFolder folder, FlowComputable input, AuthCtx authCtx, JobActivity activity) throws IOException, CodedException, DKUSecurityException {
        RunnableSubgraph subgraph = activity.getSubgraph();
        ArrayList<InputRefs.ManagedFolderDocumentRefWithMetadata> filesInFolder = new ArrayList<InputRefs.ManagedFolderDocumentRefWithMetadata>();
        try (ManagedFolderHandler inputHandler = (ManagedFolderHandler)folder.buildHandler(authCtx);){
            List<Partition> sources = subgraph.getSourcePartitions(input);
            for (Partition source : sources) {
                ManagedFolderHandler.ManagedFolderListing sourceFiles = inputHandler.listFS(source, true, false);
                logger.info((Object)("listing partition " + source.toString()));
                for (ManagedFolderHandler.ManagedFolderListingItem item : sourceFiles.items) {
                    filesInFolder.add(new InputRefs.ManagedFolderDocumentRefWithMetadata(new InputRefs.ManagedFolderDocumentRef(folder.getFullId(), item.toPath()), null, new HashMap<String, String>()));
                }
            }
        }
        logger.info((Object)("Done listing folder: \n" + String.valueOf(filesInFolder.stream().map(doc -> doc.documentRef.filePath).toList())));
        return filesInFolder;
    }

    public static void indexAndProcessDocumentsInFolder(ManagedFolder inputFolder, FlowComputable folderAsFlowComputable, DocExtractionRuleApplier ruleApplier, IndexerComputationResult indexerResult, EmbedDocumentsIndexer indexer, ExtractedDataWriter extractedDataWriter, AuthCtx authCtx, JobActivity activity) throws Exception {
        List<InputRefs.ManagedFolderDocumentRefWithMetadata> inputDocuments = DocExtractionRecipeRunner.listFolderContent(inputFolder, folderAsFlowComputable, authCtx, activity);
        for (InputRefs.ManagedFolderDocumentRefWithMetadata document : inputDocuments) {
            ruleApplier.handleLoadedDocument(document, null, indexer, indexerResult, extractedDataWriter);
        }
        ruleApplier.onExtractionEnd(indexer, indexerResult);
        logger.info((Object)("Done loading input files: " + String.valueOf(inputDocuments.stream().map(doc -> doc.documentRef.filePath).toList())));
    }
}

