/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.docextraction;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.code.DSSInternalCodeEnvsService;
import com.dataiku.dip.docextraction.DocExtractionKernelPool;
import com.dataiku.dip.docextraction.DocExtractionUtils;
import com.dataiku.dip.docextraction.Screenshotter;
import com.dataiku.dip.docextraction.ScreenshotterService;
import com.dataiku.dip.docextraction.StructuredContent;
import com.dataiku.dip.docextraction.StructuredExtractor;
import com.dataiku.dip.docextraction.VLMExtractor;
import com.dataiku.dip.docextraction.common.InputRefs;
import com.dataiku.dip.docextraction.common.TextChunk;
import com.dataiku.dip.docextraction.common.TextExtractionResponseOrError;
import com.dataiku.dip.files.MimeTypeUtils;
import com.dataiku.dip.input.stream.EnrichedInputStream;
import com.dataiku.dip.llm.EnrichedLLMStructuredRef;
import com.dataiku.dip.llm.LLMAuditHelper;
import com.dataiku.dip.llm.LLMStructuredRef;
import com.dataiku.dip.llm.governance.GuardrailsPipelineSettings;
import com.dataiku.dip.llm.governance.GuardrailsPipelineUtils;
import com.dataiku.dip.llm.online.LLMClient;
import com.dataiku.dip.llm.online.LLMMeshClient;
import com.dataiku.dip.llm.online.LLMMeshClientFactory;
import com.dataiku.dip.managedfolder.ManagedFolder;
import com.dataiku.dip.managedfolder.ManagedFolderHandler;
import com.dataiku.dip.managedfolder.ManagedFoldersService;
import com.dataiku.dip.resourceusage.ComputeResourceUsage;
import com.dataiku.dip.resourceusage.ComputeResourceUsageReportingService;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.security.audit.AuditTrailService;
import com.dataiku.dip.server.services.TransactionService;
import com.dataiku.dip.transactions.ifaces.Transaction;
import com.dataiku.dip.util.AnyLoc;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.ExceptionUtils;
import com.dataiku.dss.shadelib.com.google.common.annotations.VisibleForTesting;
import com.dataiku.dss.shadelib.org.apache.commons.io.FilenameUtils;
import com.dataiku.dss.shadelib.org.apache.commons.io.IOUtils;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.Callable;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.web.server.UnsupportedMediaTypeStatusException;

@Service
public class DocExtractionService {
    @Autowired
    private ManagedFoldersService managedFoldersService;
    @Autowired
    private TransactionService transactionService;
    @Autowired
    private AuditTrailService auditTrailService;
    @Autowired
    private ComputeResourceUsageReportingService cruReportingService;
    @Autowired
    private ScreenshotterService screenshotterService;
    @Autowired
    private DocExtractionKernelPool docExtractionKernelPool;
    public static DKULogger logger = DKULogger.getLogger((String)"dku.docextraction");

    public Screenshotter.ScreenshotterResponseOrError extractScreenshotsFromManagedFolderDocumentRef(AuthCtx authCtx, String projectKey, InputRefs.ManagedFolderDocumentRef documentRef, Screenshotter.ScreenshotterSettings settings) throws Exception {
        return this.screenshotterService.extractScreenshotsFromManagedFolderDocumentRef(authCtx, projectKey, documentRef, settings);
    }

    public Screenshotter.ScreenshotterResponseOrError extractScreenshotsFromLocalFileDocumentRef(AuthCtx authCtx, String projectKey, InputRefs.LocalFileDocumentRef document, Screenshotter.ScreenshotterSettings settings) throws Exception {
        return this.screenshotterService.extractScreenshotsFromLocalFileDocumentRef(authCtx, projectKey, document, settings);
    }

    public Screenshotter.ScreenshotterResponseOrError extractScreenshotsFromTmpDocumentRef(AuthCtx authCtx, String projectKey, InputRefs.TmpDocumentRef document, Screenshotter.ScreenshotterSettings settings) throws Exception {
        return this.screenshotterService.extractScreenshotsFromTmpDocumentRef(authCtx, projectKey, document, settings);
    }

    public StructuredExtractor.StructuredExtractionResponseOrError runStructuredExtractorFromManagedFolderDocumentRef(AuthCtx authCtx, String projectKey, InputRefs.ManagedFolderDocumentRef document, StructuredExtractor.StructuredExtractorSettings settings) throws Exception {
        ManagedFolder mf;
        try (Transaction t = this.transactionService.beginRead();){
            AnyLoc managedFolderLoc = AnyLoc.resolveSmart(projectKey, document.managedFolderId);
            mf = this.managedFoldersService.getMandatoryUnsafe(managedFolderLoc);
        }
        try (ManagedFolderHandler handler = (ManagedFolderHandler)mf.buildHandler(authCtx);){
            StructuredExtractor.StructuredExtractionResponseOrError structuredExtractionResponseOrError;
            block19: {
                handler.getProvider();
                EnrichedInputStream inputStream = handler.getInputStream(document.filePath);
                InputStream docStream = inputStream.rawStream();
                try {
                    structuredExtractionResponseOrError = this.runStructuredExtractorFromInputStream(authCtx, projectKey, docStream, inputStream.getFilename(), settings);
                    if (docStream == null) break block19;
                }
                catch (Throwable throwable) {
                    if (docStream != null) {
                        try {
                            docStream.close();
                        }
                        catch (Throwable throwable2) {
                            throwable.addSuppressed(throwable2);
                        }
                    }
                    throw throwable;
                }
                docStream.close();
            }
            return structuredExtractionResponseOrError;
        }
    }

    public TextExtractionResponseOrError runStructuredExtractionFromManagedFolderAndFlattenToTextChunks(AuthCtx authCtx, String projectKey, InputRefs.ManagedFolderDocumentRef document, StructuredExtractor.StructuredExtractorSettings settings) throws Exception {
        StructuredExtractor.StructuredExtractionResponseOrError structuredExtractionResponseOrError = this.runStructuredExtractorFromManagedFolderDocumentRef(authCtx, projectKey, document, settings);
        if (structuredExtractionResponseOrError.ok) {
            return TextExtractionResponseOrError.fromSuccess(StructuredExtractor.getFlatTextChunkListFromStructured(structuredExtractionResponseOrError.content));
        }
        return TextExtractionResponseOrError.fromError(new Exception(structuredExtractionResponseOrError.errorMessage));
    }

    public StructuredExtractor.StructuredExtractionResponseOrError runStructuredExtractorFromInputStream(AuthCtx authCtx, String projectKey, InputStream fileStream, String fileName, StructuredExtractor.StructuredExtractorSettings settings) throws Exception {
        String ext = FilenameUtils.getExtension((String)fileName).toLowerCase();
        ErrorContext.check((boolean)StringUtils.isNotBlank((String)ext), (String)"File extension cannot be blank");
        ErrorContext.check((settings.maxSectionDepth >= 0 ? 1 : 0) != 0, (String)"Max section depth cannot be strictly lower than 0");
        logger.info((Object)DocExtractionUtils.buildMessageLogForDocument(fileName, "Starting structured extraction"));
        switch (ext) {
            case "pdf": 
            case "pptx": 
            case "docx": 
            case "html": {
                String defaultEnvName = DSSInternalCodeEnvsService.getCodeEnvName(DSSInternalCodeEnvsService.DSSInternalCodeEnvType.DOCUMENT_EXTRACTION_CODE_ENV);
                int timeoutInMinutes = ApplicationConfigurator.getParams().getIntParam("dku.docextraction.structured.timeoutInMinutes", Integer.valueOf(240));
                DocExtractionKernelPool.StructuredContentResponse doclingExtractionResult = this.docExtractionKernelPool.structuredExtractWithDocling(authCtx, projectKey, defaultEnvName, settings, fileStream, fileName, timeoutInMinutes);
                if (doclingExtractionResult.ok) {
                    return StructuredExtractor.StructuredExtractionResponseOrError.fromSuccess(doclingExtractionResult.resp);
                }
                return StructuredExtractor.StructuredExtractionResponseOrError.fromError(new Exception("Failed to convert document using docling because of python exception: " + doclingExtractionResult.error));
            }
            case "md": {
                String markdown = this.detectEncodingAndBuildStringFromInputStream(fileStream);
                StructuredContent root = StructuredExtractor.runMarkdownStructuredExtraction(markdown, settings.maxSectionDepth);
                return StructuredExtractor.StructuredExtractionResponseOrError.fromSuccess(root);
            }
            case "txt": {
                String txt = this.detectEncodingAndBuildStringFromInputStream(fileStream);
                return StructuredExtractor.StructuredExtractionResponseOrError.fromSuccess(StructuredExtractor.runTxtStructuredExtraction(txt));
            }
        }
        throw new UnsupportedMediaTypeStatusException("Cannot perform structured extraction file:" + fileName + " because ." + ext + " files are not supported");
    }

    /*
     * Exception decompiling
     */
    private String detectEncodingAndBuildStringFromInputStream(InputStream stream) throws IOException {
        /*
         * This method has failed to decompile.  When submitting a bug report, please provide this stack trace, and (if you hold appropriate legal rights) the relevant class file.
         * 
         * org.benf.cfr.reader.util.ConfusedCFRException: Started 2 blocks at once
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.getStartingBlocks(Op04StructuredStatement.java:412)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.buildNestedBlocks(Op04StructuredStatement.java:487)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op03SimpleStatement.createInitialStructuredBlock(Op03SimpleStatement.java:736)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisInner(CodeAnalyser.java:850)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisOrWrapFail(CodeAnalyser.java:278)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysis(CodeAnalyser.java:201)
         *     at org.benf.cfr.reader.entities.attributes.AttributeCode.analyse(AttributeCode.java:94)
         *     at org.benf.cfr.reader.entities.Method.analyse(Method.java:531)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseMid(ClassFile.java:1055)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseTop(ClassFile.java:942)
         *     at org.benf.cfr.reader.Driver.doJarVersionTypes(Driver.java:257)
         *     at org.benf.cfr.reader.Driver.doJar(Driver.java:139)
         *     at org.benf.cfr.reader.CfrDriverImpl.analyse(CfrDriverImpl.java:76)
         *     at org.benf.cfr.reader.Main.main(Main.java:54)
         */
        throw new IllegalStateException("Decompilation failed");
    }

    public List<TextChunk> runVLMextractor(AuthCtx authCtx, String projectKey, VLMExtractor.VLMExtractorRequest request) throws Exception {
        List<LLMClient.SingleCompletionQuery> singleCompletionQueries = this.validateSettingsAndBuildCompletionQueries(authCtx, projectKey, request);
        LLMStructuredRef llmStructuredRef = LLMStructuredRef.decodeId(request.settings.llmId);
        GuardrailsPipelineSettings connectionGuardrailsPipelineSettings = GuardrailsPipelineUtils.getConnectionAndLLMLevelSettings(authCtx, projectKey, llmStructuredRef);
        GuardrailsPipelineSettings guardrailsPipelineSettings = GuardrailsPipelineUtils.mergeEnforcementSettings(connectionGuardrailsPipelineSettings, null);
        try (LLMMeshClient llmClient = LLMMeshClientFactory.get(authCtx, projectKey, llmStructuredRef, guardrailsPipelineSettings, null, singleCompletionQueries.size());){
            EnrichedLLMStructuredRef enrichedRef = llmClient.getEnrichedRef();
            ErrorContext.check((boolean)enrichedRef.supportsImageInputs, (String)("Provided model: " + request.settings.llmId + " does not support image inputs"));
            List<LLMClient.SimpleCompletionResponseOrError> responses = llmClient.completeQueries(singleCompletionQueries, new LLMClient.CompletionSettings());
            ComputeResourceUsage cru = llmClient.getTotalCRU(ComputeResourceUsage.LLMUsageType.COMPLETION);
            if (cru != null) {
                this.cruReportingService.reportComplete(cru);
            }
            for (int recordIdx = 0; recordIdx < responses.size(); ++recordIdx) {
                LLMAuditHelper.emitLLMCompletionAuditFromBackendIfNeeded(this.auditTrailService, enrichedRef, llmClient.getConnection(), singleCompletionQueries.get(recordIdx), responses.get(recordIdx));
            }
            ArrayList<TextChunk> textChunks = new ArrayList<TextChunk>();
            for (LLMClient.SimpleCompletionResponseOrError response : responses) {
                if (!response.ok) {
                    throw new IOException("Got error from LLM while processing request: " + response.errorMessage);
                }
                textChunks.add(TextChunk.build(response.text, null));
            }
            ArrayList<TextChunk> arrayList = textChunks;
            return arrayList;
        }
    }

    private ExceptionUtils.ThrowingBiFunction<Integer, Integer, List<InputRefs.SingleInlineImage>, Exception> generateInlineImageGetterFromImagesRef(final AuthCtx authCtx, final String projectKey, final InputRefs.ImagesRef imagesRef) {
        return new ExceptionUtils.ThrowingBiFunction<Integer, Integer, List<InputRefs.SingleInlineImage>, Exception>(){

            public List<InputRefs.SingleInlineImage> apply(Integer i, Integer end) throws Exception {
                if (imagesRef instanceof InputRefs.ManagedFolderImagesRef) {
                    List<String> currentImagePaths = ((InputRefs.ManagedFolderImagesRef)imagesRef).imagesPaths.subList(i, end);
                    return DocExtractionService.this.getInlineImages(authCtx, projectKey, ((InputRefs.ManagedFolderImagesRef)imagesRef).managedFolderId, currentImagePaths);
                }
                if (imagesRef instanceof InputRefs.InlineImagesRef) {
                    return ((InputRefs.InlineImagesRef)imagesRef).inlineImages.subList(i, end);
                }
                throw new IllegalArgumentException("Cannot extract images from ImagesRef");
            }
        };
    }

    @VisibleForTesting
    protected List<LLMClient.SingleCompletionQuery> validateSettingsAndBuildCompletionQueries(AuthCtx authCtx, String projectKey, VLMExtractor.VLMExtractorRequest request) throws Exception {
        int windowOverlap;
        int windowSize;
        Integer imageCount = request.inputs.imagesRef.getImageCount();
        ErrorContext.check((imageCount > 0 ? 1 : 0) != 0, (String)"At least one image is required for vlm extraction");
        ErrorContext.check((request.settings.windowSize != 0 ? 1 : 0) != 0, (String)"Window size cannot be equal to 0");
        ErrorContext.check((request.settings.windowSize >= -1 ? 1 : 0) != 0, (String)"Window size cannot be lower than -1");
        if (request.settings.windowSize == -1) {
            windowSize = imageCount;
            windowOverlap = 0;
        } else {
            ErrorContext.check((request.settings.windowSize > request.settings.windowOverlap ? 1 : 0) != 0, (String)"Window overlap cannot be greater or equal to window size");
            if (request.settings.windowSize >= imageCount) {
                windowSize = imageCount;
                windowOverlap = 0;
            } else {
                ErrorContext.check((request.settings.windowOverlap >= 0 ? 1 : 0) != 0, (String)"Window overlap cannot be strictly lower than 0");
                windowSize = request.settings.windowSize;
                windowOverlap = request.settings.windowOverlap;
            }
        }
        ArrayList<LLMClient.SingleCompletionQuery> queries = new ArrayList<LLMClient.SingleCompletionQuery>();
        ExceptionUtils.ThrowingBiFunction<Integer, Integer, List<InputRefs.SingleInlineImage>, Exception> inlineImageRefFromIndex = this.generateInlineImageGetterFromImagesRef(authCtx, projectKey, request.inputs.imagesRef);
        for (int i = 0; i < imageCount; i += windowSize - windowOverlap) {
            int end = Math.min(i + windowSize, imageCount);
            queries.add(this.buildVLMSingleCompletionQueryFromInlineImages((List)inlineImageRefFromIndex.apply((Object)i, (Object)end), request.settings.llmPrompt));
            if (end == imageCount) break;
        }
        return queries;
    }

    private LLMClient.SingleCompletionQuery buildVLMSingleCompletionQueryFromInlineImages(List<InputRefs.SingleInlineImage> inlineImages, String prompt) {
        ArrayList<LLMClient.ChatMessagePart> parts = new ArrayList<LLMClient.ChatMessagePart>();
        ArrayList<LLMClient.ChatMessage> messages = new ArrayList<LLMClient.ChatMessage>();
        for (InputRefs.SingleInlineImage image : inlineImages) {
            parts.add(new LLMClient.ChatMessagePart().withInlineImage(image.content, image.mimeType));
        }
        messages.add(new LLMClient.ChatMessage("system", Collections.singletonList(new LLMClient.ChatMessagePart().withText(prompt))));
        messages.add(new LLMClient.ChatMessage("user", parts));
        LLMClient.SingleCompletionQuery query = new LLMClient.SingleCompletionQuery();
        query.messages = messages;
        return query;
    }

    protected List<InputRefs.SingleInlineImage> getInlineImages(AuthCtx authCtx, String projectKey, String folderId, List<String> imagePaths) throws Exception {
        ManagedFolder mf;
        ArrayList<InputRefs.SingleInlineImage> res = new ArrayList<InputRefs.SingleInlineImage>();
        try (Transaction t = this.transactionService.beginRead();){
            AnyLoc managedFolderLoc = AnyLoc.resolveSmart(projectKey, folderId);
            mf = this.managedFoldersService.getMandatoryUnsafe(managedFolderLoc);
        }
        try (ManagedFolderHandler handler = (ManagedFolderHandler)mf.buildHandler(authCtx);){
            for (String imagePath : imagePaths) {
                Callable<InputStream> previewImage = () -> handler.getInputStream(imagePath).rawStream();
                InputStream img = previewImage.call();
                try {
                    byte[] sourceBytes = IOUtils.toByteArray((InputStream)img);
                    MimeTypeUtils.MimeType fullMimeType = MimeTypeUtils.fromExtension((String)FilenameUtils.getExtension((String)imagePath));
                    String mimeType = Optional.ofNullable(fullMimeType).map(obj -> obj.mimeType).orElse(null);
                    res.add(new InputRefs.SingleInlineImage(Base64.getEncoder().encodeToString(sourceBytes), mimeType));
                }
                finally {
                    if (img == null) continue;
                    img.close();
                }
            }
        }
        return res;
    }
}

