/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.embed_documents;

import com.dataiku.dip.dataflow.exec.filter.FilterDesc;
import com.dataiku.dip.docextraction.StructuredExtractor;
import com.dataiku.dip.docextraction.VLMExtractor;
import com.dataiku.dip.llm.EnrichedLLMStructuredRef;
import com.dataiku.dip.recipes.nlp.rag_embedding.RAGEmbeddingRecipeCreator;
import com.dataiku.dip.utils.DKULogger;
import javax.annotation.Nullable;

public class EmbedDocumentsRule {
    public FilterDesc filter;
    public ActionToPerform actionToPerform = ActionToPerform.VLM;
    public UiVLMExtractorSettings vlmSettings;
    public UIStructuredExtractorSettings structuredSettings;
    public SplittingSettings splittingSettings;
    public MultimodalContentType storeInMultimodalColumn;
    public boolean reExtractUnmodifiedDocuments = false;
    public static DKULogger logger = DKULogger.getLogger((String)"dku.recipes.nlp.embed_documents.embedDocumentsRule");

    public EmbedDocumentsRule() {
    }

    public EmbedDocumentsRule(ActionToPerform extractor, String[] supportedFileExtensions, @Nullable EnrichedLLMStructuredRef embeddingModelRef) {
        this.actionToPerform = extractor;
        this.filter = new FilterDesc();
        this.filter.enabled = true;
        this.filter.uiData = new FilterDesc.FilterUiData();
        this.filter.uiData.mode = "||";
        for (String file_ext : supportedFileExtensions) {
            FilterDesc.FilterUiCondition extension_condition = new FilterDesc.FilterUiCondition();
            extension_condition.operator = FilterDesc.FilterUiOperator.EQUALS_CASE_INSENSITIVE_STRING.getRepr();
            extension_condition.input = "file extension";
            extension_condition.string = file_ext;
            extension_condition.col = "file name";
            this.filter.uiData.conditions.add(extension_condition);
        }
        this.splittingSettings = new SplittingSettings(embeddingModelRef);
    }

    public static enum ActionToPerform {
        DONOTEXTRACT,
        VLM,
        STRUCTURED;

    }

    public class SplittingSettings {
        public int chunkSizeCharacters;
        public int chunkOverlapCharacters;

        SplittingSettings(EnrichedLLMStructuredRef ref) {
            this.chunkSizeCharacters = RAGEmbeddingRecipeCreator.adaptDefaultChunkSizeCharacters(ref != null ? ref.maxTokensLimit : null);
            this.chunkOverlapCharacters = RAGEmbeddingRecipeCreator.adaptDefaultOverlapCharacters(this.chunkSizeCharacters);
        }
    }

    public static class UIStructuredExtractorSettings {
        public SplitUnit splitUnit = SplitUnit.SECTION;
        public Integer maxSectionDepth = 6;

        public StructuredExtractor.StructuredExtractorSettings toStructuredExtractorSettings() {
            StructuredExtractor.StructuredExtractorSettings settings = new StructuredExtractor.StructuredExtractorSettings();
            settings.maxSectionDepth = this.splitUnit.equals((Object)SplitUnit.DOCUMENT) ? 0 : this.maxSectionDepth;
            return settings;
        }

        public static enum SplitUnit {
            DOCUMENT,
            SECTION;

        }
    }

    public static class UiVLMExtractorSettings {
        public SplitUnit splitUnit = SplitUnit.PAGE;
        public int customNbPages = 1;
        public int customPagesOverlap = 0;
        public String llmId;
        public boolean useCustomPrompt = false;
        public String customPrompt = "Generate a concise summary, up to 500 words, derived from the screenshot(s) of document page(s) provided. \nBegin with a brief overview and highlight crucial words, facts, or concepts to enhance both semantic and keyword searchability.\nOmit any references to the original source.\n";

        public UiVLMExtractorSettings() {
        }

        public UiVLMExtractorSettings(String llmId) {
            this.llmId = llmId;
        }

        public VLMExtractor.VLMExtractorSettings toVLMExtractorSettings() {
            VLMExtractor.VLMExtractorSettings settings = new VLMExtractor.VLMExtractorSettings();
            settings.llmId = this.llmId;
            if (this.splitUnit.equals((Object)SplitUnit.CUSTOM)) {
                settings.windowSize = this.customNbPages;
                settings.windowOverlap = Math.min(this.customPagesOverlap, this.customNbPages - 1);
            } else if (this.splitUnit.equals((Object)SplitUnit.PAGE)) {
                settings.windowSize = 1;
                settings.windowOverlap = 0;
            }
            settings.llmPrompt = this.useCustomPrompt ? this.customPrompt : "Generate a concise summary, up to 500 words, derived from the screenshot(s) of document page(s) provided. \nBegin with a brief overview and highlight crucial words, facts, or concepts to enhance both semantic and keyword searchability.\nOmit any references to the original source.\n";
            return settings;
        }
    }

    public static enum SplitUnit {
        PAGE,
        CUSTOM;

    }

    public static enum MultimodalContentType {
        PROMPT_OUTPUT,
        CHUNKED_PROMPT_OUTPUT,
        FULL_CONTENT,
        IMAGES;

    }
}

