/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.common.doc_extraction;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.dao.GeneralSettingsDAO;
import com.dataiku.dip.docextraction.StructuredExtractor;
import com.dataiku.dip.llm.EnrichedLLMStructuredRef;
import com.dataiku.dip.recipes.RecipeParams;
import com.dataiku.dip.recipes.nlp.common.doc_extraction.DocExtractionRule;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import javax.annotation.Nullable;

public class DocExtractionRecipesParams
extends RecipeParams {
    public static final String DSS_NO_VLM_SELECTION = "DSS_NO_SELECTION";
    public ExtractionMode extractionMode = ExtractionMode.CUSTOM_RULES;
    public static final Map<ExtractionMode, String[]> EXTRACTION_EXTENSION_SHORTLIST = Map.ofEntries(Map.entry(ExtractionMode.MANAGED_TEXT_ONLY, new String[]{"html", "txt", "md"}), Map.entry(ExtractionMode.MANAGED_VISUAL_ONLY, new String[]{"pdf", "docx", "pptx", "jpg", "png"}));
    public static final Map<ExtractionMode, String[]> EXTRACTION_EXTENSION_LONGLIST = Map.ofEntries(Map.entry(ExtractionMode.MANAGED_TEXT_ONLY, new String[]{"pdf", "docx", "pptx", "html", "txt", "md"}), Map.entry(ExtractionMode.MANAGED_VISUAL_ONLY, new String[]{"pdf", "odp", "ppt", "odt", "doc", "docx", "xls", "xlsx", "xlsb", "xlsm", "ods", "pptx", "png", "jpeg", "jpg"}));
    public static final String[] OCR_EXTENSIONS_SHORTLIST = new String[]{"png", "jpg", "jpeg"};
    private String defaultVlmId;
    public StructuredExtractor.ImageHandlingMode defaultImageHandlingMode = StructuredExtractor.ImageHandlingMode.IGNORE;
    private ArrayList<DocExtractionRule> rules = new ArrayList();
    private DocExtractionRule allOtherRule;

    public static DocExtractionRule getDefaultAllOtherRule(@Nullable EnrichedLLMStructuredRef embeddingModelRef, @Nullable String defaultVlmId, @Nullable StructuredExtractor.ImageHandlingMode defaultImageHandlingMode) {
        DocExtractionRule otherRule = DocExtractionRecipesParams.getDefaultRulesFrom(ExtractionMode.MANAGED_TEXT_ONLY, true, embeddingModelRef, defaultVlmId, defaultImageHandlingMode).get(0);
        otherRule.filter = null;
        otherRule.actionToPerform = DocExtractionRule.ActionToPerform.DONOTEXTRACT;
        return otherRule;
    }

    public static List<DocExtractionRule> getDefaultRulesFrom(ExtractionMode extractionMode, boolean shortList, @Nullable EnrichedLLMStructuredRef embeddingModelRef, @Nullable String defaultVlmId, @Nullable StructuredExtractor.ImageHandlingMode defaultImageHandlingMode) {
        String[] structuredDocExtensions = EXTRACTION_EXTENSION_LONGLIST.get((Object)ExtractionMode.MANAGED_TEXT_ONLY);
        if (defaultImageHandlingMode == StructuredExtractor.ImageHandlingMode.OCR) {
            structuredDocExtensions = (String[])Stream.of(structuredDocExtensions, OCR_EXTENSIONS_SHORTLIST).flatMap(Stream::of).toArray(String[]::new);
        }
        String[] vlmDocExtensions = EXTRACTION_EXTENSION_LONGLIST.get((Object)ExtractionMode.MANAGED_VISUAL_ONLY);
        if (shortList) {
            vlmDocExtensions = EXTRACTION_EXTENSION_SHORTLIST.get((Object)ExtractionMode.MANAGED_VISUAL_ONLY);
        }
        if (extractionMode == ExtractionMode.MANAGED_TEXT_ONLY) {
            ArrayList<DocExtractionRule> textExtractionRules = new ArrayList<DocExtractionRule>();
            DocExtractionRule structuredRule = new DocExtractionRule(DocExtractionRule.ActionToPerform.STRUCTURED, structuredDocExtensions, embeddingModelRef);
            structuredRule.structuredSettings = new DocExtractionRule.UIStructuredExtractorSettings();
            structuredRule.structuredSettings.imageHandlingMode = defaultImageHandlingMode;
            structuredRule.structuredSettings.llmId = defaultVlmId;
            if (defaultImageHandlingMode == StructuredExtractor.ImageHandlingMode.OCR) {
                structuredRule.structuredSettings.ocrEngine = StructuredExtractor.OCRSettings.OCREngine.AUTO;
            }
            structuredRule.storeInMultimodalColumn = DocExtractionRule.MultimodalContentType.FULL_CONTENT;
            textExtractionRules.add(structuredRule);
            return textExtractionRules;
        }
        if (extractionMode == ExtractionMode.MANAGED_VISUAL_ONLY) {
            defaultVlmId = defaultVlmId == null || defaultVlmId.equals(DSS_NO_VLM_SELECTION) ? null : defaultVlmId;
            ArrayList<DocExtractionRule> visualExtractionRules = new ArrayList<DocExtractionRule>();
            DocExtractionRule vlmRule = new DocExtractionRule(DocExtractionRule.ActionToPerform.VLM, vlmDocExtensions, embeddingModelRef);
            vlmRule.vlmSettings = new DocExtractionRule.UiVLMExtractorSettings(defaultVlmId, embeddingModelRef);
            vlmRule.storeInMultimodalColumn = DocExtractionRule.MultimodalContentType.IMAGES;
            visualExtractionRules.add(vlmRule);
            structuredDocExtensions = EXTRACTION_EXTENSION_SHORTLIST.get((Object)ExtractionMode.MANAGED_TEXT_ONLY);
            DocExtractionRule structuredRule = new DocExtractionRule(DocExtractionRule.ActionToPerform.STRUCTURED, structuredDocExtensions, embeddingModelRef);
            structuredRule.structuredSettings = new DocExtractionRule.UIStructuredExtractorSettings();
            structuredRule.storeInMultimodalColumn = DocExtractionRule.MultimodalContentType.FULL_CONTENT;
            visualExtractionRules.add(structuredRule);
            return visualExtractionRules;
        }
        if (extractionMode == ExtractionMode.CUSTOM_RULES) {
            return new ArrayList<DocExtractionRule>();
        }
        throw new IllegalArgumentException("Unknown extraction mode " + String.valueOf((Object)extractionMode));
    }

    public void initializeWithDefaultRules(EnrichedLLMStructuredRef embeddingModelRef, String creationVLMId) {
        if (creationVLMId != null) {
            this.defaultVlmId = creationVLMId;
        } else {
            GeneralSettingsDAO.DocExtractionRecipesSettings embedDocumentsRecipeSettings = ApplicationConfigurator.getGeneralSettingsUnsafeAutoTXN().generativeAISettings.embedDocumentsRecipeSettings;
            this.defaultVlmId = null;
            if (embedDocumentsRecipeSettings != null && embedDocumentsRecipeSettings.defaultVLMId != null && !embedDocumentsRecipeSettings.defaultVLMId.equals(DSS_NO_VLM_SELECTION)) {
                this.defaultVlmId = embedDocumentsRecipeSettings.defaultVLMId;
            }
        }
        this.extractionMode = ExtractionMode.MANAGED_TEXT_ONLY;
        if (this.defaultVlmId != null) {
            this.extractionMode = ExtractionMode.MANAGED_VISUAL_ONLY;
        }
        this.rules = new ArrayList();
        this.allOtherRule = DocExtractionRecipesParams.getDefaultAllOtherRule(embeddingModelRef, this.defaultVlmId, this.defaultImageHandlingMode);
    }

    public String getDefaultVLMSelection() {
        return this.defaultVlmId;
    }

    public void setDefaultVLMSelection(String defaultVlmId) {
        this.defaultVlmId = defaultVlmId;
    }

    public List<DocExtractionRule> getRules(EnrichedLLMStructuredRef embeddingModelRef) {
        return this.extractionMode.equals((Object)ExtractionMode.CUSTOM_RULES) ? this.rules : DocExtractionRecipesParams.getDefaultRulesFrom(this.extractionMode, false, embeddingModelRef, this.defaultVlmId, this.defaultImageHandlingMode);
    }

    public List<DocExtractionRule> getAllRules() {
        ArrayList<DocExtractionRule> allRules = new ArrayList<DocExtractionRule>(this.rules);
        allRules.add(this.allOtherRule);
        return allRules;
    }

    public DocExtractionRule getAllOtherFilesRule() {
        return this.allOtherRule;
    }

    public static enum ExtractionMode {
        MANAGED_TEXT_ONLY,
        MANAGED_VISUAL_ONLY,
        CUSTOM_RULES;

    }
}

