/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.rag_embedding;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.code.CodeEnvSelection;
import com.dataiku.dip.code.DSSInternalCodeEnvsService;
import com.dataiku.dip.connections.AbstractLLMConnection;
import com.dataiku.dip.coremodel.SerializedRecipe;
import com.dataiku.dip.coremodel.VersionTag;
import com.dataiku.dip.cuspol.CustomFieldsService;
import com.dataiku.dip.cuspol.CustomPolicyHooksRegistry;
import com.dataiku.dip.llm.EnrichedLLMStructuredRef;
import com.dataiku.dip.llm.LLMRefEnricherService;
import com.dataiku.dip.llm.PretrainedModelsService;
import com.dataiku.dip.llm.retrieval.RetrievableKnowledge;
import com.dataiku.dip.llm.retrieval.RetrievableKnowledgeDAO;
import com.dataiku.dip.recipes.RecipeMeta;
import com.dataiku.dip.recipes.common.GenericRecipeCreator;
import com.dataiku.dip.recipes.common.RecipeCreator;
import com.dataiku.dip.recipes.nlp.common.EmbeddingRecipePayloadBaseParams;
import com.dataiku.dip.recipes.nlp.rag_embedding.RAGEmbeddingRecipePayloadParams;
import com.dataiku.dip.recipes.services.PDepsFixuper;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.server.notifications.backend.TaggableObjectChangedEvent;
import com.dataiku.dip.server.services.GeneralSettingsService;
import com.dataiku.dip.server.services.ITaggingService;
import com.dataiku.dip.server.services.NeverBuiltComputablesCacheService;
import com.dataiku.dip.server.services.PubSubService;
import com.dataiku.dip.server.services.TaggableObjectsService;
import com.dataiku.dip.transactions.ifaces.Transaction;
import com.dataiku.dip.util.SecretKeyGenerator;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.StringTransmogrifier;
import com.google.common.base.Preconditions;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;

public class RAGEmbeddingRecipeCreator
extends GenericRecipeCreator {
    @Autowired
    private RetrievableKnowledgeDAO retrievableKnowledgeDAO;
    @Autowired
    private CustomFieldsService customFieldsService;
    @Autowired
    private CustomPolicyHooksRegistry customPolicyHooksRegistry;
    @Autowired
    private PubSubService pubSub;
    @Autowired
    private NeverBuiltComputablesCacheService neverBuiltComputablesCacheService;
    @Autowired
    private PretrainedModelsService pretrainedModelsService;
    @Autowired
    protected LLMRefEnricherService llmRefEnricherService;
    @Autowired
    private DSSInternalCodeEnvsService dssInternalCodeEnvsService;
    @Autowired
    private GeneralSettingsService generalSettingsService;
    public static final int DEFAULT_CHUNK_SIZE_TOKENS = 3000;

    public RAGEmbeddingRecipeCreator(AuthCtx authCtx, RecipeMeta meta) {
        super(authCtx, meta);
    }

    private RetrievableKnowledge createRetrievableKnowledge(SerializedRecipe recipe, JsonObject creationData, PretrainedModelsService.ModelsList list, String projectKey, String name, String zone) throws Exception {
        Pattern pattern = Pattern.compile("[./]");
        if (pattern.matcher(name).find()) {
            throw new IllegalArgumentException("Invalid retrievable knowledge name: '" + name + "'");
        }
        StringTransmogrifier transmogrifier = new StringTransmogrifier(" ");
        for (RetrievableKnowledge head : this.retrievableKnowledgeDAO.list(projectKey)) {
            transmogrifier.addAlreadyTransmogrifiedAcceptDupes(head.name);
        }
        RetrievableKnowledge rk = new RetrievableKnowledge();
        rk.projectKey = projectKey;
        rk.id = SecretKeyGenerator.generate((int)8);
        rk.name = transmogrifier.transmogrify(name);
        rk.versionTag = rk.creationTag = new VersionTag(this.authCtx.getIdentifier());
        String defaultEnvName = ApplicationConfigurator.getGeneralSettingsUnsafeAutoTXN().generativeAISettings.getDefaultRetrievableKnowledgeCodeEnv();
        if (!DSSInternalCodeEnvsService.getCodeEnvName(DSSInternalCodeEnvsService.DSSInternalCodeEnvType.RAG_CODE_ENV).equals(defaultEnvName) || this.dssInternalCodeEnvsService.getCodeEnv_NT(DSSInternalCodeEnvsService.DSSInternalCodeEnvType.RAG_CODE_ENV).isPresent()) {
            rk.envSelection = CodeEnvSelection.explicitEnv(defaultEnvName);
        }
        RAGEmbeddingRecipeCreationSettings settings = (RAGEmbeddingRecipeCreationSettings)JSON.parse((JsonElement)creationData, RAGEmbeddingRecipeCreationSettings.class);
        rk.embeddingLLMId = settings.embeddingLLMId;
        rk.vectorStoreType = settings.vectorStoreType;
        rk.connection = settings.connection;
        if (settings.indexName != null) {
            rk.indexName = settings.indexName;
        } else if (settings.vectorStoreType != null && settings.vectorStoreType.hasIndex) {
            rk.indexName = "${projectKey}_kb_${knowledgeBankId}";
        }
        if (settings.vectorStoreType == RetrievableKnowledge.VectorStoreType.PINECONE) {
            rk.pineconeIndexName = settings.pineconeIndexName;
        }
        if (list.identifiers.stream().noneMatch(i -> i.id.equals(settings.embeddingLLMId))) {
            throw new IllegalArgumentException("Invalid Embedding LLM selected");
        }
        this.setRetrievableKnowledgeFields(recipe, creationData, rk);
        this.customFieldsService.enrichWithDefaultCustomFieldsForTaggableObject(rk);
        this.customPolicyHooksRegistry.onPreObjectSave(this.authCtx, null, rk);
        this.retrievableKnowledgeDAO.save(rk);
        this.flowZonesService.attachObjectToZone(zone, projectKey, rk);
        this.neverBuiltComputablesCacheService.add(new TaggableObjectsService.TaggableObjectRef(rk));
        JsonObject details = new JsonObject();
        details.addProperty("objectDisplayName", name);
        this.pubSub.publishAfterTransaction(new TaggableObjectChangedEvent(ITaggingService.TaggableType.RETRIEVABLE_KNOWLEDGE, projectKey, rk.id, this.authCtx, TaggableObjectChangedEvent.ActionType.RETRIEVABLE_KNOWLEDGE_CREATE).withDetails(details));
        return rk;
    }

    protected void setRetrievableKnowledgeFields(SerializedRecipe recipe, JsonObject creationData, RetrievableKnowledge rk) throws Exception {
    }

    private void adaptDefaultPayloadToSelectedModel(EmbeddingRecipePayloadBaseParams payload, EnrichedLLMStructuredRef ref) {
        payload.chunkSizeCharacters = RAGEmbeddingRecipeCreator.adaptDefaultChunkSizeCharacters(ref.maxTokensLimit);
        payload.chunkOverlapCharacters = RAGEmbeddingRecipeCreator.adaptDefaultOverlapCharacters(payload.chunkSizeCharacters);
    }

    public static int adaptDefaultChunkSizeCharacters(Integer maxTokensLimit) {
        if (maxTokensLimit != null) {
            return Integer.min(RAGEmbeddingRecipeCreator.roundToNearestHundredth(maxTokensLimit * 3), 3000);
        }
        return 3000;
    }

    public static int adaptDefaultOverlapCharacters(int chunkSizeCharacters) {
        return Integer.min(chunkSizeCharacters / 5, 120);
    }

    protected EmbeddingRecipePayloadBaseParams createDefaultPayload() {
        return new RAGEmbeddingRecipePayloadParams();
    }

    private static int roundToNearestHundredth(int value) {
        return Math.round((float)value / 100.0f) * 100;
    }

    @Override
    public RecipeCreator.CreationResult create_NT(SerializedRecipe recipe, JsonObject creationData) throws Exception {
        PretrainedModelsService.ModelsList availableTextEmbeddingModels;
        assert (StringUtils.isNotBlank((String)recipe.projectKey));
        assert (this.meta.getType().equals(recipe.type));
        Preconditions.checkArgument((boolean)StringUtils.isNotBlank((String)recipe.name), (Object)"Recipe has no name");
        RecipeCreator.CreationResult ret = new RecipeCreator.CreationResult();
        try (Transaction t = this.transactionService.beginRead();){
            availableTextEmbeddingModels = this.pretrainedModelsService.listAvailableLLMs(this.authCtx, recipe.projectKey, AbstractLLMConnection.LLMUsagePurpose.TEXT_EMBEDDING_EXTRACTION);
        }
        t = this.transactionService.beginWriteAsLoggedInUser(this.authCtx);
        try {
            RAGEmbeddingRecipeCreationSettings settings = (RAGEmbeddingRecipeCreationSettings)JSON.parse((JsonElement)creationData, RAGEmbeddingRecipeCreationSettings.class);
            if (StringUtils.isBlank((String)settings.outputRetrievableKnowledgeName)) {
                throw new IllegalArgumentException("No knowledge bank was specified");
            }
            recipe.name = this.recipeSaveService.transmogrifyName(recipe.projectKey, recipe.name);
            recipe.params = this.makeInitialParams(recipe, creationData);
            if (StringUtils.isBlank((String)settings.zone)) {
                settings.zone = this.flowZonesService.retrieveInputZone(recipe);
            }
            this.flowZonesService.attachObjectToZone(settings.zone, recipe.getProjectKey(), recipe);
            EmbeddingRecipePayloadBaseParams payload = this.createDefaultPayload();
            RetrievableKnowledge rk = this.createRetrievableKnowledge(recipe, creationData, availableTextEmbeddingModels, recipe.getProjectKey(), settings.outputRetrievableKnowledgeName, settings.zone);
            recipe.addOutput("knowledge_bank", rk.id, false);
            EnrichedLLMStructuredRef ref = this.llmRefEnricherService.getEnrichedLLMRef(settings.embeddingLLMId, this.authCtx, recipe.projectKey);
            this.adaptDefaultPayloadToSelectedModel(payload, ref);
            new PDepsFixuper().fixupInPlace(recipe);
            this.checkOutputOwnership_T(recipe);
            this.recipeSaveService.create(recipe.projectKey, recipe, JSON.pretty((Object)payload));
            t.commitV("Created %s recipe: %s", new Object[]{recipe.type, recipe.getFullId()});
            ret.id = recipe.name;
            RecipeCreator.CreationResult creationResult = ret;
            return creationResult;
        }
        finally {
            if (t != null) {
                t.close();
            }
        }
    }

    public static class RAGEmbeddingRecipeCreationSettings {
        public String outputRetrievableKnowledgeName;
        public String embeddingLLMId;
        public RetrievableKnowledge.VectorStoreType vectorStoreType;
        public String connection;
        public String indexName;
        public String pineconeIndexName;
        public String zone;
    }
}

