(function() {
'use strict';

const app = angular.module('dataiku.recipes');

app.controller("RAGEmbeddingRecipeCreationController", function($scope, Fn, $stateParams, DataikuAPI, $q,Dialogs, DatasetsService, WT1, DatasetUtils, $controller, RecipeComputablesService, Logger, SmartId, RetrievableKnowledgeUtils) {
    $controller("_RecipeCreationControllerBase", {$scope:$scope});
    $controller("_RecipeOutputNewManagedBehavior", {$scope:$scope});

    $scope.recipeType = "nlp_llm_rag_embedding";

    // // for safety, to use the _RecipeOutputNewManagedBehavior fully (maybe one day)
    // $scope.setErrorInTopScope = function(scope) {
    //     return setErrorInScope.bind($scope);
    // };

    function makeMainRole(refs) {
        return {
            main: {
                items: refs.filter(function(ref) {return !!ref;}).map(function(ref) { return {ref}; }),
            }
        };
    };

    $scope.creationSettings = {
        vectorStoreType: 'CHROMA',
    };

    // Creates the recipe object and sends it to the backend
    $scope.doCreateRecipe = function() {
        const inputs = $scope.recipe && $scope.recipe.inputs ? $scope.recipe.inputs : makeMainRole([$scope.io.inputDataset]);
        const recipe = {
            type: "nlp_llm_rag_embedding",
            projectKey: $stateParams.projectKey,
            name: "compute_" + $scope.creationSettings.outputRetrievableKnowledgeName, //TODO @recipes remove,
            inputs
        };

        const settings = angular.copy($scope.creationSettings);
        if ($scope.zone) {
            settings.zone = $scope.zone;
        }
        return DataikuAPI.flow.recipes.generic.create(recipe, settings);
    };

    $scope.showOutputPane = function() {
        return !!$scope.io.inputDataset;
    };

    $scope.formIsValid = function() {
        const creationSettings = $scope.creationSettings;
        if(!creationSettings.outputRetrievableKnowledgeName){
            return false;
        }

        if(!creationSettings.embeddingLLMId){
            return false;
        }

        if(!creationSettings.vectorStoreType){
            return false;
        }

        if(RetrievableKnowledgeUtils.hasConnection(creationSettings) && !creationSettings.connection){
            return false;
        }

        if(creationSettings.vectorStoreType === 'PINECONE' && !creationSettings.pineconeIndexName){
            return false;
        }

        return true;
    };

    function updateInputDatasetSchema() {
        if ($scope.availableInputDatasets == null) return;
        if (!$scope.io.inputDataset) return;
        let resolvedSmartId = SmartId.resolve($scope.io.inputDataset, contextProjectKey);
        // get the object to first assert that we need to grab the schema
        let availableInput = $scope.availableInputDatasets.filter(o => o.name == resolvedSmartId.id && o.projectKey == resolvedSmartId.projectKey)[0];
        if (availableInput == null || availableInput.type == 'DATASET') {
            DataikuAPI.datasets.get(resolvedSmartId.projectKey, resolvedSmartId.id, contextProjectKey).success(function(data){
                $scope.activeSchema = data.schema;
            }).error(setErrorInScope.bind($scope));
        } else if (availableInput.type == 'STREAMING_ENDPOINT') {
            DataikuAPI.streamingEndpoints.get(resolvedSmartId.projectKey, resolvedSmartId.id).success(function(data){
                $scope.activeSchema = data.schema;
            }).error(setErrorInScope.bind($scope));
        } else {
            // other objects don't have a schema
            $scope.activeSchema = {columns:[]};
        }
    };

    const inputsIndex = {};
    DatasetUtils.listDatasetsUsabilityInAndOut($stateParams.projectKey, $scope.recipeType, $scope.datasetsOnly).then(function(data){
        $scope.availableInputDatasets = data[0];
        if ($scope.filterUsableInputsOn) {
            $scope.availableInputDatasets.forEach(function(c) {
                const usability = c.usableAsInput[$scope.filterUsableInputsOn] || {};
                c.usable = usability.usable;
                c.usableReason = usability.reason;
            });
        } else if ($scope.inputDatasetsOnly) {
            $scope.availableInputDatasets = data[0].filter(function(computable){
                return computable.usableAsInput['main'] && computable.usableAsInput['main'].usable;
            });
        }
        $scope.availableInputDatasets.forEach(function(it) {
            inputsIndex[it.id] = it;
        });
        updateInputDatasetSchema(); // if the inputDataset arrived before the availableInputDatasets
    });


    DataikuAPI.pretrainedModels.listAvailableLLMs($stateParams.projectKey, "TEXT_EMBEDDING_EXTRACTION").success(function(data){
        $scope.availableEmbeddingLLMs = data.identifiers;
        if ($scope.availableEmbeddingLLMs.length > 0) {
            $scope.creationSettings.embeddingLLMId = data.identifiers[0].id
        }
    }).error(setErrorInScope.bind($scope));


    const contextProjectKey = $scope.context && $scope.context.projectKey ? $scope.context.projectKey:$stateParams.projectKey;
    $scope.$on("preselectInputDataset", function(scope, preselectedInputDataset) {
        $scope.io.inputDataset = preselectedInputDataset;
        $scope.preselectedInputDataset = preselectedInputDataset;
    });

    $scope.$watch("io.inputDataset", function(nv) {
        if (!nv) return;

        if (!$scope.creationSettings.outputRetrievableKnowledgeName) {
            const niceInputName = nv.replace(/[A-Z]*\./, "");
            $scope.creationSettings.outputRetrievableKnowledgeName = niceInputName + "_embedded";
        }
        if ($scope.preselectedInputDataset && $scope.io.inputDataset != $scope.preselectedInputDataset){
            $scope.zone = null;
        }

        updateInputDatasetSchema();
    });
});

app.controller("RAGEmbeddingRecipeEditionController", function($scope, $rootScope, $controller, $stateParams, DataikuAPI, EmbeddingUtils, DOCUMENT_SPLITTING_METHOD_MAP,
     VECTOR_STORE_UPDATE_METHOD_MAP, VECTOR_STORE_TYPE_MAP, Logger) {
    $controller("_NLPLLMRecipeControllerBase", {$scope: $scope});
    $scope.loadLLMs("TEXT_EMBEDDING_EXTRACTION");

    $scope.DOCUMENT_SPLITTING_METHOD_MAP = DOCUMENT_SPLITTING_METHOD_MAP;
    $scope.VECTOR_STORE_UPDATE_METHOD_MAP = VECTOR_STORE_UPDATE_METHOD_MAP;
    $scope.VECTOR_STORE_TYPE_MAP = VECTOR_STORE_TYPE_MAP;

    $scope.$watch('desc', function(newVal) {
        // Default update method to overwrite if not defined or not existing
        if (newVal && !newVal.vectorStoreUpdateMethod) {
            $scope.desc.vectorStoreUpdateMethod = "OVERWRITE";
        }
    })

    $scope.splitIntoChunksDocRef = $rootScope.versionDocRoot + "preparation/processors/split-into-chunks.html"

    $scope.setSplittingMethod = function (splitting_method) {
        $scope.desc.documentSplittingMode=splitting_method
    }

    // Keep in sync with the backend version RagEmbeddingRecipeRunner::shouldClearKnowledgeBank
    $scope.shouldClearKnowledgeBank = function() {
        // If we haven't loaded the previous data yet, or if it doesn't exist, then we shouldn't clear
        if (!$scope.previousDesc || !$scope.previousRk || !$scope.desc.vectorStoreUpdateMethod) {
            return false;
        }

        const vectorStoreUpdateMethod = $scope.VECTOR_STORE_UPDATE_METHOD_MAP[$scope.desc.vectorStoreUpdateMethod];
        if (!$scope.previousDesc.vectorStoreUpdateMethod) {
            $scope.previousDesc.vectorStoreUpdateMethod = "OVERWRITE";
        }
        const prevVectorStoreUpdateMethod = $scope.VECTOR_STORE_UPDATE_METHOD_MAP[$scope.previousDesc.vectorStoreUpdateMethod];

        // If metadata cols have been added, then we need to clear (removed is fine)
        const previousMetadataCols = $scope.previousDesc.metadataColumns.map(col => col.column);
        for (const col of $scope.desc.metadataColumns) {
            if (col.column && !previousMetadataCols.includes(col.column)) {
                return true;
            }
        }

        // If we've switched from unmanaged mode to managed mode, then we need to clear
        if (!prevVectorStoreUpdateMethod.isSmart && vectorStoreUpdateMethod.isSmart) {
            return true;
        }

        // If we've stayed in managed mode, but the source ID column has changed, then we need to clear
        if ($scope.retrievableKnowledge
            && prevVectorStoreUpdateMethod.isSmart && vectorStoreUpdateMethod.isSmart
            && $scope.retrievableKnowledge.sourceIdColumn !== $scope.desc.sourceIdColumn) {
            return true;
        }

        // If we've changed the embedding column, then we need to clear
        if ($scope.previousDesc.knowledgeColumn !== $scope.desc.knowledgeColumn) {
            return true;
        }

        // Clear if we are in a smart mode and the vector store type has changed
        if ($scope.retrievableKnowledge
            && vectorStoreUpdateMethod.isSmart
            && $scope.retrievableKnowledge.vectorStoreType !== $scope.previousRk.vectorStoreType) {
            return true;
        }

        // Clear if the embedding model has changed, except in overwrite mode
        if ($scope.desc.vectorStoreUpdateMethod !== "OVERWRITE"
                && $scope.previousRk.embeddingLLMId !== $scope.retrievableKnowledge.embeddingLLMId) {
            return true;
        }
        // Clear if the document splitting mode params have changed
        if ($scope.previousDesc.documentSplittingMode !== $scope.desc.documentSplittingMode) {
            return true;
        } else if ($scope.previousDesc.documentSplittingMode === "CHARACTERS_BASED") {
            if ($scope.previousDesc.chunkSizeCharacters !== $scope.desc.chunkSizeCharacters || $scope.previousDesc.chunkOverlapCharacters !== $scope.desc.chunkOverlapCharacters) {
                return true;
            }
        }

        return false;
    };

    $scope.updateMethodDisabledReason = function(method) {
        // TODO @rag Workaround for issues with PINECONE and smart update modes. Can remove this when RecordManager works with PINECONE
        //           See also rag_embedding_recipe.py main method
        if (!$scope.retrievableKnowledge) {
            return "";
        }
        if (VECTOR_STORE_UPDATE_METHOD_MAP[method].isSmart &&
            $scope.retrievableKnowledge.vectorStoreType === "PINECONE") {
            return "Smart update methods not supported for Pinecone";
        }
        return "";
    };

    $scope.$watch('embeddingLLM', function( newVal) {
        $scope.defaultChunkSize = EmbeddingUtils.getChunkSizeCharacters(newVal);
        $scope.defaultOverlapSize = EmbeddingUtils.getOverlapSizeCharacters(newVal);
    })

    const checkRetrievalColumnsValidity = function (retrievalAugmentedLLMs) {
        $scope.retrievalColumnsAlerts = [];
        if (retrievalAugmentedLLMs && Array.isArray(retrievalAugmentedLLMs)) {
            for (const retrievalAugmentedLLM of retrievalAugmentedLLMs) {
                const activeSmiv = retrievalAugmentedLLM.inlineVersions.find((smiv) => smiv.versionId === retrievalAugmentedLLM.activeVersion);

                if (!activeSmiv) {
                    Logger.error(`Invalid active Saved Model Inline version: ${retrievalAugmentedLLM.activeVersion}`);
                    continue;
                }

                const ragllmSettings = activeSmiv.ragllmSettings;
                if (ragllmSettings.retrievalSource === 'EMBEDDING' || ragllmSettings.retrievalColumns.length === 0) {
                    continue;
                }

                const metadataColNames = $scope.desc.metadataColumns.map((c) => c.column);
                for (let retrievalColumn of ragllmSettings.retrievalColumns) {
                    if (!metadataColNames.includes(retrievalColumn)) {
                        $scope.retrievalColumnsAlerts.push({
                            label: retrievalAugmentedLLM.name,
                            retrievalColumn: retrievalColumn,
                            fullModelId: `S-${$stateParams.projectKey}-${retrievalAugmentedLLM.id}-${retrievalAugmentedLLM.activeVersion}`,
                            smId: retrievalAugmentedLLM.id,
                        });
                    }
                }
            }
        }
    }

    $scope.$watch("desc.metadataColumns", function(nv) {
        checkRetrievalColumnsValidity($scope.retrievalAugmentedLLMs)
    }, true);

    $scope.$watchGroup(['recipe.outputs.knowledge_bank', 'availableLLMs'], function([knowledgeBank, availableLLMs]) {
        if (!knowledgeBank || !availableLLMs) return;

        const ref = (knowledgeBank.items || [{}])[0].ref;
        if (!ref) return;

        DataikuAPI.retrievableknowledge.get($stateParams.projectKey, ref).success(retrievableKnowledge => {
            $scope.retrievableKnowledge = retrievableKnowledge;
            $scope.origRetrievableKnowledge = JSON.parse(JSON.stringify(retrievableKnowledge));
            $scope.embeddingLLM = availableLLMs.find(llm => llm.id === retrievableKnowledge.embeddingLLMId);

            DataikuAPI.retrievableknowledge.getCurrentVersionInfo($stateParams.projectKey, $scope.retrievableKnowledge.id)
                .then(function({data}) {
                    $scope.previousDesc = data.embeddingRecipeParams;
                    $scope.previousRk = data.rkAtVersion;
                    $scope.isRkBuilt = data.isBuilt;
                })
                .catch(setErrorInScope.bind($scope));

            DataikuAPI.retrievableknowledge.getRetrievalAugmentedLLMList($stateParams.projectKey, $scope.retrievableKnowledge.id)
                .success(function (data) {
                    $scope.retrievalAugmentedLLMs = data;
                    checkRetrievalColumnsValidity($scope.retrievalAugmentedLLMs)
                })
                .error(setErrorInScope.bind($scope));
        });
    });

    $scope.RESET_VALUE = "None - no column selected";
    $scope.getInputDatasetColumnsWithEmpty = function() {
        if (!$scope.inputDatasetColumns) {
            return [];
        }
        let columns = angular.copy($scope.inputDatasetColumns);
        columns.unshift($scope.RESET_VALUE);
        return columns;
    };
    $scope.securityTokensColumnSelectionChanged = function() {
        if ($scope.desc.securityTokensColumn == $scope.RESET_VALUE) {
            $scope.desc.securityTokensColumn = undefined;
        }
    };

    // When saving the recipe, also save the sourceIdColumn in the knowledge bank
    const recipePayloadSave = $scope.hooks.save;
    $scope.hooks.save = function () {
        return recipePayloadSave().then(() => {
            let sourceIdColumn = undefined;
            if ($scope.VECTOR_STORE_UPDATE_METHOD_MAP[$scope.desc.vectorStoreUpdateMethod].isSmart) {
                sourceIdColumn = $scope.desc.sourceIdColumn;
            }
            if ($scope.retrievableKnowledge && $scope.retrievableKnowledge.sourceIdColumn !== sourceIdColumn) {
                $scope.retrievableKnowledge.sourceIdColumn = sourceIdColumn;
                DataikuAPI.retrievableknowledge.save($scope.retrievableKnowledge).error(setErrorInScope.bind($scope));
            }
        });
    };
});


// Keep this in sync with backend java code at RAGEmbeddingRecipeCreator::adaptDefaultPayloadToSelectedModel
app.service('EmbeddingUtils', function() {
    const fallbackChunkSizeCharacters = 3000;
    const fallbackOverlapSizeCharacters = 120;

    function tokensToCharsOrDefault(embeddingLLM) {
        // if the model is not available or maxTokenLimit isn't defined, return the fallback value
        if (!embeddingLLM || !embeddingLLM.maxTokensLimit) return fallbackChunkSizeCharacters;
        // return a conservative (pessimistic) approximation of model maxTokensLimit into characters (considering 1 token > 3 characters)
        return Math.round(embeddingLLM.maxTokensLimit * 3 / 100) * 100; // round to nearest 100th so that the value is not too precise.
    }
    function getChunkSizeCharacters(embeddingLLM) {
        return Math.min(fallbackChunkSizeCharacters, tokensToCharsOrDefault(embeddingLLM));
    }

    function getOverlapSizeCharacters(embeddingLLM) {
        if (!embeddingLLM || !embeddingLLM.maxTokensLimit) {
            return fallbackOverlapSizeCharacters;
        }
        return Math.min(fallbackOverlapSizeCharacters, Math.round(0.2 * getChunkSizeCharacters(embeddingLLM)));
    }

    function shouldWarnAboutChunkSize(embeddingLLM, documentSplittingMode, chunkSizeCharacters) {
        if (documentSplittingMode !== 'CHARACTERS_BASED') return false
        return chunkSizeCharacters > tokensToCharsOrDefault(embeddingLLM);
    }


    return {tokensToCharsOrDefault, shouldWarnAboutChunkSize, getChunkSizeCharacters, getOverlapSizeCharacters };
});

}());
