from __future__ import print_function

import copy
import zlib

from six import iteritems
from os import path as osp
import re
import shutil
import random
import string
import os
import json, logging
import sys
from glob import glob, iglob
import time
import datetime
from collections import OrderedDict
import subprocess

import base, install_config
import migration_base, migration_json, migration_app

if sys.version_info > (3,):
    dku_basestring_type = str
else:
    dku_basestring_type = basestring

###############################################################################
# V9 / DSS 2.1
###############################################################################

class V9ElasticSearchDatasetParams(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Upgrades dataset parameters for ElasticSearch datasets"

    def transform(self, obj, filepath=None):
        if "type" in obj and obj["type"] == "ElasticSearch":
            obj['params']['rawCopyColumns'] = '*'
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]

class V9RecipeRoles(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Input/Output roles in recipes"

    def transform(self, obj, filepath=None):
        # Broken recipe, don't do anything
        if not "type" in obj:
            return

        old_inputs = obj.get("inputs", [])
        old_outputs = obj.get("outputs", [])

        old_pdeps = obj.get("partitionDeps", [])

        new_inputs_all = []
        for old_input in old_inputs:
            new_input = { "ref" : old_input }
            new_input["deps"] = [ dep for dep in old_pdeps if dep.get("in", None) == old_input]
            new_inputs_all.append(new_input)

        def first_input_to_main():
            if len(new_inputs_all) > 0:
                obj["inputs"] = {
                    "main" : {
                        "items" : [new_inputs_all[0]]
                    }
                }
        def convert_outputs():
            obj["outputs"] = {
                "main" :{
                    "items" : [ {"ref":x} for x in old_outputs ]
                }
            }

        if obj["type"] == "shaker" or \
            obj["type"] == "prediction_scoring" or \
            obj["type"] == "clustering_training" or \
            obj["type"] == "clustering_scoring" or \
            obj["type"] == "clustering_cluster":
            # First input goes to main, others go to "scriptDeps"
            first_input_to_main()
            if len(new_inputs_all) > 1:
                obj["inputs"]["scriptDeps"] = {
                    "items" : new_inputs_all[1:]
                }
            convert_outputs()

        elif obj["type"] == "prediction_training":
            payload = base.json_loadf(filepath.replace(".json", ".prediction_training"))
            ttPolicy = payload.get("splitParams", {}).get("ttPolicy", None)
            if ttPolicy == "EXPLICIT_FILTERING_TWO_DATASETS":
                if len(new_inputs_all) < 2:
                    print("WARNING: EXPLICIT_FILTERING recipe with only one input - BROKEN")
                    first_input_to_main()
                    convert_outputs()
                    return obj
                # First input goes to main, second to test, others to scriptdeps
                first_input_to_main()
                obj["inputs"] = {
                    "test" : {
                        "items" : [new_inputs_all[1]]
                    }
                }
                if len(new_inputs_all) > 2:
                    obj["inputs"]["scriptDeps"] = {
                        "items" : new_inputs_all[2:]
                    }
            else:
                first_input_to_main()
                if len(new_inputs_all) > 1:
                    obj["inputs"]["scriptDeps"] = {
                        "items" : new_inputs_all[1:]
                    }
            convert_outputs()
        else:
            # Regular behaviour: all inputs and all outputs to main
            obj["inputs"] = {
                "main" : {
                    "items" : new_inputs_all
                }
            }
            convert_outputs()

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]


class V9FilterRecipeSelection(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Inline selection in filter recipe"

    def transform(self, obj, filepath):
        if obj.get("type", "") != "sampling":
            return obj
        if obj.get("params", {}).get("selection", None) is not None:
            try:
                sel = json.loads(obj["params"]["selection"])
                obj["params"]["selection"] = sel
            except Exception as e:
                logging.exception("Failed to migrate sampling recipe %s" % filepath)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]


def v9_transform_chart(chart):
    newchart = {} #json.loads(base.json.dumps(chart))

    newchart["name"] = chart.get("name", "Untitled")
    newchart["userEditedName"] = True

    newchart["includeZero"] = chart.get("includeZero", True)
    newchart["showLegend"] = chart.get("showLegend", True)
    newchart["smoothing"] = chart.get("smoothing", True)

    def set_compute_on_measures(arr):
        cmode = chart.get("computeMode", "NONE")
        xmode = chart.get("xAxisMode", "NONE")

        if cmode == "LIFT_AVG":
            for measure in arr:
                measure["computeMode"] = "AVG_RATIO"
        elif xmode == "CUMULATIVE":
            for measure in arr:
                measure["computeMode"] = "CUMULATIVE"
        elif xmode == "DIFFERENCE":
            for measure in arr:
                measure["computeMode"] = "DIFFERENCE"

    if chart.get("yAxisMode", "NORMAL") == "LOG":
        newchart["axis1LogScale"] = True
        newchart["axis2LogScale"] = True

    newchart["colorOptions"] = {
        "singleColor": "#659a88",
        "transparency": 0.9,
        "colorPalette" : chart.get("colorPalette", "default")
    }

    if "thumbnailData" in chart:
        newchart["thumbnailData"] = chart["thumbnailData"]

    newchart["filters"] = chart.get("filters", [])
    for f in newchart["filters"]:
        if f["columnType"] == "NUMERICAL":
            f["filterType"] = "NUMERICAL_FACET"
        elif f["columnType"] == "ALPHANUM":
            f["filterType"] = "ALPHANUM_FACET"
        elif f["columnType"] == "DATE":
            f["filterType"] = "DATE_FACET"

    def copy_obj(fromArr, idx, toArr):
        if len(fromArr) > idx:
            print("Migrating from %s=%s to %s" % (fromArr, idx, toArr))
            newchart[toArr] = [fromArr[idx]]
        else:
            newchart[toArr] = []

    def do_generic_stdaggr():
        copy_obj(chart.get("dimensions", []), 0, "genericDimension0")
        copy_obj(chart.get("dimensions", []), 1, "genericDimension1")

        newchart["genericMeasures"] = chart.get("genericMeasures", [])
        set_compute_on_measures(newchart["genericMeasures"])

    if chart.get("type", None) is None:
        chart["type"] = "grouped_bars"

    if chart["type"] == "grouped_bars":

        if len(chart.get("dimensions", [])) == 2:
            newchart["type"] = "grouped_columns"
            newchart["variant"] = "normal"
        else:
            newchart["type"] = "multi_columns_lines"
            newchart["variant"] = "normal"

        do_generic_stdaggr()

    elif chart["type"] == "scatter_2d":

        newchart["type"] = "binned_xy"
        newchart["variant"] = "normal"
        if chart.get("hexbin", False):
            newchart["variant"] = "binned_xy_hex"
            newchart["hexbinRadius"] = chart.get("hexbinRadius", 20)

        copy_obj(chart.get("dimensions", []), 0, "xDimension")
        copy_obj(chart.get("dimensions", []), 1, "yDimension")

        newchart["colorMeasure"] = chart.get("colorMeasures", [])
        newchart["sizeMeasure"] = chart.get("sizeMeasures", [])

    elif chart["type"] == "scatter_1d":
        newchart["type"] = "grouped_xy"
        newchart["variant"] = "normal"

        copy_obj(chart.get("dimensions", []), 0, "groupDimension")

        newchart["colorMeasure"] = chart.get("colorMeasures", [])
        newchart["sizeMeasure"] = chart.get("sizeMeasures", [])

        copy_obj(chart.get("genericMeasures", []), 0, "xMeasure")
        copy_obj(chart.get("genericMeasures", []), 1, "yMeasure")

    elif chart["type"] == "lines":
        newchart["type"] = "lines"
        newchart["variant"] = "normal"

        do_generic_stdaggr()

    elif chart["type"] == "stacked_bars":
        newchart["type"] = "stacked_columns"
        newchart["variant"] = "normal"

        if chart.get("yAxisMode", "NORMAL") == "PERCENTAGE_STACK":
            newchart["variant"] = "stacked_100"

        do_generic_stdaggr()

    elif chart["type"] == "stacked_area":
        newchart["type"] = "stacked_area"
        newchart["variant"] = "normal"

        if chart.get("yAxisMode", "NORMAL") == "PERCENTAGE_STACK":
            newchart["variant"] = "stacked_100"

        do_generic_stdaggr()

    elif chart["type"] == "diminishing_returns":
        newchart["type"] = "lift"
        newchart["variant"] = "normal"

        copy_obj(chart.get("dimensions", []), 0, "groupDimension")
        copy_obj(chart.get("genericMeasures", []), 0, "xMeasure")
        copy_obj(chart.get("genericMeasures", []), 1, "yMeasure")

    elif chart["type"] == "map":
        newchart["type"] = "admin_map"
        newchart["variant"] = "normal"

        if chart.get("filledMap", False) == True:
            newchart["variant"] = "filled_map"

        copy_obj(chart.get("dimensions", []), 0, "geometry")

        newchart["colorMeasure"] = chart.get("typedMeasures", {}).get("mapColor", [])
        newchart["sizeMeasure"] = chart.get("typedMeasures", {}).get("mapSize", [])

    return newchart

class V9AnalysisCharts(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update charts in analyses"


    def transform(self, obj, filepath=None):
        for chart in obj.get("script", {}).get("charts", []):
            if "data" in chart:
                chart["data"] = v9_transform_chart(chart["data"])
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["analysis/*/core_params.json"]

class V9DatasetCharts(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update charts in datasets"

    def transform(self, obj, filepath=None):
        if obj.get("type", "UNKNOWN") != "CHART":
            return obj
        chart = obj.get("content", {}).get("chart", None)
        if chart is not None:
            obj["content"]["chart"] = v9_transform_chart(chart)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["insights/*.json"]

class V9ShakerRecipeEngine(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Upgrade preparation scripts on Hadoop"

    def transform(self, obj, filepath=None):
        if obj.get("type", None) != "shaker":
            return obj

        if obj.get("params", {}).get("runOnHadoop", False) == True:
            obj.get("params")["engine"] = "HADOOP_MAPREDUCE"

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]

class V9APIKeysForWebapps(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrate webapp API keys to public API keys"

    def execute(self, diphome, simulate=False):
        keys_file = osp.join(diphome.path, "config/apikeys.json")

        if not osp.isfile(keys_file):
            return

        old_keys_data = base.json_loadf(keys_file)
        new_keys_data = { "keys" : [] }

        projects_keys = {}

        for old_key in old_keys_data.get("keys", []):
            print("Migrating old key: %s" % old_key)
            if old_key.get("type", "") == "DATASETS_READ":

                datasetNames = []
                pkey = "???"
                for datasetInfo in old_key.get("readableDatasets", []):
                    pkey = datasetInfo.get("projectKey", "???")
                    datasetName = datasetInfo.get("datasetName", "???")
                    datasetNames.append(datasetName)

                # We only support one project key per key ...
                project_keys = projects_keys.get(pkey, [])
                projects_keys[pkey] = project_keys

                new_key = {
                    "key" : old_key["key"],
                    "localDatasets" : [
                        {
                            "datasets" : datasetNames,
                            "privileges" : ["READ_DATA"]
                        }
                    ]
                }
                project_keys.append(new_key)
            else:
                new_keys_data["keys"].append(old_key)

        base.json_dumpf(keys_file, new_keys_data)

        projects_folder = osp.join(diphome.path, "config/projects")

        for (project, keys) in projects_keys.items():
            print("Writing new keys for %s" % project)
            pkeys_file = osp.join(projects_folder, project, "apikeys.json")
            if osp.isdir(osp.join(projects_folder, project)):
                base.json_dumpf(pkeys_file, keys)
            else:
                print("Not writing keys for removed project %s" % project)


class V9RenameArraysCombine(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V9RenameArraysCombine, self).__init__("ArraysCombine")

    def transform_step(self, step):
        assert step["type"] == "ArraysCombine"
        step["type"] = "ZipArraysProcessor"
        return step

class V9ColumnRenamerMultiColumns(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V9ColumnRenamerMultiColumns, self).__init__("ColumnRenamer")

    def transform_step(self, step):
        sfrom = step.get("params", {}).get("inCol", "")
        sto = step.get("params", {}).get("outCol", "")

        step.get("params", {})["renamings"] = [
            { "from" : sfrom, "to" : sto }
        ]
        return step

###############################################################################
# V 10 / DSS 2.2
###############################################################################

class V10UpDownFiller(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "UpDownFiller")

    def transform_step(self, step):
        step.get("params",{})["columns"] = [ step.get("params", {}).get("column", "") ]
        return step

class V10TimestamNoTzInSqlDatasets(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Keep old behavior for timestamp columns without time zone"

    def transform(self, obj, filepath=None):
        dataset_type = obj['type']
        if dataset_type in ['PostgreSQL', 'MySQL', 'Vertica', 'Redshift', 'Greenplum', 'Teradata']:
            # impossible to know from just the schema whether some columns had no timestamp, but until
            # the user clicks on 'Test' again, the schema won't change. Leave assumed timezone empty for
            # local
            obj["params"]["readColsWithUnknownTzAsDates"] = True
        elif dataset_type in ['Oracle', 'SQLServer']:
            # the old behavior was already to read these dates as strings, keep doing that
            obj["params"]["readColsWithUnknownTzAsDates"] = False
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]

class V10TrueInPluginRecipesConfig(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update representation of booleans in plugin recipes config"

    def transform(self, obj, filepath=None):
        recipe_type = obj.get("type", "")
        if recipe_type.startswith("CustomCode_"):
            params = obj.get("params", {})
            for k in params.keys():
                if params.get(k, None) == "true":
                    params[k] = True
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]


###############################################################################
# V11 / DSS 2.3
###############################################################################

class V11InstallIni(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrate port and nodetype to install.ini configuration file"

    def appliesTo(self):
        return [ "design", "api" ]

    def execute(self, diphome, simulate=False):

        # Read base port from env-default
        envDefault = osp.join(diphome.path, "bin", "env-default.sh")
        base_port = None
        with open(envDefault) as f:
            for line in f.read().split('\n'):
                if line.find("export DKU_BASE_PORT") >= 0:
                    base_port = int(line.split('"')[1])
        if base_port is None:
            raise Exception("Failed to detect DKU_BASE_PORT in %s" % envDefault)

        # Read node type from install.properties if any
        node_type = migration_base.get_node_type(diphome)

        if simulate:
            return

        # Create install.ini
        install_config.initConfig(diphome, base_port, node_type, "auto", gitMode='global')

        # Remove legacy install.properties
        installprops = osp.join(diphome.path, "install.properties")
        if osp.isfile(installprops):
            print("Remove legacy file %s" % installprops)
            os.remove(installprops)

class V11SQLNotebooks(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update SQL notebooks"

    def randId(self,):
        return str(random.randint(0, 1e9))

    def transform(self, notebook, filepath=None):
        if notebook.get("cells", None) != None:
            return notebook

        notebook["cells"] = []

        rep = osp.dirname(filepath)
        fav_file = osp.join(rep, "favorites.json")
        if osp.exists(fav_file):
            fav_data = base.json_loadf(fav_file)
            for fav in fav_data["favorites"]:
                cell = {"id": self.randId(), "code": fav.get("sql", ""), "name": fav.get("name", ""), "type": "QUERY"}
                notebook["cells"].append(cell)

        queries_file = osp.join(rep, "queries.json")
        if osp.exists(queries_file):
            history = base.json_loadf(queries_file)
            new_queries = {}
            queries = history.get("queries", [])
            if len(queries) > 0:
                #create a cell for all queries in history
                cell = {
                    "id": self.randId(),
                    "name": "History from migration"
                }
                new_queries[cell["id"]] = queries
                notebook["cells"].append(cell)
            base.json_dumpf(queries_file, {"queries": new_queries})

        return notebook

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["notebooks/sql/*/params.json"]


class V11FillEmptyWithValue(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "FillEmptyWithValue")

    def transform_step(self, step):
        params = step.get("params", {})
        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]

        return step

class V11RemoveRowsOnEmpty(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "RemoveRowsOnEmpty")

    def transform_step(self, step):
        params = step.get("params", {})
        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]

        return step

class V11RoundProcessor(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "RoundProcessor")

    def transform_step(self, step):
        params = step.get("params", {})
        columns = params.get("columns", [""])

        if len(columns) > 1:
            params["appliesTo"] = "COLUMNS"
        else:
            params["appliesTo"] = "SINGLE_COLUMN"

        return step

class V11FindReplace(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "FindReplace")

    def transform_step(self, step):
        params = step.get("params", {})

        if params.get("global", False):
            params["appliesTo"] = "ALL"

        else:
            params["appliesTo"] = "SINGLE_COLUMN"
            params["columns"] = [params.get("input", "")]

        return step

class V11StringTransformer(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "StringTransformer")

    def transform_step(self, step):
        params = step.get("params", {})
        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]
        params["mode"] = params.get("mode", "").upper()

        return step

class V11CellClearer(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "CellClearer")

    def transform_step(self, step):
        step["type"] = "FilterOnValue"

        params = step.get("params", {})

        params["values"] = [params.get("value", "")]
        params["action"] = "CLEAR_CELL"

        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]

        return step

class V11RowsSelector(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "RowsSelector")

    def transform_step(self, step):
        step["type"] = "FilterOnValue"

        params = step.get("params", {})
        if (params.get("keep", False)):
            params["action"] = "KEEP_ROW"
        else:
            params["action"] = "REMOVE_ROW"

        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]

        return step

class V11ClearCellsOnBadType(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "ClearCellsOnBadType")

    def transform_step(self, step):
        step["type"] = "FilterOnBadType"
        params = step.get("params", {})
        params["action"] = "CLEAR_CELL"

        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]

        return step

class V11RemoveRowsOnBadType(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "RemoveRowsOnBadType")

    def transform_step(self, step):
        step["type"] = "FilterOnBadType"
        params = step.get("params", {})
        params["action"] = "REMOVE_ROW"

        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]

        return step

class V11NumericalRangeSelector(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "NumericalRangeSelector")

    def transform_step(self, step):
        step["type"] = "FilterOnNumericalRange"
        params = step.get("params", {})

        if params.get("keep", False):
            params["action"] = "KEEP_ROW"
        else:
            params["action"] = "REMOVE_ROW"

        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("column", "")]
        return step

class V11SplitFoldTrimFalse(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V11SplitFoldTrimFalse, self).__init__("SplitFold")

    def transform_step(self, step):
        assert step["type"] == "SplitFold"
        step.get("params", {})["trimSpaces"] = False
        return step

class V11JSONFlattenNull(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V11JSONFlattenNull, self).__init__("JSONFlattener")

    def transform_step(self, step):
        step.get("params", {})["nullAsEmpty"] = False
        return step

class V11DateParser(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "DateParser")

    def transform_step(self, step):
        params = step.get("params", {})
        params["appliesTo"] = "SINGLE_COLUMN"
        params["columns"] = [params.get("inCol", "")]
        return step

class V11RemoveShakerFilters(migration_app.ShakerScriptMigrationOperation):
    def __init__(self):
        migration_app.ShakerScriptMigrationOperation.__init__(self)

    def transform_script(self, script):
        script["explorationFilters"] = []
        return script


class V11RemoveStepsFromInsightCharts(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Remove steps from chart insights"

    def transform(self, obj, filepath=None):
        if obj.get("type", "") == "CHART":
            dv = obj.get("content", {}).get("dataView", {})
            dv["steps"] = []
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["insights/*.json"]


###############################################################################
# V12 / DSS 3.0
###############################################################################


class V12SchedulerToScenario(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrate scheduled builds to scenarios"

    def convert_to_item_specification(self, project_key, dataset_with_partition):
        return {'type' : 'DATASET', 'projectKey' : project_key, 'itemId' : dataset_with_partition.get('dataset', None), 'partitionsSpec' : dataset_with_partition.get('partition', None)}

    def convert_to_step(self, project_key, scheduled_job):
        step = {'id' : 'extracted', 'type' : 'build_flowitem', 'name' : 'Datasets from scheduled job'}
        step['params'] = {'jobType' : scheduled_job.get('type', 'NON_RECURSIVE_FORCED_BUILD')
                        , 'refreshHiveMetastore' : False
                        , 'builds' : [self.convert_to_item_specification(project_key, dataset) for dataset in scheduled_job.get('datasets', [])]}
        return step;

    def convert_to_trigger(self, scheduled_job):
        trigger = {'id' : 'converted', 'type' : 'temporal', 'name' : 'Job frequency', 'delay' : '30', 'active' : scheduled_job['enabled']} # 30s delay
        trigger['params'] = {'frequency' : scheduled_job.get('frequency', 'Daily')
                            , 'dayOfWeek' : scheduled_job.get('dayOfWeek', None)
                            , 'dayOfMonth' : scheduled_job.get('dayOfMonth', 1)
                            , 'minute' : scheduled_job.get('minute', 0)
                            , 'hour' : scheduled_job.get('hour', 0)}
        return trigger

    def get_project_owner(self, diphome, project_key):
        project_file = osp.join(diphome.path, "config/projects/%s/params.json" % project_key)

        if not osp.isfile(project_file):
            return None

        project = base.json_loadf(project_file)

        return project.get('owner', None)

    def execute(self, diphome, simulate=False):
        scheduler_file = osp.join(diphome.path, "config/scheduler.json")

        if not osp.isfile(scheduler_file):
            return

        old_scheduler_data = base.json_loadf(scheduler_file)

        scenarios_by_project = {}
        for scheduled_job in old_scheduler_data.get('scheduledJobs', []):
            project_key = scheduled_job['projectKey']

            owner = self.get_project_owner(diphome, project_key)
            if owner is None:
                continue # project doesn't exist anymore

            scenarios = scenarios_by_project.get(project_key, [])

            index_in_project = len(scenarios)
            # build scenario
            scenario = {'id' : 'scheduled_job_%i' % index_in_project, 'type' : 'step_based', 'name' : 'Converted scheduled job %i' % index_in_project, 'active' : False}
            scenario['versionTag'] = {'versionNumber': 1, 'lastModifiedBy': {'login': 'dss_migration', 'displayName': 'Migration DSS'}, 'lastModifiedOn': int(time.time() * 1000)}
            scenario['runAsUser'] = owner
            scenario['triggers'] = [self.convert_to_trigger(scheduled_job)]
            scenario['params'] = {'steps' : [self.convert_to_step(project_key, scheduled_job)]}

            # and keep for that project
            scenarios.append(scenario)
            scenarios_by_project[project_key] = scenarios

            print("converted scenario in %s" % project_key)

        # dump all these scenarios on disk, in appropriate folder
        for project_key, scenarios in iteritems(scenarios_by_project):
            print("saving scenario in %s" % project_key)
            for scenario in scenarios:
                scenarios_folder = osp.join(diphome.path, 'config/projects/%s/scenarios' % project_key)
                if not os.path.exists(scenarios_folder):
                    os.mkdir(scenarios_folder) # ensure existence
                scenario_file = osp.join(scenarios_folder, "%s.json" % scenario['id'])
                base.json_dumpf(scenario_file, scenario)
                print("saved scenario in %s" % scenario_file)

        # get rid of old scheduled jobs
        base.json_dumpf(scheduler_file, {})


def migrate_custom_python(modeling):
    custom_python = modeling.get("custom_python", None)
    if isinstance(custom_python, list):
        return modeling # make migration idempotent, just in case
    if custom_python is None:
        modeling["custom_python"] = []
    else:
        modeling["custom_python"] = [custom_python]
    return modeling


class V12CustomPythonModelsInAnalysisConfig(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update custom python models in analyses config"

    def transform(self, modeling, filepath=None):
        return migrate_custom_python(modeling)

    def jsonpath(self,):
        return "modeling"

    def file_patterns(self,):
        return ["analysis/*/ml/*/params.json"]



class V12CustomPythonModelsInAnalysisData(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Update custom python models in analyses data"

    def execute(self, project_paths):
        if not osp.isdir(project_paths.analysis_data):
            return
        #a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for anl in os.listdir(project_paths.analysis_data):
            anl_dir = osp.join(project_paths.analysis_data, anl)
            if not osp.isdir(anl_dir):
                continue
            for mltask in os.listdir(anl_dir):
                sessions_dir = osp.join(anl_dir, mltask, "sessions")
                if not osp.isdir(sessions_dir):
                    continue
                for session in os.listdir(sessions_dir):
                    session_file = osp.join(sessions_dir, session, "mltask.json")
                    if not osp.isfile(session_file):
                        continue
                    print("Migrating saved ML Task session: %s %s %s" % (anl, mltask, session))
                    try:
                        data = base.json_loadf(session_file)
                        migrate_custom_python(data.get("modeling", {}))
                        base.json_dumpf(session_file, data)
                    except Exception as e:
                        print("Model migration FAILED: %s" % e)


class V12CustomPythonModelsInSavedModels(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update custom python models in saved models"

    def transform(self, modeling, filepath=None):
        return migrate_custom_python(modeling)

    def jsonpath(self,):
        return "miniTask.modeling"

    def file_patterns(self,):
        return ["saved_models/*.json"]


class V12AnalysisCharts(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update charts in analyses"

    def transform(self, obj, filepath=None):
        for chart in obj.get("script", {}).get("charts", []):
            if "data" in chart:
                chart["data"] = v12_transform_chart(chart["data"])
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["analysis/*/core_params.json"]


class V12DatasetCharts(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update charts in datasets"

    def transform(self, obj, filepath=None):
        if obj.get("type", "UNKNOWN") != "CHART":
            return obj
        chart = obj.get("content", {}).get("chart", None)
        if chart is not None:
            obj["content"]["chart"] = v12_transform_chart(chart)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["insights/*.json"]


def v12_transform_chart(chart):

    def fix_dimension(dimension):
        if dimension['isA'] == 'ua' and dimension['type'] == 'DATE':
            if 'dateMode' not in dimension:
                dimension['dateMode'] = 'RANGE'

    for dimension in chart.get('uaXDimension', []):
        fix_dimension(dimension)
    for dimension in chart.get('uaYDimension', []):
        fix_dimension(dimension)

    return chart


class V12GroupPermissions(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrate to new group permission system"

    def execute(self, diphome, simulate=False):
        projects_path = osp.join(diphome.path, "config/projects")
        usersf = osp.join(diphome.path, "config/users.json")
        users_data = base.json_loadf(usersf)

        # First, find all users who had the "may write code" permissions
        users_who_can_code = []

        for user in users_data["users"]:
            if not "login" in user:
                continue
            if user.get("codeAllowed", False) == True:
                users_who_can_code.append(user["login"])

        # And create a new group for them
        code_group = {
            "name" : "_may_write_code_",
            "description" : "Users who may write unsafe code (migrated from DSS 2.X)",
            "mayWriteUnsafeCode" : True,
            "sourceType" : "LOCAL"
        }
        users_data["groups"].append(code_group)
        for user in users_data["users"]:
            if user["login"] in users_who_can_code:
                user_groups = user.get("groups", [])
                user_groups.append("_may_write_code_")
                user["groups"] = user_groups


        # Find all users who are currently analysts
        groups_who_are_analysts = set()

        if osp.isdir(projects_path):
            for project in os.listdir(projects_path):
                project_paramsf = osp.join(projects_path, project, "params.json")
                if osp.isfile(project_paramsf):
                    project_params = base.json_loadf(project_paramsf)
                    for permission in project_params.get("permissions", []):
                        group = permission.get("group", None)
                        if group is None:
                            continue
                        permtype = permission.get("type", "READER")
                        if permtype == "ANALYST_READWRITE" or permtype == "ANALYST_READONLY" or permtype == "ADMIN":
                            groups_who_are_analysts.add(group)

        print("Groups who are analysts: %s" % groups_who_are_analysts)

        users_who_are_analysts = set()
        for user in users_data["users"]:
            for group in user["groups"]:
                if group in groups_who_are_analysts:
                    users_who_are_analysts.add(user["login"])
        print("Users who are analysts: %s" % users_who_are_analysts)

        # And make their profile DATA_SCIENTIST
        for user in users_data["users"]:
            if user["login"] in users_who_are_analysts:
                user["userProfile"] = "DATA_SCIENTIST"
            else:
                user["userProfile"] = "READER"

        print("Writing new users file")
        base.json_dumpf(usersf, users_data)


class V12AddGitMode(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Add mandatory git mode to install.ini configuration file"

    def appliesTo(self):
        return [ "design", "api" ]

    def execute(self, diphome, simulate=False):
        installConfig = diphome.get_install_config()
        if not installConfig.getOption('git', 'mode'):
            installConfig.addOption('git', 'mode', 'global')
            if simulate:
                return
            installConfig.save()


class V12ConnectionParams(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        """
        properties are not (map str->str) anymore but (list {name, value})
        """
        return "Update connections models"

    def paramToBool(self, params, key):
        if isinstance(params.get(key, None), dku_basestring_type):
            params[key] = (params[key] == "true")

    def paramToObj(self, params, key): #obj or array
        if isinstance(params.get(key, None), dku_basestring_type):
            print("DO MIGRATE ", key)
            params[key] = json.loads(params[key])

    def paramToInt(self, params, key):
        if params.get(key, None) == "":
            del params[key]

    def transform(self, connections, filepath=None):
        for name, conn in iteritems(connections):
            params = conn.get("params", None)
            if params is not None:
                self.paramToInt(params, "port")
                self.paramToInt(params, "readTimeout") #cassandra
                self.paramToInt(params, "connectionLimit") #ftp

                self.paramToBool(params, "useTruncate")
                self.paramToBool(params, "ssl") #cassandra
                self.paramToBool(params, "passive") #ftp
                self.paramToBool(params, "useURL") #oracle
                self.paramToBool(params, "usePublicKey") #ssh

                self.paramToObj(params, "datanodeFqns") #impala

                params['dialectName'] = params.get('dialect', None) # renamed param

                properties =  params.get("properties", None)
                if properties is not None:
                    if isinstance(properties, list): # migration step idempotence
                        continue
                    new_properties = []
                    for key, value in iteritems(properties):
                        new_properties.append({"name": key, "value": value})
                    params["properties"] = new_properties
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

class V12ColumnsSelector(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "ColumnsSelector")

    def transform_step(self, step):
        params = step.get("params", {})
        columns = params.get("columns", [""])

        params["columns"] = list(OrderedDict.fromkeys(columns)) # remove duplicates from the list

        if len(params["columns"]) > 1:
            params["appliesTo"] = "COLUMNS"
        else:
            params["appliesTo"] = "SINGLE_COLUMN"

        return step


class V12NestProcessor(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "NestProcessor")

    def transform_step(self, step):
        params = step.get("params", {})
        columns = params.get("inputColumns", [""])

        if len(columns) > 1:
            params["appliesTo"] = "COLUMNS"
        else:
            params["appliesTo"] = "SINGLE_COLUMN"

        params["columns"] = columns

        if "inputColumns" in params:
            del params["inputColumns"]

        return step


class V12NumericalCombinator(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        migration_app.ShakerStepMigrationOperation.__init__(self, "NumericalCombinator")

    def transform_step(self, step):
        params = step.get("params", {})
        params["appliesTo"] = "COLUMNS"
        return step


# Must be idempotent as it was actually applied starting with DSS 2.3.4 without version bump
class V12DkuSparkHome(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrate SPARK_HOME to DKU_SPARK_HOME in env-spark.sh"

    def appliesTo(self):
        return [ "design", "api" ]

    def execute(self, diphome, simulate=False):
        sparkEnv = osp.join(diphome.path, "bin", "env-spark.sh")
        if not osp.isfile(sparkEnv):
            return

        lines = []
        with open(sparkEnv) as f:
            for line in f.readlines():
                if line.startswith('export SPARK_HOME='):
                    lines.append(line.replace('export SPARK_HOME=', 'export DKU_SPARK_HOME=', 1))
                else:
                    lines.append(line)
        if simulate:
            return

        with open(sparkEnv, 'w') as f:
            for line in lines:
                f.write(line)

class V12SetupDefaultMetrics(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Activate default metrics on datasets"

    def transform(self, obj, filepath=None):
        def add_or_set_probe(probes, probe):
            probe_type = probe['type']
            existing = [p for p in probes if p.get('type','') == probe_type]
            for p in existing:
                probes.remove(p)
            probes.append(probe)

        def add_or_set_displayed(displayed_metrics, metric_id):
            if metric_id not in displayed_metrics:
                displayed_metrics.append(metric_id)

        metrics = obj.get('metrics', {})
        # make sure the fields we are going to fill are there
        probes = metrics.get('probes', [])
        displayed_state = metrics.get('displayedState', {})
        metrics['probes'] = probes
        metrics['displayedState'] = displayed_state

        displayed_metrics = displayed_state.get('metrics', [])
        displayed_state['metrics'] = displayed_metrics

        if 'partitioning' in obj and len(obj['partitioning'].get('dimensions', [])) > 0:
            # partitioned dataset, activate partition list and count
            probe = { "type": "partitioning", "computeOnBuildMode": "WHOLE_DATASET", "enabled":True, "configuration": { } }
            add_or_set_probe(probes, probe)

            displayed_state['partition'] = 'ALL'
        else:
            displayed_state['partition'] = 'NP'

        probe = { "type": "basic", "computeOnBuildMode": "PARTITION", "enabled":True,  "configuration": { } }
        add_or_set_probe(probes, probe)
        add_or_set_displayed(displayed_metrics, "basic:COUNT_COLUMNS")

        dataset_type = obj.get('type', '')
        fs_like_types = ["Filesystem", "HDFS", "S3", "FTP", "UploadedFiles", "RemoteFiles", "Twitter"]
        if dataset_type in fs_like_types:
            add_or_set_displayed(displayed_metrics, "basic:COUNT_FILES")
            add_or_set_displayed(displayed_metrics, "basic:SIZE")

        probe = { "type": "records", "computeOnBuildMode": "NO", "enabled":True, "configuration": { } }
        add_or_set_probe(probes, probe)
        add_or_set_displayed(displayed_metrics, "records:COUNT_RECORDS")

        # set the metrics setup on the dataset
        obj['metrics'] = metrics
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]


###############################################################################
# V13 / DSS 3.0.2
###############################################################################

class V13EnableMetrics(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Propagate metrics enabled flags"

    def transform(self, obj, filepath=None):
        metrics = obj.get('metrics', {})
        probes = metrics.get('probes', [])

        metrics['probes'] = probes
        obj['metrics'] = metrics

        for probe in probes:
            pt = probe.get("type", "???")

            if pt == "basic" or pt == "records" or pt == "python" or pt == "sql_query" or pt == "py_plugin" or pt == "sql_plugin":
                probe["enabled"] = True

            if pt == "partitioning":
                if 'partitioning' in obj and len(obj['partitioning'].get('dimensions', [])) > 0:
                    probe["enabled"] = True

            if pt == "col_stats" or pt == "adv_col_stats" or pt == "verify_col":
                if len(probe.get("configuration", {}).get("aggregates", [])) > 0:
                    probe["enabled"] = True

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]



###############################################################################
# V14 / DSS 3.1
###############################################################################

def transformRecipeInput(obj, filepath, recipeType):
    virtualInputs = obj.get("virtualInputs", None)
    recipe_filepath = filepath[0:-len(recipeType)] + "json"

    if not osp.exists(recipe_filepath):
        # Recipe does not actually exist, so do nothing
        return obj

    recipe_data = base.json_loadf(recipe_filepath)
    if "main" in recipe_data["inputs"]:
        input_names = [input["ref"] for input in recipe_data["inputs"]["main"]["items"]]

    if virtualInputs is not None:
        for vi in virtualInputs:
            if vi.get("index", None) is None:
                if vi.get("name", None) is not None and vi["name"] in input_names:
                    vi["index"] = input_names.index(vi["name"])
                    del vi["name"]
                else:
                    print("WARNING: recipe file is broken: inputs are inconsistent. File: " + filepath)
    else:
        print("WARNING: recipe file is broken:" + filepath)

    return obj


class V14JoinRecipesInputs(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Join recipes inputs representation"

    def transform(self, obj, filepath):
        return transformRecipeInput(obj, filepath, "join")

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.join"]

class V14JoinRecipesJoinType(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Join recipes join types"


    def getNewJoinType(self, join):
        """
        Note: the new LEFT mode is a restriction of the (ASYMMETRIC, outer) mode which was shown as LEFT and is now 'ADVANCED'
        """
        if join.get('type', None) == 'ASYMMETRIC':
            if join.get('outerJoin', False):
                return 'ADVANCED'
            return 'INNER'
        if join.get('outerJoin', False):
            return 'FULL'
        return 'INNER'

    def transform(self, obj, filepath):
        for join in obj.get("joins", []):
            join['type'] = self.getNewJoinType(join)
            join['outerJoinOnTheLeft'] = join.get('outerJoin', False)
            join.pop('outerJoin', None)
            join['conditionsMode'] = 'AND'
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.join"]

class V14StackRecipesInputs(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Stack recipes inputs representation"

    def transform(self, obj, filepath):
        return transformRecipeInput(obj, filepath, "vstack")

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.vstack"]

class V14HideHiveDkuUdf(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
         """
         dataiku-hive-udf.jar is now optional
         """
         return "Make dataiku-hive-udf.jar optional in Hive recipes"

    def transform(self, obj, filepath=None):
        # only touch hive recipes
        if obj.get("type", "") != "hive":
            return obj
        # add the new parameter
        params = obj.get("params", {})
        params["addDkuUdf"] = True
        obj["params"] = params # in case params didn't exist before
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]


def migrate_scatter_data(model_folder):
    import pandas as pd
    df = pd.read_pickle(osp.join(model_folder, "scatter_sample.pkl"))
    # create temp folder
    scatter_folder = osp.join(model_folder, "scatter")
    filtered = df._get_numeric_data()
    if not os.path.exists(scatter_folder):
        os.makedirs(scatter_folder)

    def write(name, items):
        f = open(osp.join(scatter_folder, name), 'w')
        newitems = []
        for item in items:
            if sys.version_info < (3,) and isinstance(item, unicode):
                newitems.append(item.encode("utf8"))
            else:
                newitems.append(item)
        f.write("\n".join(newitems))
        f.close()

    header = filtered.columns.values
    write('header', header)

    n_clusters = len(df['cluster_labels'].unique())
    write('c', df['cluster_labels'].map(
        lambda c: int(c.split("_")[1]) if c != 'cluster_outliers' else n_clusters - 1).astype(
        str).tolist())
    write('cluster', df['cluster_labels'].astype(str).tolist())
    for i in range(len(header)):
        write(str(i), filtered[header[i]].astype(str).tolist())

    shutil.make_archive(osp.join(model_folder, "scatter_sample"), 'zip', scatter_folder)
    shutil.rmtree(scatter_folder)


class V14ClusteringScatterplot(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Update scatter plot data in trained clustering models"

    def execute(self, project_paths):
        if osp.isdir(project_paths.analysis_data):
            self.execute_analysis_data(project_paths.analysis_data)
        if osp.isdir(project_paths.saved_models):
            self.execute_saved_models(project_paths.saved_models)

    def execute_saved_models(self, saved_models):
        for mod in os.listdir(saved_models):
            versions_dir = osp.join(saved_models,mod,"versions")
            if not osp.isdir(versions_dir):
                continue
            for version in os.listdir(versions_dir):
                model_folder = osp.join(versions_dir,version)
                if not osp.isdir(model_folder):
                    continue
                if "scatter_sample.pkl" in os.listdir(model_folder):
                    print("Migrating scatter plot in saved model : %s %s" % (mod,version))
                    try :
                        migrate_scatter_data(model_folder)
                    except Exception as e:
                        print("Saved model migration failed %s" % e)

    def execute_analysis_data(self, analysis_data):
        for anl in os.listdir(analysis_data):
            anl_dir = osp.join(analysis_data, anl)
            if not osp.isdir(anl_dir):
                continue
            for mltask in os.listdir(anl_dir):
                sessions_dir = osp.join(anl_dir, mltask, "sessions")
                if not osp.isdir(sessions_dir):
                    continue
                for session in os.listdir(sessions_dir):
                    session_dir = osp.join(sessions_dir,session)
                    if not osp.isdir(session_dir):
                        continue
                    for in_session in os.listdir(session_dir):
                        in_session_dir = osp.join(session_dir,in_session)
                        if osp.isdir(in_session_dir) and in_session.startswith("pp"):
                            for in_pp in os.listdir(in_session_dir):
                                in_pp_dir = osp.join(in_session_dir,in_pp)
                                if osp.isdir(in_pp_dir) and in_pp.startswith("m"):
                                    if "scatter_sample.pkl" in os.listdir(in_pp_dir):
                                        try:
                                            print("Migrating scatter plot in analysis : %s %s %s %s %s" % (anl, mltask, session,in_session,in_pp))
                                            migrate_scatter_data(in_pp_dir)
                                        except Exception as e:
                                            print("Analysis model migration failed %s" % e)


class V14NormalizeDoubles(migration_json.ProjectConfigJsonMigrationOperation):
    SQL_DATASET_TYPES = ["PostgreSQL", "MySQL", "Vertica", "Redshift", "JDBC", "Greenplum", "Teradata", "Oracle", "SQLServer"]

    def __repr__(self,):
        return "Normalize doubles"

    def transform(self, obj, filepath):
        if "formatType" in obj and obj["formatType"] == "csv" and "formatParams" in obj:
            obj["formatParams"]["normalizeDoubles"] = obj["formatParams"].get("normalizeDoubles", False)
        elif "type" in obj and obj["type"] in self.SQL_DATASET_TYPES and "params" in obj:
            obj["params"]["normalizeDoubles"] = obj["params"].get("normalizeDoubles", False)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]


class V14DefaultProjectStatus(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Add default project status in project-settings.json"

    def transform(self, obj, filepath=None):
        if obj and not hasattr(obj, "projectStatusList"):
            obj["projectStatusList"] = [
                {
                  "name": "Sandbox",
                  "color": "#4285F4"
                },
                {
                  "name" : "Draft",
                  "color": "#77bec2"
                },
                {
                  "name": "In use",
                  "color": "#94BF51"
                },
                {
                  "name": "In production",
                  "color": "#ee874a"
                },
                {
                  "name": "Archived",
                  "color": "#CCCCCC"
                }
            ]
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]


class V14RenameProjectPayloadFiles(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Update recipe payload file names"

    def execute(self, project_paths):
        recipes_folder = osp.join(project_paths.config, "recipes")
        if osp.isdir(recipes_folder):
            for f in os.listdir(recipes_folder):
                nf = None

                if f.endswith(".spark_sql_query"):
                    nf = re.sub(r"\.spark_sql_query$", ".sql", f)
                if f.endswith(".pyspark"):
                    nf = re.sub(r"\.pyspark$", ".py", f)
                if f.endswith(".sparkr"):
                    nf = re.sub(r"\.sparkr$", ".r", f)

                if nf is not None:
                    print("Moving %s to %s" % (f, nf))
                    shutil.move(osp.join(recipes_folder, f), osp.join(recipes_folder, nf))


###############################################################################
# V15 / DSS 4.0
###############################################################################


class V15ClusteringHeatmap(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Update scatter plot data in trained clustering models"

    def transform(self, old_heatmap, filepath=None):

        cluster_sizes = []
        num_averages = []
        cluster_averages = []
        num_std = []
        cluster_std = []

        for fs in old_heatmap["clusters_stats"][0]["feature_stats"]:
            num_averages.append(fs["global_mean"])
            num_std.append(fs["global_std"])

        for cs in old_heatmap["clusters_stats"]:
            cluster_sizes.append(cs["size"])
            avgs = []
            stds = []
            for fs in cs["feature_stats"]:
                avgs.append(fs["mean"])
                stds.append(fs["std"])
            cluster_averages.append(avgs)
            cluster_std.append(stds)

        return {
            "cluster_labels": old_heatmap["cluster_labels"],
            "cluster_sizes": cluster_sizes,
            "total_size": old_heatmap["nb_rows"],
            "num_names": old_heatmap["variable_names"],
            "num_averages": num_averages,
            "cluster_num_averages": cluster_averages,
            "num_std_devs": num_std,
            "cluster_num_std_devs": cluster_std,
            "cat_names": [],
            "levels": [],
            "proportions": [],
            "cluster_proportions": []
        }

    def jsonpath(self,):
        return ""

    def execute(self, project_paths):
        if osp.isdir(project_paths.analysis_data):
            self.execute_analysis_data(project_paths.analysis_data)
        if osp.isdir(project_paths.saved_models):
            self.execute_saved_models(project_paths.saved_models)

    def execute_saved_models(self, saved_models):
        for mod in os.listdir(saved_models):
            versions_dir = osp.join(saved_models, mod, "versions")
            if not osp.isdir(versions_dir):
                continue
            for version in os.listdir(versions_dir):
                model_folder = osp.join(versions_dir,version)
                if not osp.isdir(model_folder):
                    continue
                if "heatmap.json" in os.listdir(model_folder):
                    print("Migrating heatmap in saved model : %s %s" % (mod,version))
                    try:
                        migration_json.migrate_json_file(self, osp.join(model_folder, "heatmap.json"))
                    except Exception as e:
                        print("Saved model migration failed %s" % e)

    def execute_analysis_data(self, analysis_data):
        for anl in os.listdir(analysis_data):
            anl_dir = osp.join(analysis_data, anl)
            if not osp.isdir(anl_dir):
                continue
            for mltask in os.listdir(anl_dir):
                sessions_dir = osp.join(anl_dir, mltask, "sessions")
                if not osp.isdir(sessions_dir):
                    continue
                for session in os.listdir(sessions_dir):
                    session_dir = osp.join(sessions_dir,session)
                    if not osp.isdir(session_dir):
                        continue
                    for in_session in os.listdir(session_dir):
                        in_session_dir = osp.join(session_dir,in_session)
                        if osp.isdir(in_session_dir) and in_session.startswith("pp"):
                            for in_pp in os.listdir(in_session_dir):
                                in_pp_dir = osp.join(in_session_dir,in_pp)
                                if osp.isdir(in_pp_dir) and in_pp.startswith("m"):
                                    if "heatmap.json" in os.listdir(in_pp_dir):
                                        try:
                                            migration_json.migrate_json_file(self, osp.join(in_pp_dir, "heatmap.json"))
                                        except Exception as e:
                                            print("Analysis model migration failed %s" % e)

class V15JsonFlattenerWithCustomSeparator(migration_app.ShakerStepMigrationOperation):
    """
    JSONFlattener used '.' separator in output column names to separate hierarchical levels
    But it is best to avoid '.' in column names.
    """

    def __init__(self):
        super(V15JsonFlattenerWithCustomSeparator, self).__init__("JSONFlattener")

    def __repr__(self,):
        return "Enable custom separators in JSONFlattener (unnest object) processor"

    def transform_step(self, step):
        step.get("params", {})["separator"] = '.'
        return step

class V15RoundProcessor(migration_app.ShakerStepMigrationOperation):
    """
    RoundProcessor has new 'precision' & 'places' parameters.
    """

    def __init__(self):
        super(V15RoundProcessor, self).__init__("RoundProcessor")

    def __repr__(self,):
        return "Add precision & places parameters to round processor"

    def transform_step(self, step):
        params = step.get("params", {"mode": "ROUND"})
        params["precision"] = 0
        params["places"] = 0
        return step


class V15RefreshNotebookInsightScenarioStep(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update refresh_notebook_insight scenario step"

    def transform(self, step, filepath):
        if step.get('type', None) == 'refresh_notebook_insight':
            # Read the notebookId from the insight file
            project_dir = osp.dirname(osp.dirname(filepath))
            insight_file = osp.join(project_dir, 'insights', step.get('params', {}).get('insightId', 'none') + '.json')
            if (osp.isfile(insight_file)):
                insight = base.json_loadf(insight_file)
                step.get('params', {})['notebookId'] = insight.get('notebook', '').replace('.', '_')
            step['type'] = 'create_jupyter_export'
        return step

    def jsonpath(self,):
        return "params.steps"

    def file_patterns(self,):
        return ["scenarios/*.json"]

def v15_add_to_dashboard_authorizations(dashboard_authorizations, object_type, object_smart_name):
    # Turn smart name into smart ref
    splits = object_smart_name.split(".")
    if len(splits) == 1:
        object_ref = {'objectType': object_type, 'objectId': object_smart_name}
    else:
        object_ref = {'objectType': object_type, 'projectKey': splits[0], 'objectId': splits[1]}

    for auth in dashboard_authorizations:
        if object_ref == auth['objectRef']:
            return

    dashboard_authorizations.append({'objectRef': object_ref, 'modes': ['READ']})


def v15_migrate_chart_def(chart_def):
    if chart_def.get('type', None) == 'binned_xy' and chart_def.get('variant', None) == 'binned_xy_hex':
        chart_def['hexbinRadiusMode'] = 'ABSOLUTE'
    return chart_def

def v15_migrate_chart_insight(insight, insight_id, is_pinned, dashboard_authorizations, project_paths):
    #print("Migrate chart insight id=%s is_pinned=%s (insight=%s)\n" % (insight_id, is_pinned, insight))
    # Create chart in dataset explore
    dataset_smart_name = insight['content']['dataset']
    dataset_chart = {
        'refreshableSelection': insight.get('content', {}).get('dataView', {}).get("sampling",{}),
        'def': v15_migrate_chart_def(insight.get('content', {}).get('chart', {}))
    }

    dataset = None
    if dataset_smart_name.find(".") == -1:
        dataset_file = osp.join(project_paths.config, "datasets", "%s.json" % dataset_smart_name)
        if osp.isfile(dataset_file):
            dataset = base.json_loadf(dataset_file)
        else:
            print("No dataset (file %s does not exist) ..." % dataset_file)
    #print("Migrate chart insight dataset_name=%s has_dataset=%s" % (dataset_smart_name, dataset is not None))

    maybe_set_chart_engine(dataset_chart, dataset, dataset_chart["refreshableSelection"], dataset_chart["def"])

    base.create_dir_if_needed(osp.join(project_paths.config, 'explore'))
    explore_file = osp.join(project_paths.config, 'explore', dataset_smart_name + '.json')
    explore = base.json_loadf(explore_file) if osp.isfile(explore_file) else {}

    explore_charts = explore.get('charts', None)
    if explore_charts is None:
        explore["charts"] = []
        explore_charts = explore["charts"]
    explore_charts.append(dataset_chart)
    base.json_dumpf(explore_file, explore)

    if is_pinned:
        # Create insight
        new_insight = v15_migrate_insight_common(insight)
        new_insight['type'] = 'chart'
        new_insight['params'] = {
            'datasetSmartName': dataset_smart_name,
            'refreshableSelection': insight.get('content', {}).get('dataView', {}).get("sampling",{}),
            'def': v15_migrate_chart_def(insight.get('content', {}).get('chart', {}))
        }

        maybe_set_chart_engine(new_insight, dataset, new_insight["params"]["refreshableSelection"], new_insight["params"]["def"])

        new_insight_file = osp.join(project_paths.config, 'insights', insight_id + '.json')
        print("Write new insight to %s" % new_insight_file)
        base.json_dumpf(new_insight_file, new_insight)

        # Add to dashboardAuthorizations
        v15_add_to_dashboard_authorizations(dashboard_authorizations, 'DATASET', dataset_smart_name)

        minimal = insight.get('miniature', {}).get('type', None) == 'chart' # 'chart' was the minichart, 'full_chart' was the detailed chart
        chart_type = new_insight['params']['def'].get('type', None)

        # Return tile
        return v15_create_tile_common({
            'tileType': "INSIGHT",
            'insightType': "chart",
            'insightId': insight_id,
            'tileParams': {
                'showXAxis': (not minimal) and (chart_type != 'stacked_bars'),
                'showYAxis': (not minimal) or (chart_type == 'stacked_bars'),
                'showTooltips': True,
                'showLegend': insight.get('miniature', {}).get('type', None) != 'chart' or insight.get('content', {}).get('chart', {}).get('type', None) == 'pie'
            }
        }, insight, project_paths)

def v15_migrate_web_app_insight(insight, insight_id, is_pinned, dashboard_authorizations, project_paths):
    base.create_dir_if_needed(osp.join(project_paths.config, "web_apps"))

    # Create web app
    web_app = v15_migrate_insight_common(insight)
    web_app['pyBackendEnabled'] = insight.get('pyBackendEnabled', False)
    web_app['pyBackendMustRun'] = insight.get('pyBackendMustRun', False)
    web_app['apiKey'] = insight.get('apiKey', "")
    web_app['libraries'] = insight.get('libraries', [])
    web_app_id = insight_id
    web_app_file = osp.join(project_paths.config, 'web_apps', web_app_id + '.json')
    shutil.copytree(osp.join(project_paths.config, 'insights_old', insight_id), osp.join(project_paths.config, 'web_apps', web_app_id))

    base.json_dumpf(web_app_file, web_app)

    if is_pinned:
        # Create insight
        new_insight = v15_migrate_insight_common(insight)
        new_insight['type'] = 'web_app'
        new_insight['params'] = {
            'webAppSmartId': web_app_id
        }
        new_insight_file = osp.join(project_paths.config, 'insights', insight_id + '.json')
        base.json_dumpf(new_insight_file, new_insight)

        # Add to dashboardAuthorizations
        v15_add_to_dashboard_authorizations(dashboard_authorizations, 'WEB_APP', web_app_id)

        # Return tile
        return v15_create_tile_common({
            'insightType': "web_app",
            'insightId': insight_id,
            'tileParams': {

            }
        }, insight, project_paths)

def v15_migrate_dataset_insight(insight, insight_id, is_pinned, dashboard_authorizations, project_paths):
    base.create_dir_if_needed(osp.join(project_paths.config, "explore"))

    # Create insight
    new_insight = v15_migrate_insight_common(insight)
    new_insight['type'] = 'dataset_table'
    new_insight['params'] = {
        'datasetSmartName': insight.get('dataset', '')
    }
    new_insight_file = osp.join(project_paths.config, 'insights', insight_id + '.json')
    base.json_dumpf(new_insight_file, new_insight)

    if is_pinned:
        # Add to dashboardAuthorizations
        v15_add_to_dashboard_authorizations(dashboard_authorizations, 'DATASET', insight.get('odbId', ''))

        # Return tile
        return v15_create_tile_common({
            'insightType': "dataset_table",
            'insightId': insight_id,
            'tileParams': {

            }
        }, insight, project_paths)

def v15_migrate_folder_insight(insight, insight_id, is_pinned, dashboard_authorizations, project_paths):
    # Create insight
    new_insight = v15_migrate_insight_common(insight)
    new_insight['type'] = 'managed-folder_content'
    new_insight['params'] = {
        'folderSmartId': insight.get('odbId', None),
        'itemPath': insight.get('itemPath', None)
    }
    new_insight_file = osp.join(project_paths.config, 'insights', insight_id + '.json')
    base.json_dumpf(new_insight_file, new_insight)

    if is_pinned:
        # Add to dashboardAuthorizations
        v15_add_to_dashboard_authorizations(dashboard_authorizations, 'MANAGED_FOLDER', insight.get('odbId', ''))

        # Return tile
        return v15_create_tile_common({
            'insightType': "managed-folder_content",
            'insightId': insight_id,
            'tileParams': {

            }
        }, insight, project_paths)


def rename_jupyter_notebook(old_name):
    if old_name.endswith(".ipynb"):
        x = re.sub(r".ipynb$", "", old_name)
        return x.replace(".", "_") + ".ipynb"
    else:
        # Should not happen ...
        return old_name.replace(".", "_")

def v15_migrate_notebook_insight(insight, insight_id, is_pinned, dashboard_authorizations, project_paths):
    base.create_dir_if_needed(project_paths.jupyter_exports)

    notebook_name = insight.get("notebook", None)
    if notebook_name is not None:
        notebook_name = rename_jupyter_notebook(notebook_name)
    else:
        notebook_name = "__unknown_notebook__"

    # Move existing notebook export
    new_insight = v15_migrate_insight_common(insight)
    export_file_src = osp.join(project_paths.config, 'insights_old', insight_id + '.ipython.html')
    if osp.isfile(export_file_src):
        base.create_dir_if_needed(osp.join(project_paths.jupyter_exports, notebook_name))
        export_file_dst = osp.join(project_paths.jupyter_exports, notebook_name, str(insight.get('refreshedOn', int(round(time.time() * 1000)))) + '.html')
        os.rename(export_file_src, export_file_dst)


    # Create insight
    new_insight['type'] = 'jupyter'
    new_insight['params'] = {
        'notebookSmartName': notebook_name,
        'loadLast': True
    }
    new_insight_file = osp.join(project_paths.config, 'insights', insight_id + '.json')
    base.json_dumpf(new_insight_file, new_insight)

    # Add to dashboardAuthorizations
    v15_add_to_dashboard_authorizations(dashboard_authorizations, 'JUPYTER_NOTEBOOK', notebook_name)

    if is_pinned:
        # Return tile
        return v15_create_tile_common({
            'insightType': "jupyter",
            'insightId': insight_id,
            'tileParams': {

            }
        }, insight, project_paths)

def v15_migrate_insight_common(insight):
    return {
        'name': insight.get('name'),
        'tags': insight.get('tags', []),
        'description': insight.get('description', ''),
        'checklists': insight.get('checklists', {}),
        'creationTag': {
            'lastModifiedBy': {'login': insight.get('createdBy')},
            'lastModifiedOn': insight.get('createdOn')
        },
        'shortDesc': insight.get('shortDesc', ''),
        'listed': True,
        'owner': insight.get('createdBy')
    }

def v15_create_tile_common(tile, insight, project_paths):
    tile['tileType'] = 'INSIGHT'
    tile['showTitle'] = insight.get('miniature', {}).get('showTitle', False)
    if tile['showTitle']:
        tile['showTitle'] = 'YES'
    else:
        tile['showTitle'] = 'NO'
    miniatureType = insight.get('miniature', {}).get('type', None)

    # Migrate insight image if it exists
    insight_image_folder = osp.join(project_paths.config, 'pictures', 'INSIGHT-' + tile.get('insightId', ''))
    insight_image = osp.join(insight_image_folder, 'original.png')
    if osp.isfile(insight_image):
        insight_image_dst_folder = osp.join(project_paths.config, 'pictures', 'DASHBOARD_TILE-' + tile.get('insightId', ''))
        shutil.move(insight_image_folder, insight_image_dst_folder)
        tile['imageId'] = tile.get('insightId', '')

    tile['resizeImage'] = tile.get('miniature', {}).get('fullImage', True)

    if miniatureType == 'picture':
        tile['displayMode'] = 'IMAGE'
        tile['showTitle'] = 'MOUSEOVER'
        tile['clickAction'] = 'OPEN_INSIGHT'
    elif miniatureType == 'description':
        tile['displayMode'] = 'INSIGHT_DESC'
        tile['clickAction'] = 'OPEN_INSIGHT'
    elif miniatureType == 'description_and_picture':
        tile['displayMode'] = 'IMAGE_AND_INSIGHT_DESC'
        tile['clickAction'] = 'OPEN_INSIGHT'
    else:
        tile['displayMode'] = 'INSIGHT'
        tile['clickAction'] = 'DO_NOTHING'

    return tile

class V15Insights(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Move insights"

    def execute(self, project_paths):
        insight_migrations = {
            'CHART': v15_migrate_chart_insight,
            'HTML_APP': v15_migrate_web_app_insight,
            'DATASET': v15_migrate_dataset_insight,
            'MANAGED_FOLDER': v15_migrate_folder_insight,
            'IPYTHON': v15_migrate_notebook_insight
        }

        base.create_dir_if_needed(osp.join(project_paths.config, "dashboards"))
        base.create_dir_if_needed(osp.join(project_paths.config, "insights"))

        pinned_insights = {}
        pinboard_file = osp.join(project_paths.config, "pinboard.json")
        if osp.isfile(pinboard_file):
            pinboard = base.json_loadf(pinboard_file)
            for section in pinboard.get('sections', []):
                for pinlet in section.get('pinlets', []):
                    pinned_insights[pinlet.get('insightId', '')] = True
        else:
            pinboard = {}

        print("Insights that were pinned: %s" % json.dumps(pinned_insights))

        insights_dir = osp.join(project_paths.config, "insights")
        if not osp.isdir(insights_dir):
            return

        insights_old_dir = osp.join(project_paths.config, "insights_old")
        if osp.isdir(insights_old_dir):
            shutil.rmtree(insights_old_dir)

        os.rename(insights_dir, insights_old_dir)
        base.create_dir_if_needed(osp.join(project_paths.config, "insights"))

        dashboard_authorizations = []

        for file in os.listdir(insights_old_dir):
            insight_id, extension = os.path.splitext(file)
            print("Migrate old insight: %s (%s) was_pinned: %s" % (insight_id, extension, insight_id in pinned_insights))
            if extension != '.json': continue
            insight = base.json_loadf(osp.join(insights_old_dir, file))
            pinned_insights[insight_id] = insight_migrations[insight['type'].upper()](insight, insight_id, insight_id in pinned_insights, dashboard_authorizations, project_paths)

        dashboard = {
            'pages': [],
            'owner': 'admin', # TODO @dashboards project owner
            'listed': True,
            'name': 'Default dashboard'
        }

        for section in pinboard.get('sections', []):
            tiles = []
            for pinlet in section.get('pinlets', []):
                tile = pinned_insights[pinlet.get('insightId', '')]
                if type(tile) != dict:
                    continue
                tile['box'] = {
                    'left': pinlet.get('box', {}).get('left', 1) * 2 + 3,
                    'width': pinlet.get('box', {}).get('width', 1) * 2,
                    'top': pinlet.get('box', {}).get('top', 1) * 2,
                    'height': pinlet.get('box', {}).get('height', 1) * 2
                }
                tiles.append(tile)

            dashboard['pages'].append({
                'id': base.generate_random_id(7),
                'title': section.get('title', ''),
                'grid': {'tiles': tiles}}
            )

        if len(dashboard['pages']) == 0:
            dashboard['pages'].append({
                'id': base.generate_random_id(7),
                'grid': {'tiles': []}}
            )

        dashboard_id = base.generate_random_id(7)
        dashboard_file = osp.join(project_paths.config, 'dashboards', dashboard_id + '.json')
        base.json_dumpf(dashboard_file, dashboard)

        params_file = osp.join(project_paths.config, "params.json")
        params = base.json_loadf(params_file)
        params['dashboardAuthorizations'] = {'allAuthorized': False, 'authorizations': dashboard_authorizations}
        base.json_dumpf(params_file, params)

        # Delete insights_old & pinboard.json
        if osp.isdir(insights_old_dir):
            shutil.rmtree(insights_old_dir)
        if osp.isfile(pinboard_file):
            os.remove(pinboard_file)


class V15JupyterExportsDir(migration_base.MigrationOperation):
    def __repr__(self, ):
        return "Create jupyter_exports directory"

    def execute(self, diphome, simulate=False):
        if not simulate:
            base.create_dir_if_needed(osp.join(diphome.path, "jupyter_exports"))


class V15ProjectSettingsExposed(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Move exposedComputables to exposedObjects"

    def transform(self, obj, filepath=None):
        obj['exposedObjects'] = {
            "objects": obj.get("settings", {}).get("exposedComputables", [])
        }
        if "exposedComputables" in obj.get("settings", {}):
            del obj["settings"]["exposedComputables"]

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["params.json"]

class V15HProxyRemovalInRecipes(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Switch to Hiveserver2 for Hive recipes"

    def transform(self, obj, filepath):
        recipe_type = obj.get("type", "")
        multi_engine_recipe_types = ["grouping", "join", "window", "vstack"]
        def convert_flag(use_global_metastore):
            return "HIVECLI_GLOBAL" if use_global_metastore else "HIVECLI_LOCAL"
        if recipe_type == "hive":
            if obj.get("params", {}).get("useGlobalMetastore", None) is not None:
                obj["params"]["executionEngine"] = convert_flag(obj["params"]["useGlobalMetastore"])
        elif recipe_type in multi_engine_recipe_types:
            payloadf = filepath.decode("utf8").replace(".json", ".%s" % recipe_type)
            payload = base.json_loadf(payloadf)
            if payload.get("engineParams", {}).get("hive", {}).get("useGlobalMetastore", None) is not None:
                payload["engineParams"]["hive"]["executionEngine"] = convert_flag(payload["engineParams"]["hive"]["useGlobalMetastore"])
                base.json_dumpf(payloadf, payload)

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]


class V15HProxyRemovalInNotebooks(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Switch to Hiveserver2 for Hive notebooks"

    def transform(self, obj, filepath):
        old_prefix = "@virtual(hive-hproxy)"
        new_prefix = "@virtual(hive-jdbc)"
        connection = obj.get("connection", "")
        if connection.startswith(old_prefix):
            obj["connection"] = new_prefix + connection[len(old_prefix):]

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["notebooks/sql/*/params.json"]

class V15HProxyRemovalInScenarios(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Switch to Hiveserver2 for scenarios"

    def transform(self, obj, filepath):
        old_prefix = "@virtual(hive-hproxy)"
        new_prefix = "@virtual(hive-jdbc)"
        for trigger in obj.get("triggers", []):
            if trigger.get("params", {}).get("connection", "").startswith(old_prefix):
                trigger["params"]["connection"] = new_prefix + trigger["params"]["connection"][len(old_prefix):]
        for step in obj.get("params", {}).get("steps", []):
            if step.get("params", {}).get("connection", "").startswith(old_prefix):
                step["params"]["connection"] = new_prefix + step["params"]["connection"][len(old_prefix):]

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["scenarios/*.json"]


class V15DenormalizeMessagingChannels(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Denormalize dataset/webhook messaging channels setup"

    def execute(self, diphome, simulate=False):
        channels_file = osp.join(diphome.path, "config/messaging-channels.json")

        if not osp.isfile(channels_file):
            return

        channels_data = base.json_loadf(channels_file)
        old_channels = channels_data.get('channels',[])
        new_channels = [c for c in old_channels if c.get('type', '') not in ['webhook', 'dataset']]
        new_channels_data = {'channels' : new_channels}
        base.json_dumpf(channels_file, new_channels_data)

        def get_channel_by_id(id):
            for channel_data in old_channels:
                if channel_data.get('id', None) == id:
                    return channel_data
            return None

        def update_messaging(messaging):
            if messaging is None:
                return
            if messaging.get('type', None) == 'webhook':
                keys_to_copy = ['useProxy']
            elif messaging.get('type', None) == 'dataset':
                keys_to_copy = ['projectKey', 'datasetName']
            else:
                keys_to_copy = []

            if len(keys_to_copy) > 0:
                #print("Messaging to migrate")
                #print(json.dumps(messaging, indent=2))
                channel_id = messaging.get('channelId', None)
                channel_data = get_channel_by_id(channel_id)
                if channel_data is not None:
                    messaging['configuration'] = messaging.get('configuration', None)
                    for prop in keys_to_copy:
                        messaging['configuration'][prop] = channel_data.get('configuration', {}).get(prop, None)
                else:
                    print('Messaging channel not found : %s' % channel_id)

        def update_reporter(reporter):
            update_messaging(reporter.get('messaging', None))

        def update_step(step):
            if step.get('type', None) == 'send_report':
                update_messaging(step.get('params', {}).get('messaging', None))

        for scenario_file in glob(osp.join(diphome.path, 'config/projects/*/scenarios/*json')):
            scenario = base.json_loadf(scenario_file)

            for reporter in scenario.get('reporters', []):
                update_reporter(reporter)
            for step in scenario.get('params', {}).get('steps', []):
                update_step(step)

            base.json_dumpf(scenario_file, scenario)

class V15RetypeChannels(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Change types in messaging channels configuration"

    def transform(self, channel, filepath=None):
        if channel.get('type', None) == 'aws':
            channel['type'] = 'aws-ses-mail'
        return channel

    def jsonpath(self,):
        return "channels"

    def file_patterns(self,):
        return ["config/messaging-channels.json"]

messaging_type_mapping = { "smtp":"mail-scenario" ,
            "aws":"mail-scenario",
            "hipchat":"hipchat-scenario",
            "slack":"slack-scenario",
            "webhook":"webhook-scenario",
            "twilio":"twilio-scenario",
            "shell":"shell-scenario",
            "dataset":"dataset-scenario",
          }
def update_messaging(messaging):
    if messaging is None:
        return
    messaging['type'] = messaging_type_mapping.get(messaging.get('type', ''), None)
    messaging['configuration'] = messaging.get('configuration', {})
    messaging['configuration']['channelId'] = messaging.get('channelId', None)
    if messaging['type'] == "hipchat-scenario":
        messaging['configuration']['useGlobalChannel'] = True

class V15RetypeMessagings(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update type and channelId in messaging configurations"

    def transform(self, scenario, filepath):
        def update_reporter(reporter):
            update_messaging(reporter.get('messaging', None))

        def update_step(step):
            if step.get('type', None) == 'send_report':
                update_messaging(step.get('params', {}).get('messaging', None))

        for reporter in scenario.get('reporters', []):
            update_reporter(reporter)
        for step in scenario.get('params', {}).get('steps', []):
            update_step(step)
        return scenario

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["scenarios/*.json"]


class V15RetypeMessagingsInScenarioRuns(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Update type and channelId in messaging configurations of scenario runs"

    def transform(self, obj, filepath):
        def update_reporter(reporter):
            update_messaging(reporter.get('messaging', None))

        def update_step(step):
            if step.get('type', None) == 'send_report':
                update_messaging(step.get('params', {}).get('messaging', None))

        scenario = obj.get("scenario", {})
        for reporter in scenario.get('reporters', []):
            update_reporter(reporter)
        for step in scenario.get('params', {}).get('steps', []):
            update_step(step)
        for reporter_state in obj.get('reportersStates', []):
            reporter_state['messagingType'] = messaging_type_mapping.get(reporter_state.get('messagingType', ''), None)

        return obj

    def file_patterns(self,):
        return ["scenarios/*/*/*/run.json"]

    def jsonpath(self,):
        return ""

class V15FixupAuthCtxInScenarioRuns(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Update authentication context of scenario runs"

    def transform(self, obj, filepath):
        run_as_user = obj.get("runAsUser", {})
        run_as_user['authSource'] = 'USER_FROM_UI'
        return obj

    def file_patterns(self,):
        return ["scenarios/*/*/*/run.json"]

    def jsonpath(self,):
        return ""


class V15AddGridSearchRFGBTETInAnalysisConfig(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update ML parameters in analysis config"

    def transform(self, modeling, filepath=None):
        v15addgridsearch_do_update_dict(modeling)
        v15_add_missing_modeling_params(modeling)
        return modeling

    def jsonpath(self,):
        return "modeling"

    def file_patterns(self,):
        return ["analysis/*/ml/*/params.json","recipes/*.prediction_training"]


class V15AddGridSearchRFGBTETInAnalysisData(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Update ML parameters in analysis data"

    def execute(self, project_paths):
        #a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data):
            print("Migrating saved ML Task session: %s " % (mltask_file))
            try:
                data = base.json_loadf(mltask_file)
                v15addgridsearch_do_update_dict(data.get("modeling", {}))
                v15_add_missing_modeling_params(data.get("modeling", {}))
                base.json_dumpf(mltask_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

        #a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/rmodeling_params.json
        for rm_file in glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data):
            print("Migrating saved ML Task rmodeling file: %s" % rm_file)
            try:
                data = base.json_loadf(rm_file)
                v15addgridsearch_do_update_dict(data)
                v15_add_missing_modeling_params(data)
                base.json_dumpf(rm_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

        #a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/actual_params.json
        for ap_file in glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data):
            print("Migrating saved ML Task actualparams file: %s" % ap_file)
            try:
                data = base.json_loadf(ap_file)
                v15addgridsearch_do_update_dict(data.get('resolved',{}))
                v15_add_missing_modeling_params(data.get("resolved",{}))
                base.json_dumpf(ap_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)


class V15AddGridSearchRFGBTETInRootSavedData(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Update ML parameters in root saved models"

    def execute(self, project_paths):
        # modelid/versions/vid/rmodeling_params.json
        for rm_file in glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models):
            print("Migrating saved ML Task rmodeling file: %s" % rm_file)
            try:
                data = base.json_loadf(rm_file)
                v15addgridsearch_do_update_dict(data)
                v15_add_missing_modeling_params(data)
                base.json_dumpf(rm_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

        # modelid/versions/vid/actual_params.json
        for ap_file in glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models):
            print("Migrating saved ML Task rmodeling file: %s" % ap_file)
            try:
                data = base.json_loadf(ap_file)
                v15addgridsearch_do_update_dict(data.get("resolved",{}))
                v15_add_missing_modeling_params(data.get("resolved",{}))
                base.json_dumpf(ap_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)


class V15AddGridSearchRFGBTETInSavedModels(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update ML parameters in saved models"

    def transform(self, modeling, filepath=None):
        v15addgridsearch_do_update_dict(modeling)
        v15_add_missing_modeling_params(modeling)
        return modeling

    def jsonpath(self,):
        return "miniTask.modeling"

    def file_patterns(self,):
        return ["saved_models/*.json"]


def v15addgridsearch_do_update_dict(modeling):
    if modeling.get('algorithm') == 'EXTRA_TREES':
        if modeling.get('rf_regressor_grid'):
            modeling['extra_trees_grid'] = modeling['rf_regressor_grid']
            del modeling['rf_regressor_grid']
        if modeling.get('rf_classifier_grid'):
            modeling['extra_trees_grid'] = modeling['rf_classifier_grid']
            del modeling['rf_classifier_grid']

    def replace_auto(modeling):
        if modeling.get('n_estimators') == [0]:
            modeling['n_estimators'] = [100]
    if modeling.get('rf_estimators') == 0:
        modeling['rf_estimators'] = 100

    extra_trees = {
        'listify': [('max_tree_depth', 100), ('min_samples_leaf', 8), ('max_features', 0)],
    }
    random_forest = {
        'listify': [('max_tree_depth', 100), ('min_samples_leaf', 8), ('max_features', 0)],
        'funcs': [replace_auto],
    }
    gbt = {
        'listify': [('learning_rate', 0.1), ('max_depth', 3), ('min_samples_leaf', 3), ('max_features', 0)],
    }
    key_properties_list = {
        'extra_trees': extra_trees,
        'extra_trees_grid': extra_trees,
        'random_forest_classification': random_forest,
        'random_forest_regression': random_forest,
        'rf_regressor_grid': random_forest,
        'rf_classifier_grid': random_forest,
        'gbt_regression': gbt,
        'gbt_classification': gbt,
        'gbt_classifier_grid': gbt,
        'gbt_regressor_grid': gbt,
    }

    for key, properties in key_properties_list.items():
        modeling_key = modeling.get(key, dict())
        for prop, prop_default in properties.get('listify', []):
            modeling_key_prop = modeling_key.get(prop, None)
            if not modeling_key_prop:
                modeling_key[prop] = [prop_default]
            elif isinstance(modeling_key_prop, list):
                pass  # make migration idempotent, just in case (but still fills empty with default vals)
            else:
                modeling_key[prop] = [modeling_key_prop]
        for func in properties.get('funcs', []):
            func(modeling_key)
    return modeling

def v15_add_missing_modeling_params(modeling):
    missing_keys = {
        "gbt_max_features": 0,
        "gbt_min_samples_leaf": 3,
        "gbt_max_feature_prop": 0.1,
        "gbt_selection_mode": "auto",
        "rf_selection_mode" : "auto"
    }
    for key, value in missing_keys.items():
        if not modeling.get(key):
            modeling[key] = value
    return modeling


def unsecure_random_string(N=16):
    return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N))

def maybe_set_chart_engine(engine_container, dataset, refreshable_selection, chart_def):
    if dataset is None:
        return

    dataset_type = dataset.get("type", "???")

    if dataset_type in ["MySQL", "PostgreSQL", "Greenplum", "Vertica", "Oracle", "Netezza",
                "Redshift", "SAPHANA", "HDFS"]:
        if refreshable_selection.get("selection", {}).get("samplingMethod", "???") == "FULL":
            if chart_def.get("useLiveProcessingIfAvailable", False) == True:
                if chart_def["type"] in ["multi_columns_lines", "grouped_columns", "stacked_columns", "stacked_bars",
                    "lines", "stacked_area", "pivot_table", "pie", "grouped_xy"]:
                    print("Setting a chart as SQL engine (dataset %s)" % (dataset_type))
                    engine_container["engineType"] = "SQL"

class V15ChartsInExplore(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Refactor charts in explore"

    def transform(self, obj, filepath):
        explore = {
            "script" : obj,
            "charts" : []
        }

        # This probably does not exist, so we don't bother migrating engines ...
        for chart in obj.get("charts", []):
            newChart = {
                "def" : v15_migrate_chart_def(chart.get("data", {}))
            }

            vizSampling = obj.get("vizSampling", {})
            if vizSampling.get("selection", None) is None:
                newChart["copySelectionFromScript"] = True
            else:
                newChart["refreshableSelection"] = vizSampling
                maybe_set_chart_engine(newChart, None, newChart["refreshableSelection"], newChart["def"])

            explore["charts"].append(newChart)

        return explore

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["explore/*.json"]


class V15ChartsInAnalysis(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Refactor charts in analysis"

    def transform(self, obj, filepath):
        script = obj.get("script", {})
        obj["charts"] = []

        for chart in script.get("charts", []):
            newChart = {
                "def" : v15_migrate_chart_def(chart.get("data", {}))
            }

            vizSampling = script.get("vizSampling", {})
            if vizSampling.get("selection", None) is None:
                newChart["copySelectionFromScript"] = True
            else:
                newChart["refreshableSelection"] = vizSampling

            obj["charts"].append(newChart)

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["analysis/*/core_params.json"]


class V15ChartsInAnalysisModels(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Refactor charts in analysis predicted data"

    def transform(self, obj, filepath):
        script = obj.get("predictionDisplayScript", {})
        obj["predictionDisplayCharts"] = []

        for chart in script.get("charts", []):
            newChart = {
                "def" : v15_migrate_chart_def(chart.get("data", {}))
            }
            obj["predictionDisplayCharts"].append(newChart)

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["analysis/*/ml/*/params.json"]

class V15PrepareRecipeEngine(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update engine in Prepare recipes"

    def transform(self, obj, filepath=None):
        if obj.get('type', None) != 'shaker':
            return obj

        if 'params' not in obj:
            obj['params'] = {}

        old_engine = obj['params'].get('engine', None)
        new_engine = {'DSS_STREAM':'DSS','SPARK':'SPARK','HADOOP_MAPREDUCE':'HADOOP_MAPREDUCE'}.get(old_engine, 'DSS')
        obj['params']['engineType'] = new_engine
        if 'engine' in obj['params']:
            del obj['params']['engine']

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]

class V15SelectDSSSyncRecipeEngine(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Set engine to DSS stream in sync recipes"

    def transform(self, obj, filepath=None):
        if obj.get('type', None) != 'sync':
            return obj

        if 'params' not in obj:
            obj['params'] = {}


        output_dataset_type = "???"

        main_output_items = obj.get("outputs", {}).get("main", {}).get("items", [])
        if len(main_output_items) == 1:
            main_output_ref = main_output_items[0].get("ref", "???")

            rep = osp.dirname(filepath)
            dataset_file = osp.join(rep, "..", "datasets", "%s.json" % main_output_ref)
            if osp.isfile(dataset_file):
                dataset = base.json_loadf(dataset_file)
                output_dataset_type = dataset.get("type", "???")
            else:
                print(" Dataset file not found")

        print("Output dataset type is %s" % output_dataset_type)
        if output_dataset_type != "Redshift":
            obj['params']['engineType'] = 'DSS'

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]

class V15SelectDSSRecipeEngine(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Set engine to DSS stream in split and sampling recipes"

    def transform(self, obj, filepath=None):
        obj['engineType'] = 'DSS'
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.split", "recipes/*.sampling"]

class JavaPropertiesFile(object):
    def __init__(self, path):
        self.path = path
        self.props = {}
        with open(path, 'r') as f:
            for line in f:
                line = line.strip()
                m = re.search('^([^=#]+)=([^#]*)(#.*)?$', line)
                if m is not None:
                    self.props[m.group(1).strip()] = m.group(2).strip()

    def get(self, key, default=None):
        return self.props.get(key, default)

    def get_as_bool(self, key, default=None):
        v = self.get(key, default)
        if v is not None and isinstance(v, bool):
            return v
        else:
            return v.lower() in ['true', 't', 'yes', 'y', 'oui', 'o']

    def save(self):
        with open(self.path, "w") as f:
            ini_text = "\n".join([str(k) + "=" + str(v) for (k, v) in self.props.items()]) + "\n"
            f.write(ini_text)

class V15MoveKerberosSettings(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Move old Kerberos settings"

    def execute(self, diphome, simulate=False):
        settings_file = osp.join(diphome.path, "config/general-settings.json")
        if not osp.isfile(settings_file):
            return

        # read old properties
        props = JavaPropertiesFile(osp.join(diphome.path, "config", "dip.properties"))

        # put the relevant ones in the general-settings.json
        settings = base.json_loadf(settings_file)

        hadoop_settings = settings.get('hadoopSettings', {})
        settings['hadoopSettings'] = hadoop_settings

        hadoop_settings['kerberosLoginEnabled'] = props.get_as_bool('hadoop.security.kerberos', False)
        hadoop_settings['dssPrincipal'] = props.get('hadoop.kerberos.principal', None)
        hadoop_settings['dssKeytabPath'] = props.get('hadoop.kerberos.keytab', None)

        base.json_dumpf(settings_file, settings)

def is_blank(obj, field):
    return field in obj and obj[field] is not None and len(obj[field]) > 0

class V15ConnectionNamingRule(migration_json.JsonMigrationOperation):
    def transform(self, obj, filepath):

        for conn in obj.get("connections", []):
            cp = conn.get("params", {})
            ct = conn.get("type", "????")

            # Special case for Teradata: split of default database and default schema for managed datasets
            if ct == "Teradata":
                if not is_blank(cp, "defaultSchemaForManagedDatasets"):
                    cp["defaultDatabase"] = cp["defaultSchemaForManagedDatasets"]

            if ct in ['PostgreSQL', 'MySQL', 'Vertica', 'Redshift', 'Greenplum', 'Teradata', 'Oracle', 'SQLServer',
                      'BigQuery', 'JDBC', 'Netezza', 'SAPHANA']:

                cp["namingRule"] = {}
                if not is_blank(cp, "defaultSchemaForManagedDatasets"):
                    cp["namingRule"]["schemaName"] = cp["defaultSchemaForManagedDatasets"]

            if ct == "HDFS":
                if not is_blank(cp, "database"):
                    cp["defaultDatabase"] = cp["database"]
                cp["namingRule"] = {
                    "hdfsPathDatasetNamePrefix" : "${projectKey}/"
                }
                if not is_blank(cp, "database"):
                    cp["namingRule"]["hiveDatabaseName"] = cp["database"]

        return obj


    def file_patterns(self,):
        return ["config/connections.json"]

    def jsonpath(self,):
        return ""


def migrate_project_level_permissions(from_perms):
    to_perms = {}

    for fp in from_perms:
        if fp == "READ_DATA" or fp == "READ_METADATA" or fp == "READ_SCHEMA":
            to_perms["readProjectContent"] = True
        elif fp == "WRITE_DATA" or fp == "WRITE_METADATA" or fp == "WRITE_SCHEMA":
            to_perms["writeProjectContent"] = True
        elif fp == "ADMIN":
            to_perms["admin"] = True
    return to_perms

class V15ProjectAPIKeys(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update project-specific API keys"

    def transform(self, obj, filepath=None):
        if obj is None:
            return None
        for key in obj:
            if "EXEC_SQLIKE" in key.get("projectPrivileges", []):
                key["execSQLLike"] = True
            key["projectPrivileges"] = migrate_project_level_permissions(key.get("projectPrivileges", []))
            key["id"] = base.generate_random_id(16)

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["apikeys.json"]

class V15ProjectAccessLevels(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update project access levels"

    def transform(self, obj, filepath=None):
        for perm in obj.get("permissions", []):
            pt = perm.get("type", "????")

            if pt == "ADMIN":
                perm["admin"] = True
            elif pt == "ANALYST_READWRITE":
                perm["writeProjectContent"] = True
            elif pt == "ANALYST_READONLY":
                perm["readProjectContent"] = True
            elif pt == "READER":
                perm["writeDashboards"] = True

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["params.json"]


class V15GlobalAPIKeys(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Update global API keys"

    def transform(self, obj, filepath):
        for key in obj:
            new_projects = {}
            for (project_key, perm) in key.get("projects", {}).items():
                new_projects[project_key] = migrate_project_level_permissions(perm)
                if "EXEC_SQLIKE" in perm:
                    obj["execSQLLike"] = True
            key["projects"] = new_projects
            key["id"] = base.generate_random_id(16)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/public-apikeys.json"]


class V15SplitRecipesOutput(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Change split recipes outputs representation"

    def transform(self, obj, filepath):
        filterSplits = obj.get("filterSplits", None)
        valueSplits = obj.get("valueSplits", None)
        defaultOutputDataset = obj.get("defaultOutputDataset", None)

        recipe_filepath = filepath[0:-5] + "json"

        if not osp.exists(recipe_filepath):
            # Recipe does not actually exist, we must be on instance owned by @Mattsco, so do nothing, it's his problem :)
            return obj

        recipe_data = base.json_loadf(recipe_filepath)
        if ("main" in recipe_data["outputs"]) and ("items" in recipe_data["outputs"]["main"]):
            output_names = [output["ref"] for output in recipe_data["outputs"]["main"]["items"]]
        else:
            return obj #give up too

        if filterSplits is not None:
            for v in filterSplits:
                if v.get("outputIndex", None) is None:
                    v["outputIndex"] = -1
                    if v.get("outputDataset", None) is not None and v["outputDataset"] in output_names:
                        v["outputIndex"] = output_names.index(v["outputDataset"])
                        del v["outputDataset"]
                    else:
                        print("WARNING: recipe file is broken: outputs are inconsistent. File: " + filepath)

        if valueSplits is not None:
            for v in valueSplits:
                if v.get("outputIndex", None) is None:
                    v["outputIndex"] = -1
                    if v.get("outputDataset", None) is not None and v["outputDataset"] in output_names:
                        v["outputIndex"] = output_names.index(v["outputDataset"])
                        del v["outputDataset"]
                    else:
                        print("WARNING: recipe file is broken: outputs are inconsistent. File: " + filepath)

        if defaultOutputDataset is not None:
            obj["defaultOutputIndex"] = -1
            if defaultOutputDataset in output_names:
                obj["defaultOutputIndex"] = output_names.index(defaultOutputDataset)
                del obj["defaultOutputDataset"]

        else:
            print("WARNING: recipe file is broken:" + filepath)

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.split"]


class V15AddInstallId(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Add mandatory installation id to install.ini configuration file"

    def execute(self, diphome, simulate=False):
        installConfig = diphome.get_install_config()
        if not installConfig.getOption('general', 'installid'):
            installConfig.addOption('general', 'installid', base.generate_random_id(24))
            if simulate:
                return
            installConfig.save()


class V15HiveDefaultDatabase(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Change defaultDatabase parameter name in connections"

    def transform(self, obj, filepath=None):
        for (conn_name, conn_data) in obj.get("connections", {}).items():
            print("Conndata: %s" % conn_data)
            if conn_data.get("type", "??") == "HDFS":
                print("It is HDFS")
                cp = conn_data.get("params", {})
                if "database" in cp:
                    print("DB is %s" % cp["database"])
                    cp["defaultDatabase"] = cp["database"]
                    del cp["database"]
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/connections.json"]

class V15HiveOverrideDatabase(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set previous value for Hive override DB setting"

    def transform(self, obj, filepath=None):
        hs = obj.get("hiveSettings", {})
        if not "overrideDatabaseInLocalMetastore" in hs:
            hs["overrideDatabaseInLocalMetastore"] = True
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]

class V15HiveJobCompressionCommands(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Activate adding compression commands to Hive jobs"

    def transform(self, obj, filepath=None):
        hs = obj.get("hiveSettings", {})
        if not "addCompressionCommands" in hs:
            hs["addCompressionCommands"] = True
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]


class V15HiveExecutionConfig(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Add a default Hive execution config"

    def transform(self, obj, filepath=None):
        hs = obj.get("hiveSettings", {})
        if len(hs.get("executionConfigs",[])) == 0:
            hs["executionConfigs"] = [{ "name" : "default" }]
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]


class V15RenameJupyterNotebooks(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Rename Jupyter notebooks"

    def execute(self, project_paths):
        print("Renaming Jupyter notebooks from %s" % project_paths.jupyter_notebooks)
        if osp.isdir(project_paths.jupyter_notebooks):
            for notebook in os.listdir(project_paths.jupyter_notebooks):
                if not notebook.endswith(".ipynb"):
                    continue
                new_name = rename_jupyter_notebook(notebook)
                print("Renaming Jupyter notebook: %s -> %s" % (notebook, new_name))
                if new_name != notebook:
                    shutil.move(osp.join(project_paths.jupyter_notebooks, notebook), \
                                osp.join(project_paths.jupyter_notebooks,new_name))


class V15MoveDatabases(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Move internal databases"

    def execute(self, diphome):
        db_folder = osp.join(diphome.path, "databases")
        if not osp.isdir(db_folder):
            os.makedirs(db_folder)

        j = osp.join(diphome.path, "jobs_database.h2.h2.db")
        if osp.isfile(j):
            shutil.move(j, osp.join(db_folder, "jobs.h2.db"))
        fs = osp.join(diphome.path, "flow_state.h2.db")
        if osp.isfile(fs):
            shutil.move(fs, osp.join(db_folder, "flow_state.h2.db"))
        s = osp.join(diphome.path, "statsdb.h2.db")
        if osp.isfile(s):
            shutil.move(s, osp.join(db_folder, "dss_usage.h2.db"))


class V15DKUCommand(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Final operations (migrate timelines)"

    def execute(self, diphome):
        pass

    def post_execute(self, diphome):
        tmp_folder = osp.join(diphome.path, "tmp")
        if not osp.isdir(tmp_folder):
            os.makedirs(tmp_folder)

        clean_h2_timestamps(diphome)

        import subprocess
        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call('"%s" __migrate_v15' % dkupath, shell=True)


class V15FixScoringRecipes(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Rewire scoring recipe inputs"

    def extract_datasets(self, project_paths):
        if osp.isdir(osp.join(project_paths.config, "datasets")):
            paths = os.listdir(osp.join(project_paths.config, "datasets"))
            return {n for (n, ext) in [os.path.splitext(p) for p in paths] if ext == ".json"}
        else:
            return {}

    def extract_recipe_parameter_paths(self, project_paths):
        recipe_folder = osp.join(project_paths.config, "recipes")
        if osp.isdir(recipe_folder):
            return [osp.join(recipe_folder, n + ext) for (n, ext) in [os.path.splitext(p) for p in os.listdir(recipe_folder)] if ext == ".json"]
        else:
            return []

    def execute(self, project_paths):
        types = {"prediction_scoring", "clustering_scoring"}
        datasets = self.extract_datasets(project_paths)
        for path in self.extract_recipe_parameter_paths(project_paths):
            par = base.json_loadf(path)
            if par["type"] in types:
                dataset_items = []
                model_item = None
                for role, items in par["inputs"].items():
                    for item in items["items"]:
                        chunks = item["ref"].split(".")
                        if len(chunks) == 1:
                            local_name = chunks[0]
                        else:
                            local_name = chunks[1]
                        might_be_a_model = len(local_name) == 8

                        if not local_name in datasets and might_be_a_model:
                            # has to be the model
                            model_item = item
                        else:
                            dataset_items.append(item)

                inputs = {
                    "main": {"items" : [dataset_items[0]]},
                    "model": {"items": [model_item]}
                }

                if model_item is None:
                    inputs["model"]["items"] = []

                if len(dataset_items) > 1:
                    inputs["scriptDeps"] = {"items" : dataset_items[1:]}
                par["inputs"] = inputs
                base.json_dumpf(path, par)


###############################################################################
# V16 / DSS 4.0.5
###############################################################################

class V16DKUCommand(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Switch timestamps to longs in databases"

    def execute(self, diphome):
        pass

    def post_execute(self, diphome):
        tmp_folder = osp.join(diphome.path, "tmp")
        if not osp.isdir(tmp_folder):
            os.makedirs(tmp_folder)

        clean_h2_timestamps(diphome)

        import subprocess

        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call('"%s" __migrate_v16' % dkupath, shell=True)

class V16UpdateWeeklyTriggers(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update time-based trigger with weekly frequency"

    def transform(self, trigger, filepath):
        if trigger.get('type', None) == 'temporal':
            params = trigger.get('params', {})
            if params.get('frequency', None) == 'Weekly':
                print("Handling trigger %s" % trigger.get('name', ''))
                # move the dayOfWeek to the list of daysOfWeek
                days_of_week = []
                day_of_week = params.get('dayOfWeek', '')
                if day_of_week != '':
                    days_of_week.append(day_of_week)
                params['daysOfWeek'] = days_of_week
        return trigger

    def jsonpath(self,):
        return "triggers"

    def file_patterns(self,):
        return ["scenarios/*.json"]



###############################################################################
# V17 / DSS 4.1
###############################################################################


class V17UpdateMailAttachment(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update parametrization of attachments on scenario mail reporters"

    def transform(self, obj, filepath):
        def handle_messaging(messaging):
            if messaging.get('type', None) == 'mail-scenario':
                configuration = messaging.get('configuration', {})
                has_dataset_html_var = 'datasetHtml' in configuration.get('message', '')
                attachments = [];
                if configuration.get('attachLog', False):
                    attachments.append({'type':'LOG'})
                if len(configuration.get('attachedDataset', '')) > 0:
                    export_params = {}
                    current_format = configuration.get('format', None)
                    if current_format == 'TSV':
                        export_params = {
                            "destinationType": "DOWNLOAD",
                            "selection": {
                                "samplingMethod": "FULL",
                                "partitionSelectionMethod": "ALL",
                                "selectedPartitions": []
                            },
                            "advancedMode": False,
                            "originatingOptionId": "tsv-excel-header-gz",
                            "format": {
                                "type": "csv",
                                "params": {
                                    "style": "excel",
                                    "charset": "utf8",
                                    "separator": ",",
                                    "quoteChar": "\"",
                                    "escapeChar": "\\",
                                    "dateSerializationFormat": "ISO",
                                    "arrayMapFormat": "json",
                                    "skipRowsBeforeHeader": 0,
                                    "parseHeaderRow": True,
                                    "skipRowsAfterHeader": 0,
                                    "normalizeBooleans": False,
                                    "normalizeDoubles": True,
                                    "compress": "gz"
                                }
                            },
                            "destinationDatasetConnection": "filesystem_managed"
                        }
                    elif current_format == 'EXCEL':
                        export_params = {
                            "destinationType": "DOWNLOAD",
                            "selection": {
                                "samplingMethod": "FULL",
                                "partitionSelectionMethod": "ALL",
                                "selectedPartitions": []
                            },
                            "advancedMode": False,
                            "originatingOptionId": "excel",
                            "format": {
                                "type": "excel",
                                "params": {
                                    "xlsx": True,
                                    "preserveNumberFormatting": False,
                                    "parseDatesToISO": False,
                                    "skipRowsBeforeHeader": 0,
                                    "parseHeaderRow": False,
                                    "skipRowsAfterHeader": 0
                                }
                            },
                            "destinationDatasetConnection": "filesystem_managed"
                        }
                    attachment_params = {'attachedDataset':configuration.get('attachedDataset'), 'addAsHtmlVariable':False, 'exportParams':export_params}
                    attachments.append({'type':'DATASET', 'params':attachment_params})
                    if has_dataset_html_var:
                        # add a second time, for the variable
                        attachment_params = {'attachedDataset':configuration.get('attachedDataset'), 'addAsHtmlVariable':True, 'exportParams':export_params}
                        attachments.append({'type':'DATASET', 'params':attachment_params})
                configuration['attachments'] = attachments

        def handle_step(step):
            if step.get("type", "") == 'send_report':
                handle_messaging(step.get('params', {}).get('messaging', {}))

        def handle_reporter(reporter):
            handle_messaging(reporter.get('messaging', {}))

        for reporter in obj.get("reporters", []):
            handle_reporter(reporter)
        for step in obj.get("params", {}).get("steps", []):
            handle_step(step)

        return obj

    def file_patterns(self,):
        return ["scenarios/*.json"]


class V17DKUCommand(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Set partition information on folder metrics and checks"

    def execute(self, diphome):
        pass

    def post_execute(self, diphome):
        tmp_folder = osp.join(diphome.path, "tmp")
        if not osp.isdir(tmp_folder):
            os.makedirs(tmp_folder)

        import subprocess

        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call(dkupath + " __migrate_v17", shell=True)


def v17_find_and_replace_compute_dims(obj):
    if type(obj) is dict:
        for key, value in obj.items():
            if key == 'computeModeDim':
                obj[key] = (obj[key]+1) % 2 # 0 becomes 1, 1 becomes 0
            else:
                v17_find_and_replace_compute_dims(value)

    elif type(obj) is list:
        for value in obj:
            v17_find_and_replace_compute_dims(value)

def v17_add_empty_bins_mode(obj):
    if type(obj) is dict:
        if "def" in obj:
            obj_def = obj["def"]
            if "genericDimension0" in obj_def:
                dimensions = obj_def["genericDimension0"]
                for dim in dimensions:
                    dim["numParams"] = dim.get("numParams", {})
                    dim["numParams"]["emptyBinsMode"] = dim["numParams"].get("emptyBinsMode", "ZEROS")
            if "genericDimension1" in obj_def:
                dimensions = obj_def["genericDimension1"]
                for dim in dimensions:
                    dim["numParams"] = dim.get("numParams", {})
                    dim["numParams"]["emptyBinsMode"] = dim["numParams"].get("emptyBinsMode", "ZEROS")
    elif type(obj) is list:
        for value in obj:
            v17_add_empty_bins_mode(value)


def v17_migrate_chart(chart):
    v17_find_and_replace_compute_dims(chart)
    v17_add_empty_bins_mode(chart)
    return chart


class V17ChartsInExplore(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrate charts in explore"

    def transform(self, obj, filepath):
        return v17_migrate_chart(obj)

    def jsonpath(self,):
        return "charts"

    def file_patterns(self,):
        return ["explore/*.json"]


class V17ChartsInAnalysis(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self, ):
        return "Migrate charts in analysis"

    def transform(self, obj, filepath):
        return v17_migrate_chart(obj)

    def jsonpath(self, ):
        return "charts"

    def file_patterns(self, ):
        return ["analysis/*/core_params.json"]


class V17ChartsInAnalysisModels(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self, ):
        return "Migrate charts in analysis predicted data"

    def transform(self, obj, filepath):
        return v17_migrate_chart(obj)

    def jsonpath(self, ):
        return "predictionDisplayCharts"

    def file_patterns(self, ):
        return ["analysis/*/ml/*/params.json"]

class V17ChartsInInsights(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self, ):
        return "Migration charts in insights data"

    def transform(self, obj, filepath):
        if obj.get("type", "") == "chart":
            obj["params"] = v17_migrate_chart(obj["params"])
        return obj

    def jsonpath(self, ):
        return ""

    def file_patterns(self, ):
        return ["insights/*.json"]

class V17AddManagedFoldersConnection(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Add connection to the managed_folders in $DIP_HOME"

    def transform(self, connections, filepath=None):
        connections['filesystem_folders'] = {
                                              "params": {
                                                "root": "${dip.home}/managed_folders"
                                              },
                                              "allowManagedDatasets":False,
                                              "type": "Filesystem"
                                            }
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]


class V17FoldersOnProviders(migration_json.ProjectConfigJsonMigrationOperation):
    def __init__(self):
        self.connections_used = {}

    def __repr__(self,):
        return "Update folders to handle several FS backends"

    def transform(self, folder, filepath=None):
        folder['type'] = 'Filesystem'
        old_path = folder.get('path', '/path/not/found') # an empty 'path' fields means the folder was non functional => it's ok to put the a dummy path
        expected_starts = ['${env:DIP_HOME}/managed_folders/', '${dip.home}/managed_folders/']
        replaced = False
        for es in expected_starts:
            if old_path.startswith(es):
                folder['params'] = {'connection' : 'filesystem_folders', 'path' : old_path[len(es):]}
                replaced = True
                break
        if not replaced:
            folder['params'] = {'connection' : 'filesystem_root', 'path' : old_path}

        self.connections_used[folder['params']['connection']] = 'Filesystem'
        return folder

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["managed_folders/*.json"]

    def get_manifest_additions(self, additions, project_paths):
        if self.connections_used is None or len(self.connections_used) == 0:
            return # no folder was changed
        required_connections = additions.get('requiredConnections', {})
        for connection_name, connection_type in iteritems(self.connections_used):
            if connection_name in required_connections:
                required_connection = required_connections[connection_name]
                if connection_type != required_connection.get('type', None):
                    print('Additional required connection %s with different type (%s instead of %s)' % (connection_name, required_connection['type'], connection_type))
            else:
                required_connections[connection_name] = {'name':connection_name, 'type':connection_type}
        additions['requiredConnections'] = required_connections

class V17UpdatePluginSettings(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Update the plugin settings files"

    def transform(self, obj, filepath=None):
        return {'config': obj}

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/plugins/*/settings.json"]

class V17WebApps(migration_json.ProjectConfigJsonMigrationOperation):

    def __repr__(self,):
        return "Transform webapps to 4.1 format"

    def transform(self, obj, filepath):
        if obj.get("type") is None:
            params = {
                "html": obj.get("html", None),
                "css": obj.get("css", None),
                "js": obj.get("js", None),
                "python": obj.get("pyBackendCode", None),

                "backendEnabled": obj.get("pyBackendEnabled", False),
                "autoStartBackend": obj.get("autoStartPyBackend", False),

                "libraries": obj.get("libraries", None)
            }
            obj["hasLegacyBackendURL"] = obj.get("pyBackendEnabled", False)

            obj['type'] = 'STANDARD'
            obj['params'] = params

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["web_apps/*.json"]


class V17WebAppsSnippets(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Move web apps code snippets"

    def execute(self, diphome, simulate=False):
        if not simulate:
            try:
                old = osp.join(diphome.path, "config", "code-snippets", "webapp")
                new = osp.join(diphome.path, "config", "code-snippets", "webapp_standard")
                os.rename(old, new)
            except Exception as e:
                logging.exception("Failed to move code snippets for webapps: %s", e)



class V17WebAppsInsights(migration_json.ProjectConfigJsonMigrationOperation):

    def __repr__(self,):
        return "Transform webapps insights to 4.1 format"

    def transform(self, obj, filepath):
        if obj.get("type", "???") == "web_app":
            obj.get("params", {})["webAppType"] = "STANDARD"
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["insights/*.json"]

class V17SplitUnfoldProcessor(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V17SplitUnfoldProcessor, self).__init__("SplitUnfold")

    def transform_step(self, step):
        assert step["type"] == "SplitUnfold"
        params = step.get('params', None)
        if params is not None:
            params["limit"] = 0
            params["overflowAction"] = "KEEP"
        return step


class V17UnfoldProcessor(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V17UnfoldProcessor, self).__init__("Unfold")

    def transform_step(self, step):
        assert step["type"] == "Unfold"
        params = step.get('params', None)
        if params is not None:
            prefix = params.pop("prefix", False)
            column = params.get('column', None)
            if prefix and column:
                params['prefix'] = column + "_"
            params["limit"] = 0
            params["overflowAction"] = "KEEP"
        return step

# instance migration, able to keep the files from the datasets
class V17ChangeRemoteFilesDataset(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Change remote files datasets into download recipes (instance level)"

    def execute(self, diphome):
        config_folder = osp.join(diphome.path, "config")
        projects_folder = osp.join(config_folder, "projects")
        if not osp.isdir(projects_folder):
            return

        for project_key in os.listdir(projects_folder):
            if not osp.isfile(osp.join(projects_folder, project_key, "params.json")):
                continue

            logging.info("Applying migration of RemoteFiles datasets on project %s" % project_key)
            self.convert_project(config_folder, projects_folder, project_key, diphome.path)

    def convert_project(self, config_folder, projects_folder, project_key, dip_home):
        project_folder = osp.join(projects_folder, project_key)
        datasets_folder = osp.join(project_folder, "datasets")
        if osp.isdir(datasets_folder):
            for dataset_file_name in os.listdir(datasets_folder):
                dataset_file = osp.join(datasets_folder, dataset_file_name)
                with open(dataset_file, 'r') as f:
                    dataset = json.load(f)
                    if dataset.get('type', None) == 'RemoteFiles':
                        print("Migrating %s" % dataset_file)
                        self.convert_dataset(dataset, dataset_file, project_folder, project_key, config_folder, dip_home)

    def convert_dataset(self, dataset, dataset_file, project_folder, project_key, config_folder, dip_home):
        folders_folder = osp.join(project_folder, "managed_folders")
        if not osp.exists(folders_folder):
            os.mkdir(folders_folder) # ensure existence
        recipes_folder = osp.join(project_folder, "recipes")
        if not osp.exists(recipes_folder):
            os.mkdir(recipes_folder) # ensure existence

        dataset_file_name = osp.basename(dataset_file)
        dataset_name = dataset.get('name', dataset_file_name[:-5]) # to remove the .json extension

        # find the connection used by the dataset to store files
        connections_file = osp.join(config_folder, 'connections.json')
        if osp.exists(connections_file):
             with open(connections_file, 'r') as f:
                connections = json.load(f).get('connections', {})
        else:
            connections = {}

        storage_connection = dataset.get('params', {}).get('connection', 'filesystem_managed')
        storage_connection_type = connections.get(storage_connection, {}).get('type', None)
        storage_connection_root = connections.get(storage_connection, {}).get('params', {}).get('root', '')

        folder_id = 'v17conversion_%s' % dataset_name

        # if data was cached on filesystem, move it to the managed_folders connection
        storage_path = dataset.get('params', {}).get('tmpPath', '%s.%s' % (project_key, dataset_name))
        if storage_connection_type == 'Filesystem':
            clean_storage_path = storage_path[1:] if storage_path.startswith('/') else storage_path
            src_path = osp.join(storage_connection_root, clean_storage_path)
            new_storage_path = '%s.%s' % (project_key, folder_id)
            dst_path = osp.join(dip_home, 'managed_folders', new_storage_path)
            if osp.exists(src_path):
                shutil.move(src_path, dst_path)
                storage_path = new_storage_path
                storage_connection = 'filesystem_folders'

        # create a managed folder to hold the files
        folder = {
                    'type' : storage_connection_type,
                    'name' : '%s_storage' % dataset_name,
                    'params' : {
                        'connection' : storage_connection,
                        'path' : storage_path
                    },
                    'partitioning' : dataset.get('partitioning', {'filePathPattern':'', 'dimensions':[]}),
                    'selection' : {'partitionSelectionMethod':'ALL'},
                    'metrics' : {},
                    'checks' : {},
                    'flowOptions' : {}
                }
        folder_file = osp.join(folders_folder, folder_id + '.json')
        folder_json = json.dumps(folder, indent=2, sort_keys=True)
        with open(folder_file, 'w') as f:
            f.write(folder_json)

        # create a recipe to replace the sync logic
        recipe = {
                    'type' : 'download',
                    'params' : {
                        'deleteExtraFiles' : True,
                        'copyEvenUpToDateFiles' : False,
                        'sources' : [self.convert_source(source, connections) for source in dataset.get('remoteFiles', {}).get('sources', [])]
                    },
                    'neverRecomputeExistingPartitions' : False,
                    'optionalDependencies' : False,
                    'redispatchPartitioning' : False,
                    'doc' : 'Automatically converted from the RemoteFiles dataset %s' % dataset_name,
                    'overrideTable' : {},
                    'customMeta' : {},
                    'inputs' : {},
                    'outputs' : {'main':{'items':[{'ref':folder_id, 'appendMode':False}]}}
                }
        recipe_name = 'download_v17conversion_%s' % dataset_name
        recipe_file = osp.join(recipes_folder, recipe_name + '.json')
        recipe_json = json.dumps(recipe, indent=2, sort_keys=True)
        with open(recipe_file, 'w') as f:
            f.write(recipe_json)

        # change the old dataset to become a FilesInFolder one
        dataset['type'] = 'FilesInFolder'
        dataset['remoteFiles'] = {}
        dataset['params'] = {
                                'folderSmartId' : folder_id,
                                'itemPathPattern' : '.*',
                                'previewPartition': ''
                            }
        dataset_json = json.dumps(dataset, indent=2, sort_keys=True)
        with open(dataset_file, 'w') as f:
            f.write(dataset_json)

    def convert_source(self, source, connections):
        converted = {'useGlobalProxy' : source.get('useGlobalProxy', False)}
        if not source.get('useConnection', False):
            converted['providerType'] = 'URL'
            converted['params'] = {'path':source.get('url', ''), 'timeout':10000}
        else:
            connection_name = source.get('connection', '')
            print('get connection %s' %  connection_name)
            connection = connections.get(connection_name, {})
            connection_type = connection.get('type', '')
            if connection_type == 'SSH':
                converted['providerType'] = source.get('protocol', 'SCP')
            else:
                converted['providerType'] = connection_type
            converted['params'] = {'connection':connection_name, 'path':source.get('path', ''), 'timeout':10000}

        return converted

# project migration, will do best effort to keep files, and leave the code to guess the providerType based on connection name
class V17ChangeRemoteFilesDatasetInProject(migration_base.ProjectLocalMigrationOperation):
    def __init__(self):
        self.connections_used = {}

    def __repr__(self,):
        return "Change remote files datasets into download recipes (project-level)"

    def execute(self, project_paths):
        project_folder = project_paths.config
        datasets_folder = osp.join(project_folder, "datasets")
        if osp.isdir(datasets_folder):
            for dataset_file_name in os.listdir(datasets_folder):
                dataset_file = osp.join(datasets_folder, dataset_file_name)
                with open(dataset_file, 'r') as f:
                    dataset = json.load(f)
                    if dataset.get('type', None) == 'RemoteFiles':
                        print("Migrating %s" % dataset_file)
                        self.convert_dataset(dataset, dataset_file, project_folder)

    def convert_dataset(self, dataset, dataset_file, project_folder):
        folders_folder = osp.join(project_folder, "managed_folders")
        if not osp.exists(folders_folder):
            os.mkdir(folders_folder) # ensure existence
        recipes_folder = osp.join(project_folder, "recipes")
        if not osp.exists(recipes_folder):
            os.mkdir(recipes_folder) # ensure existence

        dataset_file_name = osp.basename(dataset_file)
        dataset_name = dataset.get('name', dataset_file_name[:-5]) # to remove the .json extension

        storage_connection = dataset.get('params', {}).get('connection', 'filesystem_managed')
        storage_path = dataset.get('params', {}).get('tmpPath', '${projectKey}.%s' % (dataset_name))

        folder_id = 'v17conversion_%s' % dataset_name

        # create a managed folder to hold the files
        folder = {
                    'type' : ('HDFS' if 'hdfs' in storage_connection.lower() else 'Filesystem'), # might not be filesystem, but null is not an option
                    'name' : '%s_storage' % dataset_name,
                    'params' : {
                        'connection' : storage_connection,
                        'path' : storage_path
                    },
                    'partitioning' : dataset.get('partitioning', {'filePathPattern':'', 'dimensions':[]}),
                    'selection' : {'partitionSelectionMethod':'ALL'},
                    'metrics' : {},
                    'checks' : {},
                    'flowOptions' : {}
                }
        folder_file = osp.join(folders_folder, folder_id + '.json')
        folder_json = json.dumps(folder, indent=2, sort_keys=True)
        with open(folder_file, 'w') as f:
            f.write(folder_json)

        # create a recipe to replace the sync logic
        recipe = {
                    'type' : 'download',
                    'params' : {
                        'deleteExtraFiles' : True,
                        'copyEvenUpToDateFiles' : False,
                        'sources' : [self.convert_source(source) for source in dataset.get('remoteFiles', {}).get('sources', [])]
                    },
                    'neverRecomputeExistingPartitions' : False,
                    'optionalDependencies' : False,
                    'redispatchPartitioning' : False,
                    'doc' : 'Automatically converted from the RemoteFiles dataset %s' % dataset_name,
                    'overrideTable' : {},
                    'customMeta' : {},
                    'inputs' : {},
                    'outputs' : {'main':{'items':[{'ref':folder_id, 'appendMode':False}]}}
                }
        recipe_name = 'download_v17conversion_%s' % dataset_name
        recipe_file = osp.join(recipes_folder, recipe_name + '.json')
        recipe_json = json.dumps(recipe, indent=2, sort_keys=True)
        with open(recipe_file, 'w') as f:
            f.write(recipe_json)

        # change the old dataset to become a FilesInFolder one
        dataset['type'] = 'FilesInFolder'
        dataset['remoteFiles'] = {}
        dataset['params'] = {
                                'folderSmartId' : folder_id,
                                'itemPathPattern' : '.*',
                                'previewPartition': ''
                            }
        dataset_json = json.dumps(dataset, indent=2, sort_keys=True)
        with open(dataset_file, 'w') as f:
            f.write(dataset_json)

    def convert_source(self, source):
        converted = {'useGlobalProxy' : source.get('useGlobalProxy', False)}
        if not source.get('useConnection', False):
            converted['providerType'] = 'URL'
            converted['params'] = {'path':source.get('url', ''), 'timeout':10000}
        else:
            protocol = source.get('protocol', None)
            connection_name = source.get('connection', '')
            connection_type = "SSH" if protocol is not None and len(protocol) > 0 else "FTP"
            converted['providerType'] = protocol if protocol is not None and len(protocol) > 0 else "FTP"
            converted['params'] = {'connection':connection_name, 'path':source.get('path', ''), 'timeout':10000}
            if connection_name is not None and len(connection_name) > 0:
                self.connections_used[connection_name] = connection_type

        return converted

    def get_manifest_additions(self, additions, project_paths):
        if self.connections_used is None or len(self.connections_used) == 0:
            return # no dataset was changed
        required_connections = additions.get('requiredConnections', {})
        for connection_name, connection_type in iteritems(self.connections_used):
            if connection_name in required_connections:
                required_connection = required_connections[connection_name]
                if connection_type != required_connection.get('type', None):
                    print('Additional required connection %s with different type (%s instead of %s)' % (connection_name, required_connection['type'], connection_type))
            else:
                required_connections[connection_name] = {'name':connection_name, 'type':connection_type}
        additions['requiredConnections'] = required_connections

v17_grid_names = [
        "mllib_logit",
        "mllib_naive_bayes",
        "mllib_linreg",
        "mllib_rf",
        "mllib_gbt",
        "mllib_dt"
    ]

v17_to_gridify = {
        "mllib_logit" : ["reg_param", "enet_param"],
        "mllib_naive_bayes" : ["lambda"],
        "mllib_linreg": ["reg_param", "enet_param"],
        "mllib_rf": ["max_depth", "step_size", "num_trees"],
        "mllib_gbt": ["max_depth", "step_size", "num_trees"],
        "mllib_dt": ["max_depth"]
    }


def v17_make_grids(obj):
    mllib_grid = {}
    if "custom_mllib" in obj:
        mllib_grid["custom_mllib"] = obj["custom_mllib"]
    for grid_name in v17_grid_names:
        grid = {}
        mllib_grid[grid_name] = grid
        for key in obj:
            if key.startswith(grid_name):
                new_name = key.split(grid_name + "_")[1]
                grid[new_name] = obj[key]
    obj["mllib_grids"] = mllib_grid
    return obj

def v17_gridify(obj):
    grids = obj.get("mllib_grids", {})
    for grid_name in v17_to_gridify:
        if grid_name in grids:
            grid = grids[grid_name]
            for par in v17_to_gridify[grid_name]:
                if par in grid:
                    if not isinstance(grid[par], list):
                        grid[par] = [grid[par]]
    return obj

def v17_migrate_mltask(obj):
    modeling = obj.get("modeling", {})
    for grid_name in v17_to_gridify:
        if grid_name in modeling:
            grid = modeling[grid_name]
            for par in v17_to_gridify[grid_name]:
                if par in grid:
                    if not isinstance(grid[par], list):
                        grid[par] = [grid[par]]
    return obj

def v17_migrate_resolved(obj):
    if "ts_kmeans_k" in obj:  # don't do it for clustering
        return obj
    else:
        return v17_gridify(v17_make_grids(obj))

class V17MLLibUnresolvedGridsInSM(migration_json.ProjectConfigJsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self,):
        return "Move SavedModel MLLib parameters to gridified versions"

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["saved_models/*.json"]

    def transform(self, obj, filepath):
        mltask = obj.get("miniTask", None)
        if mltask is not None and mltask.get("taskType", None) == "PREDICTION":
            obj["miniTask"] = v17_migrate_mltask(mltask)
        return obj

class V17MLLibUnresolvedGridsInAnalysis(migration_json.ProjectConfigJsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self,):
        return "Move analysis MLLib parameters to gridified versions"

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["analysis/*/ml/*/params.json"]

    def transform(self, obj, filepath):
        if obj.get("taskType", None) == "PREDICTION":
            return v17_migrate_mltask(obj)
        else:
            return obj

class V17MLLibResolvedGrids(migration_base.ProjectLocalMigrationOperation):
    """
    Migrates ResolvedPredictionModelingParameters
    """
    def __init__(self):
        pass

    def __repr__(self,):
        return "Move resolved mllib parameters back into their gridified versions"

    def execute(self, project_paths):
        for mltask_file in glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data):
            print("Migrating saved ML Task session: %s " % (mltask_file))
            try:
                data = base.json_loadf(mltask_file)
                data = v17_migrate_mltask(data)
                base.json_dumpf(mltask_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

        for rm_file in glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data):
            print("Migrating saved ML Task rmodeling file: %s" % rm_file)
            try:
                data = base.json_loadf(rm_file)
                data = v17_migrate_resolved(data)
                base.json_dumpf(rm_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

        for ap_file in glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data):
            print("Migrating saved ML Task actualparams file: %s" % ap_file)
            try:
                data = base.json_loadf(ap_file)
                data["resolved"] = v17_migrate_resolved(data.get("resolved", {}))
                base.json_dumpf(ap_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

        # modelid/versions/vid/rmodeling_params.json
        for rm_file in glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models):
            print("Migrating saved ML Task rmodeling file: %s" % rm_file)
            try:
                data = base.json_loadf(rm_file)
                data = v17_migrate_resolved(data)
                base.json_dumpf(rm_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

        # modelid/versions/vid/actual_params.json
        for ap_file in glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models):
            print("Migrating saved ML Task rmodeling file: %s" % ap_file)
            try:
                data = base.json_loadf(ap_file)
                data["resolved"] = v17_migrate_resolved(data.get("resolved", {}))
                base.json_dumpf(ap_file, data)
            except Exception as e:
                print("Model migration FAILED: %s" % e)

class V17ComputedColumnsGroupingRecipe(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Grouping recipe: move the custom grouping key to a computed column"

    def transform(self, grouping, filepath=None):
        counter = 0
        grouping['computedColumns'] = grouping.get('computedColumns', [])
        for gkey in grouping['keys']:
            if gkey.get('column', None) is None:
                counter += 1
                new_name = gkey.get('customName', 'newComputedColumn' + str(counter))
                new_type = gkey.get('colType', 'DOUBLE').lower()
                grouping['computedColumns'].append({
                        "name": new_name,
                        "type": new_type,
                        "expr": gkey.get('customExpr', ''),
                        "mode": "SQL"
                    })
                gkey['last'] = gkey.get('last', False)
                gkey['max'] = gkey.get('max', False)
                gkey['column'] = new_name
                gkey['count'] = gkey.get('count', False)
                gkey['sum'] = gkey.get('sum', False)
                gkey['type'] = new_type
                gkey['sum2'] = gkey.get('sum2', False)
                gkey['min'] = gkey.get('min', False)
                gkey['countDistinct'] = gkey.get('countDistinct', False)
                gkey['avg'] = gkey.get('avg', False)
                gkey['stddev'] = gkey.get('stddev', False)
                gkey['first'] = gkey.get('first', False)
                gkey.pop('customName', None)
                gkey.pop('customExpr', None)
                gkey.pop('colType', None)

        return grouping

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.grouping"]

class V17ComputedColumnsJoinRecipe(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Join recipe: migrate the computed column to the new version"

    def transform(self, join, filepath=None):
        counter = 0
        join['computedColumns'] = join.get('computedColumns', [])
        for comp_col in join['computedColumns']:
            mode = comp_col.get('language', 'GREL')
            if mode == 'DSS':
                mode = 'GREL'
            comp_col['name'] = comp_col.get('name', '')
            comp_col['expr'] = comp_col.get('expression', '')
            comp_col['mode'] = mode
            comp_col['type'] = comp_col.get('type', 'double').lower()
            comp_col.pop('expression', None)
            comp_col.pop('language', None)

        return join

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.join"]

class V17GlobalAPIKeys(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Update global API keys"

    def transform(self, obj, filepath):
        for key in obj:
            if key.get("globalAdmin", False):
                key["globalPermissions"] = {"admin": True}
        return obj

    def jsonpath(self, ):
        return ""

    def file_patterns(self, ):
        return ["config/public-apikeys.json"]

class V17Meanings(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Update user-defined meanings"

    def transform(self, obj, filepath):
        if obj.get("type", None) == "VALUES_LIST":
            obj["entries"] = list(map(lambda v: {"value": v}, obj.get("values", [])))
            del obj["values"]
        elif obj.get("type", None) == "VALUES_MAPPING":
            obj["mappings"] = list(map(lambda m: {"from": m.get("from", None), "to": {"value": m.get("to", None)}}, obj.get("mappings", [])))

        return obj

    def jsonpath(self, ):
        return ""

    def file_patterns(self, ):
        return ["config/meanings/*.json"]

class V17ConvertVariablesToComputedColumnsSplitRecipe(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Convert Variables to Computed Columns in Split recipe"

    def transform(self, params, filepath=None):
        params["computedColumns"] = []
        variables = params.get("variables", [])
        for variable in variables:
            computed_column = {
                "name" : variable.get("name", ""),
                "expr" : variable.get("expression", ""),
                "type" : variable.get("type", "double").lower(),
                "mode" : "GREL"
            }
            params["computedColumns"].append(computed_column)
        params["writeComputedColumnsInOutput"] = params.get("writeVariablesInOutput", False)
        params.pop("writeVariablesInOutput", None)
        params.pop("variables", None)
        params.pop("enableVariables", None)
        return params

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.split"]

class V17ConvertFilesInFolderSelectionPattern(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Updates Files-in-Folder datasets' settings"

    def transform(self, obj, filepath=None):
        if "type" in obj and obj["type"] == "FilesInFolder":
            params = obj['params']
            params["filesSelectionRules"] = {"mode": "RULES_INCLUDED_ONLY", "excludeRules": [], "explicitFiles": [], "includeRules": []}
            if params.get("itemPathPattern", None) is not None:
                pattern = params.get("itemPathPattern", None)
                if len(pattern) > 0 and pattern[0] == '/':
                    pattern = pattern[1:]
                rule = { "matchingMode": "FULL_PATH", "mode": "REGEXP", "expr": "^/?%s$" % pattern}
                params["filesSelectionRules"]["includeRules"].append(rule)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]


class V17EngineCreationSettings(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set new engine creation settings"

    def transform(self, obj, filepath=None):
        hive_settings = obj.get("hiveSettings", {})
        hive_settings["engineCreationSettings"] = {
            "executionEngine" : hive_settings.get("defaultRecipesExecutionEngine", "HIVECLI_LOCAL")
        }
        obj["hiveSettings"] = hive_settings

        impala_settings = obj.get("impalaSettings", {})
        impala_settings["engineCreationSettings"] = {
            "streamMode" : impala_settings.get("createRecipeInStreamMode", True)
        }
        obj["impalaSettings"] = impala_settings
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]

class V17MoveJupyterExports(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Move Jupyter notebook exports"

    def execute(self, diphome, simulate=False):
        if not simulate:
            old = osp.join(diphome.path, "jupyter_exports")
            exports = osp.join(diphome.path, "exports")
            new = osp.join(diphome.path, "exports", "jupyter-notebooks")

            if not osp.isdir(exports):
                os.makedirs(exports)

            if osp.isdir(old):
                os.rename(old, new)

class V17InitGraceDelays(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Init grace delays in scenario triggers"

    def transform(self, obj, filepath):
        for trigger in obj.get("triggers", []):
            delay = trigger.get('delay', 0)
            if isinstance(delay, dku_basestring_type):
                try:
                    delay = int(delay)
                except Exception as e:
                    delay = 0
            trigger["graceDelaySettings"] = {'checkAgainAfterGraceDelay':False, 'delay':int(max(1, delay) / 2)}
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["scenarios/*.json"]


###############################################################################
# V18 / DSS 4.2
###############################################################################

class V18MigrateDashboardImageResizeSetting(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Upgrade the y/n resize-to-fit setting of dashboard images to new multi-option setting "

    def transform(self, obj, filepath=None):
        if "pages" in obj:
            for page in obj['pages']:
                if "grid" in page:
                    if "tiles" in page["grid"]:
                        for tile in page["grid"]["tiles"]:
                            resizeMode = "FIT_SIZE"

                            if "resizeImage" in tile and tile["resizeImage"]==False:
                                resizeMode = "ORIG_SIZE"

                            tile.pop("resizeImage", None)

                            if not "resizeImageMode" in tile:
                                tile["resizeImageMode"] = resizeMode

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["dashboards/*.json"]

class V18UpdateSQLDatasets(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Clean up the partitioning of SQL datasets"

    def transform(self, obj, filepath=None):
        all_sql_types = ['PostgreSQL', 'MySQL', 'Vertica', 'Redshift', 'Greenplum', 'Teradata', 'Oracle', 'SQLServer', 'BigQuery', 'JDBC', 'Netezza', 'SAPHANA']
        if obj.get("type", '') in all_sql_types:
            config = obj.get('params', {})
            obj['params'] = config
            if config.get('partitioned', False) and config.get('mode', '') == 'table':
                # make sure the partitioningColumn is in the partitioning scheme (until now it's the responsability of the frontend to fixup this)
                partitioning_column = config.get('partitioningColumn', None)
                if partitioning_column is not None and len(partitioning_column) > 0:
                    partitioning = obj.get('partitioning', {})
                    obj['partitioning'] = partitioning
                    dimensions = partitioning.get('dimensions', [])
                    partitioning['dimensions'] = dimensions
                    if len(dimensions) == 0:
                        dimensions.append({'name':partitioning_column, 'type':'value', 'params':{}})
                    else:
                        dimensions[0]['name'] = partitioning_column
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]


class V18CleanupMLResolvedParams(migration_base.ProjectLocalMigrationOperation):
    """
    Migrates ResolvedPredictionModelingParameters in rmodeling_params and actual_params
    (in analysis_data and saved_models data)
    """
    def __init__(self):
        pass

    def __repr__(self,):
        return "Cleanup trained models parameters"

    def pretrain_unnest_mllib_grids(self, old_resolved):
        #print("Before unnest mllib: %s" % json.dumps(old_resolved, indent=2))
        """In pre-train, mllib grids were below a weird prefix"""
        mllib_grids = old_resolved.get("mllib_grids", {})

        if "mllib_logit" in mllib_grids:
            old_resolved["mllib_logit_grid"] = mllib_grids["mllib_logit"]
        if "mllib_naive_bayes" in mllib_grids:
            old_resolved["mllib_naive_bayes_grid"] = mllib_grids["mllib_naive_bayes"]
        if "mllib_linreg" in mllib_grids:
            old_resolved["mllib_linreg_grid"] = mllib_grids["mllib_linreg"]
        if "mllib_rf" in mllib_grids:
            old_resolved["mllib_rf_grid"] = mllib_grids["mllib_rf"]
        if "mllib_gbt" in mllib_grids:
            old_resolved["mllib_gbt_grid"] = mllib_grids["mllib_gbt"]
        if "mllib_dt" in mllib_grids:
            old_resolved["mllib_dt_grid"] = mllib_grids["mllib_dt"]

        if "custom_mllib" in old_resolved:
            old_resolved["custom_mllib_grid"] = old_resolved["custom_mllib"]

        if "mllib_grids" in old_resolved:
            del old_resolved["mllib_grids"]

        #print("After unnest mllib: %s" % json.dumps(old_resolved, indent=2))

    def pretrain_call_everything_grid(self, old_pretrain):
        #print("Before call everything grid : %s" % json.dumps(old_pretrain, indent=2))
        """In pre-train, some grids were not called _grid"""

        def append_grid_to_key(key):
            if key in old_pretrain:
                old_pretrain[key + "_grid"] = old_pretrain[key]
                del old_pretrain[key]

        append_grid_to_key("least_squares")
        append_grid_to_key("xgboost")
        append_grid_to_key("deep_learning_sparkling")
        append_grid_to_key("gbm_sparkling")
        append_grid_to_key("glm_sparkling")
        append_grid_to_key("rf_sparkling")
        append_grid_to_key("nb_sparkling")

        #print("After call everything grid : %s" % json.dumps(old_pretrain, indent=2))

    def pretrain_regridify_if_needed(self, pretrain):
        """Pre-train was not always gridified, so gridify if needed"""

        #print("Before regridify : %s" % json.dumps(pretrain, indent=2))

        def _identity(value=None,col=None):
            return value


        def _listify(value=None,col=None):
            if isinstance(value,list):
                return value
            else:
                return [value]

        def _expand_value(value=None,col=None):
            return col in value.split()

        INGRIDS = {
            'RANDOM_FOREST_REGRESSION': {
                'grid_name': 'rf_regressor_grid',
                'replace_strategy': {
                    "rf_estimators": ("n_estimators", _listify),
                    "rf_njobs": "n_jobs",
                    "rf_max_tree_depth": ("max_tree_depth", _listify),
                    "rf_min_samples_leaf": ("min_samples_leaf", _listify),
                    "rf_selection_mode": "selection_mode",
                    "rf_max_features": ("max_features", _listify),
                    "rf_max_feature_prop": "max_feature_prop",
                },
            },
            'RANDOM_FOREST_CLASSIFICATION' : {
                'grid_name': 'rf_classifier_grid',
                'replace_strategy': {
                    "rf_estimators": ("n_estimators", _listify),
                    "rf_njobs": "n_jobs",
                    "rf_max_tree_depth": ("max_tree_depth", _listify),
                    "rf_min_samples_leaf": ("min_samples_leaf", _listify),
                    "rf_selection_mode": "selection_mode",
                    "rf_max_features": ("max_features", _listify),
                    "rf_max_feature_prop": "max_feature_prop",
                },
            },
            'EXTRA_TREES': {
                'grid_name': 'extra_trees_grid',
                'replace_strategy': {
                    "rf_estimators": ("n_estimators", _listify),
                    "rf_njobs": "n_jobs",
                    "rf_max_tree_depth": ("max_tree_depth", _listify),
                    "rf_min_samples_leaf": ("min_samples_leaf", _listify),
                    "rf_selection_mode": "selection_mode",
                    "rf_max_features": ("max_features", _listify),
                    "rf_max_feature_prop": "max_feature_prop",
                },
            },
            'GBT_CLASSIFICATION': {
                'grid_name' : 'gbt_classifier_grid',
                'replace_strategy' : {
                    "gbt_n_estimators": ("n_estimators", _listify),
                    "gbt_learning_rate": ("learning_rate", _listify),
                    "gbt_max_depth": ("max_depth", _listify),
                    "gbt_max_features": ("max_features", _listify),
                    "gbt_min_samples_leaf": ("min_samples_leaf", _listify),
                    "gbt_max_feature_prop": "max_feature_prop",
                    "gbt_selection_mode": "selection_mode",
                    "gbt_loss": (['deviance','exponential','huber'], _expand_value),
                },
            },
            'GBT_REGRESSION': {
                'grid_name': 'gbt_regressor_grid',
                'replace_strategy': {
                    "gbt_n_estimators": ("n_estimators", _listify),
                    "gbt_learning_rate": ("learning_rate", _listify),
                    "gbt_max_depth": ("max_depth", _listify),
                    "gbt_max_features": ("max_features", _listify),
                    "gbt_min_samples_leaf": ("min_samples_leaf", _listify),
                    "gbt_max_feature_prop": "max_feature_prop",
                    "gbt_selection_mode": "selection_mode",
                    "gbt_loss": (['ls','lad','huber'], _expand_value),
                },
            },
            'DECISION_TREE_CLASSIFICATION': {
                'grid_name': 'dtc_classifier_grid',
                'replace_strategy': {
                    "dtc_max_depth": ("max_depth", _listify),
                    "dtc_min_samples_leaf": ('min_samples_leaf', _listify),
                    "dtc_criterion": (['gini','entropy'], _expand_value),
                    "dtc_splitter": (['random','best'], _expand_value),
                },
            },
            'DECISION_TREE_REGRESSION': {
                'grid_name': 'dtc_classifier_grid',
                'replace_strategy': {
                    "dtc_max_depth": ("max_depth", _listify),
                    "dtc_min_samples_leaf": ('min_samples_leaf', _listify),
                    "dtc_criterion": (['gini','entropy'], _expand_value),
                    "dtc_splitter": (['random','best'], _expand_value),
                },
            },
            'LOGISTIC_REGRESSION': {
                'grid_name': 'logit_grid',
                'replace_strategy': {
                    "logit_penalty": (['l1','l2'], _expand_value),
                    'C':('C', _listify),
                    'n_jobs':'n_jobs',
                    'logit_multi_class': 'multi_class',
                },
            },
            'SVM_REGRESSION': {
                'grid_name': 'svr_grid',
                'replace_strategy' : {
                    'tol': 'tol',
                    'C':('C', _listify),
                    'gamma':('gamma', _listify),
                    'kernel': (['linear','poly','sigmoid','rbf'], _expand_value),
                    'coef0': 'coef0',
                    'max_iter': 'max_iter',
                },
            },
            'SVC_CLASSIFICATION': {
                'grid_name': 'svc_grid',
                'replace_strategy' : {
                    'tol': 'tol',
                    'C':('C', _listify),
                    'gamma':('gamma', _listify),
                    "kernel": (['linear','poly','sigmoid','rbf'], _expand_value),
                    'coef0': 'coef0',
                    'max_iter': 'max_iter',
                },
            },
            'SGD_REGRESSION': {
                'grid_name': 'sgd_reg_grid',
                'replace_strategy': {
                    'l1_ratio': 'l1_ratio',
                    'penalty': (['l1','l2','elasticnet'], _expand_value),
                    'alpha': ('alpha', _listify),
                    'n_jobs': 'n_jobs',
                    'max_iter': 'n_iter',
                    'loss': (['squared_loss','huber'], _expand_value),
                },
            },
            'SGD_CLASSIFICATION': {
                'grid_name': 'sgd_grid',
                'replace_strategy': {
                    'l1_ratio': 'l1_ratio',
                    "penalty": (['l1','l2','elasticnet'], _expand_value),
                    'alpha': ('alpha', _listify),
                    'max_iter': 'n_iter',
                    "loss": (['log','modified_huber'], _expand_value),
                },
            },
            'RIDGE_REGRESSION': {
                'grid_name': 'ridge_grid',
                'replace_strategy': {
                    'ridge_alphamode': 'alphaMode',
                    'alpha': ('alpha', _listify),
                },
            },
            'LASSO_REGRESSION': {
                'grid_name': 'lasso_grid',
                'replace_strategy': {
                    'lasso_alphamode': 'alphaMode',
                    'alpha': ('alpha', _listify),
                },
            },
            'KNN': {
                'grid_name': 'knn_grid',
                'replace_strategy': {
                    'knn_k': ('k', _listify),
                    'knn_distance_weighting': 'distance_weighting',
                    'knn_algorithm': 'algorithm',
                    'knn_p': 'p',
                    'knn_leaf_size': 'leaf_size',
                },
            },
            'XGBOOST_CLASSIFICATION': {
                'grid_name': 'xgboost_grid',
                'replace_strategy': {
                    'xgboost_max_depth': ('max_depth', _listify),
                    'xgboost_learning_rate': ('learning_rate', _listify),
                    'xgboost_n_estimators': 'n_estimators',
                    'xgboost_nthread': 'nthread',
                    'xgboost_gamma': ('gamma', _listify),
                    'xgboost_min_child_weight': ('min_child_weight', _listify),
                    'xgboost_max_delta_step': ('max_delta_step', _listify),
                    'xgboost_subsample': ('subsample', _listify),
                    'xgboost_colsample_bytree': ('colsample_bytree',_listify),
                    'xgboost_colsample_bylevel': ('colsample_bylevel',_listify),
                    'xgboost_alpha': ('alpha', _listify),
                    'xgboost_lambda': ('lambda', _listify),
                    'xgboost_seed': 'seed',
                    'xgboost_impute_missing': 'impute_missing',
                    'xgboost_missing': 'missing',
                    'xgboost_base_score': 'base_score',
                    'xgboost_scale_pos_weight': 'scale_pos_weight',
                    'xgboost_enable_early_stopping': 'enable_early_stopping',
                    'xgboost_early_stopping_rounds': 'early_stopping_rounds',
                    'xgboost_silent': None,
                    'xgboost_objective': None,
                },
            },
            'XGBOOST_REGRESSION': {
                'grid_name': 'xgboost_grid',
                'replace_strategy': {
                    'xgboost_max_depth': ('max_depth', _listify),
                    'xgboost_learning_rate': ('learning_rate', _listify),
                    'xgboost_n_estimators': 'n_estimators',
                    'xgboost_nthread': 'nthread',
                    'xgboost_gamma': ('gamma', _listify),
                    'xgboost_min_child_weight': ('min_child_weight', _listify),
                    'xgboost_max_delta_step': ('max_delta_step', _listify),
                    'xgboost_subsample': ('subsample', _listify),
                    'xgboost_colsample_bytree': ('colsample_bytree',_listify),
                    'xgboost_colsample_bylevel': ('colsample_bylevel',_listify),
                    'xgboost_alpha': ('alpha', _listify),
                    'xgboost_lambda': ('lambda', _listify),
                    'xgboost_seed': 'seed',
                    'xgboost_impute_missing': 'impute_missing',
                    'xgboost_missing': 'missing',
                    'xgboost_base_score': 'base_score',
                    'xgboost_scale_pos_weight': 'scale_pos_weight',
                    'xgboost_enable_early_stopping': 'enable_early_stopping',
                    'xgboost_early_stopping_rounds': 'early_stopping_rounds',
                    'xgboost_silent': None,
                    'xgboost_objective': None,
                },
            },
            'LEASTSQUARE_REGRESSION': {
                'grid_name': 'least_squares_grid',
                'replace_strategy': {
                    'n_jobs': 'n_jobs',
                },
            },
            'NEURAL_NETWORK': {
                'grid_name': 'neural_network_grid'
            },
            'LARS' : {
                'grid_name': "lars_grid",
                "replace_strategy": {
                    "lars_max_features" : "max_features",
                    "lars_K" : "K"
                }
            }
        }

        algorithm = pretrain.get("algorithm", "unknown")
        if algorithm == 'SCIKIT_MODEL':
            pass

        elif algorithm in INGRIDS:
            grid_descriptor = INGRIDS[algorithm]
            grid_name = grid_descriptor['grid_name']
            if grid_name in pretrain:
                # Already gridified, do nothing
                pass
            else:
                ingrid = {}
                for old_key, strategy in grid_descriptor['replace_strategy'].items():
                    if not strategy:
                        continue
                    if isinstance(strategy, tuple):
                        dest_col_names, clean_func = strategy
                    else:
                        dest_col_names = strategy
                        clean_func = _identity
                    if not isinstance(dest_col_names, list):
                        dest_col_names = [dest_col_names]
                    for dest_col_name in dest_col_names:
                        if old_key in pretrain:
                            ingrid[dest_col_name] = clean_func(col=dest_col_name, value=pretrain[old_key])

                pretrain[grid_name] = ingrid

        elif algorithm == "VERTICA_LINEAR_REGRESSION":
            pretrain["vertica_linreg_grid"] = {
                "optimizer" : pretrain.get("vertica_reg_optimizer", "BFGS"),
                "maxIterations": pretrain.get("vertica_reg_iterations", 200),
                "epsilon": pretrain.get("vertica_reg_epsilon", 0.000001),
            }
        elif algorithm == "VERTICA_LOGISTIC_RERESSION":
            pretrain["vertica_logit_grid"] = {
                "optimizer" : pretrain.get("vertica_reg_optimizer", "BFGS"),
                "maxIterations": pretrain.get("vertica_reg_iterations", 200),
                "epsilon": pretrain.get("vertica_reg_epsilon", 0.000001),
            }

        #print("Aftr regridify : %s" % json.dumps(pretrain, indent=2))

    def posttrain_nest(self, old_resolved):
        """In post-train, put all parameters that were at top-level behind their personal algorithm key"""
        d = old_resolved
        a = old_resolved.get("algorithm", "unknown")

        #print("Before post nest: %s" % json.dumps(old_resolved, indent=2))
        #print ("Algorithm : %s" % a)

        # Generic helpers
        def nest_unprefix(old_dict, new_dict, prefix):
            for key in old_dict.keys():
                if key.startswith(prefix):
                    new_dict[key.replace(prefix, "", 1)] = old_dict[key]

        def nest_explicit(old_dict, new_dict, *args):
            for key in args:
                if key in old_dict:
                    new_dict[key] = old_dict[key]

        # MLLib specific helper because mllib was already gridified
        def unnest_mllib_grid_post(dic, grid_name, already_ungridified, to_ungridify):
            grid = dic.get("mllib_grids", {}).get(grid_name, {})

            dic[grid_name] = {}

            for p in already_ungridified:
                v = grid.get(p, None)
                if v is not None:
                    dic[grid_name][p] =  v

            for p in to_ungridify:
                v = grid.get(p, None)
                if v is not None and len(v) == 1:
                    dic[grid_name][p] = v[0]

        if a == "RANDOM_FOREST_CLASSIFICATION" or a == "RANDOM_FOREST_REGRESSION":
            d["rf"] = {}
            nest_unprefix(d, d["rf"], "rf_")

        elif a == "GBT_CLASSIFICATION" or a == "GBT_REGRESSION":
            d["gbt"] = {}
            nest_unprefix(d, d["gbt"], "gbt_")

        elif a == "DECISION_TREE_CLASSIFICATION" or a == "DECISION_TREE_REGRESSION":
            d["dt"] = {}
            nest_unprefix(d, d["dt"], "dtc_")

        elif a == "LOGISTIC_REGRESSION":
            d["logit"] = {}
            nest_unprefix(d, d["logit"], "logit_")

        elif a == "SVC_CLASSIFICATION":
            d["svm"] = {}
            nest_explicit(d, d["svm"], "C", "gamma", "kernel", "coef0", "tol", "max_iter")

        elif a == "SGD_CLASSIFICATION" or a == "SGD_REGRESSION" :
            d["sgd"] = {}
            nest_explicit(d, d["sgd"], "alpha", "l1_ratio", "loss", "penalty", "n_jobs")

        elif a == "RIDGE_REGRESSION":
            d["ridge"] = {}
            nest_explicit(d, d["ridge"], "alpha")

        elif a == "LASSO_REGRESSION":
            d["lasso"] = {}
            nest_explicit(d, d["lasso"], "alpha")

        elif a == "LARS":
            d["lars"] = {}
            nest_unprefix(d, d["lars"], "lars_")

        elif a == "LEASTSQUARE_REGRESSION":
            d["least_squares"] = {}

        elif a == "XGBOOST_CLASSIFICATION" or a == "XGBOOST_REGRESSION":
            d["xgboost"] = {}
            nest_unprefix(d, d["xgboost"], "xgboost_")

        elif a == "MLLIB_LOGISTIC_REGRESSION":
            unnest_mllib_grid_post(d, "mllib_logit", ["max_iter"], ["reg_param","enet_param"])

        elif a == "MLLIB_DECISION_TREE":
            unnest_mllib_grid_post(d, "mllib_dt", ["max_bins", "min_info_gain", "min_instance_per_node"], ["max_depth"])

        elif a == "MLLIB_LINEAR_REGRESSION":
            unnest_mllib_grid_post(d, "mllib_linreg", ["max_iter"], ["reg_param", "enet_param"])

        elif a == "MLLIB_NAIVE_BAYES":
            unnest_mllib_grid_post(d, "mllib_naive_bayes", [], ["lambda"])

        elif a == "MLLIB_RANDOM_FOREST":
            unnest_mllib_grid_post(d, "mllib_rf", ["impurity", "max_bins", "min_info_gain", "min_instance_per_node", "subsampling_rate", "subset_strategy"], ["max_depth", "num_trees", "step_size"])

        elif a == "MLLIB_GBT":
            unnest_mllib_grid_post(d, "mllib_gbt", ["impurity", "max_bins", "min_info_gain", "min_instance_per_node", "subsampling_rate", "subset_strategy"], ["max_depth", "num_trees", "step_size"])

        elif a == "KNN":
            d["knn"] = {}
            nest_unprefix(d, d["knn"], "knn_")

        else:
            print(" ** WARNING: Unhandled algorithm: %s" % a)

        #print("After post nest: %s" % json.dumps(old_resolved, indent=2))

    def execute(self, project_paths):
        for rm_file in glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data):
            print("Migrating analysis-data MS rmodeling file: %s" % rm_file)
            try:
                data = base.json_loadf(rm_file)
                self.pretrain_unnest_mllib_grids(data)
                self.pretrain_call_everything_grid(data)
                self.pretrain_regridify_if_needed(data)

                base.json_dumpf(rm_file, data)
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Model migration FAILED: %s" % e)

        for ap_file in glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data):
            print("Migrating analysis-data MS actualparams file: %s" % ap_file)
            try:
                data = base.json_loadf(ap_file)
                self.posttrain_nest(data.get("resolved", {}))

                base.json_dumpf(ap_file, data)
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Model migration FAILED: %s" % e)

        # modelid/versions/vid/rmodeling_params.json
        for rm_file in glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models):
            print("Migrating saved-model rmodeling file: %s" % rm_file)
            try:
                data = base.json_loadf(rm_file)
                self.pretrain_unnest_mllib_grids(data)
                self.pretrain_call_everything_grid(data)
                self.pretrain_regridify_if_needed(data)

                base.json_dumpf(rm_file, data)
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Model migration FAILED: %s" % e)

        # modelid/versions/vid/actual_params.json
        for ap_file in glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models):
            print("Migrating saved-model actualparams file: %s" % ap_file)
            try:
                data = base.json_loadf(ap_file)
                self.posttrain_nest(data.get("resolved", {}))

                base.json_dumpf(ap_file, data)
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Model migration FAILED: %s" % e)

        # recipes/*.prediction_training
        for pt_file in glob("%s/*/*.prediction_training" % project_paths.config):
            print("Migrating train recipe config: %s" % pt_file)
            try:
                data = base.json_loadf(pt_file)

                modeling = data.get("modeling", {})

                self.pretrain_unnest_mllib_grids(modeling)
                self.pretrain_call_everything_grid(modeling)
                self.pretrain_regridify_if_needed(modeling)

                base.json_dumpf(pt_file, data)
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Model migration FAILED: %s" % e)


class V18FeatureGenerationParams(migration_base.ProjectLocalMigrationOperation):
    """
    Migrates to clean FeatureGenerationParams
    """
    def __init__(self):
        pass

    def __repr__(self,):
        return "Nest feature generation parameters"

    def process_file(self, the_file):
        data = base.json_loadf(the_file)
        self.process_preprocessing(data)
        base.json_dumpf(the_file, data)

    def process_preprocessing(self, data):
        data["feature_generation"] = {}

        if data.get("numerical_combinations", {}).get("pairwiseLinear", False):
            data["feature_generation"]["pairwise_linear"] = { "behavior": "ENABLED_MANUAL" }
        else:
            data["feature_generation"]["pairwise_linear"] = { "behavior": "DISABLED" }
        if data.get("numerical_combinations", {}).get("polynomialInteraction", False):
            data["feature_generation"]["polynomial_combinations"] = { "behavior": "ENABLED_MANUAL" }
        else:
            data["feature_generation"]["polynomial_combinations"] = { "behavior": "DISABLED" }

        data["feature_generation"]["manual_interactions"] = {
            "interactions" : data.get("feature_interactions", [])
        }

    def execute(self, project_paths):
        for pfile in glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data):
            print("Migrating analysis-data rpreprocessing file: %s" % pfile)
            self.process_file(pfile)

        for pfile in glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data):
            print("Migrating analysis-data mltask file: %s" % pfile)
            data = base.json_loadf(pfile)
            self.process_preprocessing(data.get("preprocessing", {}))
            base.json_dumpf(pfile, data)

        for pfile in glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models):
            print("Migrating saved-model rpreprocessing file: %s" % pfile)
            self.process_file(pfile)

        for pfile in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating train recipe config: %s" % pfile)
            data = base.json_loadf(pfile)
            self.process_preprocessing(data.get("preprocessing", {}))
            base.json_dumpf(pfile, data)

        for pfile in glob("%s/analysis/*/ml/*/params.json" % project_paths.config):
            print("Migrating analysis mltask config: %s" % pfile)
            data = base.json_loadf(pfile)
            if data is None:
                print("Analysis MLTask file is corrupted: %s, ignoring" % pfile)
                continue
            self.process_preprocessing(data.get("preprocessing", {}))
            base.json_dumpf(pfile, data)

###############################################################################
# V19 / DSS 4.3
###############################################################################

# Nothing to do !

###############################################################################
# V20 / DSS 5.0
###############################################################################

class V20AddParamsToMLRecipes(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Add params to ML recipes"

    def transform(self, obj, filepath=None):
        if obj.get('type', '') not in [
                'prediction_training', 'prediction_scoring', 'evaluation',
                'clustering_training', 'clustering_scoring', 'clustering_cluster']:
            return obj
        obj['params'] = obj.get('params', {})
        return obj

    def file_patterns(self,):
        return ["recipes/*.json"]


class V20TransformCommentsInsightsToDiscussionsInsights(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Transform comments insight to discussions insights"

    def transform(self, obj, filepath=None):
        if obj.get("type", "") != "comments":
            return obj
        obj["type"] = "discussions"
        params = obj.get("params", {})
        if params.get("objectSmartId", None) is not None:
            params["objectId"] = params["objectSmartId"]
            del params["objectSmartId"]
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["insights/*.json"]

class V20TransformCommentsInsightsToDiscussionsInsightsInDashboards(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Transform dashboard by changing comments insight to discussions insights"

    def transform(self, obj, filepath=None):
        if "pages" in obj:
            for page in obj.get("pages"):
                if "grid" in page:
                    if "tiles" in page["grid"]:
                        tiles = page["grid"]["tiles"]
                        for tile in tiles:
                            if tile.get("insightType", "") == "comments":
                                tile["insightType"] = "discussions"
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["dashboards/*.json"]

class V20DKUCommand(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Update project timelines for discussions"

    def execute(self, diphome):
        pass

    def post_execute(self, diphome):
        tmp_folder = osp.join(diphome.path, "tmp")
        if not osp.isdir(tmp_folder):
            os.makedirs(tmp_folder)

        import subprocess

        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call(dkupath + " __migrate_v20", shell=True)

###############################################################################
# V21 / DSS 5.0.2
###############################################################################

def migrate_ml_spark_params(obj):
    spark_params = {}
    for prop in ["sparkConf", "sparkPreparedDFStorageLevel", "sparkRepartitionNonHDFS", "pipelineAllowStart", "pipelineAllowMerge"]:
        spark_params[prop] = obj.get(prop, None)
        if prop in obj:
            del obj[prop]
    obj["sparkParams"] = spark_params
    return obj

class V21RegoupMLSparkParamsInRecipes(migration_json.ProjectConfigJsonMigrationOperation):
    """
    Migrates spark params in recipes into a common object
    """
    def __repr__(self,):
        return "Regroup spark params in recipes"

    def transform(self, obj, filepath=None):
        recipe_type = obj['type']
        if recipe_type in ['clustering_training', 'clustering_scoring', 'clustering_cluster', 'prediction_training', 'prediction_scoring', 'evaluation']:
            payload_file = filepath.replace(".json", ".%s" % recipe_type)
            payload = base.json_loadf(payload_file)
            payload = migrate_ml_spark_params(payload)
            base.json_dumpf(payload_file, payload)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.json"]

class V21RegoupMLSparkParamsInAnalysesMLTasks(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrates spark params in analyses' ml tasks into a common object"

    def transform(self, modeling, filepath=None):
        return migrate_ml_spark_params(modeling)

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["analysis/*/ml/*/params.json"]

class V21RegoupMLSparkParamsInAnalysisDataMLTasks(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrates spark params in analyses' models into a common object"

    def execute(self, project_paths):
        if not osp.isdir(project_paths.analysis_data):
            return
        #a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for anl in os.listdir(project_paths.analysis_data):
            anl_dir = osp.join(project_paths.analysis_data, anl)
            if not osp.isdir(anl_dir):
                continue
            for mltask in os.listdir(anl_dir):
                sessions_dir = osp.join(anl_dir, mltask, "sessions")
                if not osp.isdir(sessions_dir):
                    continue
                for session in os.listdir(sessions_dir):
                    session_file = osp.join(sessions_dir, session, "mltask.json")
                    if not osp.isfile(session_file):
                        continue
                    print("Migrating saved ML Task session: %s %s %s" % (anl, mltask, session))
                    try:
                        data = base.json_loadf(session_file)
                        data = migrate_ml_spark_params(data)
                        base.json_dumpf(session_file, data)
                    except Exception as e:
                        print("Model migration FAILED: %s" % e)

class V21RegoupMLSparkParamsInSavedModelsMLTasks(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrates spark params in saved models into a common object"

    def transform(self, modeling, filepath=None):
        return migrate_ml_spark_params(modeling)

    def jsonpath(self,):
        return "miniTask"

    def file_patterns(self,):
        return ["saved_models/*.json"]

###############################################################################
# V22 / DSS 5.0.3
###############################################################################

class V22GiveNPSSurveySettingsToUsers(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Give NPS Survey Settings to each user"

    def execute(self, diphome):
        usersf = osp.join(diphome.path, "config/users.json")
        users_data = base.json_loadf(usersf)
        defaultSetting = {
            "state": "INITIAL",
            "nextAction": int((datetime.datetime.now() + datetime.timedelta(days=30)).strftime("%s")) * 1000 # 30 days from now
        }

        for user in users_data["users"]:
            user["npsSurveySettings"] = defaultSetting

        print("Writing users file with NPS survey settings")
        base.json_dumpf(usersf, users_data)


###############################################################################
# V23 / DSS 5.1.0
###############################################################################

class V23MigrateH2Databases(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Upgrade structure of dss_schema_info in H2 databases"

    def execute(self, diphome):
        pass

    def post_execute(self, diphome):
        tmp_folder = osp.join(diphome.path, "tmp")
        if not osp.isdir(tmp_folder):
            os.makedirs(tmp_folder)

        import subprocess

        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call(dkupath + " __migrate_v23", shell=True)


class V23MakeClassWeightTheDefaultForClassifications(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Make CLASS_WEIGHT the default weighting strategy for classification saved models"

    def must_process(self, data):
        prediction_type = data.get("predictionType") or data.get("prediction_type")
        task_type = data.get("taskType") or data.get("task_type")
        return task_type == "PREDICTION" and prediction_type in {"BINARY_CLASSIFICATION", "MULTICLASS"}

    def process_file(self, the_file, field=None, also_core_params=False):
        data = base.json_loadf(the_file)
        if field is None:
            if self.must_process(data):
                data = self.process_weight(data)
                base.json_dumpf(the_file, data)
                if also_core_params:
                    backend_type = data.get("backendType") or data.get("backend_type")
                    core_params_file = osp.join(osp.dirname(the_file), "core_params.json")
                    if osp.isfile(core_params_file):
                        core_params_data = base.json_loadf(core_params_file)
                        core_params_data = self.process_weight(core_params_data, backend_type=backend_type)
                        base.json_dumpf(core_params_file, core_params_data)
        else:
            backend_type = data.get("backendType")
            prediction_type = data["core"].get("prediction_type")
            if backend_type == "PY_MEMORY" and prediction_type in {"BINARY_CLASSIFICATION", "MULTICLASS"}:
                data[field] = self.process_weight(data[field], backend_type=backend_type)
                base.json_dumpf(the_file, data)
            if also_core_params:
                backend_type = data.get("backendType") or data.get("backend_type")
                core_params_file = osp.join(osp.dirname(the_file), "core_params.json")
                if osp.isfile(core_params_file):
                    core_params_data = base.json_loadf(core_params_file)
                    core_params_data = self.process_weight(core_params_data, backend_type=backend_type)
                    base.json_dumpf(core_params_file, core_params_data)

    def process_weight(self, data, backend_type=None):
        weight_params = data.get("weight")
        backend_type = backend_type or data.get("backendType") or data.get("backend_type")
        if backend_type == "PY_MEMORY":
            if weight_params is None:
                data["weight"] = {"weightMethod": "CLASS_WEIGHT"}
            elif weight_params.get("weightMethod") == "NO_WEIGHTING":
                weight_params["weightMethod"] = "CLASS_WEIGHT"
        else:
            if weight_params is None:
                data["weight"] = {"weightMethod": "NO_WEIGHTING"}
        return data

    def execute(self, project_paths):
        for pfile in glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data):
            print("Migrating analysis-data mltask.json file (and related core_params.json): %s" % pfile)
            self.process_file(pfile, also_core_params=True)
        for pfile in glob("%s/*/versions/*/core_params.json" % project_paths.saved_models):
            print("Migrating saved-model core_params file: %s" % pfile)
            self.process_file(pfile)
        for pfile in glob("%s/analysis/*/ml/*/params.json" % project_paths.config):
            print("Migrating analysis params config: %s" % pfile)
            self.process_file(pfile)
        for pfile in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating train recipe config: %s" % pfile)
            self.process_file(pfile, field="core")


class V23TransferKernelSpecEnvName(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Setup metadata for code envs in Jupyter kernels' specs"

    def execute(self, diphome, simulate=False):
        kernels_dir = osp.join(diphome.path, "jupyter-run", "jupyter", "kernels")

        if not osp.isdir(kernels_dir):
            return

        for kernel_file in glob(osp.join(kernels_dir, '*', 'kernel.json')):
            kernel_name = kernel_file.split('/')[-2]

            if not kernel_name.startswith('py-dku-venv-') and not kernel_name.startswith('r-dku-venv-'):
                # not a code env kernel
                continue
            with open(kernel_file, 'r') as f:
                kernel = json.load(f)

            kernel_metadata = kernel.get('metadata', {})
            kernel['metadata'] = kernel_metadata

            display_name = kernel.get('display_name', kernel_name)
            m = re.search('(Python|R) \((version (.*)\.(.*) of )?env (.*)\)', display_name)
            if m is not None:
                kernel_metadata["envName"] = m.group(5)
                kernel_metadata["projectKey"] = m.group(3)
                kernel_metadata["bundleId"] = m.group(4)

                print("Updated kernel : %s" % display_name)
                with open(kernel_file, 'w') as f:
                    json.dump(kernel, f, indent=2)

            else:
                print("Kernel from DSS with unexpected display name : %s" % display_name)


class V23DefaultGitURLWhitelist(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Sets a default Git URL whitelist for clones/pulls"

    def transform(self, obj, filepath=None):
        obj["git"] = {
            "enforcedConfigurationRules": [{
                "remoteWhitelist": ["^(?:git|ssh|https?|git@[-\w.]+):(\/\/)?(.*?)(\.git)?(\/?|\#[-\d\w._]+?)$"]
            }]
        }
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]


class V23UseSmartnameInRefreshChartsStep(migration_json.ProjectConfigJsonMigrationOperation):
    """
    Migrates dashboard references from (projectKey,id) to smartName in refresh charts step
    """
    def __repr__(self,):
        return "Use smartName in refresh charts steps"

    def transform(self, step, filepath):
        if step is not None and step.get('type', None) == 'refresh_chart_cache':
            project_key = os.environ.get('DKU_ORIGINAL_PROJECT_KEY', osp.basename(osp.dirname(osp.dirname(filepath))))
            step_params = step.get('params', {})
            dashboards = step_params.get('dashboards', [])
            smart_dashboards = []
            for dashboard in dashboards:
                dashboard_project_key = dashboard.get('projectKey', None)
                dashboard_id = dashboard.get('id', None)
                if dashboard_id is None:
                    # simply ignore, no point in keeping an empty slot
                    continue
                if dashboard_project_key == project_key or dashboard_project_key is None:
                    dashboard['smartName'] = dashboard_id
                else:
                    # this is not supposed to happen unless the user has edited the scenario manually or via the public API
                    dashboard['smartName'] = '%s.%s' % (dashboard_project_key, dashboard_id)
                del dashboard['projectKey']
                del dashboard['id']
                smart_dashboards.append(dashboard)
            step_params['dashboards'] = smart_dashboards
        return step

    def jsonpath(self,):
        return "params.steps"

    def file_patterns(self,):
        return ["scenarios/*.json"]


class V23SkipExpensiveReportsInMLTasks(migration_json.ProjectConfigJsonMigrationOperation):
    """
    Move ml task param 'skipExpensiveReports' in 'modeling'
    """
    def __repr__(self,):
        return "Move ml task param skipExpensiveReports in modeling"

    def transform(self, obj, filepath):
        if "skipExpensiveReports" in obj and "modeling" in obj:
            obj["modeling"]["skipExpensiveReports"] = obj["skipExpensiveReports"]
            del obj["skipExpensiveReports"]
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self,):
        return ["analysis/*/ml/*/params.json"]

###############################################################################
# V24 / DSS 5.1.1
###############################################################################

class V24UseSmartnameInArticleAttachments(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrating article attachments to use smart IDs"

    def transform(self, obj, filepath=None):
        context_pkey = os.environ.get('DKU_ORIGINAL_PROJECT_KEY', osp.basename(osp.dirname(osp.dirname(osp.dirname(filepath)))))
        if "attachments" in obj:
            for att in obj.get("attachments"):
                if att.get("attachmentType", None) == "DSS_OBJECT":
                    ttype = att.get("taggableType", None)
                    pkey = att.get("projectKey", None)
                    obj_id = att.get("id", None)
                    att["smartId"] = (pkey + '.' + obj_id) if (ttype != "PROJECT" and pkey is not None and pkey != context_pkey) else obj_id
                else:
                    att["smartId"] = att.get("id", None)
                if "id" in att:
                    del att["id"]
                if "projectKey" in att:
                    del att["projectKey"]
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["wiki/articles/*.json"]

###############################################################################
# V6000 / DSS 6.0.0
###############################################################################

class V6000SetEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, set):
            return list(obj)
        return json.JSONEncoder.default(self, obj)


class V6000MigrateProjectPathToProjectFolder(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrating project path to project folder"

    def execute(self, diphome):
        folders = {};
        for fp in glob(osp.join(diphome.path, "config/projects/*/params.json")):
            with open(fp, 'r') as f:
                print("Migrating %s" % fp)
                obj = json.load(f)
                path = obj.get(self.jsonpath(), {}).get('path', '/')
                project_permissions = obj.get("permissions", [])
                folders_in_path = self.cleanup_path(path).split('/')
                last_index = len(folders_in_path) - 1
                for idx, name in reversed(list(enumerate(folders_in_path))):
                    folder_id = 'ROOT' if idx == 0 else self.generate_small_id(list(map(lambda e: folders[e]['id'], folders)))
                    key = '/'.join(folders_in_path[:idx + 1])
                    folder = folders.get(key, {'id': folder_id, 'object': {'parentId': '', 'name': name, 'projectKeys': set([]), 'childrenIds': set([]), 'permissions': [], 'owner': ''}})
                    folders[key] = folder # Update the map in case the entry does not exists
                    if idx < last_index:
                        child_key = '/'.join(folders_in_path[:idx + 2])
                        child = folders[child_key]
                        child['object']['parentId'] = folder['id']
                        folder['object']['childrenIds'].add(child['id'])
                    else:
                        folder['object']['projectKeys'].add(osp.basename(osp.dirname(fp)))
                    if idx > 0: # root does not need permissions
                        for permission in project_permissions:
                            group = permission.get('group')
                            admin = permission.get('admin', False)
                            read = self.has_any_access(permission)
                            if group is None:
                                continue
                            new_permission = { 'group': group, 'admin': admin if idx == last_index else False, 'writeContents': admin, 'read': read }
                            existing_permission = self.get_permission(folder['object']['permissions'], group)
                            if existing_permission is not None:
                                self.update_permission(existing_permission, new_permission, 'admin')
                                self.update_permission(existing_permission, new_permission, 'writeContents')
                                self.update_permission(existing_permission, new_permission, 'read')
                            else:
                                folder['object']['permissions'].append(new_permission)
                if obj.get(self.jsonpath(), {}).get('path') is not None:
                    del obj[self.jsonpath()]['path']
                with open(fp, 'w') as f:
                    f.write(json.dumps(obj, indent=2, sort_keys=True))
        for entry in map(lambda e: folders[e], folders):
            file_path_id = osp.join(diphome.path, 'config', 'project_folders', entry['id'] + '.json')
            folder_json = json.dumps(entry['object'], indent=2, sort_keys=True, cls=V6000SetEncoder)
            print("Create file %s" % file_path_id)
            directory = osp.dirname(file_path_id)
            if not osp.exists(directory):
                os.makedirs(directory)
            with open(file_path_id, 'w+') as f:
                f.write(folder_json)

    def has_any_access(self, project_permission):
        # the executeApp is left out of this one
        return (project_permission.get('admin', False) or
                project_permission.get('readProjectContent', False) or
                project_permission.get('writeProjectContent', False) or
                project_permission.get('exportDatasetsData', False) or
                project_permission.get('readDashboards', False) or
                project_permission.get('writeDashboards', False) or
                project_permission.get('moderateDashboards', False) or
                project_permission.get('runScenarios', False) or
                project_permission.get('manageDashboardAuthorizations', False) or
                project_permission.get('manageExposedElements', False) or
                project_permission.get('manageAdditionalDashboardUsers', False))

    def cleanup_path(self, path):
        new_path = path
        if new_path[0] != '/':
            new_path = '/' + new_path
        if path[len(path) - 1] == '/':
            new_path = new_path[:-1]
        if len(new_path) > 0:
            new_path = self.remove_duplicates_slash(new_path)
        return new_path

    def remove_duplicates_slash(self, path):
        new_path = path[0]
        for char in path[1:]:
            if char == '/' and new_path[-1] == '/':
                continue
            new_path += char
        return new_path

    def update_permission(self, existing_permission, new_permission, name):
        existing_permission[name] = existing_permission[name] or new_permission[name]

    def get_permission(self, permissions, group_name):
        for permission in permissions:
            if permission.get('group') == group_name:
                return permission
        return None

    def generate_small_id(self, existing_ids):
        alphabet = string.ascii_letters + string.digits
        while True:
            new_id = ''.join(alphabet[random.randint(0, len(alphabet)-1)] for i in range(7))
            if new_id not in existing_ids:
                return new_id

    def jsonpath(self,):
        return "settings"


class V6000MigrateHomeSettings(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Change project home behavior in user-settings.json"

    def transform(self, obj, filepath=None):
        behavior = obj.get('behavior', 'home');
        if behavior == 'all-projects-expanded':
            obj['behavior'] = 'project-list';
        return obj;

    def jsonpath(self,):
        return "userSettings.*.home"

    def file_patterns(self,):
        return ["config/user-settings.json"]


class V6000UseNumericIdsForArticle(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrating article IDs to numeric IDs instead of names"

    # Transforming numbers into strings to protect from weird deserialization
    def name_str(self, name):
        if sys.version_info < (3,):
            if isinstance(name, (int, long, float)):
                return str(name)
        else:
            if isinstance(name, (int, float)):
                return str(name)
        return name

    def process_articles(self, project_paths, mapping):
        articles_base = osp.join(project_paths.config, "wiki", "articles")
        articles_migrated = osp.join(project_paths.config, "wiki", "articles_migrated")

        if not osp.exists(articles_migrated):
            os.makedirs(articles_migrated)

        for name, id in iteritems(mapping):
            for ext in [".json", ".md"]:
                origin = osp.join(articles_base, name + ext)
                target = osp.join(articles_migrated, str(id) + ext)
                if not osp.isfile(origin):
                    continue

                if ext == ".json":
                    data = base.json_loadf(origin)
                    data['name'] = name
                    base.json_dumpf(origin, data)

                shutil.move(origin, target)

        shutil.rmtree(articles_base, ignore_errors=True)
        shutil.move(articles_migrated, articles_base)

    def transform_taxonomy(self, project_paths):
        taxonomy_file = osp.join(project_paths.config, "wiki", "taxonomy.json")
        mapping = {}

        if osp.isfile(taxonomy_file):
            data = base.json_loadf(taxonomy_file)
            if 'taxonomy' in data:
                # `[] +` needed to duplicate the list (we don't want to edit it)
                queue = [] + data['taxonomy']
                id = 1

                while len(queue) != 0:
                    node = queue.pop(0)
                    article_name = self.name_str(node['id'])

                    if article_name not in mapping:
                        mapping[article_name] = id
                        id += 1

                    node['id'] = mapping[article_name]
                    queue.extend(node['children'])

                data['homeArticleId'] = mapping.get(self.name_str(data['homeArticleId']), None)

                base.json_dumpf(taxonomy_file, data)

        return mapping

    def execute(self, project_paths):
        mapping = self.transform_taxonomy(project_paths)
        self.process_articles(project_paths, mapping)




class V6000MigrateHomepagesArticles(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrating homepage articles to ids"

    def execute(self, diphome):
        general_settings = osp.join(diphome.path, "config", "general-settings.json")
        mapping = {}

        for fp in glob(osp.join(diphome.path, "config", "projects", "*", "wiki", "articles", "*.json")):
            try:
                article = base.json_loadf(fp)
                identifier = osp.basename(fp)
                project_key = osp.basename(osp.dirname(osp.dirname(osp.dirname(fp))))
                articles = mapping.get(project_key, {})
                articles[article["name"]] = identifier[0:identifier.index(".")]
                mapping[project_key] = articles
            except:
                logging.exception("Unexpected error when trying to read: %s" % fp)

        data_modified = False
        data = base.json_loadf(general_settings)
        for article in data.get("personalHomePages", {}).get("articles", []):
            if "projectKey" in article and "id" in article:
                project_mapping = mapping.get(article["projectKey"], {})
                article_id = project_mapping.get(article["id"], None)
                if article_id is not None:
                    article["id"] = article_id # Replace name with the new Id
                    data_modified = True
        if data_modified:
            base.json_dumpf(general_settings, data)

class V6000MigrateDashboardArticles(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrating dashboard articles to ids"

    def execute(self, project_paths):
        article_id_old_to_new = {}
        article_id_new_to_old = {}
        for article_file in glob(osp.join(project_paths.config, "wiki", "articles", "*.json")):
            article = base.json_loadf(article_file)
            old_id = article.get("name", None)
            new_id = osp.basename(article_file)[:-5] # trim the .json extension
            if old_id is not None:
                article_id_old_to_new[old_id] = new_id
                article_id_new_to_old[new_id] = old_id

        def convert_id(article_id):
            if article_id in article_id_new_to_old:
                # id is already a new id, keep it (would go wrong if you used numeric names for articles pre-6.0. but then you're insane)
                return article_id
            else:
                return article_id_old_to_new.get(article_id, article_id)

        for insight_file in glob(osp.join(project_paths.config, "insights", "*.json")):
            insight = base.json_loadf(insight_file)
            if insight.get('type', None) != "article":
                continue
            article_id = insight.get('params', {}).get('articleId', None)
            print('migrate article id %s in insight %s' % (article_id, insight_file))
            insight['params']['articleId'] = convert_id(article_id)
            base.json_dumpf(insight_file, insight)

        params_file = osp.join(project_paths.config, "params.json")
        params = base.json_loadf(params_file)
        for authorization in params.get('dashboardAuthorizations', {}).get('authorizations', []):
            object_ref = authorization.get('objectRef', {})
            if object_ref.get('objectType', None) != 'ARTICLE':
                continue
            print('migrate article %s in dashboard authorizations' % object_ref.get('objectId', ''))
            article_id = object_ref.get('objectId', '')
            object_ref['objectId'] = convert_id(article_id)
        base.json_dumpf(params_file, params)


class V6000UpgradeWikiTimelineNumericIds(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Upgrade Wiki timeline with numeric article IDs"

    def execute(self, diphome, simulate=False):
        pass

    def post_execute(self, diphome):
        import subprocess

        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call(dkupath + " __migrate_v6000", shell=True)

class V6000UpgradeEC2Connections(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Change credentials mode in AWS connections"

    def transform(self, obj, filepath=None):
        for (name, conn) in obj.get("connections", {}).items():
            if conn.get("type", "?") == "EC2":
                print("Upgrading credentials for EC2 connection: %s" % name)
                params = conn.get("params", {})
                if params.get("useDefaultCredentials", False):
                    params["credentialsMode"] = "ENVIRONMENT"
                else:
                    params["credentialsMode"] = "KEYPAIR"
        return obj;

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/connections.json"]

class V6000PrePushHookGeneralSettings(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Upgrade pre-push hook in general settings"

    def transform(self, obj, filepath=None):
        for execConfig in obj.get("containerSettings", {}).get("executionConfigs", []):
            pps = execConfig.get("prePushScript", None)
            if pps is not None and pps != "":
                execConfig["prePushMode"] = "CUSTOM"
            else:
                execConfig["prePushMode"] = "NONE"
        for execConfig in obj.get("sparkSettings", {}).get("executionConfigs", []):
            k8s = execConfig.get("kubernetesSettings", None)
            if k8s is not None:
                pps = k8s.get("prePushScript", None)
                if pps is not None and pps != "":
                    k8s["prePushMode"] = "CUSTOM"
                else:
                    k8s["prePushMode"] = "NONE"
        return obj;

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]

class V6000PrePushHookInAPIDeployerInfras(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Upgrade pre-push hook in API deployer"

    def transform(self, obj, filepath=None):
        pps = obj.get("prePushScript", None)
        if pps is not None and pps != "":
            obj["prePushMode"] = "CUSTOM"
        else:
            obj["prePushMode"] = "NONE"
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/api-deployer/infras/*.json"]


class V6000MigrateDoctorExecutionParams(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Migrating visual ML execution params"


    def default_execution_params(self):
        # Relies on the fact that current (in DSS<=5.1) default value for envSelection is Inherit
        return {
            "sparkCheckpoint": "NONE",
            "sparkParams": {
                "sparkConf": {
                "inheritConf": "default",
                "conf": []
                },
                "sparkUseGlobalMetastore": False,
                "sparkPreparedDFStorageLevel": "MEMORY_AND_DISK",
                "sparkRepartitionNonHDFS": 1,
                "pipelineAllowStart": True,
                "pipelineAllowMerge": True,
                "sparkExecutionEngine": "SPARK_SUBMIT"
            },
            "containerSelection":{
                "containerMode": "INHERIT"
            },
            "envSelection": {
                "envMode": "INHERIT"
            }
        }

    def migrate_trained_session(self, session_dir, project_code_env_name):
        mltask_file = osp.join(session_dir, "mltask.json")

        backend_type = "PY_MEMORY"
        task_type = "PREDICTION"
        if osp.isfile(mltask_file):
            mltask_data = base.json_loadf(mltask_file)
            execution_params = self.get_execution_params_from_mltask(mltask_data, project_code_env_name)
            backend_type = mltask_data.get("backendType", "PY_MEMORY")
            task_type = mltask_data.get("taskType", "PREDICTION")
        else:
            execution_params = self.default_execution_params()

        core_params_file =  osp.join(session_dir, "core_params.json")
        if osp.isfile(core_params_file):
            core_params = base.json_loadf(core_params_file)
        else: # for clustering
            core_params = {}

        core_params["taskType"] = task_type
        core_params["backendType"] = backend_type
        core_params["executionParams"] = execution_params
        base.json_dumpf(core_params_file, core_params)

    def migrate_saved_model_version(self, sm_version_dir, sm_id, project_paths, project_code_env_name):
        execution_params = None

        # First need to retrieve the corresponding MLTask
        sm_origin_file = osp.join(sm_version_dir, "sm_origin.json")
        has_found_original_analysis = False
        backend_type = "PY_MEMORY"
        task_type = "PREDICTION"
        if osp.isfile(sm_origin_file):

            sm_origin = base.json_loadf(sm_origin_file)
            origin_full_model_id = sm_origin.get("fullModelId", None)

            if origin_full_model_id is not None:

                fmi_info = self.partial_parse_analysis_full_model_id(origin_full_model_id)

                if fmi_info is not None:

                    origin_session_folder = osp.join(project_paths.analysis_data,
                                                     fmi_info["analysis_id"],
                                                     fmi_info["mltask_id"],
                                                     fmi_info["session_id"])

                    if osp.isfile(origin_session_folder):
                        mltask_file = osp.join(origin_session_folder, "mltask.json")
                        if osp.isfile(mltask_file):
                            mltask_data = base.json_loadf(mltask_file)
                            execution_params = self.get_execution_params_from_mltask(mltask_data, project_code_env_name)
                            backend_type = mltask_data.get("backendType", "PY_MEMORY")
                            task_type = mltask_data.get("taskType", "PREDICTION")
                            has_found_original_analysis = execution_params is not None

        if not has_found_original_analysis:
            # Try to fetch params from Saved Model miniTask, which is how it behaved previously
            sm_config_file = osp.join(project_paths.config, "saved_models", "{}.json".format(sm_id))

            if osp.isfile(sm_config_file):
                sm_config = base.json_loadf(sm_config_file)
                minitask_data = sm_config.get("miniTask", None)
                if minitask_data is not None:
                    execution_params = self.get_execution_params_from_mltask(minitask_data, project_code_env_name)
                    backend_type = minitask_data.get("backendType", "PY_MEMORY")
                    task_type = minitask_data.get("taskType", "PREDICTION")

        if execution_params is None:
            execution_params = self.default_execution_params()

        core_params_file =  osp.join(sm_version_dir, "core_params.json")
        if osp.isfile(core_params_file):
            core_params = base.json_loadf(core_params_file)
        else: # for clustering
            core_params = {}

        core_params["backendType"] = backend_type
        core_params["taskType"] = task_type
        core_params["executionParams"] = execution_params
        base.json_dumpf(core_params_file, core_params)

    def get_execution_params_from_mltask(self, mltask_data, project_code_env_name):
        execution_params = self.default_execution_params()

        env_selection_to_migrate = mltask_data.get("envSelection", {})
        env_name, env_selection = self.resolve_env_selection(env_selection_to_migrate, project_code_env_name)

        if env_selection is not None:
            execution_params["envSelection"] = env_selection

        if env_name is not None:
            execution_params["envName"] = env_name

        for k in ({"containerSelection", "sparkParams", "sparkCheckpoint", "sparkCheckpointDir"} & set(mltask_data.keys())):
            execution_params[k] = mltask_data[k]

        return execution_params

    def resolve_env_selection(self, env_selection, project_code_env_name):
        env_mode = env_selection.get("envMode", None)
        if env_mode == "INHERIT":
            env_name = project_code_env_name
        elif env_mode == "EXPLICIT_ENV":
            env_name = env_selection.get("envName", None)
        else:
            env_name = None
        return env_name, env_selection

    def fetch_project_code_env(self, project_paths):
        project_code_env_name = None
        if osp.isdir(project_paths.config):
            project_params_file = osp.join(project_paths.config, "params.json")
            if osp.isfile(project_params_file):
                project_params = base.json_loadf(project_params_file)
                project_py_code_envs_params = project_params.get("settings", {}).get("codeEnvs", {}).get("python", {})
                use_builtin = project_py_code_envs_params.get("useBuiltinEnv", True)
                if not use_builtin:
                    project_code_env_name = project_py_code_envs_params.get("envName", None)
        return project_code_env_name

    def partial_parse_analysis_full_model_id(self, full_model_id_str):
        """
            Example: A-TOTO-RwMNEg5m-CDX4wKoq-s2-pp1-m1
        """
        elements = full_model_id_str.split("-")

        if elements[0] != "A" or len(elements) != 7:
            return None

        analysis_id = elements[2]
        mltask_id = elements[3]
        session_id = elements[4]

        return {
            "analysis_id": elements[2],
            "mltask_id": elements[3],
            "session_id": elements[4]
        }

    def execute(self, project_paths):
        # Fetch project code env for future use
        project_code_env_name = self.fetch_project_code_env(project_paths)

        # Migrating analysis data
        if osp.isdir(project_paths.analysis_data):
            for anl in os.listdir(project_paths.analysis_data):
                anl_dir = osp.join(project_paths.analysis_data, anl)
                if not osp.isdir(anl_dir):
                    continue
                for mltask in os.listdir(anl_dir):
                    sessions_dir = osp.join(anl_dir, mltask, "sessions")
                    if not osp.isdir(sessions_dir):
                        continue
                    for session in os.listdir(sessions_dir):
                        session_dir = osp.join(sessions_dir, session)
                        if not osp.isdir(session_dir):
                            continue
                        print("Migrating trained ML Task session: %s %s %s" % (anl, mltask, session))
                        try:
                            self.migrate_trained_session(session_dir, project_code_env_name)
                        except Exception as e:
                            print("Trained model '%s %s %s' migration FAILED: %s" % (anl, mltask, session, e))

        # Migrating saved model data
        if osp.isdir(project_paths.saved_models):
            for sm in os.listdir(project_paths.saved_models):
                sm_versions_dir = osp.join(project_paths.saved_models, sm, "versions")
                if not osp.isdir(sm_versions_dir):
                    continue
                for sm_version in os.listdir(sm_versions_dir):
                    sm_version_dir = osp.join(sm_versions_dir, sm_version)
                    if not osp.isdir(sm_version_dir):
                        continue
                    print("Migrating saved model session: %s %s" % (sm, sm_version))
                    try:
                        self.migrate_saved_model_version(sm_version_dir, sm, project_paths, project_code_env_name)
                    except Exception as e:
                        print("Saved model '%s %s' migration FAILED: %s" % (sm, sm_version, e))

class V6000MigrateKerasModelListedInCodeEnv(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating listing of code-envs used in visual deep-learning"

    def __init__(self):
        self.used_code_envs = []

    def execute(self, project_paths):
        pass

    def find_dl_code_env_usage_in_mltask(self, mltask_file):
        mltask_data = base.json_loadf(mltask_file)
        if mltask_data.get("backendType", "PY_MEMORY") == "KERAS":
            env_selection = mltask_data.get("envSelection", None)
            if env_selection is not None:
                env_mode = env_selection.get("envMode", None)
                if env_mode == "EXPLICIT_ENV":
                    env_name = env_selection.get("envName", None)
                    if env_name is not None:
                        self.used_code_envs.append({
                            "envName": env_name,
                            "envLang": "PYTHON"
                        })

    def find_dl_code_env_usage_in_recipe(self, recipe_file):
        recipe_data = base.json_loadf(recipe_file)
        if recipe_data.get("backendType", "PY_MEMORY") == "KERAS":
            env_name = recipe_data.get("envName", None)
            if env_name is not None:
                self.used_code_envs.append({
                    "envName": env_name,
                    "envLang": "PYTHON"
                })

    def get_manifest_additions(self, additions, project_paths):
        if osp.isdir(project_paths.config):
            # Looking for all DL MLTasks with explicit code-env
            analysis_data_dir = osp.join(project_paths.config, "analysis")
            if osp.isdir(analysis_data_dir):
                for anl in os.listdir(analysis_data_dir):
                    anl_dir = osp.join(analysis_data_dir, anl, "ml")
                    if osp.isdir(anl_dir):
                        for mltask in os.listdir(anl_dir):
                            mltask_dir = osp.join(anl_dir, mltask)
                            if osp.isdir(mltask_dir):
                                mltask_file = osp.join(mltask_dir, "params.json")
                                if osp.isfile(mltask_file):
                                    try:
                                        self.find_dl_code_env_usage_in_mltask(mltask_file)
                                    except Exception as e:
                                        print("Cannot find code-env usage of Mltask: %s %s" % (anl, mltask))

            # Looking for all DL ML train recipes to find code env usages
            recipes_dir = osp.join(project_paths.config, "recipes")
            if osp.isdir(recipes_dir):
                for train_recipe_name in glob(osp.join(recipes_dir, "*.prediction_training")):
                    train_recipe_file = osp.join(recipes_dir, train_recipe_name)
                    if osp.isfile(train_recipe_file):
                        try:
                            self.find_dl_code_env_usage_in_recipe(train_recipe_file)
                        except Exception as e:
                            print("Cannot find code-env usage of train recipe: %s" % train_recipe_name)

        # Add used code-envs if any
        if len(self.used_code_envs) > 0:
            used_code_envs = additions.get("usedCodeEnvRefs", [])
            for used_code_env in self.used_code_envs:
                already_in_list = any(c for c in used_code_envs if
                                      c.get("envName", None) == used_code_env["envName"] and c.get("envLang", None) == used_code_env["envLang"])
                if not already_in_list:
                    used_code_envs.append(used_code_env)
            additions["usedCodeEnvRefs"] = used_code_envs


class V6000MigrateEvaluationRecipeMetricsOutputs(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self, ):
        return "Upgrade evaluation recipe metrics and ouputs configurations"

    def get_evaluated_recipe_ref(self, obj, filepath):
        eval_recipe_suffix = ".evaluation"
        eval_json_file_path = filepath[:-len(eval_recipe_suffix)] + ".json"
        if osp.isfile(eval_json_file_path):
            with open(eval_json_file_path) as eval_json_file:
                eval_json = json.load(eval_json_file)
                if eval_json and eval_json.get("inputs", None):
                    inputs = eval_json["inputs"]
                    if len(inputs.get("model", {}).get("items", [])) > 0:
                        item = inputs.get("model", {}).get("items", [])[0]
                        if item.get("ref", None):
                            return item["ref"]
        return None

    def get_evaluated_recipe_type(self, obj, filepath):
        searched_ref = self.get_evaluated_recipe_ref(obj, filepath)
        if searched_ref:
            recipes_dir = osp.dirname(filepath)
            if osp.isdir(recipes_dir):
                for recipe_file_name in os.listdir(recipes_dir):
                    if recipe_file_name[-4:] != "json":
                        continue
                    recipe_path = osp.join(recipes_dir, recipe_file_name)
                    if osp.isfile(recipe_path):
                        with open(recipe_path) as recipe_file:
                            recipe = json.load(recipe_file)
                            if recipe and recipe.get("outputs", {}).get("main", {}).get("items", None):
                                items = recipe["outputs"]["main"]["items"]
                                for item in items:
                                    if searched_ref == item.get("ref", None):
                                        prediction_training_filename = osp.join(recipes_dir, "%s.prediction_training" % recipe_file_name[:-len(".json")])
                                        if osp.isfile(prediction_training_filename):
                                            with open(prediction_training_filename) as prediction_training_file:
                                                prediction_training = json.load(prediction_training_file)
                                                if prediction_training.get("core", None) and prediction_training.get("core").get("prediction_type", None):
                                                    return prediction_training["core"]["prediction_type"]
        return None

    def add_all_metrics(self, obj, evaluated_recipe_type):
        if "REGRESSION" == evaluated_recipe_type:
            obj["metrics"] = ["evs", "mae", "mse", "mape", "rmse", "rmsle", "r2", "pearson", "customScore"]
        elif "BINARY_CLASSIFICATION" == evaluated_recipe_type:
            obj["metrics"] = ["precision", "recall", "auc", "f1", "accuracy", "mcc", "hammingLoss", "logLoss", "lift", "calibrationLoss", "customScore"]
        elif "MULTICLASS" == evaluated_recipe_type:
            obj["metrics"] = ["mrocAUC", "recall", "precision", "accuracy", "logLoss", "hammingLoss", "mcalibrationLoss", "customScore"]

    def add_all_prior_outputs(self, obj, evaluated_recipe_type):
        if "REGRESSION" == evaluated_recipe_type:
            obj["outputs"] = ["error", "error_decile", "abs_error_decile"]
        else:
            obj["outputs"] = ["prediction_correct"]

    def transform(self, obj, filepath=None):

        evaluated_recipe_type = self.get_evaluated_recipe_type(obj, filepath)
        filter_metrics = obj.get("filterMetrics", None)
        # no metrics filtering was defined. Let's add all metrics
        if not filter_metrics:
            self.add_all_metrics(obj, evaluated_recipe_type)
        else:
            del obj["filterMetrics"]

        self.add_all_prior_outputs(obj, evaluated_recipe_type)
        return obj

    def file_patterns(self,):
        return ["recipes/*.evaluation"]

###############################################################################
# V6020 / DSS 6.0.2
###############################################################################

class V6020FixArticleIdMigration(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrating article IDs to numeric IDs in dashboards"

    def execute(self, project_paths):
        # V6000UseNumericIdsForArticle was incomplete and didn't handle the links to articles
        # that can be found in insights and dashboards. So:
        # - build a mapping of old articleId -> new numericId by going over the articles' jsons
        # - apply the mapping to:
        #   * insights
        #   * reader authorizations
        # - change the V6000UseNumericIdsForArticle to handle dashboards too (actually by
        #   adding a V6000MigrateDashboardArticles step)

        article_id_old_to_new = {}
        article_id_new_to_old = {}
        for article_file in glob(osp.join(project_paths.config, "wiki", "articles", "*.json")):
            article = base.json_loadf(article_file)
            old_id = article.get("name", None)
            new_id = osp.basename(article_file)[:-5] # trim the .json extension
            if old_id is not None:
                article_id_old_to_new[old_id] = new_id
                article_id_new_to_old[new_id] = old_id

        def convert_id(article_id):
            if article_id in article_id_new_to_old:
                # id is already a new id, keep it (would go wrong if you used numeric names for articles pre-6.0. but then you're insane)
                return article_id
            else:
                return article_id_old_to_new.get(article_id, article_id) # keep article_id if it's already numeric

        for insight_file in glob(osp.join(project_paths.config, "insights", "*.json")):
            insight = base.json_loadf(insight_file)
            if insight.get('type', None) != "article":
                continue
            article_id = insight.get('params', {}).get('articleId', None)
            print('migrate article id %s in insight %s' % (article_id, insight_file))
            insight['params']['articleId'] = convert_id(article_id)
            base.json_dumpf(insight_file, insight)

        params_file = osp.join(project_paths.config, "params.json")
        params = base.json_loadf(params_file)
        for authorization in params.get('dashboardAuthorizations', {}).get('authorizations', []):
            object_ref = authorization.get('objectRef', {})
            if object_ref.get('objectType', None) != 'ARTICLE':
                continue
            print('migrate article %s in dashboard authorizations' % object_ref.get('objectId', ''))
            article_id = object_ref.get('objectId', '')
            object_ref['objectId'] = convert_id(article_id)
        base.json_dumpf(params_file, params)

###############################################################################
# V6030 / DSS 6.0.3
###############################################################################

class V6030FixMicrosoftTeamsIntegrationMigration(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrating Microsoft Teams scenarios reporters"

    def execute(self, project_paths):
        # For Microsoft Teams reporters:
        # - rename 'url' into 'webhookUrl'
        # - rename 'payload' into 'message'
        for fp in glob(osp.join(project_paths.config, "scenarios", "*.json")):
            try:
                scenario_config = base.json_loadf(fp)
                scenario_config_modified = False
                for reporter in scenario_config.get("reporters", []):
                    messaging = reporter.get("messaging", None)
                    if messaging is not None:
                        type = messaging.get("type", None)
                        if type == "msft-teams-scenario":
                            configuration = messaging.get("configuration", None)
                            if configuration is not None:
                                webhookUrl = configuration.pop("url", None)
                                if webhookUrl is not None:
                                    configuration["webhookUrl"] = webhookUrl
                                message = configuration.pop("payload", None)
                                if message is not None:
                                    configuration["message"] = message
                                else:
                                    configuration["message"] = "${if(outcome == 'SUCCESS', '&#x2705;', '')}${if(outcome == 'FAILED', '&#x1F534;', '')}${if(outcome == 'WARNING', '&#x1F536;', '')}${if(outcome == '' || outcome == 'N/A', '&#x1F514;', '')} DSS Scenario [${scenarioName}](${scenarioRunURL}) triggered by ${triggerName}: **${outcome}**"
                                configuration["useGlobalChannel"] = False
                                scenario_config_modified = True
                if scenario_config_modified:
                    base.json_dumpf(fp, scenario_config)
            except:
                logging.exception("Unexpected error when trying to read: %s" % fp)

###############################################################################
# V7000 / DSS 7.0.0
###############################################################################

class V7000UserCredentialsRenaming(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Renaming connectionCredentials to credentials for each user"

    def execute(self, diphome):
        users_file = osp.join(diphome.path, "config/users.json")
        users_data = base.json_loadf(users_file)

        for user in users_data.get("users", []):
            if "connectionCredentials" in user:
                user["credentials"] = user.pop("connectionCredentials")

        print("Writing users file with renamed credentials field")
        base.json_dumpf(users_file, users_data)


class V7000ExpositionkInAPIDeployerInfras(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Convert expositions in infras of API deployer"

    def transform(self, obj, filepath=None):
        exposition = obj.get("defaultServiceExposition", {})
        type_map = {'CLUSTER_IP':'cluster_ip', 'NODE_PORT':'node_port', 'LOAD_BALANCER':'load_balancer', 'INGRESS':'ingress'}
        obj["defaultServiceExposition"] = {
                                               'type':type_map.get(exposition.get('serviceType', ''), 'cluster_ip'),
                                               'params': {'port':exposition.get('port', -1)}
                                            }
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/api-deployer/infras/*.json"]

class V7000ExpositionkInAPIDeployerDeployments(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Convert expositions in deployments of API deployer"

    def transform(self, obj, filepath=None):
        exposition = obj.get("serviceExposition", {})
        type_map = {'CLUSTER_IP':'cluster_ip', 'NODE_PORT':'node_port', 'LOAD_BALANCER':'load_balancer', 'INGRESS':'ingress'}
        obj["serviceExposition"] = {
                                               'type':type_map.get(exposition.get('serviceType', ''), 'cluster_ip'),
                                               'params': {'port':exposition.get('port', -1)}
                                            }
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/api-deployer/deployments/*.json"]

class V7000RemoveHipchatReporters(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Removing existing Hipchat Reporters"

    def transform(self, obj, filepath=None):
        if obj is not None:
            reporters = obj.get("reporters", [])
            obj["reporters"] = [reporter for reporter in reporters if not self.isHipchatReporter(reporter)]
        return obj

    def isHipchatReporter(self, reporter):
        messaging = reporter.get("messaging")
        return messaging is not None and messaging.get("type") == "hipchat-scenario"

    def file_patterns(self,):
        return ["scenarios/*.json"]


class V7000RemoveHipchatChannels(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Removing existing Hipchat Channels"

    def transform(self, obj, filepath=None):
        if obj is not None:
            channels = obj.get("channels", [])
            obj["channels"] = [channel for channel in channels if channel.get('type', None) != 'hipchat']
        return obj

    def file_patterns(self,):
        return ["config/messaging-channels.json"]

class V7000RemoveHipchatIntegrations(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Removing existing Hipchat Integrations"

    def transform(self, obj, filepath=None):
        if obj is not None:
            integrations = obj.get("settings", {}).get("integrations", None)
            if integrations is not None:
                cleaned_integrations = []
                for integration in integrations.get("integrations", []):
                    if (integration.get('hook', {}).get('type', None) != 'hipchat-project'):
                        cleaned_integrations.append(integration)
                integrations["integrations"] = cleaned_integrations
        return obj

    def file_patterns(self,):
        return ["params.json"]

class V7000MigrateSamlSPConfig(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Migrating SAML SP configuration"

    def transform(self, obj, filepath=None):
        ssoSettings = obj.get("ssoSettings")
        if ssoSettings and ssoSettings.get("enabled", False) and ssoSettings.get("protocol") == "SAML":
            if ssoSettings.get("samlSPMetadata") and not ssoSettings.get("samlSPParams"):
                import xml.etree.ElementTree as ET

                logging.info("Parsing SAML SP Metadata")
                spMeta = ET.fromstring(ssoSettings['samlSPMetadata'])
                if spMeta.tag != '{urn:oasis:names:tc:SAML:2.0:metadata}EntityDescriptor':
                    raise Exception("Invalid XML tag for SP metadata : %s" % spMeta.tag)

                entityID = spMeta.get('entityID')
                if not entityID:
                    raise Exception("entityID not found in SP metadata")
                logging.info("Found entityID = %s" % entityID)

                ns = { 'md' : 'urn:oasis:names:tc:SAML:2.0:metadata' }
                acs = spMeta.findall('./md:SPSSODescriptor/md:AssertionConsumerService', ns)
                if not acs:
                    raise Exception("AssertionConsumerService node not found in SP metadata")
                elif len(acs) > 1:
                    raise Exception("Multiple AssertionConsumerService nodes found in SP metadata - not supported")

                acsURL = acs[0].get('Location')
                if not acsURL:
                    raise Exception("ACS URL not found in SP metadata")
                logging.info("Found ACS URL = %s" % acsURL)

                del(ssoSettings['samlSPMetadata'])
                ssoSettings['samlSPParams'] = {
                    'entityId': entityID,
                    'acsURL': acsURL
                }
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]

class V7000MigrateAlgorithmsParamsStructure(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating algorithms params structure"


    # Per dimension migration

    def migrate_numerical_params(self, algo_params, dimension):
        if not isinstance(algo_params, dict) or dimension not in algo_params.keys():
            return

        previous_values = algo_params.get(dimension, [])

        algo_params[dimension] = {
            "values": previous_values,
            "gridMode": "EXPLICIT"
        }

    # If no element of `boolean_fields` is found in the original model, we enable the first one
    def migrate_categorical_params(self, algo_params, dimension, boolean_fields):

        if not isinstance(algo_params, dict):
            return

        algo_params[dimension] = {
            "values": {}
        }

        all_fields_none = all(algo_params.get(field, None) is None for field in boolean_fields)

        for field in boolean_fields:
            algo_params[dimension]["values"][field] = {
                "enabled": algo_params.get(field, False)
            }
            if algo_params.get(field) is not None:
                del algo_params[field]

        if all_fields_none:
            for field, val in algo_params[dimension]["values"].items():
                if field == boolean_fields[0]:
                    val["enabled"] = True
                    break


    # List of all algorithms to migrate

    def migrate_tree_based(self, algo_params):
        self.migrate_numerical_params(algo_params, "n_estimators")
        self.migrate_numerical_params(algo_params, "max_tree_depth")
        self.migrate_numerical_params(algo_params, "min_samples_leaf")
        self.migrate_numerical_params(algo_params, "max_features")


    def migrate_gbt_classification(self, algo_params):
        self.migrate_numerical_params(algo_params, "n_estimators")
        self.migrate_numerical_params(algo_params, "max_depth")
        self.migrate_numerical_params(algo_params, "min_samples_leaf")
        self.migrate_numerical_params(algo_params, "max_features")
        self.migrate_numerical_params(algo_params, "learning_rate")
        self.migrate_categorical_params(algo_params, "loss", ["deviance", "exponential"])

    def migrate_gbt_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "n_estimators")
        self.migrate_numerical_params(algo_params, "max_depth")
        self.migrate_numerical_params(algo_params, "min_samples_leaf")
        self.migrate_numerical_params(algo_params, "max_features")
        self.migrate_numerical_params(algo_params, "learning_rate")
        self.migrate_categorical_params(algo_params, "loss", ["ls", "lad", "huber"])


    def migrate_decision_tree(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth")
        self.migrate_numerical_params(algo_params, "min_samples_leaf")
        self.migrate_categorical_params(algo_params, "criterion", ["gini", "entropy"])
        self.migrate_categorical_params(algo_params, "splitter", ["best", "random"])


    def migrate_logistic_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "C")
        self.migrate_categorical_params(algo_params, "penalty", ["l2", "l1"])


    def migrate_neural_network(self, algo_params):
        self.migrate_numerical_params(algo_params, "layer_sizes")


    def migrate_svm(self, algo_params):
        self.migrate_numerical_params(algo_params, "C")
        self.migrate_numerical_params(algo_params, "gamma")
        self.migrate_categorical_params(algo_params, "kernel", ["rbf", "linear", "poly", "sigmoid"])

    def migrate_sgd_classif(self, algo_params):
        self.migrate_numerical_params(algo_params, "alpha")
        self.migrate_categorical_params(algo_params, "loss", ["log", "modified_huber"])
        self.migrate_categorical_params(algo_params, "penalty", ["l1", "l2", "elasticnet"])


    def migrate_sgd_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "epsilon")
        self.migrate_numerical_params(algo_params, "alpha")
        self.migrate_categorical_params(algo_params, "loss", ["squared_loss", "huber"])
        self.migrate_categorical_params(algo_params, "penalty", ["l1", "l2", "elasticnet"])


    def migrate_ridge_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "alpha")


    def migrate_lasso(self, algo_params):
        self.migrate_numerical_params(algo_params, "alpha")


    def migrate_knn(self, algo_params):
        self.migrate_numerical_params(algo_params, "k")


    def migrate_xgboost(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth")
        self.migrate_numerical_params(algo_params, "learning_rate")
        self.migrate_numerical_params(algo_params, "gamma")
        self.migrate_numerical_params(algo_params, "min_child_weight")
        self.migrate_numerical_params(algo_params, "max_delta_step")
        self.migrate_numerical_params(algo_params, "subsample")
        self.migrate_numerical_params(algo_params, "colsample_bytree")
        self.migrate_numerical_params(algo_params, "colsample_bylevel")
        self.migrate_numerical_params(algo_params, "alpha")
        self.migrate_numerical_params(algo_params, "lambda")
        self.migrate_categorical_params(algo_params, "booster", ["gbtree", "dart"])
        self.migrate_categorical_params(algo_params, "objective", ["reg_linear", "reg_logistic", "reg_gamma", "binary_logistic", "multi_softprob"])


    def migrate_mllib_logit(self, algo_params):
        self.migrate_numerical_params(algo_params, "reg_param")
        self.migrate_numerical_params(algo_params, "enet_param")


    def migrate_mllib_decision_tree(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth")


    def migrate_mllib_naive_bayes(self, algo_params):
        self.migrate_numerical_params(algo_params, "lambda")


    def migrate_mllib_linear_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "reg_param")
        self.migrate_numerical_params(algo_params, "enet_param")


    def migrate_mllib_random_forest(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth")
        self.migrate_numerical_params(algo_params, "num_trees")
        self.migrate_numerical_params(algo_params, "step_size")


    def migrate_algo_params_in_mltask(self, mltask_data):

        # PY_MEMORY ALGOS
        modeling_params = mltask_data.get("modeling", {})
        self.migrate_tree_based(modeling_params.get("random_forest_regression", {}))
        self.migrate_tree_based(modeling_params.get("random_forest_classification", {}))
        self.migrate_tree_based(modeling_params.get("extra_trees", {}))
        self.migrate_gbt_classification(modeling_params.get("gbt_classification", {}))
        self.migrate_gbt_regression(modeling_params.get("gbt_regression", {}))
        self.migrate_decision_tree(modeling_params.get("decision_tree_classification", {}))
        self.migrate_decision_tree(modeling_params.get("decision_tree_regression", {}))
        self.migrate_ridge_regression(modeling_params.get("ridge_regression", {}))
        self.migrate_lasso(modeling_params.get("lasso_regression", {}))
        # no need to migrate "leastsquare_regression", no grid search params
        self.migrate_sgd_regression(modeling_params.get("sgd_regression", {}))
        self.migrate_knn(modeling_params.get("knn", {}))
        self.migrate_logistic_regression(modeling_params.get("logistic_regression", {}))
        self.migrate_neural_network(modeling_params.get("neural_network", {}))
        self.migrate_svm(modeling_params.get("svc_classifier", {}))
        self.migrate_svm(modeling_params.get("svm_regression", {}))
        self.migrate_sgd_classif(modeling_params.get("sgd_classifier", {}))
        # no need to migrate "lars_params", no grid search params
        self.migrate_xgboost(modeling_params.get("xgboost", {}))

        # MLlib ALGOS
        self.migrate_mllib_logit(modeling_params.get("mllib_logit", {}))
        self.migrate_mllib_naive_bayes(modeling_params.get("mllib_naive_bayes", {}))
        self.migrate_mllib_linear_regression(modeling_params.get("mllib_linreg", {}))
        self.migrate_mllib_random_forest(modeling_params.get("mllib_rf", {}))
        self.migrate_mllib_random_forest(modeling_params.get("mllib_gbt", {}))
        self.migrate_mllib_decision_tree(modeling_params.get("mllib_dt", {}))


    def migrate_algo_params_in_modeling(self, rmodeling_data):

        # PY_MEMORY ALGOS
        self.migrate_tree_based(rmodeling_data.get("rf_regressor_grid", {}))
        self.migrate_tree_based(rmodeling_data.get("rf_classifier_grid", {}))
        self.migrate_tree_based(rmodeling_data.get("extra_trees_grid", {}))
        self.migrate_gbt_classification(rmodeling_data.get("gbt_classifier_grid", {}))
        self.migrate_gbt_regression(rmodeling_data.get("gbt_regressor_grid", {}))
        self.migrate_decision_tree(rmodeling_data.get("dtc_classifier_grid", {}))
        self.migrate_logistic_regression(rmodeling_data.get("logit_grid", {}))
        self.migrate_neural_network(rmodeling_data.get("neural_network_grid", {}))
        self.migrate_svm(rmodeling_data.get("svc_grid", {}))
        self.migrate_svm(rmodeling_data.get("svr_grid", {}))
        # no need to migrate "least_squares_grid", no grid search params
        self.migrate_sgd_classif(rmodeling_data.get("sgd_grid", {}))
        self.migrate_sgd_regression(rmodeling_data.get("sgd_reg_grid", {}))
        self.migrate_ridge_regression(rmodeling_data.get("ridge_grid", {}))
        self.migrate_lasso(rmodeling_data.get("lasso_grid", {}))
        # no need to migrate "lars_grid", no grid search params
        self.migrate_knn(rmodeling_data.get("knn_grid", {}))
        self.migrate_xgboost(rmodeling_data.get("xgboost_grid", {}))

        # MLlib ALGOS
        self.migrate_mllib_logit(rmodeling_data.get("mllib_logit_grid", {}))
        self.migrate_mllib_decision_tree(rmodeling_data.get("mllib_dt_grid", {}))
        self.migrate_mllib_naive_bayes(rmodeling_data.get("mllib_naive_bayes_grid", {}))
        self.migrate_mllib_linear_regression(rmodeling_data.get("mllib_linreg_grid", {}))
        self.migrate_mllib_random_forest(rmodeling_data.get("mllib_rf_grid", {}))
        self.migrate_mllib_random_forest(rmodeling_data.get("mllib_gbt_grid", {}))

        # Ensemble ALGOS
        for origin_model_mp in rmodeling_data.get("ensemble_params", {}).get("modeling_params", []):
            self.migrate_algo_params_in_modeling(origin_model_mp)


    def execute(self, project_paths):

        # config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) \
                          + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                self.migrate_algo_params_in_mltask(mltask_data)
                base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Algo params in mltask migration FAILED: %s" % e)

        # config/projects/PROJECT_KEY/recipes/*.prediction_training
        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating algorithms params in training recipe: %s " % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                self.migrate_algo_params_in_modeling(train_recipe_data.get("modeling", {}))
                base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Algo params in train recipe migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/rmodeling_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/rmodeling_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/rmodeling_params.json
        for rm_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models) \
                      + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models) \
                      + glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in rmodeling file: %s " % rm_file)
            try:
                rmodeling_data = base.json_loadf(rm_file)
                self.migrate_algo_params_in_modeling(rmodeling_data)
                base.json_dumpf(rm_file, rmodeling_data)
            except Exception as e:
                print("Algo params in trained model rmodeling migration FAILED: %s" % e)

        # config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating algorithms params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                self.migrate_algo_params_in_mltask(sm_data.get("miniTask", {}))
                base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Algo params in saved model miniTask migration FAILED: %s" % e)


class V7000MigratePosttrainComputationParams(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating posttrain computation params"

    def migrate_pdp_params(self, iperf_file):
        iperf_data = base.json_loadf(iperf_file)
        for pdp_result in iperf_data.get("partialDependencies", []):
            if isinstance(pdp_result, dict) and "onFullTestSet" in pdp_result.keys():
                pdp_result["onSample"] = not pdp_result["onFullTestSet"]
                del pdp_result["onFullTestSet"]
        base.json_dumpf(iperf_file, iperf_data)

    def migrate_subpopulation_params(self, modality_file):
        modality_data = base.json_loadf(modality_file)
        if isinstance(modality_data, dict):
            if "totalRows" in modality_data.keys():
                modality_data["nbRecords"] = modality_data["totalRows"]
                del modality_data["totalRows"]

            if "weightedTotalRows" in modality_data.keys():
                modality_data["weightedNbRecords"] = modality_data["weightedTotalRows"]
                del modality_data["weightedTotalRows"]

        base.json_dumpf(modality_file, modality_data)

    def execute(self, project_paths):

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/iperf.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/iperf.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/iperf.json
        for iperf_file in (glob("%s/*/versions/*/iperf.json" % project_paths.saved_models) \
                         + glob("%s/*/pversions/*/*/iperf.json" % project_paths.saved_models) \
                         + glob("%s/*/*/sessions/*/*/*/iperf.json" % project_paths.analysis_data)):
            try:
                print("Migrating partial dependencies params in iperf file: %s" % iperf_file)
                self.migrate_pdp_params(iperf_file)
            except Exception as e:
                print("Migration of partial dependencies params failed: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/posttrain/subpop-8da8b33a3b6a367b885b54caf27703b7/modality.json (regular models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/posttrain/subpop-8da8b33a3b6a367b885b54caf27703b7/modality.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/posttrain/subpop-8da8b33a3b6a367b885b54caf27703b7/modality.json
        for modality_file in (glob("%s/*/versions/*/posttrain/*/modality.json" % project_paths.saved_models) \
                      + glob("%s/*/pversions/*/*/posttrain/*/modality.json" % project_paths.saved_models) \
                      + glob("%s/*/*/sessions/*/*/*/posttrain/*/modality.json" % project_paths.analysis_data)):
            print("Migrating subpopulation params in modality file: %s" % modality_file)
            try:
                self.migrate_subpopulation_params(modality_file)
            except Exception as e:
                print("Migration of subpopulation params failed: %s" % e)


class V7020MigrateExplanationsScoringRecipeParams(migration_json.ProjectConfigJsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Migrate scoring recipe explanation params"

    def transform(self, obj, filepath=None):
        if isinstance(obj, dict):
            explanation_params = obj.get("individualExplanationParams", None)
            if explanation_params is not None and explanation_params.get("drawInScoredSet") is None:
                explanation_params["drawInScoredSet"] = True
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["recipes/*.prediction_scoring"]



###############################################################################
# V8000 / DSS 8.0.0
###############################################################################


class V8000MigrateAlgorithmsSVMParamsStructure(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating SVM algorithms params structure"

    def migrate_svm(self, algo_params):

        if not isinstance(algo_params, dict):
            return

        if "custom_gamma" in algo_params:  # already migrated, not doing it again
            return

        auto_gamma = False
        custom_gamma = False
        custom_gamma_values = []
        for prev_gamma in algo_params.get("gamma", {}).get("values", []):
            if prev_gamma <= 0:
                auto_gamma = True
            else:
                custom_gamma = True
                custom_gamma_values.append(prev_gamma)

        gamma_cat_params = {
            "scale": {"enabled": False},
            "auto": {"enabled": auto_gamma},
            "custom": {"enabled": custom_gamma}
        }

        algo_params["gamma"] = {
            "values": gamma_cat_params
        }

        if not custom_gamma:  # putting default value if no other values
            custom_gamma_values = [0.001]

        algo_params["custom_gamma"] = {
            "values": custom_gamma_values,
            "gridMode": "EXPLICIT"
        }


    def migrate_algo_params_in_mltask(self, mltask_data):
        modeling_params = mltask_data.get("modeling", {})
        self.migrate_svm(modeling_params.get("svc_classifier"))
        self.migrate_svm(modeling_params.get("svm_regression"))

    def migrate_algo_params_in_modeling(self, rmodeling_data):
        self.migrate_svm(rmodeling_data.get("svc_grid"))
        self.migrate_svm(rmodeling_data.get("svr_grid"))

        # Ensemble ALGOS
        for origin_model_mp in rmodeling_data.get("ensemble_params", {}).get("modeling_params", []):
            self.migrate_algo_params_in_modeling(origin_model_mp)

    def migrate_actual_params(self, actual_params):
        if not isinstance(actual_params, dict) \
            or actual_params.get("resolved", {}).get("algorithm") not in ["SVC_CLASSIFICATION", "SVM_REGRESSION"] \
            or "svm" not in actual_params.get("resolved", {}):
            return

        svm_params = actual_params["resolved"]["svm"]
        gamma = svm_params.get("gamma", 0.0)  # should always be there, putting default value just in case

        if gamma in ["auto", "custom"]: # already migrated, not doing it again
            return

        if gamma <= 0:
            svm_params["gamma"] = "auto"
        else:
            svm_params["gamma"] = "custom"
            svm_params["custom_gamma"] = gamma


    def execute(self, project_paths):

        # config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) \
                          + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                self.migrate_algo_params_in_mltask(mltask_data)
                base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Algo params in mltask migration FAILED: %s" % e)

        # config/projects/PROJECT_KEY/recipes/*.prediction_training
        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating algorithms params in training recipe: %s " % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                self.migrate_algo_params_in_modeling(train_recipe_data.get("modeling", {}))
                base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Algo params in train recipe migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/rmodeling_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/rmodeling_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/rmodeling_params.json
        for rm_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models) \
                      + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models) \
                      + glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in rmodeling file: %s " % rm_file)
            try:
                rmodeling_data = base.json_loadf(rm_file)
                self.migrate_algo_params_in_modeling(rmodeling_data)
                base.json_dumpf(rm_file, rmodeling_data)
            except Exception as e:
                print("Algo params in trained model rmodeling migration FAILED: %s" % e)

        # config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating algorithms params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                self.migrate_algo_params_in_mltask(sm_data.get("miniTask", {}))
                base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Algo params in saved model miniTask migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/actual_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/actual_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/actual_params.json
        for ap_file in (glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models) \
                      + glob("%s/*/pversions/*/*/actual_params.json" % project_paths.saved_models) \
                      + glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in actual params file: %s " % ap_file)
            try:
                actual_params = base.json_loadf(ap_file)
                self.migrate_actual_params(actual_params)
                base.json_dumpf(ap_file, actual_params)
            except Exception as e:
                print("Algo params in trained model actual params migration FAILED: %s" % e)


class V8000MigratePredictionAlgorithmsRanges(V7000MigrateAlgorithmsParamsStructure):

    min_positive = 1e-23

    def __repr__(self):
        return "Migrating prediction algorithms params structure to add range"

    # Per dimension migration
    def migrate_numerical_params(self, algo_params, dimension, limit_min=None, limit_max=None, range_min=None, range_max=None, scaling="LINEAR"):
        if not isinstance(algo_params, dict) or dimension not in algo_params.keys():
            return

        algo_params[dimension]["randomMode"] = "RANGE"
        algo_params[dimension]["range"] = {"min": range_min, "max": range_max, "scaling": scaling, "nbValues": 3}
        algo_params[dimension]["limit"] = {"min": limit_min, "max": limit_max}

    # List of all algorithms to migrate
    def migrate_rf(self, algo_params):
        self.migrate_numerical_params(algo_params, "n_estimators", limit_min=1, limit_max=None, range_min=80, range_max=200, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "max_tree_depth", limit_min=1, limit_max=None, range_min=5, range_max=10, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "min_samples_leaf", limit_min=1, limit_max=None, range_min=3, range_max=20, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "max_features", limit_min=1, limit_max=None, range_min=1, range_max=20, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "max_feature_prop", limit_min=self.min_positive, limit_max=1, range_min=0.1, range_max=0.7, scaling="LINEAR")

    def migrate_extra_trees(self, algo_params):
        self.migrate_rf(algo_params)
        self.migrate_numerical_params(algo_params, "n_estimators", limit_min=1, limit_max=None, range_min=10, range_max=50, scaling="LINEAR")

    def migrate_gbt(self, algo_params):
        self.migrate_numerical_params(algo_params, "n_estimators", limit_min=1, limit_max=None, range_min=80, range_max=200, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "max_depth", limit_min=1, limit_max=None, range_min=3, range_max=8, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "min_samples_leaf", limit_min=1, limit_max=None, range_min=1, range_max=20, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "max_features", limit_min=1, limit_max=None, range_min=1, range_max=20, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "max_feature_prop", limit_min=self.min_positive, limit_max=1, range_min=0.1, range_max=0.7, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "learning_rate", limit_min=self.min_positive, limit_max=1., range_min=0.05, range_max=0.5, scaling="LINEAR")

    def migrate_decision_tree(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth", limit_min=1, limit_max=None, range_min=3, range_max=8, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "min_samples_leaf", limit_min=1, limit_max=None, range_min=1, range_max=20, scaling="LINEAR")

    def migrate_logistic_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "C", limit_min=self.min_positive, limit_max=None, range_min=0.01, range_max=100., scaling="LOGARITHMIC")

    def migrate_neural_network(self, algo_params):
        self.migrate_numerical_params(algo_params, "layer_sizes", limit_min=1, limit_max=None, range_min=8, range_max=16, scaling="LINEAR")

    def migrate_svm(self, algo_params):
        self.migrate_numerical_params(algo_params, "C", limit_min=self.min_positive, limit_max=None, range_min=0.1, range_max=10., scaling="LOGARITHMIC")
        self.migrate_numerical_params(algo_params, "custom_gamma", limit_min=self.min_positive, limit_max=None, range_min=0.0001, range_max=1., scaling="LOGARITHMIC")

    def migrate_sgd_classif(self, algo_params):
        self.migrate_numerical_params(algo_params, "alpha", limit_min=self.min_positive, limit_max=None, range_min=0.00001, range_max=0.001, scaling="LOGARITHMIC")

    def migrate_sgd_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "epsilon", limit_min=self.min_positive, limit_max=None, range_min=0.01, range_max=0.1, scaling="LOGARITHMIC")
        self.migrate_numerical_params(algo_params, "alpha", limit_min=self.min_positive, limit_max=None, range_min=0.00001, range_max=0.001, scaling="LOGARITHMIC")

    def migrate_ridge_regression(self, algo_params):
        self.migrate_numerical_params(algo_params, "alpha", limit_min=self.min_positive, limit_max=None, range_min=0.1, range_max=3., scaling="LOGARITHMIC")

    def migrate_lasso(self, algo_params):
        self.migrate_numerical_params(algo_params, "alpha", limit_min=self.min_positive, limit_max=None, range_min=0.1, range_max=10., scaling="LOGARITHMIC")

    def migrate_knn(self, algo_params):
        self.migrate_numerical_params(algo_params, "k", limit_min=1, limit_max=None, range_min=3, range_max=7, scaling="LINEAR")

    def migrate_xgboost(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth", limit_min=1, limit_max=None, range_min=2, range_max=5, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "learning_rate", limit_min=self.min_positive, limit_max=1., range_min=0.1, range_max=0.5, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "gamma", limit_min=0., limit_max=None, range_min=0., range_max=1., scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "min_child_weight", limit_min=0., limit_max=None, range_min=0., range_max=1., scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "max_delta_step", limit_min=0., limit_max=None, range_min=0., range_max=1., scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "subsample", limit_min=self.min_positive, limit_max=1., range_min=0.5, range_max=1., scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "colsample_bytree", limit_min=self.min_positive, limit_max=1., range_min=0.5, range_max=1., scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "colsample_bylevel", limit_min=self.min_positive, limit_max=1., range_min=0.5, range_max=1., scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "alpha", limit_min=0., limit_max=1., range_min=0., range_max=0.1, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "lambda", limit_min=self.min_positive, limit_max=1., range_min=0.5, range_max=1., scaling="LINEAR")

    def migrate_mllib_linear(self, algo_params):
        self.migrate_numerical_params(algo_params, "reg_param", limit_min=0., limit_max=None, range_min=0.001, range_max=10, scaling="LOGARITHMIC")
        self.migrate_numerical_params(algo_params, "enet_param", limit_min=0., limit_max=None, range_min=0., range_max=1., scaling="LINEAR")

    def migrate_mllib_naive_bayes(self, algo_params):
        self.migrate_numerical_params(algo_params, "lambda", limit_min=self.min_positive, limit_max=None, range_min=0.1, range_max=10., scaling="LOGARITHMIC")

    def migrate_mllib_decision_tree(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth", limit_min=1, limit_max=None, range_min=3, range_max=8, scaling="LINEAR")

    def migrate_mllib_random_forest(self, algo_params):
        self.migrate_numerical_params(algo_params, "max_depth", limit_min=1, limit_max=None, range_min=3, range_max=8, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "num_trees", limit_min=1, limit_max=None, range_min=10, range_max=50, scaling="LINEAR")
        self.migrate_numerical_params(algo_params, "step_size", limit_min=self.min_positive, limit_max=1, range_min=0.05, range_max=0.5, scaling="LINEAR")

    def migrate_algo_params_in_mltask(self, mltask_data):
        # PY_MEMORY ALGOS
        modeling_params = mltask_data.get("modeling", {})
        self.migrate_rf(modeling_params.get("random_forest_regression", {}))
        self.migrate_rf(modeling_params.get("random_forest_classification", {}))
        self.migrate_extra_trees(modeling_params.get("extra_trees", {}))
        self.migrate_gbt(modeling_params.get("gbt_classification", {}))
        self.migrate_gbt(modeling_params.get("gbt_regression", {}))

        self.migrate_decision_tree(modeling_params.get("decision_tree_classification", {}))
        self.migrate_decision_tree(modeling_params.get("decision_tree_regression", {}))
        self.migrate_ridge_regression(modeling_params.get("ridge_regression", {}))
        self.migrate_lasso(modeling_params.get("lasso_regression", {}))
        # no need to migrate "leastsquare_regression", no grid search params
        self.migrate_sgd_regression(modeling_params.get("sgd_regression", {}))
        self.migrate_knn(modeling_params.get("knn", {}))
        self.migrate_logistic_regression(modeling_params.get("logistic_regression", {}))
        self.migrate_neural_network(modeling_params.get("neural_network", {}))
        self.migrate_svm(modeling_params.get("svc_classifier", {}))
        self.migrate_svm(modeling_params.get("svm_regression", {}))
        self.migrate_sgd_classif(modeling_params.get("sgd_classifier", {}))
        # no need to migrate "lars_params", no grid search params
        self.migrate_xgboost(modeling_params.get("xgboost", {}))

        # MLlib ALGOS
        self.migrate_mllib_naive_bayes(modeling_params.get("mllib_naive_bayes", {}))
        self.migrate_mllib_linear(modeling_params.get("mllib_logit", {}))
        self.migrate_mllib_linear(modeling_params.get("mllib_linreg", {}))
        self.migrate_mllib_random_forest(modeling_params.get("mllib_rf", {}))
        self.migrate_mllib_random_forest(modeling_params.get("mllib_gbt", {}))
        self.migrate_mllib_decision_tree(modeling_params.get("mllib_dt", {}))


    def migrate_algo_params_in_modeling(self, rmodeling_data):
        # PY_MEMORY ALGOS
        self.migrate_rf(rmodeling_data.get("rf_regressor_grid", {}))
        self.migrate_rf(rmodeling_data.get("rf_classifier_grid", {}))
        self.migrate_extra_trees(rmodeling_data.get("extra_trees_grid", {}))
        self.migrate_gbt(rmodeling_data.get("gbt_classifier_grid", {}))
        self.migrate_gbt(rmodeling_data.get("gbt_regressor_grid", {}))
        self.migrate_decision_tree(rmodeling_data.get("dtc_classifier_grid", {}))
        self.migrate_logistic_regression(rmodeling_data.get("logit_grid", {}))
        self.migrate_neural_network(rmodeling_data.get("neural_network_grid", {}))
        self.migrate_svm(rmodeling_data.get("svc_grid", {}))
        self.migrate_svm(rmodeling_data.get("svr_grid", {}))
        # no need to migrate "least_squares_grid", no grid search params
        self.migrate_sgd_classif(rmodeling_data.get("sgd_grid", {}))
        self.migrate_sgd_regression(rmodeling_data.get("sgd_reg_grid", {}))
        self.migrate_ridge_regression(rmodeling_data.get("ridge_grid", {}))
        self.migrate_lasso(rmodeling_data.get("lasso_grid", {}))
        # no need to migrate "lars_grid", no grid search params
        self.migrate_knn(rmodeling_data.get("knn_grid", {}))
        self.migrate_xgboost(rmodeling_data.get("xgboost_grid", {}))

        # MLlib ALGOS
        self.migrate_mllib_naive_bayes(rmodeling_data.get("mllib_naive_bayes_grid", {}))
        self.migrate_mllib_linear(rmodeling_data.get("mllib_logit_grid", {}))
        self.migrate_mllib_linear(rmodeling_data.get("mllib_linreg_grid", {}))
        self.migrate_mllib_decision_tree(rmodeling_data.get("mllib_dt_grid", {}))
        self.migrate_mllib_random_forest(rmodeling_data.get("mllib_rf_grid", {}))
        self.migrate_mllib_random_forest(rmodeling_data.get("mllib_gbt_grid", {}))

        # Ensemble ALGOS
        for origin_model_mp in rmodeling_data.get("ensemble_params", {}).get("modeling_params", []):
            self.migrate_algo_params_in_modeling(origin_model_mp)


class V8000MigrateAuditConfig(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Initialize audit settings"

    def transform(self, general_settings, filepath=None):
        general_settings["auditTrailSettings"] = {
            "targets": [
                {
                    "type": "LOG4J",
                    "appendTopicToLogger": True,
                    "topicsFiltering": "ALL",
                    "routingKeysFiltering": "ALL",
                }
            ]
        }
        return general_settings

    def file_patterns(self,):
        return ["config/general-settings.json"]


class V8000MigrateAuditConfigAPINode(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Initialize audit settings (API node)"

    def appliesTo(self):
        return [ "api" ]

    def transform(self, server_config, filepath=None):
        audit_log = server_config.get("auditLog", {})
        audit_log["settings"] = {
            "targets": [
                {
                    "type": "LOG4J",
                    "appendTopicToLogger": True,
                    "topicsFiltering": "ALL",
                    "routingKeysFiltering": "ALL",
                }
            ]
        }
        server_config["auditLog"] = audit_log
        return server_config

    def file_patterns(self,):
        return ["config/server.json"]


class V8000MigrateGridLengthForNonSearchableAlgos(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating grid length for non-searchable algos"

    def migrate_grid_length(self, rmodeling_file):
        rmodeling_data = base.json_loadf(rmodeling_file)
        # setting gridLength to 1 for both:
        # * missing gridLength (should not happen, maybe for very old models)
        # * previous gridLength == 0
        # which represent cases where no grid search has been performed
        if rmodeling_data.get("gridLength", 0) == 0:
            rmodeling_data["gridLength"] = 1
        base.json_dumpf(rmodeling_file, rmodeling_data)

    def execute(self, project_paths):

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/rmodeling_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/rmodeling_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/rmodeling_params.json (regular models, partitioned base models)
        for rmodeling_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models) \
                         + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models) \
                         + glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)):
            try:
                print("Migrating grid length in rmodeling_params file: %s" % rmodeling_file)
                self.migrate_grid_length(rmodeling_file)
            except Exception as e:
                print("Migration of grid length failed: %s" % e)


class V8000MigrateMaxFeaturePropStructure(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating structure of 'proportion of features to sample'"

    def migrate_max_feature_prop(self, algo_params):
        dimension = "max_feature_prop"
        if not isinstance(algo_params, dict) or dimension not in algo_params.keys():
            return

        previous_values = algo_params[dimension]
        if isinstance(previous_values, dict):
            return

        algo_params[dimension] = {
            "values": [previous_values],
            "gridMode": "EXPLICIT"
        }

    # List of all algorithms to migrate
    def migrate_algo_params_in_mltask(self, mltask_data):
        # PY_MEMORY ALGOS
        modeling_params = mltask_data.get("modeling", {})
        self.migrate_max_feature_prop(modeling_params.get("random_forest_regression", {}))
        self.migrate_max_feature_prop(modeling_params.get("random_forest_classification", {}))
        self.migrate_max_feature_prop(modeling_params.get("extra_trees", {}))
        self.migrate_max_feature_prop(modeling_params.get("gbt_classification", {}))
        self.migrate_max_feature_prop(modeling_params.get("gbt_regression", {}))

    def migrate_algo_params_in_modeling(self, rmodeling_data):
        # PY_MEMORY ALGOS
        self.migrate_max_feature_prop(rmodeling_data.get("rf_regressor_grid", {}))
        self.migrate_max_feature_prop(rmodeling_data.get("rf_classifier_grid", {}))
        self.migrate_max_feature_prop(rmodeling_data.get("extra_trees_grid", {}))
        self.migrate_max_feature_prop(rmodeling_data.get("gbt_classifier_grid", {}))
        self.migrate_max_feature_prop(rmodeling_data.get("gbt_regressor_grid", {}))

        # Ensemble ALGOS
        for origin_model_mp in rmodeling_data.get("ensemble_params", {}).get("modeling_params", []):
            self.migrate_algo_params_in_modeling(origin_model_mp)

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating structure of 'proportion of features to sample' in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                self.migrate_algo_params_in_mltask(mltask_data)
                base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Structure of 'proportion of features to sample' in mltask migration FAILED: %s" % e)

        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating structure of 'proportion of features to sample' in training recipe: %s "
                  % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                self.migrate_algo_params_in_modeling(train_recipe_data.get("modeling", {}))
                base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Structure of 'proportion of features to sample' in train recipe migration FAILED: %s" % e)

        for rm_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)):
            print("Migrating structure of 'proportion of features to sample' in rmodeling file: %s " % rm_file)
            try:
                rmodeling_data = base.json_loadf(rm_file)
                self.migrate_algo_params_in_modeling(rmodeling_data)
                base.json_dumpf(rm_file, rmodeling_data)
            except Exception as e:
                print("Structure of 'proportion of features to sample' in trained model rmodeling migration FAILED: %s"
                      % e)

        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating structure of 'proportion of features to sample' in saved model miniTask: %s "
                  % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                self.migrate_algo_params_in_mltask(sm_data.get("miniTask", {}))
                base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Structure of 'proportion of features to sample' in saved model miniTask migration FAILED: %s"
                      % e)


class V8000MigrateCodeEnvSelection(migration_json.ProjectConfigJsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Migrate code env selection"

    def transform(self, obj, filepath=None):
        cd = obj.get("settings", {}).get("codeEnvs", {})

        def migrate_lang(lang):
            if lang.get("useBuiltinEnv", True):
                lang["mode"] = "INHERIT"
            else:
                lang["mode"] = "EXPLICIT_ENV"

        migrate_lang(cd.get("python", {}))
        migrate_lang(cd.get("r", {}))
        migrate_lang(cd.get("julia", {}))
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["params.json"]

class V8020MigrateTreeBasedMLResults(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating tree visualisations to take into account difference between class and sample weights"

    def update_tree(self, tree_data, weighting_strategy):

        if tree_data is None or tree_data == {}:
            print("Failed to update tree visualization: empty tree data")
            return

        if weighting_strategy == "NO_WEIGHTING":
            # nSamples is the unweighted count of samples, nothing to do
            pass
        elif weighting_strategy == "SAMPLE_WEIGHT":
            # nSamples is the weighted count of samples, need to delete nSamples and add the nSamplesWeighted field
            if "nSamples" in tree_data:
                tree_data["nSamplesWeighted"] = tree_data["nSamples"]
                del tree_data["nSamples"]
        elif weighting_strategy == "CLASS_WEIGHT":
            # nSamples is the weighted count of samples, which is corrupted by class weights
            # Cannot compute the proper nSamples exactly so encourage the user to retrain
            tree_data["warningMessage"] = "Tree samples count may be invalid due to target class weighting." \
                                          "Please retrain this model to update the visualization."
        elif weighting_strategy == "CLASS_AND_SAMPLE_WEIGHT":
            # nSamples is the weighted count of samples, which is corrupted by class weights
            # Cannot compute the proper nSamples and nSamplesWeighted so encourage the user to retrain
            tree_data["warningMessage"] = "Tree samples count may be invalid due to interaction between target class " \
                                          "and sample weighting. Please retrain this model to update the visualization."
        else:
            print("Failed to update tree visualization: unknown weighting strategy \"%s\"" % weighting_strategy)

    def migrate_tree_viz(self, tree_viz, weighting_strategy):

        if "tree" in tree_viz:
            # Decision tree
            tree = tree_viz["tree"]
            self.update_tree(tree, weighting_strategy)
        elif "trees" in tree_viz:
            # RF, ET or GBT
            trees = tree_viz["trees"]
            for tree in trees:
                self.update_tree(tree, weighting_strategy)
        else:
            print("Failed to update tree visualization: data has no \"tree\" nor \"trees\" field")


    def execute(self, project_paths):
        for tree_file in (glob("%s/*/*/sessions/*/*/*/tree.json" % project_paths.analysis_data) \
                          + glob("%s/*/*/sessions/*/*/*/trees.json" % project_paths.analysis_data)):
            print("Attempting migration of file %s" % tree_file)
            try:
                tree_data = base.json_loadf(tree_file)
                core_params_file = osp.join(osp.dirname(tree_file), "..", "..", "core_params.json")
                if osp.isfile(core_params_file):
                    core_params_data = base.json_loadf(core_params_file)
                    if "weight" in core_params_data and "weightMethod" in core_params_data["weight"]:
                        weighting_strategy = core_params_data["weight"]["weightMethod"]
                        self.migrate_tree_viz(tree_data, weighting_strategy)
                        base.json_dumpf(tree_file, tree_data)
            except Exception as e:
                print("Failed to migrate tree visualization -- Error : %s" % str(e))

        for tree_file in (glob("%s/*/versions/*/tree.json" % project_paths.saved_models) \
                          + glob("%s/*/versions/*/trees.json" % project_paths.saved_models) \
                          + glob("%s/*/pversions/*/*/tree.json" % project_paths.saved_models) \
                          + glob("%s/*/pversions/*/*/trees.json" % project_paths.saved_models)):
            print("Attempting migration of file %s" % tree_file)
            try:
                tree_data = base.json_loadf(tree_file)
                core_params_file = os.path.join(os.path.dirname(tree_file), "core_params.json")
                if osp.isfile(core_params_file):
                    core_params_data = base.json_loadf(core_params_file)
                    if "weight" in core_params_data and "weightMethod" in core_params_data["weight"]:
                        weighting_strategy = core_params_data["weight"]["weightMethod"]
                        self.migrate_tree_viz(tree_data, weighting_strategy)
                        base.json_dumpf(tree_file, tree_data)
            except Exception as e:
                print("Failed to migrate tree visualization -- Error : %s" % str(e))

class V8020UpdateGlobalTagsStructure(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrating structure of global tags categories"

    def transform(self, obj, filepath=None):
        globalTagsCategories = obj.get("globalTagsCategories", [])
        for category in globalTagsCategories:
            if category.get("globalTagsList"):
                category["globalTags"] = category.pop("globalTagsList")
                for tag in category.get("globalTags", []):
                    if tag.get("updatedTagName"):
                        tag["name"] = tag.pop("updatedTagName")
            else:
                category["globalTagsList"] = []

            if category.get("applyTo"):
                category["appliesTo"] = category.pop("applyTo")
                if "FLOW" in category["appliesTo"]:
                    category["appliesTo"].remove("FLOW")
                    category["appliesTo"].extend(["DATASET", "RECIPE", "MANAGED_FOLDER", "FLOW_ZONE", "STREAMING_ENDPOINT"])
                if "MODELS" in category["appliesTo"]:
                    category["appliesTo"].remove("MODELS")
                    category["appliesTo"].extend(["SAVED_MODEL", "ANALYSIS"])
                if "NOTEBOOK" in category["appliesTo"]:
                    category["appliesTo"].remove("NOTEBOOK")
                    category["appliesTo"].extend(["SQL_NOTEBOOK", "JUPYTER_NOTEBOOK"])
                if "DASHBOARD" in category["appliesTo"]:
                    category["appliesTo"].extend(["INSIGHT"])

        return obj

    def jsonpath(self,):
            return ""

    def file_patterns(self,):
        return ["config/general-settings.json"]

class V8020RenameHashSizeField(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Rename 'hashSVDhashSize' field to 'hashSize' for text features"

    @staticmethod
    def process_file(the_file, preprocessing_access_func):
        file_data = base.json_loadf(the_file)
        preprocessing_data = preprocessing_access_func(file_data)
        V8020RenameHashSizeField.process_preprocessing(preprocessing_data)
        base.json_dumpf(the_file, file_data)

    @staticmethod
    def process_preprocessing(data):
        for params in data.get("per_feature", {}).values():
            if params.get("type") == "TEXT" and "hashSize" not in params:
                hash_size = params.pop("hashSVDHashSize", 200000)
                if params.get("text_handling") == "TOKENIZE_HASHING":
                    params["hashSize"] = 2**20
                else:
                    params["hashSize"] = hash_size

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Renaming 'hashSVDhashSize' of text features in MLTask: %s" % mltask_file)
            try:
                V8020RenameHashSizeField.process_file(mltask_file, lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Renaming of 'hashSVDhashSize' in MLTask FAILED: %s" % e)

        for train_recipe_params_file in (glob("%s/recipes/*.prediction_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_cluster" % project_paths.config)):
            print("Renaming 'hashSVDhashSize' of text features in recipe: %s " % train_recipe_params_file)
            try:
                V8020RenameHashSizeField.process_file(train_recipe_params_file,
                                                      lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Renaming of 'hashSVDhashSize' in train recipe migration FAILED: %s" % e)

        for rp_file in (glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data)):
            print("Renaming 'hashSVDhashSize' of text features in rpreprocessing file: %s " % rp_file)
            try:
                V8020RenameHashSizeField.process_file(rp_file, lambda data: data)
            except Exception as e:
                print("Renaming of 'hashSVDhashSize' in trained model rpreprocessing migration FAILED: %s" % e)

###############################################################################
# V9000 / DSS 9.0.0
###############################################################################

class V9000MigrateCategoricalHashingMethod(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Migrate categorical hashing method"

    @staticmethod
    def process_file(the_file, preprocessing_access_func):
        file_data = base.json_loadf(the_file)
        preprocessing_data = preprocessing_access_func(file_data)
        V9000MigrateCategoricalHashingMethod.process_preprocessing(preprocessing_data)
        base.json_dumpf(the_file, file_data)

    @staticmethod
    def process_preprocessing(data):
        for params in data.get("per_feature", {}).values():
            if params.get("type") == "CATEGORY" and "hash_whole_categories" not in params:
                params["hash_whole_categories"] = False

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating categorical hashing method in MLTask: %s" % mltask_file)
            try:
                V9000MigrateCategoricalHashingMethod.process_file(mltask_file,
                                                                  lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Migrating categorical hashing method in MLTask FAILED: %s" % e)

        for rp_file in (glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data)):
            print("Migrating categorical hashing method in analysis rpreprocessing file: %s " % rp_file)
            try:
                V9000MigrateCategoricalHashingMethod.process_file(rp_file, lambda data: data)
            except Exception as e:
                print("Migrating categorical hashing method in analysis rpreprocessing file FAILED: %s" % e)

        for train_recipe_params_file in (glob("%s/recipes/*.prediction_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_cluster" % project_paths.config)):
            print("Migrating categorical hashing method in recipe: %s " % train_recipe_params_file)
            try:
                V9000MigrateCategoricalHashingMethod.process_file(train_recipe_params_file,
                                                                  lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Migrating categorical hashing method in train recipe FAILED: %s" % e)

        for rp_file in (glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rpreprocessing_params.json" % project_paths.saved_models)):
            print("Migrating categorical hashing method in saved model rpreprocessing file: %s " % rp_file)
            try:
                V9000MigrateCategoricalHashingMethod.process_file(rp_file, lambda data: data)
            except Exception as e:
                print("Migrating categorical hashing method in saved model rpreprocessing file FAILED: %s" % e)


class V9000FilterAndFlagOnDateRangeProcessor(migration_app.ShakerStepMigrationOperation):
    def __init__(self, original_name, new_name):
        super(V9000FilterAndFlagOnDateRangeProcessor, self).__init__(original_name)
        self.original_name = original_name
        self.new_name = new_name

    def transform_step(self, step):
        assert step["type"] == self.original_name
        step["type"] = self.new_name
        params = step.get('params', None)
        if params is not None:
            params["filterType"] = "RANGE"
            self.fix_incomplete_date(params, "min")
            self.fix_incomplete_date(params, "max")
            params["timezone_id"] = params.get("timezone_id", "UTC")
            params["part"] = "YEAR"
            params["option"] = "THIS"
            params["relativeMin"] = 1
            params["relativeMax"] = 1
        return step

    def fix_incomplete_date(self, params, value_name):
        if len(params.get(value_name, "")) > 0:
            full_date = "1970-01-01T00:00:00.000"
            value = params[value_name]
            value_length = len(value)
            if value_length < len(full_date):
                suffix = full_date[value_length:]
                params[value_name] = value + suffix

class V9000MigrateNotebook(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Migrating of the jupyter storage path to project"

    def execute(self, project_paths):
        if osp.exists(project_paths.jupyter_notebooks):
            project_notebooks = osp.join(project_paths.config, "ipython_notebooks")
            if not osp.exists(project_notebooks):
                os.makedirs(project_notebooks)

            for root, dirs, files in os.walk(project_paths.jupyter_notebooks):
                for f in files:
                    src = osp.join(root, f)
                    dest = osp.join(project_notebooks, osp.relpath(osp.join(root, f), project_paths.jupyter_notebooks))
                    target_dir = osp.dirname(dest)
                    if not osp.exists(target_dir):
                        os.makedirs(target_dir)
                    print("Migrate notebook %s to %s" % (src, dest))
                    shutil.move(src, dest)
            import subprocess
            # The migration tasks may be called when we migrate a full DSS or when we import a project.
            # On a full DSS, we are directly on the final git directory, and we need to add the jupyter notebook to git;
            # On a simple import, we are on a temporary directory (which is not a git one) but the git add will be done automatically,
            # So we check if we are inside the git repository before adding file to the git.
            try:
                subprocess.check_call("if git rev-parse --git-dir > /dev/null 2>&1; then git add . && git -c user.name='DSS' -c user.email='noreply@dataiku.com' "
                                      "commit -m 'Migration task: adding Jupyter notebooks to git repository'; fi",
                                      cwd = project_notebooks, shell=True)
            except subprocess.CalledProcessError as e:
                print(e)
                print("WARNING: Jupyter Notebooks will not be added to the remote control of this project.")

class V9000MigrateTimeTriggers(migration_json.ProjectConfigJsonMigrationOperation):

    def __repr__(self):
        return "Migrating time triggers"

    def transform(self, trigger, filepath):
        if trigger.get('type', None) == 'temporal':
            params = trigger.get('params', {})

            if params.get('frequency', None) == 'Minutely':
                # Minutely mode -> rename count into repeatFrequency
                params['repeatFrequency'] = params.get('count', 1)
            else:
                params['repeatFrequency'] = 1
            if params.get('frequency', None) == 'Monthly':
                # Monthly mode -> the start date should use the day of "dayOfMonth"
                # Since the month does not really matter, we will force January to avoid the 29 February or 31 April
                from datetime import datetime
                from dateutil.tz import tzlocal
                dayOfMonth = params.get('dayOfMonth', 1)
                if dayOfMonth is None or not isinstance(dayOfMonth, int) or dayOfMonth < 1 or dayOfMonth > 31:
                    dayOfMonth = 1
                upgrade_date = datetime.now(tzlocal()).replace(month=1, day=dayOfMonth, hour=0, minute=0, second=0, microsecond=0)
                params['startingFrom'] = upgrade_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + upgrade_date.strftime('%z')
            else:
                # Use the current date as a starting point
                from datetime import datetime
                from dateutil.tz import tzlocal
                upgrade_date = datetime.now(tzlocal()).replace(hour=0, minute=0, second=0, microsecond=0)
                params['startingFrom'] = upgrade_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + upgrade_date.strftime('%z')
            params['monthlyRunOn'] = "ON_THE_DAY"
            params['timezone'] = "SERVER"

            # Finally, remove unnecessary elements
            params.pop('count', None)
            params.pop('dayOfMonth', None)
        return trigger

    def jsonpath(self,):
        return "triggers"

    def file_patterns(self,):
        return ["scenarios/*.json"]

class V9000AddEvaluationRecipeParameters(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Add parameters to evaluation recipes"

    def transform(self, obj, filepath=None):
        obj['selection'] = {"samplingMethod": "FULL"}
        obj['modelVersionId'] = ''
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.evaluation"]

class V9000MigrateAPIServiceParamsWithType(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Migrate API service params to use type parameter"

    def transform(self, service_params, filepath=None):
        service_params["type"] = "API_SERVICE"
        return service_params

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["config/api-deployer/published-services/*.json"]


class V9000MigrateDeployerSettings(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Migrating API Deployer settings to be Deployer settings"

    def transform(self, general_settings, filepath=None):
        if "apiDeployerClientSettings" in general_settings:
            general_settings["deployerClientSettings"] = general_settings.pop("apiDeployerClientSettings")

        api_deployer_server_settings = general_settings.get("apiDeployerServerSettings")
        if api_deployer_server_settings and "serverEnabledDespiteRemote" in api_deployer_server_settings:
            general_settings["deployerServerEnabledDespiteRemote"] = \
                api_deployer_server_settings.pop("serverEnabledDespiteRemote")

        return general_settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


class V9000MigrateTreeBasedModelsMaxDepth(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrate max depth for tree based models"

    def migrate_tree_based_algo_params(self, algo_params):
        if not isinstance(algo_params, dict) or "max_tree_depth" not in algo_params:
            return

        algo_params["max_tree_depth"]["values"] = [
            value if value != 0 else (2 ** 31) - 1
            for value in algo_params["max_tree_depth"].get("values", [])
        ]

    def migrate_algo_params_in_mltask(self, mltask_data):
        modeling_params = mltask_data.get("modeling", {})
        self.migrate_tree_based_algo_params(modeling_params.get("random_forest_regression", {}))
        self.migrate_tree_based_algo_params(modeling_params.get("random_forest_classification", {}))
        self.migrate_tree_based_algo_params(modeling_params.get("extra_trees", {}))

    def migrate_algo_params_in_modeling(self, rmodeling_data):
        self.migrate_tree_based_algo_params(rmodeling_data.get("rf_regressor_grid", {}))
        self.migrate_tree_based_algo_params(rmodeling_data.get("rf_classifier_grid", {}))
        self.migrate_tree_based_algo_params(rmodeling_data.get("extra_trees_grid", {}))

        # Ensemble ALGOS
        for origin_model_mp in rmodeling_data.get("ensemble_params", {}).get("modeling_params", []):
            self.migrate_algo_params_in_modeling(origin_model_mp)

    def migrate_tree_based_resolved_actual_params(self, resolved_actual_params):
        if not isinstance(resolved_actual_params, dict) or "max_tree_depth" not in resolved_actual_params:
            return

        # in the actual params, you have the scikit learn param, a.k.a. None for no max depth
        if resolved_actual_params["max_tree_depth"] == None:
            resolved_actual_params["max_tree_depth"] = (2 ** 31) - 1

    def migrate_actual_params(self, actual_params):
        if (
            not isinstance(actual_params, dict)
            or actual_params.get("resolved", {}).get("algorithm") not in ["RANDOM_FOREST_CLASSIFICATION", "RANDOM_FOREST_REGRESSION", "EXTRA_TREES"]
        ):
            return

        self.migrate_tree_based_resolved_actual_params(actual_params.get("resolved", {}).get("rf", {}))
        self.migrate_tree_based_resolved_actual_params(actual_params.get("resolved", {}).get("extra_trees", {}))

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                          + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                self.migrate_algo_params_in_mltask(mltask_data)
                base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Algo params in mltask migration FAILED: %s" % e)

        # config/projects/PROJECT_KEY/recipes/*.prediction_training
        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating algorithms params in training recipe: %s " % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                self.migrate_algo_params_in_modeling(train_recipe_data.get("modeling", {}))
                base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Algo params in train recipe migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/rmodeling_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/rmodeling_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/rmodeling_params.json
        for rm_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in rmodeling file: %s " % rm_file)
            try:
                rmodeling_data = base.json_loadf(rm_file)
                self.migrate_algo_params_in_modeling(rmodeling_data)
                base.json_dumpf(rm_file, rmodeling_data)
            except Exception as e:
                print("Algo params in trained model rmodeling migration FAILED: %s" % e)

        # config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating algorithms params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                self.migrate_algo_params_in_mltask(sm_data.get("miniTask", {}))
                base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Algo params in saved model miniTask migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/actual_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/actual_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/actual_params.json
        for ap_file in (glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/actual_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in actual params file: %s " % ap_file)
            try:
                actual_params = base.json_loadf(ap_file)
                self.migrate_actual_params(actual_params)
                base.json_dumpf(ap_file, actual_params)
            except Exception as e:
                print("Algo params in trained model actual params migration FAILED: %s" % e)


class V9020MigrateCVSeed(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrating seed used for cross-validation"

    def migrate_mltask(self, mltask_data):
        backend = mltask_data["backendType"]
        if backend in {"PY_MEMORY", "MLLIB"}:
            mltask_data["modeling"]["gridSearchParams"]["cvSeed"] = self._get_seed_from_mltask(mltask_data)

    def migrate_saved_ensemble(self, rmodeling_data, seed):
        for sub_rmodeling_data in rmodeling_data.get("ensemble_params", {}).get("modeling_params", []):
            sub_rmodeling_data["grid_search_params"]["cvSeed"] = seed
            self.migrate_saved_ensemble(sub_rmodeling_data, seed)

    def _get_seed_from_mltask(self, mltask_data):
        backend = mltask_data["backendType"]
        prediction_type = mltask_data.get("predictionType", None)
        ssd_seed = mltask_data.get("splitParams", {}).get("ssdSeed", None)
        return self._get_seed(backend, prediction_type, ssd_seed)


    def _get_seed(self, backend, prediction_type, ssd_seed):
        if backend == "PY_MEMORY":
            if prediction_type == "REGRESSION" and ssd_seed is not None:
                return ssd_seed
            else:
                return 1337
        elif backend == "MLLIB":
            return 42

    def execute(self, project_paths):

        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating cross-validation seed in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                task_type = mltask_data["taskType"]
                if task_type == "PREDICTION":
                    self.migrate_mltask(mltask_data)
                    base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Migration of cross-validation seed in MLTask FAILED: %s" % e)

        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating cross-validation seed in train recipe: %s" % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                backend = train_recipe_data["backendType"]
                prediction_type = train_recipe_data["core"]["prediction_type"]
                ssd_seed = train_recipe_data.get("splitParams", {}).get("ssdSeed", None)
                seed = self._get_seed(backend, prediction_type, ssd_seed)
                train_recipe_data["modeling"]["grid_search_params"]["cvSeed"] = seed
                base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Migration of cross-validation seed in train recipe FAILED: %s" % e)

        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating cross-validation seed in saved model miniTask: %s "% saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                mltask_data = sm_data.get("miniTask", None)
                if mltask_data is not None and mltask_data["taskType"] == "PREDICTION":
                    self.migrate_mltask(mltask_data)
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Migration of cross-validation seed in saved model miniTask FAILED: %s" % e)

        # Sessions rmodeling
        for rm_file in glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data):
            print("Migrating cross-validation seed in rmodeling file: %s " % rm_file)
            try:
                rmodeling_data = base.json_loadf(rm_file)
                mltask_data = base.json_loadf(os.path.join(
                    os.path.dirname(os.path.dirname(os.path.dirname(rm_file))), "mltask.json"))
                backend = mltask_data["backendType"]
                task_type = mltask_data["taskType"]
                if task_type == "PREDICTION" and backend in {"PY_MEMORY", "MLLIB"}:
                    seed = self._get_seed_from_mltask(mltask_data)
                    rmodeling_data["grid_search_params"]["cvSeed"] = seed
                    self.migrate_saved_ensemble(rmodeling_data, seed)
                    base.json_dumpf(rm_file, rmodeling_data)
            except Exception as e:
                print("Migration of cross-validation seed in rmodeling FAILED: %s" % e)

        # Saved models rmodeling
        for rm_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)):
            print("Migrating cross-validation seed in rmodeling file: %s " % rm_file)
            try:
                rmodeling_data = base.json_loadf(rm_file)
                core_params = base.json_loadf(os.path.join(os.path.dirname(rm_file), "core_params.json"))
                backend = core_params["backendType"]
                task_type = core_params["taskType"]
                if task_type == "PREDICTION" and backend in {"PY_MEMORY", "MLLIB"}:
                    prediction_type = core_params["prediction_type"]
                    split_params = base.json_loadf(os.path.join(os.path.dirname(rm_file), "split", "split.json"))["params"]
                    ssd_seed = split_params["ssdSeed"]
                    seed = self._get_seed(backend, prediction_type, ssd_seed)
                    rmodeling_data["grid_search_params"]["cvSeed"] = seed
                    self.migrate_saved_ensemble(rmodeling_data, seed)
                    base.json_dumpf(rm_file, rmodeling_data)
            except Exception as e:
                print("Migration of cross-validation seed in rmodeling FAILED: %s" % e)


class V9020MigrateNumericalFeatureRescaling(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrate feature rescaling to reflect past behavior (always avg/std)"

    @staticmethod
    def process_file(the_file, preprocessing_access_func, transformation_func):
        file_data = base.json_loadf(the_file)
        preprocessing_data = preprocessing_access_func(file_data)
        transformation_func(preprocessing_data)
        base.json_dumpf(the_file, file_data)

    @staticmethod
    def process_preprocessing(data):
        for params in data.get("per_feature", {}).values():
            if params.get("type") == "NUMERIC" and params.get("rescaling") == "MINMAX":
                params["rescaling"] = "AVGSTD"

    @staticmethod
    def process_ensemble_preprocessings(rmodeling_data):
        for sub_rmodeling_data in rmodeling_data.get("ensemble_params", {}).get("preprocessing_params", []):
            V9020MigrateNumericalFeatureRescaling.process_preprocessing(sub_rmodeling_data)

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating numeric feature rescaling in MLTask: %s" % mltask_file)
            try:
                V9020MigrateNumericalFeatureRescaling.process_file(mltask_file,
                                                                   lambda data: data.get("preprocessing", {}),
                                                                   V9020MigrateNumericalFeatureRescaling.process_preprocessing)
            except Exception as e:
                print("Migrating numeric feature rescaling in MLTask FAILED: %s" % e)

        for rp_file in (glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data)):
            print("Migrating numeric feature rescaling in analysis rpreprocessing file: %s " % rp_file)
            try:
                V9020MigrateNumericalFeatureRescaling.process_file(rp_file,
                                                                   lambda data: data,
                                                                   V9020MigrateNumericalFeatureRescaling.process_preprocessing)
            except Exception as e:
                print("Migrating numeric feature rescaling in analysis rpreprocessing file FAILED: %s" % e)

        for train_recipe_params_file in (glob("%s/recipes/*.prediction_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_cluster" % project_paths.config)):
            print("Migrating numeric feature rescaling in recipe: %s " % train_recipe_params_file)
            try:
                V9020MigrateNumericalFeatureRescaling.process_file(train_recipe_params_file,
                                                                   lambda data: data.get("preprocessing", {}),
                                                                   V9020MigrateNumericalFeatureRescaling.process_preprocessing)
                V9020MigrateNumericalFeatureRescaling.process_file(train_recipe_params_file,
                                                                   lambda data: data.get("modeling", {}),
                                                                   V9020MigrateNumericalFeatureRescaling.process_ensemble_preprocessings)
            except Exception as e:
                print("Migrating numeric feature rescaling in train recipe FAILED: %s" % e)

        for rp_file in (glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rpreprocessing_params.json" % project_paths.saved_models)):
            print("Migrating numeric feature rescaling in saved model rpreprocessing file: %s " % rp_file)
            try:
                V9020MigrateNumericalFeatureRescaling.process_file(rp_file,
                                                                   lambda data: data,
                                                                   V9020MigrateNumericalFeatureRescaling.process_preprocessing)
            except Exception as e:
                print("Migrating numeric feature rescaling in saved model rpreprocessing file FAILED: %s" % e)

        for rm_file in (glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)
                        + glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)):
            print("Migrating numeric feature rescaling in saved ensemble model rmodeling file: %s " % rm_file)
            try:
                V9020MigrateNumericalFeatureRescaling.process_file(rm_file,
                                                                   lambda data: data,
                                                                   V9020MigrateNumericalFeatureRescaling.process_ensemble_preprocessings)
            except Exception as e:
                print("Migrating numeric feature rescaling in saved ensemble model rmodeling file FAILED: %s" % e)


class V9050RenameFmInstanceImagesFile(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Rename instance images file in data dir to avoid shadowing newer version"

    def appliesTo(self):
        return ["fm"]

    def execute(self, diphome, simulate=False):
        # get cloud provider in 'config/settings.json'
        settings_file = osp.join(diphome.path, 'config', 'settings.json')
        if not osp.isfile(settings_file):
            return
        settings = base.json_loadf(settings_file)
        cloud = settings.get('cloud', 'aws').lower()

        # rename instance images file if it exists
        file = osp.join(diphome.path, 'resources', cloud + '-instance-images.json')
        if osp.isfile(file):
            backup = file + '.bak'
            os.rename(file, backup)
            print('%s was renamed to %s as it may shadow newer releases of DSS images, if it was intended you can safely undo the renaming' % (file, backup))


class V9060RenameCredentialsToAuthtype(migration_json.JsonMigrationOperation):
    def __repr__(self, ):
        return "Refactor credentials mode attribute for bigquery and GCS from 'credentialsMode' to 'authType'"

    def transform(self, obj, filepath):

        for (name, conn) in obj.get("connections", {}).items():
            cp = conn.get("params", {})
            ct = conn.get("type", "????")
            # Special case for bigQuery: we renamed 'credentialsMode' to 'authType'
            if ct == "BigQuery":
                cp["authType"] = cp.get("credentialsMode", "KEYPAIR")
            elif ct == "GCS":
                cp["authType"] = cp.get("credentialsMode", "KEYPAIR")
        return obj


    def file_patterns(self,):
        return ["config/connections.json"]

    def jsonpath(self,):
        return ""


###############################################################################
# V10000 / DSS 10.0.0
###############################################################################

class V10000ImpactCoding(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrate impact coding for categorical features on models trained before DSS10"

    @staticmethod
    def process_file(the_file, preprocessing_access_func):
        file_data = base.json_loadf(the_file)
        preprocessing_data = preprocessing_access_func(file_data)
        V10000ImpactCoding.process_preprocessing(preprocessing_data)
        # For ensembles in training recipes
        if "modeling" in file_data:
            preprocessing_params_list = file_data["modeling"].get("ensemble_params", {}).get("preprocessing_params", [])
            for preprocessing_data in preprocessing_params_list:
                V10000ImpactCoding.process_preprocessing(preprocessing_data)

        base.json_dumpf(the_file, file_data)

    @staticmethod
    def process_rmodeling_file(the_file):
        file_data = base.json_loadf(the_file)
        preprocessing_params_list = file_data.get("ensemble_params", {}).get("preprocessing_params", [])
        for preprocessing_data in preprocessing_params_list:
            V10000ImpactCoding.process_preprocessing(preprocessing_data)
        base.json_dumpf(the_file, file_data)

    @staticmethod
    def process_preprocessing(data):
        for params in data.get("per_feature", {}).values():
            if params.get("type") == "CATEGORY" and params.get("category_handling") == "IMPACT":
                params["impact_m"] = params.get("impact_m", 10)  # Explicitly set m value
                params["impact_method"] = params.get("impact_method", "M_ESTIMATOR")
                params["impact_kfold"] = params.get("impact_kfold", False)
                # Set default values for the following even if they are not used by default (kfold disabled on migrated
                # models and no rescaling)
                params["impact_kfold_k"] = params.get("impact_kfold_k", 5)
                params["impact_kfold_seed"] = params.get("impact_kfold_seed", 1337)
                params["categorical_rescaling"] = params.get("categorical_rescaling", "NONE")

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating impact coding for categorical features in MLTask: %s" % mltask_file)
            try:
                V10000ImpactCoding.process_file(mltask_file, lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Impact coding for categorical features in MLTask migration FAILED: %s" % e)

        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating impact coding for categorical features in recipe: %s " % train_recipe_params_file)
            try:
                V10000ImpactCoding.process_file(train_recipe_params_file,
                                                      lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Impact coding for categorical features in train recipe migration FAILED: %s" % e)

        for rp_file in (glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data)):
            print("Migrating impact coding for categorical features in rpreprocessing file: %s " % rp_file)
            try:
                V10000ImpactCoding.process_file(rp_file, lambda data: data)
            except Exception as e:
                print("Impact coding for categorical features in trained model rpreprocessing migration FAILED: %s" % e)

        for rm_file in (glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)
                        + glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)):
            print("Migrating impact coding for categorical features in rmodeling file (only ensemble models): %s " % rm_file)
            try:
                V10000ImpactCoding.process_rmodeling_file(rm_file)
            except Exception as e:
                print("Impact coding for categorical features in trained model rmodeling migration FAILED: %s" % e)


class V10000PreprocessingReport(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Update preprocessing_report.json structure (now compatible with multiclass classification)"

    @staticmethod
    def process_file(the_file):
        file_data = base.json_loadf(the_file)

        impact_data = file_data.pop("impact", None)
        if impact_data is not None:
            file_data["categoricalEncodings"] = {
                column: {
                    "values": categorical_encoding_report.get("values", []),
                    "targetValues": ["encoded_value"], # Preprocessing reports before V10 were only for binary classification or regression
                    "counts": categorical_encoding_report.get("counts", []),
                    "encodings": [[encoding] for encoding in categorical_encoding_report.get("impacts", [])],
                } for column, categorical_encoding_report in impact_data.items()
            }
            base.json_dumpf(the_file, file_data)

    def execute(self, project_paths):
        for rp_file in (glob("%s/*/versions/*/preprocessing_report.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/preprocessing_report.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/preprocessing_report.json" % project_paths.analysis_data)):
            print("Migrating preprocessing report: %s " % rp_file)
            try:
                V10000PreprocessingReport.process_file(rp_file)
            except Exception as e:
                print("Preprocessing report migration FAILED: %s" % e)


class V10000KeepRegularForBinarizeAndQuantize(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrate keep regular feature option for binarized/quantized numerical features on models trained before DSS10"

    @staticmethod
    def process_file(the_file, preprocessing_access_func):
        file_data = base.json_loadf(the_file)
        preprocessing_data = preprocessing_access_func(file_data)
        V10000KeepRegularForBinarizeAndQuantize.process_preprocessing(preprocessing_data)
        # For ensembles in training recipes
        if "modeling" in file_data:
            preprocessing_params_list = file_data["modeling"].get("ensemble_params", {}).get("preprocessing_params", [])
            for preprocessing_data in preprocessing_params_list:
                V10000KeepRegularForBinarizeAndQuantize.process_preprocessing(preprocessing_data)

        base.json_dumpf(the_file, file_data)

    @staticmethod
    def process_rmodeling_file(the_file):
        file_data = base.json_loadf(the_file)
        preprocessing_params_list = file_data.get("ensemble_params", {}).get("preprocessing_params", [])
        for preprocessing_data in preprocessing_params_list:
            V10000KeepRegularForBinarizeAndQuantize.process_preprocessing(preprocessing_data)
        base.json_dumpf(the_file, file_data)

    @staticmethod
    def process_preprocessing(data):
        for params in data.get("per_feature", {}).values():
            if params.get("type") == "NUMERIC" and params.get("numerical_handling") in ["QUANTILE_BIN", "BINARIZE"]:
                params["keep_regular"] = True

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating keep regular option for binarized/quantized features in MLTask: %s" % mltask_file)
            try:
                V10000KeepRegularForBinarizeAndQuantize.process_file(mltask_file, lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Keep regular option for binarized/quantized features in MLTask migration FAILED: %s" % e)

        for train_recipe_params_file in (glob("%s/recipes/*.prediction_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_training" % project_paths.config)
                                         + glob("%s/recipes/*.clustering_cluster" % project_paths.config)):
            print("Migrating keep regular option for binarized/quantized features in recipe: %s " % train_recipe_params_file)
            try:
                V10000KeepRegularForBinarizeAndQuantize.process_file(train_recipe_params_file,
                                                                     lambda data: data.get("preprocessing", {}))
            except Exception as e:
                print("Keep regular option for binarized/quantized features in train recipe migration FAILED: %s" % e)

        for rp_file in (glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rpreprocessing_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data)):
            print("Migrating keep regular option for binarized/quantized features in rpreprocessing file: %s " % rp_file)
            try:
                V10000KeepRegularForBinarizeAndQuantize.process_file(rp_file, lambda data: data)
            except Exception as e:
                print("Keep regular option for binarized/quantized features in trained model rpreprocessing migration FAILED: %s" % e)

        for rm_file in (glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)
                        + glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)):
            print("Migrating keep regular option for binarized/quantized features in rmodeling file (only ensemble models): %s " % rm_file)
            try:
                V10000KeepRegularForBinarizeAndQuantize.process_rmodeling_file(rm_file)
            except Exception as e:
                print("Keep regular option for binarized/quantized features in trained model rmodeling migration FAILED: %s" % e)


class V10000InteractiveStatisticsWorksheetColorBy(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Update statistics_worksheets json files (rename groupBy into colorBy)"

    @staticmethod
    def migrate_card(card):
        """
        Rename the groupBy parameter into colorBy - if applicable.

        :param card: the input card
        :return: whether the card has been updated
        :rtype: boolean
        """
        if card["type"] not in {"pca", "cdf_plot", "parallel_coordinates_plot"}:
            return False

        if "groupBy" not in card:
            return False

        card["colorBy"] = card["groupBy"]
        del card["groupBy"]
        return True

    @staticmethod
    def migrate_worksheet_file(worksheet_file):
        worksheet = base.json_loadf(worksheet_file)
        top_level_cards = worksheet["rootCard"]["cards"]

        save_worksheet = False
        for card in top_level_cards:
            # The cards we want to migrate can only be top level cards.
            is_card_updated = V10000InteractiveStatisticsWorksheetColorBy.migrate_card(card)
            save_worksheet = save_worksheet or is_card_updated

        if save_worksheet:
            base.json_dumpf(worksheet_file, worksheet)

    @staticmethod
    def migrate_insight_file(insight_file):
        insight = base.json_loadf(insight_file)
        if insight["type"] != "eda":
            return

        card = insight["params"]["card"]
        is_card_updated = V10000InteractiveStatisticsWorksheetColorBy.migrate_card(card)

        if is_card_updated:
            base.json_dumpf(insight_file, insight)

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/statistics_worksheets/7FUOGwlNaM.json
        for worksheet_file in (glob("%s/statistics_worksheets/*.json" % project_paths.config)):
            print("Migrating statistics worksheet file {}".format(worksheet_file))
            try:
                V10000InteractiveStatisticsWorksheetColorBy.migrate_worksheet_file(worksheet_file)
            except Exception as e:
                print("Statistics worksheet file migration FAILED: {}".format(e))

        # config/projects/PROJECT_KEY/insights/pCobKVR.json
        for insight_file in (glob("%s/insights/*.json" % project_paths.config)):
            print("Migrating insight file {}".format(insight_file))
            try:
                V10000InteractiveStatisticsWorksheetColorBy.migrate_insight_file(insight_file)
            except Exception as e:
                print("Insight file migration FAILED: {}".format(e))


class V10000ReorganizeReadyReportsCache(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Reorganize cache of reports to send"

    def execute(self, diphome, simulate=False):
        reports_file = osp.join(diphome.path, "caches", "reports.json")

        if not osp.isfile(reports_file):
            return

        old_reports_data = base.json_loadf(reports_file)
        new_reports_data = { "ready" : {}, "lastSends" : old_reports_data.get("lastSends", {}) }

        reports_folder = osp.join(diphome.path, "caches", "reports")
        if not os.path.exists(reports_folder):
            os.mkdir(reports_folder) # ensure existence

        ready_reports = old_reports_data.get("ready", {})
        for report_id in ready_reports:
            report_data = ready_reports.get(report_id, {})

            report_file = osp.join(diphome.path, "caches", "reports", report_id + ".json")
            base.json_dumpf(report_file, report_data)

            report_data['reportData'] = None
            new_reports_data['ready'][report_id] = report_data

        base.json_dumpf(reports_file, new_reports_data)


class V10000RenameTrainDiagnosticFile(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Rename train_diagnostics into more accurate ml_diagnostics."

    def process_parts_json(self, filepath):
        migration_json.migrate_json_file(self, filepath)

    def jsonpath(self):
        ""

    def transform(self, cobj, filepath):
        for partition, values in cobj.get("summaries", {}).items():
            diagnostics = values.get("snippet", {}).get("trainDiagnostics")
            if diagnostics is not None:
                del values["snippet"]["trainDiagnostics"]
                values["snippet"]["mlDiagnostics"] = diagnostics
        return cobj

    @staticmethod
    def process_diagnostic_file(filepath):
        os.rename(
            filepath,
            os.path.join(os.path.dirname(filepath), "ml_diagnostics.json")
        )

    def execute(self, project_paths):
        diagnostic_file = "train_diagnostics.json"
        parts_file = "parts.json"
        for root in [project_paths.saved_models, project_paths.analysis_data]:
            for dirname, _, files in os.walk(root):
                if parts_file in files:
                    print("Migrating diagnostics in parts.json file: %s " % parts_file)
                    self.process_parts_json(
                        os.path.join(dirname, parts_file)
                    )
                if diagnostic_file in files:
                    print("Migrating diagnostic file: %s " % diagnostic_file)
                    V10000RenameTrainDiagnosticFile.process_diagnostic_file(
                        os.path.join(dirname, diagnostic_file)
                    )

class V10000InteractiveStatisticsScatterPlotsColorBy(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Update statistics_worksheets and insights json files (rename splitBy into colorBy for scatter plots 2d and 3d)"

    @staticmethod
    def migrate_card(card):
        """
        Rename the splitBy parameter into colorBy - if applicable.

        :param card: the input card
        :return: whether the card has been updated
        :rtype: boolean
        """
        if card["type"] not in {"scatter_plot", "scatter_plot_3d"}:
            return False

        if "splitBy" not in card:
            return False

        card["colorBy"] = card["splitBy"]
        # It makes little sense to allow the "group with all" settings in a
        # "color by" configuration.
        card["colorBy"]["groupWithAll"] = False
        del card["splitBy"]
        return True

    @staticmethod
    def migrate_worksheet_file(worksheet_file):
        worksheet = base.json_loadf(worksheet_file)
        top_level_cards = worksheet["rootCard"]["cards"]

        save_worksheet = False
        for card in top_level_cards:
            # The cards we want to migrate can only be top level cards.
            is_card_updated = V10000InteractiveStatisticsScatterPlotsColorBy.migrate_card(card)
            save_worksheet = save_worksheet or is_card_updated

        if save_worksheet:
            base.json_dumpf(worksheet_file, worksheet)

    @staticmethod
    def invalidate_insight(insights_data_path, insight_id):
        fingerprint_file = osp.join(insights_data_path, "eda", insight_id, "eda-card-result.fingerprint")

        if osp.isfile(fingerprint_file):
            # we just need to write an empty fingerprint to invalidate the insight.
            with open(fingerprint_file, "wb"):
                pass

    @staticmethod
    def migrate_insight_file(insight_file, insights_data_path):
        insight = base.json_loadf(insight_file)
        if insight["type"] != "eda":
            return

        card = insight["params"]["card"]
        is_card_updated = V10000InteractiveStatisticsScatterPlotsColorBy.migrate_card(card)

        if is_card_updated:
            base.json_dumpf(insight_file, insight)

        is_scatter_2d_or_3d = card["type"] in {"scatter_plot", "scatter_plot_3d"}
        is_bivariate_with_scatter_2d = card["type"] == "bivariate_header" and card["showScatterPlot"] is True

        if is_scatter_2d_or_3d or is_bivariate_with_scatter_2d:
            # We need to invalidate the fingerprint in all cases because the
            # result JSON has changed despite the card params have not changed.
            insight_file_name = insight_file.split("/")[-1]
            insight_id = insight_file_name.split(".")[0]
            V10000InteractiveStatisticsScatterPlotsColorBy.invalidate_insight(insights_data_path, insight_id)

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/statistics_worksheets/7FUOGwlNaM.json
        for worksheet_file in (glob("%s/statistics_worksheets/*.json" % project_paths.config)):
            print("Migrating statistics worksheet file {}".format(worksheet_file))
            try:
                V10000InteractiveStatisticsScatterPlotsColorBy.migrate_worksheet_file(worksheet_file)
            except Exception as e:
                print("Statistics worksheet file migration FAILED: {}".format(e))

        # config/projects/PROJECT_KEY/insights/pCobKVR.json
        for insight_file in (glob("%s/insights/*.json" % project_paths.config)):
            print("Migrating insight file {}".format(insight_file))
            try:
                V10000InteractiveStatisticsScatterPlotsColorBy.migrate_insight_file(insight_file, project_paths.insights_data)
            except Exception as e:
                print("Insight file migration FAILED: {}".format(e))

class V10000DisableChartsLogScale(migration_app.ChartsMigrationOperation):
    def __init__(self, chart_types):
        super(V10000DisableChartsLogScale, self).__init__(chart_types)

    def migrate_def(self, chart_def):
        self.handle_axis_log_scale(chart_def, "axis1LogScale")
        self.handle_axis_log_scale(chart_def, "axis2LogScale")

        return chart_def

    def handle_axis_log_scale(self, chart_def, name):
        axis_log_scale = chart_def.get(name, None)
        if axis_log_scale is not None:
            chart_def[name] = False

class V10000EnableProjectShareToWorkspace(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Enable share to workspace on project if user has already read project/dashboard and manage dashboard authorizations"

    def has_read_project(self, item):
        return item.get("admin", False) or item.get("writeProjectContent", False) or item.get("readProjectContent", False)

    def has_read_dashboard(self, item):
        return item.get("admin", False) or item.get("writeProjectContent", False) or item.get("readProjectContent", False) or item.get("moderateDashboards", False) or item.get("writeDashboards", False) or item.get("readDashboards", False)

    def has_managed_authorized_objects(self, item):
        return item.get("admin", False) or item.get("manageDashboardAuthorizations", False)

    def transform(self, obj, filepath=None):
        if obj and obj.get("permissionsVersion", "LEGACY") == "LEGACY":
            for item in obj.get("permissions", []):
                if self.has_read_project(item) and self.has_read_dashboard(item) and self.has_managed_authorized_objects(item):
                    item["shareToWorkspaces"] = True
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self):
        return ["params.json"]

###############################################################################
# V10030 / DSS 10.0.3
###############################################################################


class V10030EDAExtraBivariateSummaryMetrics(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Interactive statistics: add the extra metrics to bivariate summary statistics cards"

    @staticmethod
    def migrate_card(card):
        """
        Set the extra metric fields to False for all bivariate summary statistics cards

        :param card: the input card
        :return: whether the card has been updated
        :rtype: boolean
        """
        if card["type"] == "bivariate_summary":
            card["compute"]["chi2"] = False
            card["compute"]["oneWayAnova"] = False
            card["compute"]["nSampleMood"] = False
            card["compute"]["kolmogorovSmirnov"] = False
            return True

        # Migrate child cards - if any
        any_child_updated = False
        if "cards" in card:
            for child_card in card["cards"]:
                is_child_updated = V10030EDAExtraBivariateSummaryMetrics.migrate_card(child_card)
                any_child_updated = any_child_updated or is_child_updated

        return any_child_updated

    @staticmethod
    def migrate_worksheet_file(worksheet_file):
        worksheet = base.json_loadf(worksheet_file)
        top_level_cards = worksheet["rootCard"]["cards"]

        save_worksheet = False
        for card in top_level_cards:
            is_card_updated = V10030EDAExtraBivariateSummaryMetrics.migrate_card(card)
            save_worksheet = save_worksheet or is_card_updated

        if save_worksheet:
            base.json_dumpf(worksheet_file, worksheet)

    @staticmethod
    def migrate_insight_file(insight_file):
        insight = base.json_loadf(insight_file)
        if insight["type"] != "eda":
            return

        card = insight["params"]["card"]
        is_card_updated = V10030EDAExtraBivariateSummaryMetrics.migrate_card(card)

        if is_card_updated:
            base.json_dumpf(insight_file, insight)

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/statistics_worksheets/WORKSHEET_KEY.json
        for worksheet_file in (glob("%s/statistics_worksheets/*.json" % project_paths.config)):
            print("Migrating statistics worksheet file {}".format(worksheet_file))
            try:
                V10030EDAExtraBivariateSummaryMetrics.migrate_worksheet_file(worksheet_file)
            except Exception as e:
                print("Statistics worksheet file migration FAILED: {}".format(e))

        # config/projects/PROJECT_KEY/insights/INSIGHT_KEY.json
        for insight_file in (glob("%s/insights/*.json" % project_paths.config)):
            print("Migrating insight file {}".format(insight_file))
            try:
                V10030EDAExtraBivariateSummaryMetrics.migrate_insight_file(insight_file)
            except Exception as e:
                print("Insight file migration FAILED: {}".format(e))


###############################################################################
# V10040 / DSS 10.0.4
###############################################################################

class V10040MigrateUDRSettings(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Update usage data settings"

    def transform(self, general_settings, filepath=None):
        if "udr" in general_settings and general_settings["udr"]:
            general_settings["udrMode"] = "DEFAULT"
        else:
            general_settings["udrMode"] = "NO"
        return general_settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


###############################################################################
# V10060 / DSS 10.0.6
###############################################################################

class V10060RemoveDataikuJarFromGeoadminPlugin(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Remove the dataiku-dip.jar from the geoadmin plugin if it exists."

    def execute(self, diphome, simulate=False):
        plugin_folders = ["installed", "dev"]
        for folder in plugin_folders:
            jar_to_remove = osp.join(diphome.path, "plugins", folder, "geoadmin", "lib", "dataiku-dip.jar")
            if osp.isfile(jar_to_remove):
                print("Deleting file {} from the geoadmin plugin".format(jar_to_remove))
                os.remove(jar_to_remove)

class V10060MigrateAuthorizationConfig(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Migrate current service authorization"

    def appliesTo(self):
        return ["design", "api", "automation"]

    def transform(self, service_params, filepath=None):
        if service_params.get("publicAccess", False):
            service_params["authMethod"] = "PUBLIC"
        else:
            service_params["authMethod"] = "API_KEYS"

        return service_params

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["services/*/gens/*/config.json"]

class V10060MigrateAuthorizationConfigForServiceInProject(migration_json.ProjectConfigJsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Migrate current service authorization for imported projects"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, service_params, filepath=None):
        if service_params.get("publicAccess", False):
            service_params["authMethod"] = "PUBLIC"
        else:
            service_params["authMethod"] = "API_KEYS"

        return service_params

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["lambda_services/*.json"]

###############################################################################
# V11000 / DSS 11.0.0
###############################################################################

class V11000UpgradeGeoAdmin(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Upgrade Reverse geocoding (geoadmin) plugin to v2.3.0 if it is installed"

    def execute(self, diphome, simulate=False):
        try:
            plugin_folders = ["installed", "dev"]
            resources_dir = osp.join(os.environ["DKUINSTALLDIR"], 'resources', 'migration', 'geoadmin_v11_migration')
            if osp.isdir(resources_dir):
                for folder in plugin_folders:
                    if osp.isdir(osp.join(diphome.path, "plugins", folder, "geoadmin")):
                        lib_dir = osp.join(diphome.path, "plugins", folder, "geoadmin", "lib")
                        if osp.isdir(lib_dir):
                            for file in os.listdir(lib_dir):
                                if osp.splitext(file)[1] == ".jar":
                                    os.remove(osp.join(lib_dir, file))
                            shutil.copyfile(osp.join(resources_dir, 'dss-plugin-geoadmin.jar'), osp.join(lib_dir, "dss-plugin-geoadmin.jar"))
                        plugin_json_path = osp.join(diphome.path, "plugins", folder, "geoadmin", "plugin.json")
                        shutil.copyfile(osp.join(resources_dir, 'plugin.json'), plugin_json_path)
        except Exception as e:
            print("migrating reverse geocoding (geoadmin) plugin to v2.3.0 FAILED: %s" % e)

class V11000FixUpGeoMapDefinition(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V11000FixUpGeoMapDefinition, self).__init__(["geom_map"])

    def migrate_def(self, chart_def):
        geometry = chart_def.get("geometry", [])
        ua_color = chart_def.get("uaColor", [])
        color_options = chart_def.get("colorOptions", self.default_color_options())
        if len(geometry) > 0:
            chart_def["geoLayers"] = [{"geometry": geometry, "colorOptions": color_options, "uaColor": ua_color},
                                      {"geometry": [], "uaColor": [], "colorOptions": self.default_color_options(single_color="#AEC7E8")}]
            # For the empty placeholder, pick the second color of the default color palette (#AEC7E8)
        else:
            chart_def["geoLayers"] = [{"geometry": [], "colorOptions": color_options, "uaColor": ua_color}]
        chart_def["geometry"] = []
        return chart_def

    def default_color_options(self, single_color="#2678B1"):
        return {
            "singleColor": single_color,
            "transparency": 0.75,
            "colorPalette": "default",
            "ccScaleMode": "NORMAL",
            "customPalette": {
                "id": "__dku_custom__",
                "name": "Custom Palette",
                "colors": [],
                "values": [],
                "fixedValues": False
            },
            "paletteType": "CONTINUOUS",
            "quantizationMode": "NONE",
            "numQuantizeSteps": 5,
            "paletteMiddleValue": 0.0,
            "customColors": {},
            "heatDensityMapIntensity": 0.5,
            "heatDensityMapRadius": 0.5}


class V11000UpdateEditSQLTileModes(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self):
        return "Update mode of edit SQL dataset tiles in Dataiku apps"

    def transform(self, manifest, filepath=None):
        for section in manifest.get("homepageSections", []):
            for tile in section.get("tiles", []):
                if not tile.get('type', '') == 'CONNECTION_EXPLORER_TO_REPLACE_THE_SETTINGS_OF_A_DATASET_WITH_A_NEW_TABLE_REFERENCE':
                    continue
                if tile.get('behavior', '') != 'GO_TO_DATASET':
                    tile['behavior'] = 'MODAL_BROWSE'

        return manifest

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["config/projects/*/app-manifest.json"]


###############################################################################
# V11010 / DSS 11.0.1
###############################################################################

class V11010CreateIfThenElseWithLegacyPositioningParam(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V11010CreateIfThenElseWithLegacyPositioningParam, self).__init__("VisualIfRule")

    def __repr__(self,):
        return "Add a legacyPositioning field to the parameters of create If then Else prepare recipe processors, and set it to true"

    def transform_step(self, step):
        step.get("params", {})["legacyPositioning"] = True
        return step


###############################################################################
# V11100 / DSS 11.1.0
###############################################################################

# V10040MigrateUDRSettings did not include govern nodes in its execution but govern did not run migrations before v10.0.6.
# So let's rerun the step carefully only for Govern. See [sc-90043] for more info.
class V11100MigrateGovernUDRSettings(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Update Govern usage data settings"

    def appliesTo(self):
        return ["govern"]

    def transform(self, general_settings, filepath=None):

        # Migration has already been run, do nothing
        if "udrMode" in general_settings:
            return general_settings

        if "udr" in general_settings and general_settings["udr"]:
            general_settings["udrMode"] = "DEFAULT"
        else:
            general_settings["udrMode"] = "NO"
        return general_settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


class V11100CheckTaggableObjectReadUsePrivileges(migration_base.MigrationOperation):
    """
    We want to set dku.job.checkTaggableObjectReadUsePrivileges to false on
    existing instances for backwards compatibility.
    """

    PROPERTY_NAME = "dku.job.checkTaggableObjectReadUsePrivileges"
    PROPERTY_VALUE = "false"

    def __repr__(self):
        return "Set {}={}".format(self.PROPERTY_NAME, self.PROPERTY_VALUE)

    def execute(self, diphome, simulate=False):
        path = osp.join(diphome.path, "config", "dip.properties")

        # Read old properties
        properties_file = JavaPropertiesFile(path)

        # Migration has already been run, do nothing
        if self.PROPERTY_NAME in properties_file.props:
            return

        # Append to the properties file
        with open(path, 'a') as f:
            f.write("{}={}\n".format(self.PROPERTY_NAME, self.PROPERTY_VALUE))


class V11100AddWorkspaceObjectIds(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Add identifiers to workspace objects"

    def appliesTo(self):
        return ["design", "api", "automation"]

    def transform(self, obj, filepath):
        if "workspaceObjects" in obj:
            workspace_objects = obj["workspaceObjects"]
            for workspace_object in workspace_objects:
                if not "id" in workspace_object:
                    workspace_object["id"] = ''.join([random.choice(string.ascii_lowercase + string.digits) for _ in range(8)])

        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["config/workspaces/*.json"]


class V11100TimeseriesHpSearch(migration_base.ProjectLocalMigrationOperation):
    TS_ALGORITHMS = [
        'TRIVIAL_IDENTITY_TIMESERIES',
        'SEASONAL_NAIVE',
        'AUTO_ARIMA',
        'SEASONAL_LOESS',
        'GLUONTS_NPTS_FORECASTER',
        'GLUONTS_SIMPLE_FEEDFORWARD',
        'GLUONTS_DEEPAR',
        'GLUONTS_TRANSFORMER',
        'GLUONTS_MQCNN'
    ]

    def __repr__(self):
        return "Migrate HP search params: update search mode & nb folds for timeseries models and add foldOffset field for all prediction models"

    @staticmethod
    def migrate_hp_search_params(gs_data, the_file, file_data, is_ts_model):
        if gs_data is None:
            return

        gs_data["foldOffset"] = True
        if is_ts_model:
            gs_data["mode"] = "TIME_SERIES_KFOLD"
            gs_data["nFolds"] = 3

        base.json_dumpf(the_file, file_data)

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating HP search params in MLTask: %s" % mltask_file)
            try:
                task_data = base.json_loadf(mltask_file)

                is_ts_model = task_data.get("predictionType") == "TIMESERIES_FORECAST"
                gs = task_data.get("modeling", {}).get("gridSearchParams")
                V11100TimeseriesHpSearch.migrate_hp_search_params(gs, mltask_file, task_data, is_ts_model)
            except Exception as e:
                print("Migrating HP search params in MLTask migration FAILED: %s" % e)

        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating HP search params in train recipe: %s " % train_recipe_params_file)
            try:
                recipe_data = base.json_loadf(train_recipe_params_file)

                is_ts_model = recipe_data.get("core", {}).get("prediction_type") == "TIMESERIES_FORECAST"
                gs = recipe_data.get("modeling", {}).get("grid_search_params")
                V11100TimeseriesHpSearch.migrate_hp_search_params(gs, train_recipe_params_file, recipe_data, is_ts_model)
            except Exception as e:
                print("Migrating HP search params in train recipe migration FAILED: %s" % e)

        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating HP search params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                mini_task_data = sm_data.get("miniTask")
                if mini_task_data is not None:
                    is_ts_model = mini_task_data.get("predictionType") == "TIMESERIES_FORECAST"
                    gs = mini_task_data.get("modeling", {}).get("gridSearchParams")
                    V11100TimeseriesHpSearch.migrate_hp_search_params(gs, saved_model_file, sm_data, is_ts_model)
            except Exception as e:
                print("Migrating HP search params in saved model miniTask FAILED: %s" % e)

        for rm_file in (glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)
                        + glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)):
            print("Migrating HP search params in rmodeling file: %s " % rm_file)
            try:
                modeling_data = base.json_loadf(rm_file)

                algorithm = modeling_data.get("algorithm")
                is_ts_model = algorithm in V11100TimeseriesHpSearch.TS_ALGORITHMS
                gs = modeling_data.get("grid_search_params")
                V11100TimeseriesHpSearch.migrate_hp_search_params(gs, rm_file, modeling_data, is_ts_model)
            except Exception as e:
                print("Migrating HP search params in trained model rmodeling migration FAILED: %s" % e)


class V11100DatasetInsights(migration_json.ProjectConfigJsonMigrationOperation):

    def __repr__(self, ):
        return "Add viewKind param to dashboard dataset tiles"

    def transform(self, obj, filepath = None):
        if "pages" in obj:
            pages = obj["pages"]
            for page in pages:
                if "grid" in page:
                    grid = page["grid"]
                    if 'tiles' in grid:
                        tiles = grid["tiles"]
                        for tile in tiles:
                            if "insightType" in tile and tile["insightType"] == "dataset_table":
                                if not "tileParams" in tile:
                                    tile["tileParams"] = {
                                        "viewKind": "EXPLORE",
                                        "showName": True,
                                        "showDescription": True,
                                        "showCustomFields": True,
                                        "showMeaning": False,
                                        "showProgressBar": False
                                    }
                                else:
                                    tileParams = tile["tileParams"]
                                    if not "viewKind" in tileParams:
                                        tileParams["viewKind"] = "EXPLORE"
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["dashboards/*.json"]


class V11100ApiNodeConfigKeyCreation(migration_base.MigrationOperation):

    def __repr__(self, ):
        return "Generate encryption keys for the API node"

    def appliesTo(self):
        return ["api"]

    def execute(self, diphome):
        pass

    def post_execute(self, diphome):
        import subprocess
        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        keys_file = osp.join(diphome.path, "config/configkey.json")

        if not osp.isfile(keys_file):
            subprocess.check_call('"%s" generate-crypto-key' % dkupath, shell=True)
        else:
            print("Info: API node already contains a configkey.json. Skipping migration step.")


class V11100RefactorTrainableLayersParamDeephub(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "replace trainable_layers from deephub modeling params and default to 0 additional finetuned layer when auto mode param was activated"

    def migrate_trainable_layers_in_modeling_params(self, modeling_params):
        was_data_modified = False
        if modeling_params and isinstance(modeling_params, dict):
            is_deephub_model = modeling_params.get("type") == "DEEP_HUB_IMAGE_CLASSIFICATION" or \
                               modeling_params.get("type") == "DEEP_HUB_IMAGE_OBJECT_DETECTION"
            if is_deephub_model:
                trainable_layers = modeling_params.pop("trainableLayers", None)
                if trainable_layers and isinstance(trainable_layers, dict):
                    modeling_params["nbFinetunedLayers"] = 0 if trainable_layers.get("mode", "AUTO") == "AUTO" \
                        else trainable_layers.get("value", 0)
                    was_data_modified = True
        return was_data_modified


    def execute(self, project_paths):

        # Examples paths:
        #   saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/rmodeling_params.json (regular models only)
        #   analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/rmodeling_params.json
        for rmodeling_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models) \
                               + glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)):
            print("Migrating trainableLayer params in rmodeling file: %s " % rmodeling_file)
            try:
                rmodeling_data = base.json_loadf(rmodeling_file)
                was_data_modified = self.migrate_trainable_layers_in_modeling_params(rmodeling_data)
                if was_data_modified:
                    base.json_dumpf(rmodeling_file, rmodeling_data)
            except Exception as e:
                print("Migration of trainableLayer param in trained model rmodeling FAILED: %s" % e)

        # Examples paths:
        #   config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        #   analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) \
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating trainableLayer param in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                was_data_modified = self.migrate_trainable_layers_in_modeling_params(mltask_data.get("modeling"))
                if was_data_modified:
                    base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Migration of trainableLayer param in mltask FAILED: %s" % e)

        # Examples path: config/projects/PROJECT_KEY/recipes/*.prediction_training
        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating trainableLayer param in training recipe: %s " % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                was_data_modified = self.migrate_trainable_layers_in_modeling_params(train_recipe_data.get("modeling"))
                if was_data_modified:
                    base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Migration of trainableLayer param in train recipe FAILED: %s" % e)

        # Examples path: config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating trainableLayer params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                was_data_modified = self.migrate_trainable_layers_in_modeling_params(
                    sm_data.get("miniTask", {}).get("modeling")
                )
                if was_data_modified:
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Migration of trainableLayer param in saved model miniTask FAILED: %s" % e)


class V11100OAuth2SQLConnectionRefactoring(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate the OAuth2 settings of the SQL connection"

    def transform(self, obj, filepath=None):
        for (name, conn) in obj.get("connections", {}).items():
            cp = conn.get("params", {})
            ct = conn.get("type", "????")
            if ct == "Synapse" or ct == "SQLServer":
                if cp.get("azureOAuthLoginEnabled", False):
                    print("Connection of type SQLServer or Synapse with OAuth2 enabled. Migrating it.")
                    cp["grantType"] = "DEVICE_CODE"
                    if "azureOAuthLoginSTS" in cp and cp["azureOAuthLoginSTS"] != "":
                        cp["azureOAuth2TokenEndpoint"] = cp["azureOAuthLoginSTS"]
                        del cp["azureOAuthLoginSTS"]
                        cp["azureOAuth2AuthorizationEndpoint"] = cp["azureOAuth2TokenEndpoint"].replace("/token", "/authorize")
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/connections.json"]


###############################################################################
# V11200 / DSS 11.2.0
###############################################################################


class V11200AddLearningRateToTimeSeriesDeepLearningModels(migration_base.ProjectLocalMigrationOperation):
    LEARNING_RATE_DICT = {
        "gridMode": "EXPLICIT",
        "randomMode": "RANGE",
        "limit": {"min": 1e-23},
        "values": [.001],
        "range": {
            "min": .0001,
            "max": .1,
            "nbValues": 3,
            "scaling": "LOGARITHMIC"
        }
    }

    TS_DL_ALGO_MODELING_FIELDS = [
        "gluonts_simple_feed_forward_timeseries",
        "gluonts_deepar_timeseries",
        "gluonts_transformer_timeseries",
        "gluonts_mqcnn_timeseries"
    ]

    TS_DL_ALGO_TO_FIELDS = {
        'GLUONTS_SIMPLE_FEEDFORWARD': {
            "pre_train": "gluonts_simple_feed_forward_timeseries_grid",
            "post_train": "gluonts_simple_feedforward_timeseries_params"
        },
        'GLUONTS_DEEPAR': {
            "pre_train": "gluonts_deepar_timeseries_grid",
            "post_train": "gluonts_deepar_timeseries_params",
        },
        'GLUONTS_TRANSFORMER': {
            "pre_train": "gluonts_transformer_timeseries_grid",
            "post_train": "gluonts_transformer_timeseries_params",
        },
        'GLUONTS_MQCNN': {
            "pre_train": "gluonts_mqcnn_timeseries_grid",
            "post_train": "gluonts_mqcnn_timeseries_params",
        }
    }

    def __repr__(self):
        return "Migrate time series deep learning model params: add learning rate"

    @staticmethod
    def migrate_pre_post_train_modeling_params(modeling_params, the_file, file_data, param_key, new_learning_rate):
        if not modeling_params:
            return

        algorithm = modeling_params.get("algorithm")
        modeling_params_field = V11200AddLearningRateToTimeSeriesDeepLearningModels.TS_DL_ALGO_TO_FIELDS.get(algorithm, {}).get(param_key)
        if modeling_params_field:
            modeling_params.get(modeling_params_field, {}).update({
                "learning_rate": new_learning_rate
            })
            base.json_dumpf(the_file, file_data)

    @staticmethod
    def migrate_pretrain_modeling_params(modeling_params, the_file, file_data):
        V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_pre_post_train_modeling_params(
            modeling_params, the_file, file_data, "pre_train",
            V11200AddLearningRateToTimeSeriesDeepLearningModels.LEARNING_RATE_DICT
        )

    @staticmethod
    def migrate_posttrain_modeling_params(modeling_params, the_file, file_data):
        V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_pre_post_train_modeling_params(
            modeling_params, the_file, file_data, "post_train", .001
        )

    @staticmethod
    def migrate_modeling_params(the_file, file_data):
        modeling_params = file_data.get("modeling", {})
        if not modeling_params:
            return

        for modeling_params_field in V11200AddLearningRateToTimeSeriesDeepLearningModels.TS_DL_ALGO_MODELING_FIELDS:
            modeling_params.get(modeling_params_field, {}).update({
                "learning_rate": V11200AddLearningRateToTimeSeriesDeepLearningModels.LEARNING_RATE_DICT
            })
        base.json_dumpf(the_file, file_data)

    @staticmethod
    def migrate_search_desc(the_file, file_data):
        learning_rate_desc = {
            "name": "Learning rate",
            "vals": ["0.001"]
        }

        pre_search_important_params = file_data.get("preSearchDescription", {}).get("importantParams")
        post_search_important_params = file_data.get("postSearchDescription", {}).get("importantParams")

        if pre_search_important_params is not None:
            pre_search_important_params.insert(0, learning_rate_desc)

        if post_search_important_params is not None:
            post_search_important_params.insert(0, learning_rate_desc)

        base.json_dumpf(the_file, file_data)

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating learning rate in MLTask: %s" % mltask_file)
            try:
                task_data = base.json_loadf(mltask_file)
                if task_data.get("predictionType") == "TIMESERIES_FORECAST":
                    V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_modeling_params(mltask_file, task_data)
            except Exception as e:
                print("Migrating learning rate in MLTask migration FAILED: %s" % e)

        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating learning rate in train recipe: %s " % train_recipe_params_file)
            try:
                recipe_data = base.json_loadf(train_recipe_params_file)

                modeling = recipe_data.get("modeling", {})
                V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_pretrain_modeling_params(
                    modeling, train_recipe_params_file, recipe_data
                )
            except Exception as e:
                print("Migrating learning rate in train recipe migration FAILED: %s" % e)

        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating learning rate in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                mini_task_data = sm_data.get("miniTask")
                if mini_task_data is not None and mini_task_data.get("predictionType") == "TIMESERIES_FORECAST":
                    V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_modeling_params(
                        saved_model_file, sm_data
                    )
            except Exception as e:
                print("Migrating learning rate in saved model miniTask FAILED: %s" % e)

        for rm_file in (glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)
                        + glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)):
            print("Migrating learning rate in rmodeling file: %s " % rm_file)
            try:
                modeling_data = base.json_loadf(rm_file)

                V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_pretrain_modeling_params(
                    modeling_data, rm_file, modeling_data
                )
            except Exception as e:
                print("Migrating learning rate in trained model rmodeling migration FAILED: %s" % e)

        for model_folder in (glob("%s/*/*/sessions/*/*/*/" % project_paths.analysis_data)
                        + glob("%s/*/versions/*/" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/" % project_paths.saved_models)):
            print("Migrating learning rate in actual params and train info files: %s " % model_folder)
            try:
                ap_file = model_folder + "actual_params.json"
                ap_data = base.json_loadf(ap_file)
                ti_file = model_folder + "train_info.json"
                ti_data = base.json_loadf(ti_file)

                modeling = ap_data.get("resolved", {})
                algorithm = modeling.get("algorithm")
                if algorithm in V11200AddLearningRateToTimeSeriesDeepLearningModels.TS_DL_ALGO_TO_FIELDS:
                    V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_posttrain_modeling_params(
                        modeling, ap_file, ap_data
                    )
                    V11200AddLearningRateToTimeSeriesDeepLearningModels.migrate_search_desc(ti_file, ti_data)
            except Exception as e:
                print("Migrating learning rate in actual params and train info migration FAILED: %s" % e)


class V11200EDATimeSeriesCardLongFormat(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Update long format configurations for time series analyses in statistics_worksheets and insights"

    @staticmethod
    def _migrate_card(card):
        """
        Declare time series identifier values as a collection - if applicable.

        :param card: the input card
        :return: whether the card has been updated
        :rtype: boolean
        """
        time_series_card_types = {
            "unit_root_test_adf",
            "unit_root_test_kpss",
            "unit_root_test_za",
            "mann_kendall_test",
            "durbin_watson",
            "acf_plot",
        }

        if card["type"] not in time_series_card_types:
            return False

        if "seriesIdentifiers" not in card:
            card["seriesIdentifiers"] = []
            return True

        for identifier in card["seriesIdentifiers"]:
            if "value" in identifier:
                value = identifier["value"]
                identifier["values"] = [value]
                del identifier["value"]
            else:
                identifier["values"] = []

        return len(card["seriesIdentifiers"]) > 0

    @staticmethod
    def _migrate_worksheet_file(worksheet_file):
        worksheet = base.json_loadf(worksheet_file)
        save_worksheet = False

        for card in worksheet["rootCard"]["cards"]:
            is_card_updated = V11200EDATimeSeriesCardLongFormat._migrate_card(card)
            save_worksheet = save_worksheet or is_card_updated

        if save_worksheet:
            base.json_dumpf(worksheet_file, worksheet)

    @staticmethod
    def _migrate_insight_file(insight_file):
        insight = base.json_loadf(insight_file)
        if insight["type"] != "eda":
            return

        card = insight["params"]["card"]
        is_card_updated = V11200EDATimeSeriesCardLongFormat._migrate_card(card)

        if is_card_updated:
            base.json_dumpf(insight_file, insight)

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/statistics_worksheets/7FUOGwlNaM.json
        for worksheet_file in (glob("%s/statistics_worksheets/*.json" % project_paths.config)):
            print("Migrating statistics worksheet file {}".format(worksheet_file))
            try:
                V11200EDATimeSeriesCardLongFormat._migrate_worksheet_file(worksheet_file)
            except Exception as e:
                print("Statistics worksheet file migration FAILED: {}".format(e))

        # config/projects/PROJECT_KEY/insights/pCobKVR.json
        for insight_file in (glob("%s/insights/*.json" % project_paths.config)):
            print("Migrating insight file {}".format(insight_file))
            try:
                V11200EDATimeSeriesCardLongFormat._migrate_insight_file(insight_file)
            except Exception as e:
                print("Insight file migration FAILED: {}".format(e))


# Code Studio Objects owner was retrieved from the CreationTag field
# The field is moved to a dedicated member field
class V11200CodeStudioObjectOwner(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self, ):
        return "Update Code Studio Objects owner"

    def appliesTo(self):
        return ["design"]

    def transform(self, code_studio_object, filepath=None):
        if "owner" not in code_studio_object:
            code_studio_object["owner"] = code_studio_object.get("creationTag", {}).get("lastModifiedBy", {}).get("login", "admin")

        return code_studio_object

    def file_patterns(self, ):
        return ["config/projects/*/code_studios/*.json"]


class V11200CodeStudioResourcesRenaming(migration_json.JsonMigrationOperation):
    """
    "non-versioned" folders are already named "resources" in everything user-facing, we also
    rename them internally
    """
    def __init__(self):
        pass

    def __repr__(self, ):
        return "Update Code Studio Resources name"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, code_studio_template, filepath=None):
        for block in code_studio_template.get("params", {}).get("blocks", {}):
            if block.get("type", None) == "dss_base_image":
                for zone in block.get("params", {}).get("syncedZones", {}):
                    zone["zone"] = zone["zone"].replace("non_versioned", "resources")
        return code_studio_template

    def file_patterns(self, ):
        return ["config/code_studio_templates/*.json"]


class V11200ScenarioStepsWarningHandling(migration_json.ProjectConfigJsonMigrationOperation):
    """
    Warnings were translated to a step outcome based on a boolean (to warning or failed)
    and now to an enum in order to incorporate success as a possible step outcome
    """
    def __repr__(self, ):
        return "Update scenario steps warnings handling"

    def transform(self, step, filepath):
        if step.get('type', None) in ['check_dataset', 'check_consistency']:
            step_params = step.get('params', {})
            warnings_as_errors = step_params.get('warningsAsErrors', False)
            step_params['handleWarningsAs'] = 'FAILED' if warnings_as_errors else 'WARNING'
        return step

    def jsonpath(self,):
        return "params.steps"

    def file_patterns(self,):
        return ["scenarios/*.json"]

class V11200GovernActivateUDR(migration_json.JsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self, ):
        return "Update Govern usage data settings"

    def appliesTo(self):
        return ["govern"]

    def transform(self, general_settings, filepath=None):
        general_settings["udrMode"] = "DEFAULT"
        return general_settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


###############################################################################
# V11300 / DSS 11.3.0
###############################################################################

class V11300ChartFiltersIncludeExcludeDefaults(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V11300ChartFiltersIncludeExcludeDefaults, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
            if "excludeOtherValues" not in filter:
                filter["excludeOtherValues"] = False
                if "selectedValues" in filter:
                    del filter["selectedValues"]
            if "allValuesInSample" not in filter:
                filter["allValuesInSample"] = False
        return chart_def

class V11300DashboardFiltersIncludeExcludeDefaults(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Initialize dashboards alphanumerical filter facets excludeOtherValues and allValuesInSample"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for tile in page.get("grid", {}).get("tiles", []):
                isInsight = tile.get("tileType", None) == "INSIGHT"
                isFilterInsight = isInsight and tile.get("insightType", None) == "filters"

                if not isFilterInsight:
                    continue
                filters = tile.get("tileParams", {}).get("filters", [])
                for filter in filters:
                    filterType = filter.get("filterType", "")
                    columnType = filter.get("columnType", "")
                    dateFilterType = filter.get("dateFilterType", "")
                    isAlphanumericalFilter = filterType == "ALPHANUM_FACET" or columnType == "ALPHANUM" or (columnType == "DATE" and dateFilterType != 'RANGE')

                    if not isAlphanumericalFilter:
                        continue
                    if filter.get("excludeOtherValues", None) == None:
                        filter["excludeOtherValues"] = True
                        if "excludedValues" in filter:
                            del filter["excludedValues"]
                    if filter.get("allValuesInSample", None) == None:
                        filter["allValuesInSample"] = False
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V11300DatasetFilterValuesMigration(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Rename dataset insights filter values list from 'values' to 'selectedValues'"

    def transform(self, obj, filepath):
        for filter in obj.get("explorationFilters", []):
            facet = filter.get("facet", {})
            if "values" in facet:
                facet["selectedValues"] = facet.pop("values")
            alphanumFilter = filter.get("alphanumFilter", {})
            if "values" in alphanumFilter:
                alphanumFilter["selectedValues"] = alphanumFilter.pop("values")
        return obj

    def file_patterns(self,):
        return ["explore/*.json"]

    def jsonpath(self):
        return "script"

class V11300PartitionedModelsMoveJobIdUpdateToSmOrigin(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrate partitioned models: move jobIdUpdate to sm_origin"

    def execute(self, project_paths):
        # Filtering only on partitioned base model versions, as other models will not have
        # the `parts.json` file
        for parts_file in glob("%s/*/versions/*/parts.json" % project_paths.saved_models):
            model_folder = os.path.dirname(parts_file)
            print("Migrating partitioned saved model version %s" % model_folder)
            try:
                smo_file = os.path.join(model_folder, "sm_origin.json")
                smo_data = base.json_loadf(smo_file)

                if smo_data.get("origin") != "TRAINED_FROM_RECIPE":
                    continue

                parts_data = base.json_loadf(parts_file)  # Only loading (big) file if need to migrate it
                job_id_update = parts_data.get("jobIdUpdate", None)
                if job_id_update is not None:  # Might happen if running migration a second time
                    smo_data["jobIdUpdate"] = job_id_update
                    base.json_dumpf(smo_file, smo_data)
                    parts_data.pop("jobId", None)
                    parts_data.pop("jobIdUpdate", None)
                    parts_data.pop("jobIdVersion", None)
                    base.json_dumpf(parts_file, parts_data)
            except Exception as e:
                print("Migrating partitioned saved model version FAILED: %s" % e)


class V11300CreatePredictionFileForModelsAndModelEvaluations(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Use the values in perf.json to create the prediction_statistics.json file"

    def execute(self, project_paths):

        # Reading in saved models directories - e.g.
        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/perf.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/perf.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/perf.json
        for directory in (glob("%s/*/versions/*/" % project_paths.saved_models)
                          + glob("%s/*/pversions/*/*/" % project_paths.saved_models)
                          + glob("%s/*/*/sessions/*/*/*/" % project_paths.analysis_data)):
            try:
                # Required to make this step idempotent
                # If this migration step has already been run on a project, the info is not present in perf.json anymore, and prediction_statistics.json
                # would be overwritten with an empty content.
                if not self.has_predictions_json_file(directory):
                    print("Reading analysis model or saved model perf.json file in directory: %s" % directory)
                    perf_extract = self.read_perf_and_predicted_from_model(directory)
                    print("Copying prediction infos from perf.json to prediction_statistics.json")
                    preds, perf_without_preds = self.move_from_perf_to_preds(perf_extract.perf,
                                                                             predicted=perf_extract.predicted_data_df,
                                                                             prediction_distribution=perf_extract.prediction_distribution)
                    self.write_preds(preds=preds, directory=directory)
                    if perf_extract.perf is not None:
                        # We overwrite perf.json with the new perf dict we created without info that are present in prediction_statistics.json
                        self.write_perf(perf=perf_without_preds, directory=directory)
                else:
                    print("Already a prediction_statistics.json file at %s" % directory)
            except Exception as e:
                print("Migration of performance files from model directory %s has failed: %s" % (directory, e))

        # Reading in model evaluation directories - e.g.
        # model_evaluation_stores/PROJECT_KEY/a7QE8ig7/ecsqyuFW/perf.json
        for mes_directory in (glob("%s/*/*/" % project_paths.model_evaluation_stores)):
            try:
                # Required to make this step idempotent
                # If this migration step has already been run on a project, the info is not present in perf.json anymore, and prediction_statistics.json
                # would be overwritten with an empty content.
                if not self.has_predictions_json_file(mes_directory):
                    print("Reading model evaluation perf.json file in directory: %s" % mes_directory)
                    perf_extract = self.read_perf_from_model_evaluation(mes_directory)
                    print("Copying prediction infos from perf.json to prediction_statistics.json")
                    preds, perf_without_preds = self.move_from_perf_to_preds(perf_extract.perf,
                                                                             prediction_distribution=perf_extract.prediction_distribution,
                                                                             predicted=perf_extract.predicted_data_df)
                    self.write_preds(preds=preds, directory=mes_directory)
                    if perf_extract.perf is not None:
                        # We overwrite perf.json with the new perf dict we created without info that are present in prediction_statistics.json
                        self.write_perf(perf=perf_without_preds, directory=mes_directory)
                else:
                    print("Already a prediction_statistics.json file at %s" % mes_directory)
            except Exception as e:
                print("Migration of performance files from model evaluation directory %s has failed: %s" % (mes_directory, e))


        # Reading in saved models (and model evaluations) posttrain directories for subpopulation perf files - e.g.
        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/posttrain/all_dataset_perf.json
        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/posttrain/subpop-38661b4e51855b92263e141ce9135426/modality_perf_0.json
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/1573723995773/partition_name/posttrain/all_dataset_perf.json
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/1573723995773/partition_name/posttrain/subpop-38661b4e51855b92263e141ce9135426/modality_perf_0.json
        # analysis-data/PROJECT_KEY/c9NgcJYB/DCMlt9AR/sessions/s1/pp1/m1/posttrain/all_dataset_perf.json
        # analysis-data/PROJECT_KEY/c9NgcJYB/DCMlt9AR/sessions/s1/pp1/m1/posttrain/subpop-38661b4e51855b92263e141ce9135426/modality_perf_0.json
        # model_evaluation_stores/PROJECT_KEY/NPTPOqwi/NTZhM6TXnYSN/postcomputation/all_dataset_perf.json
        # model_evaluation_stores/PROJECT_KEY/NPTPOqwi/NTZhM6TXnYSN/postcomputation/subpop-38661b4e51855b92263e141ce9135426/modality_perf_0.json
        # The goal is to replace density data "correct" and "incorrect" (and medians) with the new names
        # i.e. "actualIsThisClass" and "actualIsNotThisClass" (and their medians).
        for directory in (glob("%s/*/versions/*/posttrain/" % project_paths.saved_models)
                          + glob("%s/*/versions/*/posttrain/*/" % project_paths.saved_models)
                          + glob("%s/*/pversions/*/*/posttrain/" % project_paths.saved_models)
                          + glob("%s/*/pversions/*/*/posttrain/*/" % project_paths.saved_models)
                          + glob("%s/*/*/sessions/*/*/*/posttrain/" % project_paths.analysis_data)
                          + glob("%s/*/*/sessions/*/*/*/posttrain/*/" % project_paths.analysis_data)
                          + glob("%s/*/*/postcomputation/" % project_paths.model_evaluation_stores)
                          + glob("%s/*/*/postcomputation/*/" % project_paths.model_evaluation_stores)):
            for filename in os.listdir(directory):
                f = os.path.join(directory, filename)
                try:
                    print("Reading keras post-train subpopulation perf file in file: %s" % f)
                    perf_extract = self.read_and_update_posttrain_subpopulation_perf_file(f)
                    if perf_extract is not None:
                        print("Writing it back to %s" % f)
                        self.write_posttrain_subpopulation_perf(perf_extract, f)
                    else:
                        print("The file %s does not need migrating." % f)
                except Exception as e:
                    print("Migration of post-train subpopulation performance files from file %s has failed: %s" % (f, e))

    # To be sure that, in the events when the migration has already been run on a project and we attempt to migrate it twice
    # That we do not overwrite the prediction_statistics.json file with empty data from the already migrated perf.json
    def has_predictions_json_file(self, directory):
        pred_file = osp.join(directory, "prediction_statistics.json")
        return osp.isfile(pred_file)

    class PerfExtract:
        def __init__(self, perf, predicted_data_df, prediction_distribution):
            self.perf = perf
            self.predicted_data_df = predicted_data_df
            self.prediction_distribution = prediction_distribution

    def read_perf_from_model_evaluation(self, directory):
        perf = self.read_perf(directory)
        predicted = self.read_sample_scored_file(directory)
        evaluation = self.read_evaluation_file(directory)
        prediction_distribution = None
        if perf is not None:
            if 'confusion' in perf and 'perActual' in perf['confusion']:
                # Multiclass classifications
                prediction_distribution = self.compute_prediction_distribution_from_confusion(perf['confusion']['perActual'])
            elif 'densityData' in perf and 'perCutData' in perf and evaluation is not None and 'activeClassifierThreshold' in evaluation:
                # Binary classifications with evaluation information (including the threshold override)
                prediction_distribution = self.compute_prediction_distribution_from_pcd(perf['perCutData'],
                                                                                        evaluation['activeClassifierThreshold'],
                                                                                        list(perf['densityData'].keys()))
            elif 'densityData' in perf and 'perCutData' in perf and 'usedThreshold' in perf:
                # Binary classifications
                prediction_distribution = self.compute_prediction_distribution_from_pcd(perf['perCutData'],
                                                                                        perf['usedThreshold'],
                                                                                        list(perf['densityData'].keys()))
        return V11300CreatePredictionFileForModelsAndModelEvaluations.PerfExtract(perf, predicted, prediction_distribution)

    def read_perf_and_predicted_from_model(self, directory):
        perf = self.read_perf(directory)
        prediction_distribution = None
        predicted = None
        if perf is not None and 'predictionPDF' in perf:  # Regression
            predicted = self.read_predicted_file(directory)
        elif perf is not None and 'densityData' in perf:  # Classification
            predicted = self.read_predicted_file(directory)
            if predicted is not None and 'usedThreshold' in perf:  # Binary classification
                classes = self.get_classes_from_proba_columns(predicted)
                threshold = perf['usedThreshold']
                prediction_distribution = self.compute_prediction_distribution_binary(predicted["proba_" + classes[1]], threshold, classes[0], classes[1])
            elif predicted is not None:  # Multiclass
                prediction_distribution = self.compute_prediction_distribution_multiclass(predicted['prediction'])
            elif 'confusion' in perf and 'perActual' in perf['confusion']:
                # Multiclass classifications without predicted.csv (e.g. mlflow models)
                prediction_distribution = self.compute_prediction_distribution_from_confusion(perf['confusion']['perActual'])
            elif 'densityData' in perf and 'perCutData' in perf and 'usedThreshold' in perf:
                # Binary classifications without predicted.csv (e.g. mlflow models)
                prediction_distribution = self.compute_prediction_distribution_from_pcd(perf['perCutData'],
                                                                                        perf['usedThreshold'],
                                                                                        list(perf['densityData'].keys()))
        return V11300CreatePredictionFileForModelsAndModelEvaluations.PerfExtract(perf, predicted, prediction_distribution)

    def read_perf(self, directory):
        perf_file = osp.join(directory, "perf.json")
        if osp.isfile(perf_file):
            return base.json_loadf(perf_file)
        else:
            return None

    def read_predicted_file(self, directory):
        import pandas as pd
        predicted_file = osp.join(directory, "predicted.csv")
        if osp.isfile(predicted_file):
            print("Reading %s" % predicted_file)
            predicted = pd.read_csv(predicted_file, sep='\t')
            print("Finished reading %s" % predicted_file)
            return predicted
        return None

    def read_evaluation_file(self, directory):
        evaluation_file = osp.join(directory, "evaluation.json")
        if osp.isfile(evaluation_file):
            print("Reading evaluation file %s" % evaluation_file)
            evaluation = base.json_loadf(evaluation_file)
            print("Finished reading %s" % evaluation_file)
            return evaluation
        return None

    def read_sample_scored_file(self, directory):
        import pandas as pd
        sample_scored_file = osp.join(directory, "sample_scored.csv.gz")
        sample_scored_schema_file = osp.join(directory, "sample_scored_schema.json")
        if osp.isfile(sample_scored_file) and osp.isfile(sample_scored_schema_file):
            print("Reading sample schema file %s" % sample_scored_schema_file)
            try:
                sample_schema = base.json_loadf(sample_scored_schema_file)
                names = [c["name"] for c in sample_schema["columns"]]
                print("Reading sample file %s" % sample_scored_file)
                sample_df = pd.read_csv(sample_scored_file, sep='\t', compression='gzip', names=names)
                print("Finished reading sample file %s" % sample_scored_file)
                result = sample_df['prediction']  # Scored sample always has a prediction column (created by Model Evaluations)
            except Exception as e:
                print("Failed reading sample file %s" % sample_scored_file)
                print("Error:  %s" % e)
                result = None

            return result
        return None

    def read_and_update_posttrain_subpopulation_perf_file(self, perf_file):
        if osp.isfile(perf_file):
            perf = base.json_loadf(perf_file)
            if 'densityData' in perf:
                for class_name in perf['densityData']:
                    # renaming of existing density data
                    self.change_key_in_density_data_and_remove(perf['densityData'][class_name], "correct", "actualIsThisClass")
                    self.change_key_in_density_data_and_remove(perf['densityData'][class_name], "correctMedian", "actualIsThisClassMedian")
                    self.change_key_in_density_data_and_remove(perf['densityData'][class_name], "incorrect", "actualIsNotThisClass")
                    self.change_key_in_density_data_and_remove(perf['densityData'][class_name], "incorrectMedian", "actualIsNotThisClassMedian")
                return perf
            else:
                return None
        else:
            return None

    def write_posttrain_subpopulation_perf(self, perf, f):
        base.json_dumpf(f, perf)

    def write_preds(self, preds, directory):
        print("Write %s/prediction_statistics.json" % directory)
        base.json_dumpf(os.path.join(directory, "prediction_statistics.json"), preds)

    def write_perf(self, perf, directory):
        print("Write %s/perf.json" % directory)
        base.json_dumpf(os.path.join(directory, "perf.json"), perf)

    def compute_prediction_distribution_multiclass(self, predictions):
        prediction_distribution = predictions.astype(str).value_counts().to_dict()
        return prediction_distribution

    def compute_prediction_distribution_binary(self, proba_1, threshold, class_0, class_1):
        import pandas as pd
        predictions = pd.Series([class_1 if val > threshold else class_0 for val in proba_1])
        prediction_distribution = predictions.value_counts().to_dict()
        return prediction_distribution

    def get_classes_from_proba_columns(self, proba_columns):
        return [col_name.replace("proba_", "") for col_name in proba_columns.head()]

    def move_from_perf_to_preds(self, perf, prediction_distribution, predicted=None):
        pred = dict()
        if perf is None:
            return pred, None
        else:
            if 'densityData' in perf:  # classification
                pred['probabilityDensities'] = dict()
                classes = list(perf['densityData'].keys())
                if 'perCutData' in perf and 'usedThreshold' in perf:
                    actual_distribution = self.compute_actual_distribution_from_pcd(perf['perCutData'], perf['usedThreshold'], classes)
                elif 'confusion' in perf and 'perActual' in perf['confusion']:
                    actual_distribution = self.compute_actual_distribution_from_confusion(perf['confusion']['perActual'])
                else:
                    actual_distribution = None
                for k in perf['densityData']:
                    # renaming of existing density data
                    self.change_key_in_density_data_and_remove(perf['densityData'][k], "correct", "actualIsThisClass")
                    self.change_key_in_density_data_and_remove(perf['densityData'][k], "correctMedian", "actualIsThisClassMedian")
                    self.change_key_in_density_data_and_remove(perf['densityData'][k], "incorrect", "actualIsNotThisClass")
                    self.change_key_in_density_data_and_remove(perf['densityData'][k], "incorrectMedian", "actualIsNotThisClassMedian")

                    if actual_distribution is not None and k in actual_distribution:
                        total = sum(actual_distribution.values())
                        class_ratio = actual_distribution[k] / total
                        not_this_class_ratio = 1 - class_ratio
                        pred['probabilityDensities'][k] = dict()
                        pred['probabilityDensities'][k]['density'] = self.make_combined_density(perf['densityData'][k], class_ratio, not_this_class_ratio)
                        pred['probabilityDensities'][k]['median'] = self.make_combined_median(perf['densityData'][k], class_ratio, not_this_class_ratio)

                if prediction_distribution is not None:
                    pred['predictedClassCount'] = prediction_distribution
            elif 'predictionPDF' in perf:  # regression with perf
                if 'x' in perf['predictionPDF']:
                    pred['x'] = perf['predictionPDF']['x']
                if 'pdf' in perf['predictionPDF']:
                    pred['pdf'] = perf['predictionPDF']['pdf']
                if predicted is not None and 'prediction' in predicted:
                    pred['predictions'] = predicted['prediction'].dropna().tolist()
                elif 'scatterPlotData' in perf and 'y' in perf['scatterPlotData']:
                    pred['predictions'] = perf['scatterPlotData']['y']
                else:
                    pred['predictions'] = []
                del perf['predictionPDF']
            return pred, perf

    def change_key_in_density_data_and_remove(self, density_data, before_key, after_key):
        if before_key in density_data:
            density_data[after_key] = density_data[before_key]
            del density_data[before_key]

    def compute_prediction_distribution_from_confusion(self, confusion_per_actual):
        prediction_distribution = {class_name: 0 for class_name in confusion_per_actual}
        for class_name in confusion_per_actual:
            if 'perPredicted' in confusion_per_actual[class_name]:
                for predicted_class_name in confusion_per_actual[class_name]['perPredicted']:
                    if predicted_class_name not in prediction_distribution:
                        prediction_distribution[predicted_class_name] = 0
                    prediction_distribution[predicted_class_name] += confusion_per_actual[class_name]['perPredicted'][predicted_class_name]
        return prediction_distribution

    def compute_actual_distribution_from_confusion(self, confusion_per_actual):
        return {class_name: confusion_per_actual[class_name]['actualClassCount'] if 'actualClassCount' in confusion_per_actual[class_name] else 0
                for class_name in confusion_per_actual}

    def compute_prediction_distribution_from_pcd(self, pcd, threshold, classes):
        prediction_distribution = dict()
        cut_index = pcd.get('cut', [threshold]).index(threshold)
        if self.is_not_corrupted(pcd, cut_index):
            prediction_distribution[classes[0]] = pcd['fn'][cut_index] + pcd['tn'][cut_index]
            prediction_distribution[classes[1]] = pcd['fp'][cut_index] + pcd['tp'][cut_index]
        return prediction_distribution

    def compute_actual_distribution_from_pcd(self, pcd, threshold, classes):
        actual_distribution = dict()
        cut_index = pcd.get('cut', [threshold]).index(threshold)
        if self.is_not_corrupted(pcd, cut_index):
            actual_distribution[classes[0]] = pcd['fp'][cut_index] + pcd['tn'][cut_index]
            actual_distribution[classes[1]] = pcd['fn'][cut_index] + pcd['tp'][cut_index]
        return actual_distribution

    def make_combined_density(self, density_data_for_class, class_ratio, not_this_class_ratio):
        if 'actualIsThisClass' in density_data_for_class and 'actualIsNotThisClass' in density_data_for_class:
            return [actualIsThisClass * class_ratio + actualIsNotThisClass * not_this_class_ratio
                    for actualIsThisClass, actualIsNotThisClass in zip(density_data_for_class['actualIsThisClass'],
                                                                       density_data_for_class['actualIsNotThisClass'])]
        return None

    def make_combined_median(self, density_data_for_class, class_ratio, not_this_class_ratio):
        if 'actualIsThisClassMedian' in density_data_for_class and 'actualIsNotThisClassMedian' in density_data_for_class:
            return density_data_for_class['actualIsThisClassMedian'] * class_ratio + density_data_for_class['actualIsNotThisClassMedian'] * not_this_class_ratio
        return None

    def is_not_corrupted(self, pcd, index):
        return ('cut' in pcd and 'fn' in pcd and 'tn' in pcd and 'fp' in pcd and 'tp' in pcd and len(pcd['fn']) > index
                and len(pcd['tn']) > index and len(pcd['fp']) > index and len(pcd['tp']) > index)


###############################################################################
# V11400 / DSS 11.4.0
###############################################################################

class V11400DeleteWrongCIForDataDrift(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Delete the lower and upper bound of the data drift CI: they were wrong."

    def execute(self, project_paths):
        for data_metrics in (glob("%s/*/*/data_metrics.json" % project_paths.model_evaluation_stores)):
            print("Migrating data_metrics: %s " % data_metrics)
            try:
                data_metrics_dict = base.json_loadf(data_metrics)
                data_metrics_dict["driftModelAccuracy"].pop("lower")
                data_metrics_dict["driftModelAccuracy"].pop("upper")
                base.json_dumpf(data_metrics, data_metrics_dict)
            except Exception as e:
                print("Data metrics migration FAILED: %s" % e)


class V11400MoveWebappsTempFolder(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Migrate webapp working dirs from tmp/ to dedicated web_apps/ folder"

    def execute(self, diphome, simulate=False):
        target_folder = osp.join(diphome.path, "web_apps")

        # create the target folder if needed
        # nota: in UIF this'll have to be chmod'd to 711 by a run of install-impersonation
        if not os.path.exists(target_folder):
            os.makedirs(target_folder)

        # move webapps
        for webapp_folder in glob(osp.join(diphome.path, 'tmp/*/web_apps/*')):
            # only take folders
            if not os.path.isdir(webapp_folder):
                logging.info("Found a path that is not a folder, skipping: " + webapp_folder)
                continue
            # trim trailing /, just in case (there shouldn't be)
            while webapp_folder.endswith('/'):
                webapp_folder = webapp_folder[:-1]
            chunks = webapp_folder.split(os.sep)
            webapp_id = chunks[-1]
            project_key = chunks[-3]

            logging.info("Moving temp folder of %s.%s" % (project_key, webapp_id))
            new_webapp_folder = osp.join(target_folder, project_key, webapp_id)
            new_webapp_folder_parent = osp.dirname(new_webapp_folder)
            # make the folder for the project level
            if not osp.exists(new_webapp_folder_parent):
                os.makedirs(new_webapp_folder_parent)
            # make sure there is not already a folder for the webapp
            if osp.exists(new_webapp_folder):
                logging.info("Target folder already exists, clearing")
                shutil.rmtree(new_webapp_folder)
            # move it
            os.rename(webapp_folder, new_webapp_folder)

        # move the marker file for webapp deployments on K8S
        for webapp_info in glob(osp.join(diphome.path, 'tmp/*/web_apps/instance-info-*.json')):
            # only take folders
            if not os.path.isfile(webapp_info):
                logging.info("Found a path that is not a file, skipping: " + webapp_info)
                continue
            chunks = webapp_info.split(os.sep)
            info_name = chunks[-1]
            project_key = chunks[-3]

            logging.info("Moving info file %s of %s" % (info_name, project_key))
            new_webapp_info = osp.join(target_folder, project_key, info_name)
            new_webapp_info_parent = osp.dirname(new_webapp_info)
            # make the folder for the project level
            if not osp.exists(new_webapp_info_parent):
                os.makedirs(new_webapp_info_parent)
            # make sure there is not already a file for the webapp
            if osp.exists(new_webapp_info):
                logging.info("Target info file already exists, clearing")
                os.remove(new_webapp_info)
            # move it
            os.rename(webapp_info, new_webapp_info)


###############################################################################
# V12000 / DSS 12.0.0
###############################################################################
class V12000FixupTopNParams(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update all-selected in topN recipe"

    def transform(self, obj, filepath):
        retrieved = obj.get("retrievedColumns", [])
        if len(retrieved) == 0:
            # 0 meant all ... Don't ask
            obj["retrievedColumnsSelectionMode"] = "ALL"
        else:
            obj["retrievedColumnsSelectionMode"] = "EXPLICIT"

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.topn"]


class V12000FixupJoinParams(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Update output column selection mode in join recipes"

    def transform(self, obj, filepath):
        for vi in obj.get("virtualInputs", []):
            all_selected = vi.get("autoSelectColumns", False)
            if all_selected:
                vi["outputColumnsSelectionMode"] = "ALL"
            else:
                vi["outputColumnsSelectionMode"] = "MANUAL"

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["recipes/*.join", "recipes/*.geojoin", "recipes/*.fuzzyjoin"]

class V12100FixupWindowParams(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Update output column selection mode in windows recipe"

    def transform(self, obj, filepath):
        retrieved = obj.get("values", [])
        all_selected = True
        for value in retrieved:
            if "column" in value and not value["value"]:
                all_selected = False
                break

        if all_selected:
            obj["retrievedColumnsSelectionMode"] = "ALL"
        else:
            obj["retrievedColumnsSelectionMode"] = "EXPLICIT"
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self):
        return ["recipes/*.window"]

class V12000OVariableImportanceDashboardMigration(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Migrate default variable importance dashboard tiles to feature importance tile"

    def execute(self, project_paths):
        for dashboard_file in glob("%s/dashboards/*.json" % project_paths.config):
            self.migrate_dashboard(dashboard_file)

    @staticmethod
    def migrate_dashboard(dashboard_file):
        def migrate_tile(t):
            t["tileParams"]["displayMode"] = "feature_importance"
            t["tileParams"]["advancedOptions"] = t["tileParams"].get("advancedOptions", {})
            t["tileParams"]["advancedOptions"]["featureImportance"] = {
                "importanceDisplayMode": "variableImportance",
                "graphType": "absoluteFeatureImportance"
            }
        dashboard_dict = base.json_loadf(dashboard_file)

        for page in dashboard_dict.get("pages", []):
            grid = page.get("grid", {})
            for tile in grid.get("tiles", []):
                if tile.get("tileType") == "INSIGHT" and tile.get("insightType") == "saved-model_report":
                    if tile.get("tileParams") is not None and tile["tileParams"].get("displayMode") in ["variables_importance", "variables_interpretation"]:
                        migrate_tile(tile)

        base.json_dumpf(dashboard_file, dashboard_dict)

class V12000EnableOpals(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Enable Help Center"

    def _any_unsupported_settings(self, general_settings):
        if "whiteLabeling" not in general_settings:
            return False

        white_labeling = general_settings["whiteLabeling"]

        if (
            "productLongName" in white_labeling
            and white_labeling["productLongName"] != "Dataiku DSS"
        ):
            return True

        string_settings = [
            "referenceDocRootUrl",
            "aboutModalTitle",
            "aboutModalLogoUrl",
            "aboutModalText",
            "getHelpModalTitle",
            "getHelpModalText",
        ]

        if any(filter(
            lambda s: s in white_labeling and white_labeling[s],
            string_settings
        )):
            return True

        boolean_settings = [
            "contextualHelpSearchEnabled",
            "giveFeedbackModalEnabled",
            "defaultHelpMenuItemsEnabled",
        ]

        return any(filter(
            lambda s: s in white_labeling and not white_labeling[s],
            boolean_settings
        ))

    def transform(self, general_settings, filepath=None):
        if (
            "opalsEnabled" in general_settings
            and general_settings["opalsEnabled"]
        ):
            # Nothing to do
            return general_settings

        any_unsupported_settings = (
            self._any_unsupported_settings(general_settings)
        )

        if not any_unsupported_settings:
            general_settings["opalsEnabled"] = True

        return general_settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]

class V12000LabelingTaskLabelColumn(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Add value to new parameter (labelColumnName) for existing labeling tasks"

    def transform(self, lt, filepath):
        if lt.get("labelColumnName", None) is None:
            lt["labelColumnName"] = "label"

        return lt

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["labeling_tasks/*.json"]

class V12000FixupScenarioStartingFrom(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Turn Scenario starting from date/time to a date"

    def transform(self, obj, filepath):
        for trigger in obj.get("triggers", []):
            param = trigger.get("params", {});
            start = param.get("startingFrom", None)
            if start and len(start) > 10:
                param["startingFrom"] = start[:10]

        return obj

    def jsonpath(self,):
        return ""
    def file_patterns(self,):
        return ["scenarios/*.json"]


class V12000TimeseriesTestSize(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Migrate test size in time series ML tasks evaluation params"

    @staticmethod
    def migrate_test_size(mltask_data):
        if not isinstance(mltask_data, dict):
            return False

        if "evaluationParams" not in mltask_data or "predictionLength" not in mltask_data:
            return False

        was_modified = False
        evaluation_params = mltask_data["evaluationParams"]
        prediction_length = mltask_data["predictionLength"]
        if evaluation_params.get("testSize", 0) < prediction_length:
            evaluation_params["testSize"] = prediction_length
            was_modified = True

        return was_modified

    def migrate_parts(self, parts_data):
        if not isinstance(parts_data, dict):
            return False

        was_modified = False
        for part_name, part_data in parts_data.get("summaries", {}).items():
            part_was_modified = self.migrate_test_size(part_data.get("snippet", {}))
            if part_was_modified:
                was_modified = True

        return was_modified

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating time series test size in MLTask params: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                was_modified = self.migrate_test_size(mltask_data)
                if was_modified:
                    base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Time series test size in MLTask migration FAILED: %s" % e)

        # config/projects/PROJECT_KEY/recipes/*.prediction_training
        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating time series test size in training recipe params: %s " % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                was_modified = self.migrate_test_size(train_recipe_data.get("core", {}))
                if was_modified:
                    base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Time series test size in MLTask in training recipe  migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/core_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/core_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/core_params.json
        for core_params_file in (glob("%s/*/versions/*/core_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/core_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/core_params.json" % project_paths.analysis_data)):
            print("Migrating time series test size in core params: %s " % core_params_file)
            try:
                core_params = base.json_loadf(core_params_file)
                was_modified = self.migrate_test_size(core_params)
                if was_modified:
                    base.json_dumpf(core_params_file, core_params)
            except Exception as e:
                print("Time series test size in core params migration FAILED: %s" % e)

        # config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating time series test size in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                was_modified = self.migrate_test_size(sm_data.get("miniTask", {}))
                if was_modified:
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Time series test size in saved model miniTask migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/parts.json
        # analysis-data/PROJECT_KEY/BzraLG5b/ElVOgftk/sessions/s1/pp1-base/m1/parts.json
        for parts_file in (glob("%s/*/versions/*/parts.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/*/parts.json" % project_paths.analysis_data)):
            print("Migrating time series test size in parts.json file: %s " % parts_file)
            try:
                parts = base.json_loadf(parts_file)
                was_modified = self.migrate_parts(parts)
                if was_modified:
                    base.json_dumpf(parts_file, parts)
            except Exception as e:
                print("Time series test size in parts.json migration FAILED: %s" % e)


class V12000PrepareRecipeTypeInference(migration_json.JsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self,):
        return "Change default type inference mode for prepare recipe"

    def transform(self, general_settings, filepath=None):
        if general_settings.get("defaultRecipeCreationSettings", {}).get("prepareRecipeColumnTypeMode") == "ALWAYS_INFER":
            general_settings["defaultRecipeCreationSettings"]["prepareRecipeColumnTypeMode"] = "KEEP_STRONGLY_TYPED_INPUTS"
        return general_settings

    def file_patterns(self,):
        return ["config/general-settings.json"]

###############################################################################
# V12100 / DSS 12.1.0
###############################################################################

def migrateFilterDateTypeAndPart(filter):
    if filter.get("columnType", "") != "DATE":
        return
    if filter.get("dateFilterType", "") == "RANGE":
        return
    # If we are still here then we are in the case of a date part filter
    filter["dateFilterPart"] = filter["dateFilterType"]
    filter["dateFilterType"] = "PART"

class V12100ChartFiltersDateTypeAndPart(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12100ChartFiltersDateTypeAndPart, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
            migrateFilterDateTypeAndPart(filter)
        return chart_def

class V12100DashboardFiltersDateTypeAndPart(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Migrate date part filters to use both dateFilterType and dateFilterPart"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for tile in page.get("grid", {}).get("tiles", []):
                isInsight = tile.get("tileType", None) == "INSIGHT"
                isFilterInsight = isInsight and tile.get("insightType", None) == "filters"

                if not isFilterInsight:
                    continue
                filters = tile.get("tileParams", {}).get("filters", [])
                for filter in filters:
                    migrateFilterDateTypeAndPart(filter)
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""


class V12100IAMSettingsUpdate(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate the IAM Settings"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, settings, filepath=None):
        settings.get('ssoSettings', {}).get('openIDParams', {})['usePKCE'] = False
        settings.get('ssoSettings', {})['autoProvisionUsersAtLoginTime'] = False
        settings.get('ssoSettings', {})['autoSyncUsersAtLoginTime'] = False

        settings['authSyncSettings'] = {}
        settings['authSyncSettings']['syncUserGroups'] = True
        settings['authSyncSettings']['syncUserDisplayName'] = False
        settings['authSyncSettings']['syncUserEmail'] = False
        settings['authSyncSettings']['syncUserProfile'] = False
        settings['authSyncSettings']['missingUserAction'] = 'WARN'
        settings['authSyncSettings']['notInAuthorizedGroupsAction'] = 'WARN'

        settings.get('ldapSettings', {})['autoSyncUsersAtLoginTime'] = settings.get('ldapSettings', {}).get('autoImportUsers', True)

        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


class V12100IAMSettingsUpdateForFM(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate the IAM Settings for FM"

    def appliesTo(self):
        return [ "fm" ]

    def transform(self, settings, filepath=None):
        settings.get('ssoSettings', {}).get('openIDParams', {})['usePKCE'] = False
        settings.get('ssoSettings', {})['autoProvisionUsersAtLoginTime'] = False
        settings.get('ssoSettings', {})['autoSyncUsersAtLoginTime'] = False

        settings['authSyncSettings'] = {}
        settings['authSyncSettings']['syncUserDisplayName'] = False

        return settings

    def file_patterns(self, ):
        return ["config/settings.json"]

###############################################################################
# V12200 / DSS 12.2.0
###############################################################################

class V12200FixupPivotTableOptions(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12200FixupPivotTableOptions, self).__init__(["pivot_table"])

    def migrate_def(self, chart_def):
        if "pivotTableOptions" not in chart_def:
            chart_def["pivotTableOptions"] = {}
        if "pivotMeasureDisplayMode" in chart_def:
            chart_def["pivotTableOptions"]["measureDisplayMode"] = chart_def["pivotMeasureDisplayMode"]
        if "pivotDisplayTotals" in chart_def:
            chart_def["pivotTableOptions"]["displayTotals"] = chart_def["pivotDisplayTotals"]
        return chart_def


class V12200OpenIDPromptParameter(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set prompt=login for OpenID if enabled"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, settings, filepath=None):
        if settings.get('ssoSettings', {}).get('enabled', False) and settings.get('ssoSettings', {}).get('protocol', '') == 'OPENID':
            settings.get('ssoSettings', {}).get('openIDParams', {})['prompt'] = 'login'
        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


class V12200OpenIDPromptParameterForFM(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set prompt=login for OpenID if enabled for FM"

    def appliesTo(self):
        return [ "fm" ]

    def transform(self, settings, filepath=None):
        if settings.get('ssoSettings', {}).get('enabled', False) and settings.get('ssoSettings', {}).get('protocol', '') == 'OPENID':
            settings.get('ssoSettings', {}).get('openIDParams', {})['prompt'] = 'login'
        return settings

    def file_patterns(self, ):
        return ["config/settings.json"]

class V12200AddIdToChartFilters(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12200AddIdToChartFilters, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
            filter["id"] = base.generate_random_id(16)
        return chart_def

class V12200AddIdToDashboardFilters(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Add id to filters to use both dateFilterType and dateFilterPart"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for tile in page.get("grid", {}).get("tiles", []):
                is_insight = tile.get("tileType", None) == "INSIGHT"
                is_filter_insight = is_insight and tile.get("insightType", None) == "filters"

                if not is_filter_insight:
                    continue

                filters = tile.get("tileParams", {}).get("filters", [])
                for filter in filters:
                    filter["id"] = base.generate_random_id(16)

        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V12200AddFilterSelectionTypeToDashboardFilters(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Initialise filter selection type for dashboard filters"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for tile in page.get("grid", {}).get("tiles", []):
                is_insight = tile.get("tileType", None) == "INSIGHT"
                is_filter_insight = is_insight and tile.get("insightType", None) == "filters"

                if not is_filter_insight:
                    continue

                filters = tile.get("tileParams", {}).get("filters", [])
                for filter in filters:
                    if filter.get("filterSelectionType", None) is not None:
                        continue
                    is_date_filter = filter.get("filterType") == "DATE_FACET"
                    is_range_date_filter = is_date_filter and (filter.get("dateFilterType") == "RANGE" or filter.get("dateFilterType") == "RELATIVE")
                    is_numerical_range_filter = filter.get("filterType") == "NUMERICAL_FACET"
                    if is_range_date_filter or is_numerical_range_filter:
                        filter["filterSelectionType"] = "RANGE_OF_VALUES"
                    else:
                        filter["filterSelectionType"] = "MULTI_SELECT"
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V12200AddFilterSelectionTypeToChartFilters(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12200AddFilterSelectionTypeToChartFilters, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
            if filter.get("filterSelectionType", None) is not None:
                continue
            is_date_filter = filter.get("filterType") == "DATE_FACET"
            is_range_date_filter = is_date_filter and (filter.get("dateFilterType") == "RANGE" or filter.get("dateFilterType") == "RELATIVE")
            is_numerical_range_filter = filter.get("filterType") == "NUMERICAL_FACET"
            if is_range_date_filter or is_numerical_range_filter:
                filter["filterSelectionType"] = "RANGE_OF_VALUES"
            else:
                filter["filterSelectionType"] = "MULTI_SELECT"
        return chart_def

###############################################################################
# V12220 / DSS 12.2.2
###############################################################################

class V12220TeradataDefaultConnectionTimezone(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set teradata connections default assumed timezone settings to GMT, as per legacy dataset-level behavior"

    def transform(self, connections, filepath=None):
        for name, conn in iteritems(connections):
            params = conn.get("params", None)
            if conn.get("type") == 'Teradata' and params is not None:
                params["defaultAssumedTzForUnknownTz"] = params.get("defaultAssumedTzForUnknownTz", "GMT")
                params["defaultAssumedDbTzForUnknownTz"] = params.get("defaultAssumedDbTzForUnknownTz", "GMT")
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

###############################################################################
# V12320 / DSS 12.3.2
###############################################################################

class V12320FixupFilterAndFlagParamsExclude(migration_app.ShakerStepMigrationOperation):
    def __init__(self):
        super(V12320FixupFilterAndFlagParamsExclude, self).__init__("FlagOnValue")

    def transform_step(self, step):
        if "params" in step and "exclude" not in step["params"]:
            step["params"]["exclude"] = False
        return step

###############################################################################
# V12400 / DSS 12.4.0
###############################################################################

class V12400SecuritySettingsUpdate(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set enableEmailAndDisplayNameModification to true for existing instances"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, settings, filepath=None):
        settings['security']['enableEmailAndDisplayNameModification'] = True

        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]

class V12400EnableCrossFiltersInDashboardPages(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Enable cross-filters by default for existing dashboards"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            page["enableCrossFilters"] = True
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V12400FixupReferenceLines(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12400FixupReferenceLines, self).__init__()

    def migrate_def(self, chart_def):
        for refLine in chart_def.get("referenceLines", []):
            refLine["sourceType"] = "Constant"
            if refLine.get("value") is not None:
                refLine["constantValue"] = refLine.get("value")

        return chart_def

class V12400FixupAxes(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12400FixupAxes, self).__init__()

    def migrate_def(self, chart_def):
        if "xAxisFormatting" not in chart_def:
            chart_def["xAxisFormatting"] = {}

        if "displayXAxis" in chart_def:
            chart_def["xAxisFormatting"]["displayAxis"] = chart_def["displayXAxis"]
        if "showXAxis" in chart_def and chart_def.get("type", None) == "stacked_bars":
            chart_def["xAxisFormatting"]["displayAxis"] = chart_def["showXAxis"]
        if "showXAxisLabel" in chart_def:
            chart_def["xAxisFormatting"]["showAxisTitle"] = chart_def["showXAxisLabel"]
        if "xAxisLabel" in chart_def:
            chart_def["xAxisFormatting"]["axisTitle"] = chart_def["xAxisLabel"]
        if "xAxisNumberFormattingOptions" in chart_def:
            if "axisValuesFormatting" not in chart_def["xAxisFormatting"]:
                chart_def["xAxisFormatting"]["axisValuesFormatting"] = {}
            chart_def["xAxisFormatting"]["axisValuesFormatting"]["numberFormatting"] = chart_def["xAxisNumberFormattingOptions"]
        if "xTicks" in chart_def:
            chart_def["xAxisFormatting"]["ticksConfig"] = chart_def["xTicks"]
        if "xCustomExtent" in chart_def:
            chart_def["xAxisFormatting"]["customExtent"] = chart_def["xCustomExtent"]
        if "axis1LogScale" in chart_def:
            chart_def["xAxisFormatting"]["isLogScale"] = chart_def["axis1LogScale"]


        if len(chart_def.get("yAxesFormatting", [])) < 1:
            chart_def["yAxesFormatting"] = [{}]
        if (type(chart_def["yAxesFormatting"][0]) is not dict):
            chart_def["yAxesFormatting"][0] = {}
        if "displayYAxis" in chart_def:
            chart_def["yAxesFormatting"][0]["displayAxis"] = chart_def["displayYAxis"]
        if "showYAxisLabel" in chart_def:
            chart_def["yAxesFormatting"][0]["showAxisTitle"] = chart_def["showYAxisLabel"]
        if "yAxisLabel" in chart_def:
            chart_def["yAxesFormatting"][0]["axisTitle"] = chart_def["yAxisLabel"]
        if "yAxisNumberFormattingOptions" in chart_def:
            if "axisValuesFormatting" not in chart_def["yAxesFormatting"][0]:
                chart_def["yAxesFormatting"][0]["axisValuesFormatting"] = {}
            chart_def["yAxesFormatting"][0]["axisValuesFormatting"]["numberFormatting"] = chart_def["yAxisNumberFormattingOptions"]
        if "yAxisFormatting" in chart_def:
            chart_def["yAxesFormatting"][0] = chart_def["yAxisFormatting"]
        if "yTicks" in chart_def:
            chart_def["yAxesFormatting"][0]["ticksConfig"] = chart_def["yTicks"]
        if "yCustomExtent" in chart_def:
            chart_def["yAxesFormatting"][0]["customExtent"] = chart_def["yCustomExtent"]
        if "axis2LogScale" in chart_def:
            chart_def["yAxesFormatting"][0]["isLogScale"] = chart_def["axis2LogScale"]
        return chart_def


class V12400CustomScoringMetricMigration(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrate custom scoring to use the custom metric system"

    def execute(self, project_paths):
        # Migrate all rmodeling params files (analysis, saved models and model evaluations)
        for rmodeling_params_file in (glob("%s/*/*/sessions/s*/pp*/m*/rmodeling_params.json" % project_paths.analysis_data) +
                                      glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models) +
                                      glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models) +
                                      glob("%s/*/*/rmodeling_params.json" % project_paths.model_evaluation_stores)
        ):
            self.migrate_rmodeling_params_file(rmodeling_params_file)

        # Migrate ml tasks
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) +
                            glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            self.migrate_mltask(mltask_file)

        # Migrate partitioned models parts
        for parts_file in (glob("%s/*/*/sessions/s*/pp*/m*/parts.json" % project_paths.analysis_data) +
                           glob("%s/*/versions/*/parts.json" % project_paths.saved_models)
        ):
            self.migrate_parts(parts_file)

        # Migrate performance mertrics
        for perf_file in (self.get_json_perf_files("%s/*/*/sessions/s*/pp*/m*" % project_paths.analysis_data) +
                          self.get_json_perf_files("%s/*/versions/*" % project_paths.saved_models) +
                          self.get_json_perf_files("%s/*/pversions/*/*" % project_paths.saved_models) +
                          self.get_json_perf_files("%s/*/*" % project_paths.model_evaluation_stores)):
            self.migrate_perf(perf_file)

        # Migration of actual params. Only present for ensemble and most probably useless remains of the past
        for actual_params_file in (glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models) +
                                   glob("%s/*/pversions/*/*/actual_params.json" % project_paths.saved_models) +
                                   glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data)):
            self.migrate_actual_params(actual_params_file)

        # Migration of saved models
        for saved_model_file in glob("%s/saved_models/*.json" % project_paths.config):
            self.migrate_saved_model(saved_model_file)

        # Migration of model comparisons
        for model_comparison_file in glob("%s/model_comparisons/*.json" % project_paths.config):
            self.migrate_model_comparisons(model_comparison_file, project_paths.analysis_data, project_paths.saved_models, project_paths.model_evaluation_stores)

        # Migration of model evaluation models
        for model_evaluation_file in glob("%s/model_evaluation_stores/*.json" % project_paths.config):
            self.migrate_model_evaluation(model_evaluation_file, project_paths.model_evaluation_stores)

        # Migration of model evaluations metrics
        for model_evaluation_file in glob("%s/*/*/evaluation.json" % project_paths.model_evaluation_stores):
            self.migrate_mes_metrics(model_evaluation_file)

        # Migration of evaluation recipes
        for evaluation_recipe_file in glob("%s/recipes/*.evaluation" % project_paths.config):
            self.migrate_evaluation_recipe(evaluation_recipe_file, project_paths.config, project_paths.saved_models)

        # Migration of prediction recipes
        for training_recipe_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            self.migrate_training_recipe(training_recipe_file)

    @staticmethod
    def get_custom_evaluation_metric_name(custom_metrics):
        """
        Returns an evaluation metric name. We iterate to find a custom evaluation metric name that is
        not already a custom metric name.
        :param custom_metrics: list of custom metrics
        :return: name of the custom evaluation metric
        """
        custom_metrics_names = {cm["name"] for cm in custom_metrics}
        custom_evaluation_metric_name = "Migrated Custom Score"
        if custom_evaluation_metric_name not in custom_metrics_names:
            return custom_evaluation_metric_name
        # Custom evaluation name 'Migrated Custom Score' is already used. We use instead 'Migrated Custom Score #i' where we increment i until metric name is available.
        i = 1
        while "{} #{}".format(custom_evaluation_metric_name, i) in custom_metrics_names:
            i += 1
        return "{} #{}".format(custom_evaluation_metric_name, i)

    @staticmethod
    def get_custom_evaluation_metric_from_rmodeling_params(directory):
        """
        Return the custom evaluation metric from rmodeling params if any, else raise a ValueError (that should be caught by a try/except block)
        :param directory: directory of a rmodeling_params file
        """
        rmodeling_params_path = os.path.join(directory, "rmodeling_params.json")
        rmodeling_params = base.json_loadf(rmodeling_params_path)
        for custom_metric in rmodeling_params["metrics"]["customMetrics"]:
            if custom_metric["name"] == rmodeling_params["metrics"]["customEvaluationMetricName"]:
                return custom_metric
        raise ValueError("Migrated custom evaluation metric not found in {}".format(rmodeling_params_path))

    @staticmethod
    def update_metrics_in_file(filename, modeling_params_access_fn, is_ensemble_model=False):
        # Update modeling metrics dict in a file. If is_ensemble is True, we loop over ensemble_params list entries
        try:
            data = base.json_loadf(filename)
            modeling_params = modeling_params_access_fn(data)
            if is_ensemble_model:  # ensemble case -> loop over metrics dict:
                for params in modeling_params:
                    V12400CustomScoringMetricMigration.update_metrics_dict(params.get("metrics", {}))
            else:  # nominal case -> access metrics dict directly
                V12400CustomScoringMetricMigration.update_metrics_dict(modeling_params.get("metrics", {}))
            base.json_dumpf(filename, data)
        except Exception as e:
            logging.exception("Failed to migrate file {}: {}".format(filename, e))

    @staticmethod
    def update_metrics_dict(metrics_dict):
        # Update a `metrics dict` creating a custom metric when the evaluation metric is custom
        try:
            evaluation_metric = metrics_dict.get("evaluationMetric", None)
            if evaluation_metric == "CUSTOM" and "customEvaluationMetricName" not in metrics_dict:
                custom_evaluation_metric_code = metrics_dict.get("customEvaluationMetricCode", None)
                custom_evaluation_metric_gib = metrics_dict.get("customEvaluationMetricGIB", True)
                custom_evaluation_metric_needs_proba = metrics_dict.get("customEvaluationMetricNeedsProba", None)
                custom_evaluation_metric_name = V12400CustomScoringMetricMigration.get_custom_evaluation_metric_name(metrics_dict["customMetrics"])

                custom_evaluation_metric_description = """
                    Previously the model's 'custom evaluation' function, migrated into a custom metric.
                    If {} is set as the Optimization Method, this code will be used to optimise the model when training.
                    """.format(custom_evaluation_metric_name)

                new_custom_metric = {
                    "name": custom_evaluation_metric_name,
                    "metricCode": custom_evaluation_metric_code,
                    "greaterIsBetter": custom_evaluation_metric_gib,
                    "needsProbability": custom_evaluation_metric_needs_proba,
                    "description": custom_evaluation_metric_description
                }

                custom_metrics = metrics_dict.get("customMetrics", [])
                custom_metrics.append(new_custom_metric)

                metrics_dict["customMetrics"] = custom_metrics
                metrics_dict["customEvaluationMetricName"] = custom_evaluation_metric_name

            metrics_dict.pop("customEvaluationMetricCode", None)
            metrics_dict.pop("customEvaluationMetricGIB", None)
            metrics_dict.pop("customEvaluationMetricNeedsProba", None)
        except Exception as e:
            logging.exception("Failed to update metrics dictionary {}: {}".format(metrics_dict, e))

    @staticmethod
    def get_evaluation_recipe_smv_id(project_path, eval_recipe, saved_model_id):
        # We pick the set model version id or the active one
        if eval_recipe.get("modelVersionId", "") != "":
            # eval_recipe["modelVersionId"] can be "" and means to select the active version.
            return eval_recipe["modelVersionId"]
        saved_model_desc = base.json_loadf("{}/saved_models/{}.json".format(project_path, saved_model_id))
        return saved_model_desc.get("activeVersion")

    @staticmethod
    def migrate_evaluation_recipe(er_filename, project_path, saved_model_path):
        # This step relies on a previously migrated saved model. If rmodeling_params has not been correctly migrated nothing will happen here.
        try:
            print("Migrating custom score to custom metric in evaluation recipe file {}".format(er_filename))
            evaluation_recipe = base.json_loadf(er_filename)
            evaluate_recipe_desc_filename = er_filename.replace(".evaluation", ".json")

            evaluation_recipe_desc = base.json_loadf(evaluate_recipe_desc_filename)

            models = evaluation_recipe_desc.get("inputs", {}).get("model", {}).get("items", [])
            if not models:
                return
            model_id = models[0].get("ref")
            if model_id is None:
                return
            version_id = V12400CustomScoringMetricMigration.get_evaluation_recipe_smv_id(project_path, evaluation_recipe, model_id)

            rmodeling_params_path = "{}/{}/versions/{}/rmodeling_params.json".format(saved_model_path, model_id, version_id)
            saved_model_modeling_params = base.json_loadf(rmodeling_params_path)

            saved_model_custom_metrics = [custom_metric['name'] for custom_metric in saved_model_modeling_params.get("metrics", {}).get("customMetrics", [])]
            evaluation_recipe['possibleCustomMetrics'] = saved_model_custom_metrics
            base.json_dumpf(er_filename, evaluation_recipe)
        except Exception as e:
            logging.exception("Failed to migrate evaluation recipe {}: {}".format(er_filename, e))

    @staticmethod
    def update_perf_custom_metric_results(rmodeling_params_directory, perf_data, is_per_cut_perf):
        new_custom_metric_result = {
            "metric": V12400CustomScoringMetricMigration.get_custom_evaluation_metric_from_rmodeling_params(rmodeling_params_directory),
            "didSucceed": True,
        }
        new_custom_metric_result["values" if is_per_cut_perf else "value"] = perf_data["customScore"]
        if "customScorestd" in perf_data:
            new_custom_metric_result["valuesstd" if is_per_cut_perf else "valuestd"] = perf_data["customScorestd"]
        custom_metrics_results = perf_data.get("customMetricsResults", [])
        if new_custom_metric_result["metric"]["name"] not in {cmr["metric"]["name"] for cmr in custom_metrics_results}:
            custom_metrics_results.append(new_custom_metric_result)
            perf_data["customMetricsResults"] = custom_metrics_results

    @staticmethod
    def migrate_perf(filename):
        # This step relies on a previously migrated saved model. If rmodeling_params has not been correctly migrated nothing will happen here.
        try:
            print("Migrating custom score to custom metric in perf file {}".format(filename))
            parent_dir = os.path.basename(os.path.dirname(filename))
            if parent_dir.startswith("fold_") or parent_dir in ["posttrain", "postcomputation"]:
                rmodeling_params_directory = os.path.dirname(os.path.dirname(filename)) # rmodeling_params in parent folder
            elif parent_dir.startswith("subpop-"):
                rmodeling_params_directory = os.path.dirname(os.path.dirname(os.path.dirname(filename))) # rmodeling_params in grand parent folder
            else:
                rmodeling_params_directory = os.path.dirname(filename)
            perf = base.json_loadf(filename)
            if "perCutData" in perf or "metrics" in perf or "tiMetrics" in perf:
                if "perCutData" in perf and "customScore" in perf["perCutData"] and perf["perCutData"]["customScore"]:
                    V12400CustomScoringMetricMigration.update_perf_custom_metric_results(rmodeling_params_directory, perf["perCutData"], True)
                if "metrics" in perf and "customScore" in perf["metrics"]:
                    V12400CustomScoringMetricMigration.update_perf_custom_metric_results(rmodeling_params_directory, perf["metrics"], False)
                if "tiMetrics" in perf and "customScore" in perf["tiMetrics"]:
                    if V12400CustomScoringMetricMigration.get_custom_evaluation_metric_from_rmodeling_params(rmodeling_params_directory).get("needsProbability", False):
                        V12400CustomScoringMetricMigration.update_perf_custom_metric_results(rmodeling_params_directory, perf["tiMetrics"], False)
            base.json_dumpf(filename, perf)
        except Exception as e:
            logging.exception("Failed to migrate perf {}: {}".format(filename, e))

    @staticmethod
    def migrate_parts(parts_filename):
        # This step relies on a previously migrated saved model. If rmodeling_params has not been correctly migrated nothing will happen here.
        try:
            print("Migrating custom score to custom metric in parts file {}".format(parts_filename))
            parts = base.json_loadf(parts_filename)
            summaries = parts.get("summaries", {})
            current_directory = os.path.split(parts_filename)[0] if base.is_os_windows() else "/".join(parts_filename.split("/")[:-1])

            for partition_name, info in summaries.items():
                snippet = info.get("snippet", {})
                if "customScore" in snippet:
                    custom_metrics_results = snippet.get("customMetricsResults", [])
                    new_custom_metric_result =  {
                        "value": snippet["customScore"],
                        "metric": V12400CustomScoringMetricMigration.get_custom_evaluation_metric_from_rmodeling_params(current_directory),
                        "didSucceed": True
                    }
                    if "customScorestd" in snippet:
                        new_custom_metric_result["customScorestd"] = snippet["customScorestd"]
                    custom_metrics_results.append(new_custom_metric_result)
                    snippet["customMetricsResults"] = custom_metrics_results

            base.json_dumpf(parts_filename, parts)
        except Exception as e:
            logging.exception("Failed to migrate kfold base parts {}: {}".format(parts_filename, e))

    @staticmethod
    def update_me_mc_display_params(desc, custom_evaluation_metrics):
        display_params = desc.get("displayParams", {})
        displayed_metrics = display_params.get("displayedMetrics", [])
        pinned_metrics = display_params.get("pinnedMetrics", [])
        # We loop over all custom evaluation metrics.
        if "CUSTOM" in displayed_metrics: # The Custom Score is displayed. So we want to display all custom evaluation metrics
            for custom_evaluation_metric in custom_evaluation_metrics:
                if custom_evaluation_metric not in displayed_metrics:
                    displayed_metrics.append(custom_evaluation_metric)
            displayed_metrics.remove("CUSTOM")
        if "CUSTOM" in pinned_metrics: # The Custom Score is pinned. So we want to pin all custom evaluation metrics
            for custom_evaluation_metric in custom_evaluation_metrics:
                if custom_evaluation_metric not in pinned_metrics:
                    pinned_metrics.append(custom_evaluation_metric)
            pinned_metrics.remove("CUSTOM")

    @staticmethod
    def migrate_model_evaluation(me_filename, mes_path):
        # This step relies on a previously migrated saved model. If rmodeling_params has not been correctly migrated nothing will happen here.
        try:
            print("Migrating custom score to custom metric in model evaluation file {}".format(me_filename))
            model_evaluation = base.json_loadf(me_filename)
            mes_id = os.path.split(me_filename)[1].split(".json")[0] if base.is_os_windows() else me_filename.split("/")[-1].split(".json")[0]
            model_evaluations_ids = os.listdir("{}/{}".format(mes_path, mes_id))
            custom_evaluation_metrics = set()
            for model_id in model_evaluations_ids:
                rmodeling_params_path = "{}/{}/{}/rmodeling_params.json".format(mes_path, mes_id, model_id)
                rmodeling_params = base.json_loadf(rmodeling_params_path)
                if rmodeling_params.get("metrics", {}).get("evaluationMetric") == "CUSTOM":
                    if rmodeling_params.get("metrics").get("customEvaluationMetricName") is None:
                        raise ValueError("Custom evaluation metric name not found in {}".format(rmodeling_params_path))
                    custom_evaluation_metrics.add("!!{}".format(rmodeling_params["metrics"]["customEvaluationMetricName"]))
            V12400CustomScoringMetricMigration.update_me_mc_display_params(model_evaluation, custom_evaluation_metrics)
            base.json_dumpf(me_filename, model_evaluation)
        except Exception as e:
            logging.exception("Failed to migrate model evaluation {}: {}".format(model_evaluation, e))

    @staticmethod
    def migrate_model_comparisons(mc_filename, analysis_path, saved_model_path, mes_path):
        # This step relies on a previously migrated saved model. If rmodeling_params has not been correctly migrated nothing will happen here.
        print("Migrating custom score to custom metric in model comparison file {}".format(mc_filename))
        try:
            model_comparison = base.json_loadf(mc_filename)
            compared_models_ids = [model["smartId"] for model in model_comparison.get("comparedModels", [])]
            custom_evaluation_metrics = set()
            for smi in compared_models_ids:
                # We loop over all MEC models and we get the custom evaluation metrics
                if smi.startswith("A-"):
                    # Analysis
                    [_, _, analysis, modeling_task, session, part, model] = smi.split("-")
                    rmodeling_params_path = "{}/{}/{}/sessions/{}/{}/{}/rmodeling_params.json".format(
                        analysis_path,
                        analysis,
                        modeling_task,
                        session,
                        part,
                        model
                    )
                elif smi.startswith("S-"):
                    # Saved model
                    [_, _, saved_model_id, saved_model_version_id] = smi.split("-")
                    rmodeling_params_path = "{}/{}/versions/{}/rmodeling_params.json".format(saved_model_path, saved_model_id, saved_model_version_id)
                elif smi.startswith("ME-"):
                    # Model evaluation store
                    [_, _, mes_id, mes_model_id] = smi.split("-")
                    rmodeling_params_path = "{}/{}/{}/rmodeling_params.json".format(mes_path, mes_id, mes_model_id)
                else:
                    raise ValueError("Unknown saved model type for model {}".format(smi))

                if not osp.exists(rmodeling_params_path):
                    # Saved model doesn't exist anymore, we continue looping
                    continue

                rmodeling_params = base.json_loadf(rmodeling_params_path)

                if rmodeling_params.get("metrics", {}).get("evaluationMetric") == "CUSTOM":
                    if rmodeling_params["metrics"].get("customEvaluationMetricName") is None:
                        raise ValueError("Custom evaluation metric name not found in {}".format(rmodeling_params_path))
                    custom_evaluation_metrics.add("!!{}".format(rmodeling_params["metrics"]["customEvaluationMetricName"]))
            V12400CustomScoringMetricMigration.update_me_mc_display_params(model_comparison, custom_evaluation_metrics)
            base.json_dumpf(mc_filename, model_comparison)
        except Exception as e:
            logging.exception("Failed to migrate model comparison {}: {}".format(mc_filename, e))

    @staticmethod
    def get_json_perf_files(perf_basedir_path):
        # We just add all possible paths. The code is less complex that way and at worst we won't match files
        paths = (
                glob("%s/perf.json" % perf_basedir_path) +
                glob("%s/perf_without_overrides.json" % perf_basedir_path) +
                glob("%s/fold_*/perf.json" % perf_basedir_path) +
                glob("%s/fold_*/perf_without_overrides.json" % perf_basedir_path) +
                glob("%s/posttrain/all_dataset_perf.json" % perf_basedir_path) +
                glob("%s/posttrain/subpop-*/modality_perf_*.json" % perf_basedir_path) +
                glob("%s/postcomputation/all_dataset_perf.json" % perf_basedir_path) +
                glob("%s/postcomputation/subpop-*/modality_perf_*.json" % perf_basedir_path)
        )
        return paths

    @staticmethod
    def migrate_rmodeling_params_file(filename):
        print("Migrating custom score to custom metric in rmodeling_params file {}".format(filename))
        V12400CustomScoringMetricMigration.update_metrics_in_file(filename, lambda rmodeling_params: rmodeling_params)
        V12400CustomScoringMetricMigration.update_metrics_in_file(
            filename,
            lambda rmodeling_params: rmodeling_params.get('ensemble_params', {}).get('modeling_params', []),
            is_ensemble_model=True
        )

    @staticmethod
    def migrate_mltask(filename):
        print("Migrating custom score to custom metric in mltask file {}".format(filename))
        V12400CustomScoringMetricMigration.update_metrics_in_file(filename, lambda mltask: mltask.get("modeling", {}))

    @staticmethod
    def migrate_actual_params(filename):
        print("Migrating custom score to custom metric in actual_params file {}".format(filename))
        V12400CustomScoringMetricMigration.update_metrics_in_file(
            filename,
            lambda actual_params: actual_params.get("resolved", {}).get('ensemble_params', {}).get('modeling_params', []),
            is_ensemble_model=True
        )

    @staticmethod
    def migrate_saved_model(filename):
        print("Migrating custom score to custom metric in saved model file {}".format(filename))
        V12400CustomScoringMetricMigration.update_metrics_in_file(filename, lambda rmodeling_params: rmodeling_params.get("miniTask", {}).get("modeling", {}))

    @staticmethod
    def migrate_training_recipe(filename):
        print("Migrating custom score to custom metric in training recipe file {}".format(filename))
        V12400CustomScoringMetricMigration.update_metrics_in_file(filename, lambda training_recipe: training_recipe.get("modeling", {}))

    @staticmethod
    def migrate_mes_metrics(filename):
        print("Migrating custom score to custom metric in model evaluation metrics {}".format(filename))
        # Not using `update_metrics_in_file` since the data is in `metricParams`
        try:
            evaluation = base.json_loadf(filename)
            V12400CustomScoringMetricMigration.update_metrics_dict(evaluation.get("metricParams", {}))
            base.json_dumpf(filename, evaluation)
        except Exception as e:
            logging.exception("Failed to migrate file {}: {}".format(filename, e))


class V12400AddMonitoringToInfrastructure(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set activityMonitoringSettings.mode=DISABLED for all existing infrastructures"

    def appliesTo(self):
        return ['design', 'automation']

    def transform(self, infra, filepath=None):
        activity_monitoring_settings = {}
        activity_monitoring_settings['mode'] = 'DISABLED'
        infra['activityMonitoringSettings'] = activity_monitoring_settings
        return infra

    def file_patterns(self,):
        return ["config/api-deployer/infras/*.json"]


class V12400InvalidateEDAInsightsWithConfidenceIntervals(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Invalidate fingerprint for EDA insights supporting confidence intervals and/or one-sided testing"

    @staticmethod
    def _invalidate_insight(insight_data_dir):
        results_file = osp.join(insight_data_dir, "eda-card-result.json")
        if not osp.isfile(results_file):
            logging.info("Card results file '{}' does not exist, skipping migration".format(results_file))
            return

        insight_results = base.json_loadf(results_file)
        if insight_results["type"] not in [
            "sign_test_1samp",
            "ttest_ztest_1samp",
            "ttest_2samp",
            "oneway_anova",
            "pairwise_ttest",
            "pairwise_mood",
        ]:
            # no need to invalidate this insight
            return

        fingerprint_file = osp.join(insight_data_dir, "eda-card-result.fingerprint")
        if not osp.isfile(fingerprint_file):
            logging.info("Fingerprint file '{}' does not exist, skipping migration".format(fingerprint_file))
            return

        # just write an empty fingerprint to invalidate the insight
        with open(fingerprint_file, "wb"):
            pass

    def execute(self, project_paths):
        for insight_data_dir in (glob("%s/eda/*" % project_paths.insights_data)):
            try:
                V12400InvalidateEDAInsightsWithConfidenceIntervals._invalidate_insight(insight_data_dir)
            except Exception as e:
                logging.exception("Could not migrate EDA insight {}: {}".format(insight_data_dir, e))


class V12420FixupObjectRefForLocalAuthorizedObjectsInDashboardAuthorizations(migration_json.ProjectConfigJsonMigrationOperation):

    def __repr__(self):
        return "Set objects as local for local objects in dashboard authorizations that are considered foreign and remove duplicates"

    def transform(self, obj, filepath):
        pkey = os.path.split(os.path.split(filepath)[0])[1] if base.is_os_windows() else filepath.split('/')[-2]
        object_authorizations = obj.get('authorizations', [])
        object_authorizations = self.fix_local_refs(pkey, object_authorizations)
        object_authorizations = self.merge_duplicates(object_authorizations)
        obj['authorizations'] = object_authorizations
        return obj

    def local_ref_from(self, ref):
        return {'objectType': ref.get('objectType'), 'objectId': ref.get('objectId')}

    def find_auth_matching(self, authorizations, ref):
        return next((item for item in authorizations if item.get('objectRef') == ref), None)

    def fix_local_refs(self, project_key, authorizations):
        """
        Remove authorizations with invalid local ref if a valid authorization exists for the same object
        """
        result = []

        for current in authorizations:
            ref = current.get('objectRef')
            if ref is None:
                # kinda broken. probably never happens, drop it (makes NPE in java if it happens anyway)
                continue

            if ref.get('projectKey') == project_key:  # this is a broken local ref
                # keep & fix it if there is no local ref matching. Otherwise, drop it.
                local_ref = self.local_ref_from(ref)
                any_local_matching = self.find_auth_matching(authorizations, local_ref)
                if any_local_matching is None:
                    result.append({'objectRef': local_ref, 'modes': current.get('modes', [])})
            else:  # normal auth, keep it
                result.append(current)

        return result

    def merge_duplicates(self, authorizations):
        """
        When multiple auth refer to the same object, merge them
        """
        result = []

        for current in authorizations:
            ref = current.get('objectRef')
            previous = self.find_auth_matching(result, ref)
            if previous is None:
                result.append(current)
            else:
                previous_modes = previous.get('modes', [])
                current_modes = current.get('modes', [])
                previous['modes'] = list(set(previous_modes) | set(current_modes))

        return result

    def jsonpath(self):
        return "dashboardAuthorizations"

    def file_patterns(self):
        return ["params.json"]


class V12400AddVersionTagsFileToGitIgnore(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Add file for version tags to gitignore"

    def execute(self, project_paths):
        gitignore_path = osp.join(project_paths.config, '.gitignore')
        if osp.exists(gitignore_path):
            try:
                with open(gitignore_path, 'r') as fd:
                    data = fd.read()
                if 'version-tags.json' not in data:
                    data = data + '\nversion-tags.json\n'
                    with open(gitignore_path, 'w') as fd:
                        fd.write(data)
            except Exception as e:
                logging.exception("Failed to migrate gitignore at %s" % gitignore_path, e)

class V12400UpdateGPUSettings(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self,):
        return "Migrate old style gpu configs to new gpu config"

    def execute(self, project_paths):
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) +
                            glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            # we have a lot of info here, so its easy to migrate the ml_task file
            # everywhere else, our stance is 'migrate as little as possible, just set defaults'
            self.migrate_mltask_file(mltask_file)

        for rmodeling_params_file in (glob("%s/*/*/sessions/s*/pp*/m*/rmodeling_params.json" % project_paths.analysis_data) +
                                      glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models) +
                                      glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models) +
                                      glob("%s/*/*/rmodeling_params.json" % project_paths.model_evaluation_stores)):
            self.migrate_rmodeling_params_file(rmodeling_params_file)

        for core_params_file in (glob("%s/*/versions/*/core_params.json" % project_paths.saved_models) +
                                 glob("%s/*/pversions/*/*/core_params.json" % project_paths.saved_models) +
                                 glob("%s/*/*/sessions/*/core_params.json" % project_paths.analysis_data)):
            self.migrate_core_params_file(core_params_file)

        for actual_params_file in (glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models) +
                                   glob("%s/*/pversions/*/*/actual_params.json" % project_paths.saved_models) +
                                   glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data)):
            self.migrate_actual_params_file(actual_params_file)

        for saved_model_file in glob("%s/saved_models/*.json" % project_paths.config):
            self.migrate_saved_model_file(saved_model_file)

        for prediction_training_file in (glob("%s/recipes/*.prediction_training" % project_paths.config)):
            self.migrate_prediction_training_file(prediction_training_file)

        for file in (glob("%s/recipes/*.evaluation" % project_paths.config) +
                     glob("%s/recipes/*.prediction_scoring" % project_paths.config)):
            self.migrate_evaluation_or_prediction_scoring_file(file)


    @staticmethod
    def get_default_gpu_config():
        return {
            "params": {
                "useGpu": True,
                "gpuList": [
                    0
                ],
                "perGPUMemoryFraction": 0.5,
                "gpuAllowGrowth": False
            },
            "disabledCapabilities": []
        }

    @staticmethod
    def migrate_mltask_file(filepath):
        """
        Add a new gpuConfig dict to each mltask & configure it based upon the following:
        * if a capacity is enabled, and usegpu is true, set useGpu=true
        * if it is disabled, ignore
        * if it is enabled and usegpu false, do nothing UNLESS something else is enabled with usegpu true, in which case add the capacity to disabled list
        * text embedding - set useGpu=true if present
        We also create the new `tree_method` param for xgboost
        """
        print("Migrating GPU config in mltask file {}".format(filepath))
        try:
            mltask = base.json_loadf(filepath)
            new_gpu_config = V12400UpdateGPUSettings.get_default_gpu_config()

            if "gpuConfig" in mltask:
                # already migrated
                return

            if "modeling" not in mltask:
                mltask["gpuConfig"] = new_gpu_config
                base.json_dumpf(filepath, mltask)
                return

            xgboost_requires_cpu = V12400UpdateGPUSettings.does_xgboost_require_cpu(mltask["modeling"])

            V12400UpdateGPUSettings.migrate_modeling_params(mltask["modeling"], new_gpu_config)

            if not new_gpu_config["params"]["useGpu"] and not xgboost_requires_cpu:
                # loop over features preprocessings only if the gpu usage is not already activated.
                for _, info in mltask.get("preprocessing", {}).get("per_feature", {}).items():
                    if "text_handling" in info and info["text_handling"] == "SENTENCE_EMBEDDING":
                        new_gpu_config["params"]["useGpu"] = True
                        break

            mltask["gpuConfig"] = new_gpu_config
            base.json_dumpf(filepath, mltask)
        except Exception as e:
            logging.exception("Failed to migrate GPU config of mltask file {}: {}".format(filepath, e))


    @staticmethod
    def migrate_rmodeling_params_file(filepath):
        print("Migrating GPU config in rmodeling_params file {}".format(filepath))
        try:
            rmodeling_params = base.json_loadf(filepath)
            dummy_gpu_config = V12400UpdateGPUSettings.get_default_gpu_config()
            V12400UpdateGPUSettings.migrate_modeling_params(rmodeling_params, dummy_gpu_config, use_grid_suffix=True)

            base.json_dumpf(filepath, rmodeling_params)

        except Exception as e:
            logging.exception("Failed to migrate GPU config of rmodeling params file {}: {}".format(filepath, e))


    @staticmethod
    def migrate_core_params_file(filepath):
        print("Migrating GPU config in core_params file {}".format(filepath))
        try:
            core_params = base.json_loadf(filepath)

            if "executionParams" in core_params:
                if "gpuConfig" in core_params["executionParams"]:
                    # already migrated
                    return

                core_params["executionParams"]["gpuConfig"] = V12400UpdateGPUSettings.get_default_gpu_config()
                base.json_dumpf(filepath, core_params)

        except Exception as e:
            logging.exception("Failed to migrate GPU config of core params file {}: {}".format(filepath, e))


    @staticmethod
    def migrate_saved_model_file(filepath):
        print("Migrating GPU config in saved model file {}".format(filepath))
        try:
            saved_model = base.json_loadf(filepath)

            if "modeling" in saved_model.get("miniTask", {}):
                dummy_gpu_config = V12400UpdateGPUSettings.get_default_gpu_config()
                V12400UpdateGPUSettings.migrate_modeling_params(saved_model["miniTask"]["modeling"], dummy_gpu_config)

                base.json_dumpf(filepath, saved_model)
        except Exception as e:
            logging.exception("Failed to migrate GPU config of saved model file {}: {}".format(filepath, e))


    @staticmethod
    def migrate_actual_params_file(filepath):
        print("Migrating GPU config in actual_params file {}".format(filepath))
        try:
            actual_params = base.json_loadf(filepath)
            algorithm = actual_params.get("resolved", {}).get("algorithm", "")

            if algorithm in ["DEEP_NEURAL_NETWORK_REGRESSION", "DEEP_NEURAL_NETWORK_CLASSIFICATION"]:
                if "device" in actual_params["resolved"].get("deep_neural_network", {}):
                    del actual_params["resolved"]["deep_neural_network"]["device"]

            elif algorithm == "KERAS_CODE":
                keras_settings = actual_params["resolved"].get("keras", {})

                # remove old gpu fields if existing
                keras_settings.pop("perGPUMemoryFraction", "")
                keras_settings.pop("gpuAllowGrowth", "")
                keras_settings.pop("useGPU", "")
                keras_settings.pop("gpuList", "")
            else:
                # no modif
                return

            base.json_dumpf(filepath, actual_params)
        except Exception as e:
            logging.exception("Failed to migrate GPU config of actual params file {}: {}".format(filepath, e))

    @staticmethod
    def migrate_prediction_training_file(filepath):
        print("Migrating GPU config in .prediction_training file {}".format(filepath))
        try:
            prediction_training = base.json_loadf(filepath)

            if "gpuConfig" in prediction_training.get("core", {}).get("executionParams", {}):
                # already migrated
                return

            new_gpu_config = V12400UpdateGPUSettings.get_default_gpu_config()

            if "modeling" in prediction_training:
                V12400UpdateGPUSettings.migrate_modeling_params(prediction_training["modeling"], new_gpu_config, use_grid_suffix=True)

            if "executionParams" in prediction_training.get("core", {}):
                prediction_training["core"]["executionParams"]["gpuConfig"] = new_gpu_config
                base.json_dumpf(filepath, prediction_training)

        except Exception as e:
            logging.exception("Failed to migrate GPU config of prediction training file {}: {}".format(filepath, e))


    @staticmethod
    def migrate_evaluation_or_prediction_scoring_file(filepath):
        print("Migrating GPU config in eval or prediction scoring file {}".format(filepath))
        try:
            recipe_desc = base.json_loadf(filepath)

            if "gpuConfig" in recipe_desc:
                # already migrated
                return

            new_gpu_config = V12400UpdateGPUSettings.get_default_gpu_config()

            backend_type = recipe_desc.get("backendType", "")

            if backend_type in ["KERAS", "DEEP_HUB"]:
                if "perGPUMemoryFraction" in recipe_desc:
                    new_gpu_config["params"]["perGPUMemoryFraction"] = recipe_desc["perGPUMemoryFraction"]

                if "gpuAllowGrowth" in recipe_desc:
                    new_gpu_config["params"]["gpuAllowGrowth"] = recipe_desc["gpuAllowGrowth"]

                if "useGPU" in recipe_desc:
                    new_gpu_config["params"]["useGpu"] = recipe_desc["useGPU"]

                if "gpuList" in recipe_desc:
                    new_gpu_config["params"]["gpuList"] = recipe_desc["gpuList"]

                    if isinstance(new_gpu_config["params"]["gpuList"], int):
                        new_gpu_config["params"]["gpuList"] = [new_gpu_config["params"]["gpuList"]]

            recipe_desc.pop("perGPUMemoryFraction", "")
            recipe_desc.pop("gpuAllowGrowth", "")
            recipe_desc.pop("useGPU", "")
            recipe_desc.pop("gpuList", "")

            recipe_desc["gpuConfig"] = new_gpu_config

            base.json_dumpf(filepath, recipe_desc)
        except Exception as e:
            logging.exception("Failed to migrate GPU config of eval/scoring file {}: {}".format(filepath, e))

    @staticmethod
    def migrate_modeling_params(modeling_params, new_gpu_config, use_grid_suffix=False):
        if "gluonts_gpu_params" in modeling_params:
            gluonts_modeling = modeling_params["gluonts_gpu_params"]
            if V12400UpdateGPUSettings.is_any_timeseries_gpu_algo_enabled(modeling_params):
                if "useGPU" in gluonts_modeling:
                    new_gpu_config["params"]["useGpu"] = gluonts_modeling.pop("useGPU")

                if "gpuList" in gluonts_modeling:
                    new_gpu_config["params"]["gpuList"] = gluonts_modeling.pop("gpuList")

            del modeling_params["gluonts_gpu_params"]

        if "keras" in modeling_params:
            keras_modeling = modeling_params["keras"]

            is_enabled = keras_modeling.get("enabled", False)
            keras_memory_fraction = keras_modeling.pop("perGPUMemoryFraction", 0.5)
            keras_allow_growth = keras_modeling.pop("gpuAllowGrowth", False)
            keras_usegpu = keras_modeling.pop("useGPU", True)
            keras_gpu_list = keras_modeling.pop("gpuList", [0])

            if is_enabled:
                new_gpu_config["params"]["perGPUMemoryFraction"] = keras_memory_fraction
                new_gpu_config["params"]["gpuAllowGrowth"] = keras_allow_growth
                new_gpu_config["params"]["useGpu"] = keras_usegpu

                if isinstance(keras_gpu_list, int):
                    keras_gpu_list = [keras_gpu_list]
                new_gpu_config["params"]["gpuList"] = keras_gpu_list

        if "gpuParams" in modeling_params:  # deephub
            deephub_gpu_params = modeling_params["gpuParams"]

            if "useGPU" in deephub_gpu_params:
                new_gpu_config["params"]["useGpu"] = deephub_gpu_params.pop("useGPU")

            if "gpuList" in deephub_gpu_params:
                new_gpu_config["params"]["gpuList"] = deephub_gpu_params.pop("gpuList")

            del modeling_params["gpuParams"]

        something_uses_gpu = V12400UpdateGPUSettings.does_something_use_gpu(modeling_params, use_grid_suffix)

        if something_uses_gpu:
            new_gpu_config["params"]["useGpu"] = True

        xgboost_key = "xgboost_grid" if use_grid_suffix else "xgboost"
        if xgboost_key in modeling_params:
            xgboost_modeling = modeling_params[xgboost_key]
            xgboost_alg_is_enabled = xgboost_modeling.get("enabled", False)
            xgboost_uses_gpu = xgboost_modeling.get("enable_cuda", False)

            if something_uses_gpu and xgboost_alg_is_enabled and not xgboost_uses_gpu:
                new_gpu_config["disabledCapabilities"].append("XGBOOST")

            V12400UpdateGPUSettings.update_xgboost_modeling_params(xgboost_modeling, new_gpu_config)

        dnnr_key = "deep_neural_network_regression_grid" if use_grid_suffix else "deep_neural_network_regression"
        if dnnr_key in modeling_params:
            dnn_reg_modeling = modeling_params[dnnr_key]
            dnnr_alg_is_enabled = dnn_reg_modeling.get("enabled", False)
            dnnr_alg_device = dnn_reg_modeling.pop("device", "cpu")
            dnnr_alg_uses_gpu = dnnr_alg_device != "cpu"

            if something_uses_gpu and dnnr_alg_is_enabled and not dnnr_alg_uses_gpu:
                new_gpu_config["disabledCapabilities"].append("DEEP_NN")

        dnnc_key = "deep_neural_network_classification_grid" if use_grid_suffix else "deep_neural_network_classification"
        if dnnc_key in modeling_params:
            dnn_classif_modeling = modeling_params[dnnc_key]
            dnnc_alg_is_enabled = dnn_classif_modeling.get("enabled", False)
            dnnc_alg_device = dnn_classif_modeling.pop("device", "cpu")
            dnnc_alg_uses_gpu = dnnc_alg_device != "cpu"

            if something_uses_gpu and dnnc_alg_is_enabled and not dnnc_alg_uses_gpu:
                new_gpu_config["disabledCapabilities"].append("DEEP_NN")

    @staticmethod
    def does_xgboost_require_cpu(modeling_params):
        if "xgboost" not in modeling_params:
            return False

        xgboost_modeling = modeling_params["xgboost"]
        xgboost_alg_is_enabled = xgboost_modeling.get("enabled", False)
        xgboost_uses_gpu = xgboost_modeling.get("enable_cuda", False)

        if not xgboost_alg_is_enabled or xgboost_uses_gpu:
            return False

        cpu_tree_method = xgboost_modeling.get("cpu_tree_method", "auto")

        if cpu_tree_method in ["auto", "approx"]:  # these two are cpu only
            return True

        return False

    @staticmethod
    def update_xgboost_modeling_params(xgboost_params, new_gpu_config):
        xgboost_uses_gpu = xgboost_params.pop("enable_cuda", False)
        cpu_tree_method = xgboost_params.pop("cpu_tree_method", "auto")
        gpu_tree_method = xgboost_params.pop("gpu_tree_method", "gpu_exact")  # taking 'gpu_exact' as default
        if xgboost_uses_gpu:
            # can only be 'gpu_exact' or 'gpu_hist'
            xgboost_params["tree_method"] = "exact" if gpu_tree_method == "gpu_exact" else "hist"
        else:
            xgboost_params["tree_method"] = cpu_tree_method
            if cpu_tree_method in ["auto", "approx"]:  # these two are cpu only, so we disable gpu on by default
                new_gpu_config["params"]["useGpu"] = False

    @staticmethod
    def does_something_use_gpu(modeling_params, use_grid_suffix):
        xgboost_key = "xgboost_grid" if use_grid_suffix else "xgboost"
        xgboost_data = modeling_params.get(xgboost_key, {})
        if xgboost_data.get("enabled", False) and xgboost_data.get("enable_cuda", False):
            return True

        dnnr_key = "deep_neural_network_regression_grid" if use_grid_suffix else "deep_neural_network_regression"
        dnn_reg_data = modeling_params.get(dnnr_key, {})
        if dnn_reg_data.get("enabled", False) and dnn_reg_data.get("device", "cpu") != "cpu":
            return True

        dnnc_key = "deep_neural_network_classification_grid" if use_grid_suffix else "deep_neural_network_classification"
        dnn_class_data = modeling_params.get(dnnc_key, {})
        if dnn_class_data.get("enabled", False) and dnn_class_data.get("device", "cpu") != "cpu":
            return True

        return False

    @staticmethod
    def is_any_timeseries_gpu_algo_enabled(modeling):
        return (
                modeling.get("gluonts_simple_feed_forward_timeseries", {}).get("enabled", False) or
                modeling.get("gluonts_simple_feed_forward_timeseries_grid", {}).get("enabled", False) or
                modeling.get("gluonts_deepar_timeseries", {}).get("enabled", False) or
                modeling.get("gluonts_deepar_timeseries_grid", {}).get("enabled", False) or
                modeling.get("gluonts_transformer_timeseries", {}).get("enabled", False) or
                modeling.get("gluonts_transformer_timeseries_grid", {}).get("enabled", False) or
                modeling.get("gluonts_mqcnn_timeseries", {}).get("enabled", False) or
                modeling.get("gluonts_mqcnn_timeseries_grid", {}).get("enabled", False)
        )


class V12400UpdatePromptRecipePayloadParams(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self,):
        return "Migrate prompt recipe payload params (add missing field)"

    def execute(self, project_paths):
        for recipe_file in glob("%s/recipes/*.prompt" % project_paths.config):
            print("Migrating prompt recipe payload params in file {}".format(recipe_file))
            try:
                recipe_params = base.json_loadf(recipe_file)
                prompt_def = recipe_params.get("prompt")
                if prompt_def is not None and "promptTemplateQueriesSource" not in prompt_def:
                    prompt_def["promptTemplateQueriesSource"] = "DATASET"
                    base.json_dumpf(recipe_file, recipe_params)

            except Exception as e:
                logging.exception("Failed to migrate prompt recipe payload params in file {}: {}".format(recipe_file, e))


class V12420AddRAGModelId(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Add an ID to RAG models in knowledge banks"

    def execute(self, project_paths):
        for file in glob("%s/knowledge-banks/*.json" % project_paths.config):
            print("Adding an ID to RAG models in knowledge bank config file {}".format(file))
            try:
                kb = base.json_loadf(file)
                llms = kb.get("llmsExposedWith", [])
                for idx, llm in enumerate(llms):
                    llm["ragModelId"] = str(idx)
                base.json_dumpf(file, kb)

            except Exception as e:
                logging.exception("Failed to add an ID to RAG models in knowledge bank config file {}: {}".format(file, e))

        for file in glob("%s/*/kb.json" % project_paths.knowledge_banks):
            print("Adding an ID to RAG models in knowledge bank recipe file {}".format(file))
            try:
                kb = base.json_loadf(file)
                llms = kb.get("llmsExposedWith", [])
                for idx, llm in enumerate(llms):
                    llm["ragModelId"] = str(idx)
                base.json_dumpf(file, kb)

            except Exception as e:
                logging.exception("Failed to add an ID to RAG models in knowledge bank recipe file {}: {}".format(file, e))


class V12420UpdatePromptStudioPromptStructure(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrate prompt studio prompt structure"

    @staticmethod
    def migrate(studio_prompt):
        if "prompt" in studio_prompt:
            return  # already migrated
        prompt = dict()
        prompt["promptMode"] = studio_prompt.get("promptMode", None)
        prompt["promptTemplateQueriesSource"] = studio_prompt.pop("promptTemplateQueriesSource", None)
        # Structured prompt params
        prompt["promptTemplateInputs"] = studio_prompt.pop("promptTemplateInputs", [])
        prompt["structuredPromptPrefix"] = studio_prompt.pop("structuredPromptPrefix", "")
        prompt["structuredPromptSuffix"] = studio_prompt.pop("structuredPromptSuffix", "")
        prompt["structuredPromptOutputName"] = studio_prompt.pop("structuredPromptOutputName", "")
        prompt["structuredPromptExamples"] = studio_prompt.pop("structuredPromptExamples", [])
        # Text prompt params
        prompt["textPromptTemplateInputs"] = studio_prompt.pop("textPromptTemplateInputs", [])
        prompt["textPromptTemplate"] = studio_prompt.pop("textPromptTemplate", "")
        # Raw prompt param
        prompt["rawPromptType"] = studio_prompt.pop("rawPromptType", None)
        prompt["rawPromptText"] = studio_prompt.pop("rawPromptText", "")
        # Single input
        prompt["singleInputColumn"] = studio_prompt.pop("singleInputColumn", "")
        # Result validation
        prompt["resultValidation"] = studio_prompt.pop("resultValidation", {})

        studio_prompt["prompt"] = prompt

    def execute(self, project_paths):
        for file in glob("%s/prompt-studios/*.json" % project_paths.config):
            print("Migrating prompt studio prompt structure in prompt studio config file {}".format(file))
            try:
                prompt_studio = base.json_loadf(file)
                prompts = prompt_studio.get("prompts", [])
                for prompt in prompts:
                    V12420UpdatePromptStudioPromptStructure.migrate(prompt)
                base.json_dumpf(file, prompt_studio)

            except Exception as e:
                logging.exception("Failed to migrate prompt studio prompt structure in file {}: {}".format(file, e))
        for file in glob("%s/*/prompts/*/history.json" % project_paths.prompt_studios):
            print("Migrating prompt studio prompt structure in prompt studio history file {}".format(file))
            try:
                history = base.json_loadf(file)
                entries = history.get("entries", [])
                for entry in entries:
                    prompt = entry.pop("prompt", None)
                    if prompt is not None:
                        V12420UpdatePromptStudioPromptStructure.migrate(prompt)
                        entry["promptStudioPrompt"] = prompt
                base.json_dumpf(file, history)

            except Exception as e:
                logging.exception("Failed to migrate prompt studio prompt structure in file {}: {}".format(file, e))

class V12420UpdateCustomLLMConnections(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate custom LLM connections to support multiple models"

    def transform(self, connections, filepath=None):
        dip_home = os.path.dirname(os.path.dirname(filepath))
        plugins_folder = os.path.join(dip_home, "plugins")
        dev_plugins_folder = os.path.join(plugins_folder, "dev")
        installed_plugins_folder = os.path.join(plugins_folder, "installed")

        installed_plugin_ids = []
        if os.path.isdir(dev_plugins_folder):
            installed_plugin_ids += [item for item in os.listdir(dev_plugins_folder) if os.path.isdir(os.path.join(dev_plugins_folder, item))]
        if os.path.isdir(installed_plugins_folder):
            installed_plugin_ids += [item for item in os.listdir(installed_plugins_folder) if os.path.isdir(os.path.join(installed_plugins_folder, item))]

        for name, connection in iteritems(connections):
            if connection.get("type", None) == "CustomLLM":
                params = connection.get("params", {})
                if "models" not in params:
                    # not already migrated
                    print("Migrating custom LLM connection: {}".format(name))
                    connection["params"] = params
                    model_type = params.pop("type", "")
                    model_custom_config = params.pop("customConfig", {})
                    plugin_id_matches = [plugin_id for plugin_id in installed_plugin_ids
                                         if model_type.startswith("jllm_" + plugin_id + "_")]
                    # model type format is: "jllm_" + plugin_id + "_" + model_type_id_from_plugin
                    if len(plugin_id_matches) > 0:
                        plugin_id = plugin_id_matches[0]
                    else:
                        plugin_id = ""
                        print('WARNING: Plugin ID not found for model type: "{}"'.format(model_type))
                    params["pluginID"] = plugin_id
                    params["models"] = [{
                        "type": model_type,
                        "capability": "TEXT_COMPLETION",
                        "id": "default",
                        "customConfig": model_custom_config,
                    }]
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

###############################################################################
# V12500 / DSS 12.5.0
###############################################################################
class V12500FixupReferenceLines(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12500FixupReferenceLines, self).__init__()

    def migrate_def(self, chart_def):
        for refLine in chart_def.get("referenceLines", []):
            if "axis" in refLine and type(refLine["axis"]) is str:
                refLine["axis"] = { "type": refLine["axis"] }
        return chart_def


class V12500FixupChartValues(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12500FixupChartValues, self).__init__()

    def migrate_def(self, chart_def):
        if "valuesInChartDisplayOptions" not in chart_def:
            chart_def["valuesInChartDisplayOptions"] = {}

        if chart_def.get("type", None) == "stacked_columns":
            showValues = chart_def.get("showInChartValues", False)
            showTotals = chart_def.get("showInChartTotalValues", False)
            if showValues or showTotals:
                chart_def["valuesInChartDisplayOptions"]["displayValues"] = True
            if showValues and showTotals:
                chart_def["valuesInChartDisplayOptions"]["displayMode"] = "VALUES_AND_TOTALS"
            if showValues and not showTotals:
                chart_def["valuesInChartDisplayOptions"]["displayMode"] = "VALUES"
        elif chart_def.get("type", None) == "pie":
            showValues = chart_def.get("showInChartValues", False)
            showLabels = chart_def.get("showInChartLabels", False)
            if showValues or showLabels:
                chart_def["valuesInChartDisplayOptions"]["displayPieLabelsOrValues"] = True
            if showValues and showLabels:
                chart_def["valuesInChartDisplayOptions"]["displayMode"] = "VALUES_AND_LABELS"
            if showValues and not showLabels:
                chart_def["valuesInChartDisplayOptions"]["displayMode"] = "VALUES"
            if not showValues and showLabels:
                chart_def["valuesInChartDisplayOptions"]["displayMode"] = "LABELS"
        elif "showInChartValues" in chart_def:
            chart_def["valuesInChartDisplayOptions"]["displayValues"] = chart_def["showInChartValues"]
            chart_def["valuesInChartDisplayOptions"]["displayMode"] = "VALUES"

        return chart_def


class V12520EnableAdaEmbeddingModelInOpenAIConnections(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate OpenAI LLM connections to enable the Ada embedding model"

    def transform(self, connections, filepath=None):
        for name, connection in iteritems(connections):
            if connection.get("type", None) == "OpenAI":
                params = connection.get("params", {})
                if "allowEmbeddingAda2" not in params:
                    # not already migrated
                    print("Migrating OpenAI LLM connection: {}".format(name))
                    params["allowEmbeddingAda2"] = True
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]


###############################################################################
# V12600 / DSS 12.6.0
###############################################################################

class V12600AddSupportForScikitLearn13(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self,):
        return "Migrate selection_mode to 'sqrt' (classification) or 'prop' (regression) since 'auto' is removed in scikit-learn 1.3"

    def migrate_tree_based_regression_algo_params(self, params):
        if not isinstance(params, dict) or params.get("selection_mode") != "auto":
            return False
        # "auto" was removed in sklearn 1.3, but it was the same as 1.0 (use all features) (see https://github.com/scikit-learn/scikit-learn/pull/20803)
        params["selection_mode"] = "prop"
        params["max_feature_prop"]['gridMode'] = "EXPLICIT"
        params["max_feature_prop"]['values'] = [1.0]
        return True

    def migrate_tree_based_regression_actual_params(self, params):
        if not isinstance(params, dict) or params.get("selection_mode") != "auto":
            return False
        # "auto" was removed in sklearn 1.3, but it was the same as 1.0 (use all features) (see https://github.com/scikit-learn/scikit-learn/pull/20803)
        params["selection_mode"] = "prop"
        params["max_feature_prop"] = 1.0
        return True

    def migrate_tree_based_classification_algo_params(self, params):
        if not isinstance(params, dict) or params.get("selection_mode") != "auto":
            return False
        # "auto" was removed in sklearn 1.3, but it was the same as "sqrt" (see https://github.com/scikit-learn/scikit-learn/pull/20803)
        params["selection_mode"] = "sqrt"
        return True

    def migrate_tree_based_classification_actual_params(self, params):
        return self.migrate_tree_based_classification_algo_params(params)

    def migrate_algo_params_in_mltask(self, mltask_data):
        if not isinstance(mltask_data, dict):
            return False
        prediction_type = mltask_data.get("predictionType", "").lower()
        modeling_params = mltask_data.get("modeling", {})

        was_data_modified = self.migrate_tree_based_regression_algo_params(modeling_params.get("random_forest_regression", {}))
        was_data_modified = self.migrate_tree_based_classification_algo_params(modeling_params.get("random_forest_classification", {})) or was_data_modified
        was_data_modified = self.migrate_tree_based_regression_algo_params(modeling_params.get("gbt_regression", {})) or was_data_modified
        was_data_modified = self.migrate_tree_based_classification_algo_params(modeling_params.get("gbt_classification", {})) or was_data_modified
        if prediction_type == "regression":
            was_data_modified = self.migrate_tree_based_regression_algo_params(modeling_params.get("extra_trees", {})) or was_data_modified
        elif "classification" in prediction_type:
            was_data_modified = self.migrate_tree_based_classification_algo_params(modeling_params.get("extra_trees", {})) or was_data_modified

        return was_data_modified

    def migrate_algo_params_in_modeling(self, rmodeling_data):
        if not isinstance(rmodeling_data, dict):
            return False
        algorithm = rmodeling_data.get("algorithm", "")

        was_data_modified = False
        if algorithm == "RANDOM_FOREST_REGRESSION":
            was_data_modified = self.migrate_tree_based_regression_algo_params(rmodeling_data.get("rf_regressor_grid", {}))
        elif algorithm == "RANDOM_FOREST_CLASSIFICATION":
            was_data_modified = self.migrate_tree_based_classification_algo_params(rmodeling_data.get("rf_classifier_grid", {}))
        elif algorithm == "GBT_REGRESSION":
            was_data_modified = self.migrate_tree_based_regression_algo_params(rmodeling_data.get("gbt_regressor_grid", {}))
        elif algorithm == "GBT_CLASSIFICATION":
            was_data_modified = self.migrate_tree_based_classification_algo_params(rmodeling_data.get("gbt_classifier_grid", {}))
        elif algorithm == "EXTRA_TREES":
            # infer prediction type: classification if the threshold metric field is present
            if "thresholdOptimizationMetric" in rmodeling_data.get("metrics", {}):
                was_data_modified = self.migrate_tree_based_classification_algo_params(rmodeling_data.get("extra_trees_grid", {}))
            else:
                was_data_modified = self.migrate_tree_based_regression_algo_params(rmodeling_data.get("extra_trees_grid", {}))
        elif algorithm == "PYTHON_ENSEMBLE":
            for origin_model_mp in rmodeling_data.get("ensemble_params", {}).get("modeling_params", []):
                was_data_modified = self.migrate_algo_params_in_modeling(origin_model_mp) or was_data_modified

        return was_data_modified

    def migrate_actual_params(self, actual_params, rmodeling_params):
        if not isinstance(actual_params, dict):
            return False
        resolved = actual_params.get("resolved", {})
        algorithm = resolved.get("algorithm")

        was_data_modified = False
        if algorithm == "RANDOM_FOREST_REGRESSION":
            was_data_modified = self.migrate_tree_based_regression_actual_params(resolved.get("rf", {}))
        elif algorithm == "RANDOM_FOREST_CLASSIFICATION":
            was_data_modified = self.migrate_tree_based_classification_actual_params(resolved.get("rf", {}))
        elif algorithm == "GBT_REGRESSION":
            was_data_modified = self.migrate_tree_based_regression_actual_params(resolved.get("gbt", {}))
        elif algorithm == "GBT_CLASSIFICATION":
            was_data_modified = self.migrate_tree_based_classification_actual_params(resolved.get("gbt", {}))
        elif algorithm == "EXTRA_TREES":
            # infer prediction type: classification if the threshold metric field is present
            if "thresholdOptimizationMetric" in rmodeling_params.get("metrics", {}):
                was_data_modified = self.migrate_tree_based_classification_actual_params(resolved.get("extra_trees", {}))
            else:
                was_data_modified = self.migrate_tree_based_regression_actual_params(resolved.get("extra_trees", {}))
        elif algorithm == "PYTHON_ENSEMBLE":
            for origin_model_mp in resolved.get("ensemble_params", {}).get("modeling_params", []):
                was_data_modified = self.migrate_algo_params_in_modeling(origin_model_mp) or was_data_modified

        return was_data_modified

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                if self.migrate_algo_params_in_mltask(mltask_data):
                    base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Algo params in mltask migration FAILED: %s" % e)

        # config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating algorithms params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                if self.migrate_algo_params_in_mltask(sm_data.get("miniTask", {})):
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Algo params in saved model miniTask migration FAILED: %s" % e)

        # config/projects/PROJECT_KEY/recipes/*.prediction_training
        for train_recipe_params_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating algorithms params in training recipe: %s " % train_recipe_params_file)
            try:
                train_recipe_data = base.json_loadf(train_recipe_params_file)
                if self.migrate_algo_params_in_modeling(train_recipe_data.get("modeling", {})):
                    base.json_dumpf(train_recipe_params_file, train_recipe_data)
            except Exception as e:
                print("Algo params in train recipe migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/rmodeling_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/rmodeling_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/rmodeling_params.json
        # model_evaluation_stores/PROJECT_KEY/TUBsmsxH/ETuJjbqDpg5H/rmodeling_params.json
        for rm_file in (glob("%s/*/versions/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/rmodeling_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/*/rmodeling_params.json" % project_paths.analysis_data)
                        + glob("%s/*/*/rmodeling_params.json" % project_paths.model_evaluation_stores)):
            print("Migrating algorithms params in rmodeling file: %s " % rm_file)
            try:
                rmodeling_data = base.json_loadf(rm_file)
                if self.migrate_algo_params_in_modeling(rmodeling_data):
                    base.json_dumpf(rm_file, rmodeling_data)
            except Exception as e:
                print("Algo params in trained model rmodeling migration FAILED: %s" % e)

        # saved_models/PROJECT_KEY/58ipAuN7/versions/1573723995773/actual_params.json (regular models, partitioned base models)
        # saved_models/PROJECT_KEY/58ipAuN7/pversions/female/v1/actual_params.json (model partitions)
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/pp1/m1/actual_params.json
        for ap_file in (glob("%s/*/versions/*/actual_params.json" % project_paths.saved_models)
                        + glob("%s/*/pversions/*/*/actual_params.json" % project_paths.saved_models)
                        + glob("%s/*/*/sessions/*/*/*/actual_params.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in actual params file: %s " % ap_file)
            try:
                actual_params = base.json_loadf(ap_file)
                rmodeling_params = base.json_loadf(ap_file.replace("actual_params.json", "rmodeling_params.json"))
                if self.migrate_actual_params(actual_params, rmodeling_params):
                    base.json_dumpf(ap_file, actual_params)
            except Exception as e:
                print("Algo params in trained model actual params migration FAILED: %s" % e)


def migrate_custom_measure_inferred_type(customMeasure):
    if "inferredType" not in customMeasure:
        customMeasure["inferredType"] = "NUMERICAL"
    return customMeasure

class V12600CustomMeasuresInExplore(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrate custom measures in explore"

    def transform(self, obj, filepath):
        return migrate_custom_measure_inferred_type(obj)

    def jsonpath(self,):
        return "customMeasures"

    def file_patterns(self,):
        return ["explore/*.json"]

class V12600CustomMeasuresInAnalysis(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrate custom measures in analysis"

    def transform(self, obj, filepath):
        return migrate_custom_measure_inferred_type(obj)

    def jsonpath(self,):
        return "customMeasures"

    def file_patterns(self,):
        return ["analysis/*/core_params.json"]

class V12600CustomMeasuresInAnalysisModels(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrate custom measures in analysis models"

    def transform(self, obj, filepath):
        return migrate_custom_measure_inferred_type(obj)

    def jsonpath(self,):
        return "customMeasures"

    def file_patterns(self,):
        return ["analysis/*/ml/*/params.json"]

class V12600CustomMeasuresInInsights(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Migrate custom measures in insights"

    def transform(self, obj, filepath):
        return migrate_custom_measure_inferred_type(obj)

    def jsonpath(self,):
        return "params.customMeasures"

    def file_patterns(self,):
        return ["insights/*.json"]

class V12600CheckToDataQualityRulesUpdate(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Migrate existing Checks to Data Quality Rules"

    def collect_recipe_outputs(self, project_paths):
        not_input_datasets = set()

        for recipe_file in glob("%s/recipes/*.json" % project_paths.config):
            try:
                recipe = base.json_loadf(recipe_file)
                outputs = recipe.get('outputs', {})
                for role, output_for_role in outputs.items():
                    for item in output_for_role.get('items', []):
                        # an output dataset should never be foreign, but just for safety
                        dataset_name = item.get('ref', '').split('.')[-1]
                        not_input_datasets.add(dataset_name)
            except Exception as e:
                print("Unable to read recipe %s - ignoring it: %s" % (recipe_file, e))

        return not_input_datasets

    def migrate_one_dataset(self, dataset_name, dataset, is_input):
        modified = False

        checkSet = dataset.get('metricsChecks', None)
        if checkSet is not None:
            run_on_build = checkSet.get('runOnBuild', False)
            checks = checkSet.get('checks', [])
            for check in checks:
                check['autoRun'] = not is_input and run_on_build

            checkSet['monitor'] = len(checks) > 0
            if "runOnBuild" in checkSet:
                del checkSet["runOnBuild"]

            modified = True

        return modified

    def execute(self, project_paths):
        not_input_datasets = self.collect_recipe_outputs(project_paths)

        for dataset_file in glob("%s/datasets/*.json" % project_paths.config):
            dataset_name = osp.basename(dataset_file)[:-len('.json')]
            is_input = dataset_name not in not_input_datasets

            try:
                dataset = base.json_loadf(dataset_file)
                was_modified = self.migrate_one_dataset(dataset_name, dataset, is_input)
                if was_modified:
                    base.json_dumpf(dataset_file, dataset)
            except:
                print("Unable to read dataset %s - ignoring it" % dataset_name)

class V12600CheckDatasetScenarioStepUpdate(migration_json.ProjectConfigJsonMigrationOperation):
    """
    Add ignorePartitionSelectionMode parameter on existing 'run checks' scenario steps to keep the pre-DataQualityRule legacy behavior
    """
    def __repr__(self, ):
        return "Update 'run checks' scenario steps"

    def transform(self, step, filepath):
        if step.get('type', None) == 'check_dataset':
            step_params = step.get('params', {})
            step_params['ignorePartitionSelectionMode'] = True
            step_params['computeAutomaticRules'] = True
        return step

    def jsonpath(self,):
        return "params.steps"

    def file_patterns(self,):
        return ["scenarios/*.json"]


class V12600MoveDashboardFiltersInsideDashboardPage(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Move dashboard filters information stored in tiles and insights at the page level"

    def transform(self, obj, filepath):

        for page in obj.get("pages", []):
            page["filtersParams"] = {
                "panelPosition": "TOP"
            }
            for tile in page.get("grid", {}).get("tiles", []):
                is_insight = tile.get("tileType", None) == "INSIGHT"
                is_filter_insight = is_insight and tile.get("insightType", None) == "filters"
                if not is_filter_insight:
                    continue

                tile_params = tile.get("tileParams", {})
                filters = tile_params.get("filters", [])
                insight_id = tile.get("insightId", None)
                # Construct the path to the filter insight (we need it to get the source dataset name)
                dashboards_directory = osp.dirname(filepath)
                project_directory = osp.dirname(dashboards_directory)
                insight_file = osp.join(project_directory, "insights", "{}.json".format(insight_id))
                if not osp.isfile(insight_file):
                    break
                # TODO check if the filter insight file can be removed
                # Get the filter insight source dataset name
                insight_data = base.json_loadf(insight_file)
                insight_params = insight_data.get('params', {})
                dataset_smart_name = insight_params.get('datasetSmartName', None)
                if dataset_smart_name == None:
                    break
                # Set the filters and filter params at their new location
                page["filters"] = filters
                page["filtersParams"]["panelPosition"] = "TILE"
                if len(filters) > 0:
                    page["filtersParams"]["datasetSmartName"] = dataset_smart_name
                    page["filtersParams"]["engineType"] = tile_params.get("engineType", "LINO")
                    page["filtersParams"]["refreshableSelection"] = tile_params.get("refreshableSelection", {})
                tile_params.pop("filters", None)
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

def safe_int(value):
    try:
        return int(value)
    except (ValueError, TypeError):
        return 0

def convert_relative_date_option(current_option, last, next):
    relative_date_option = {"containsCurrentDatePart": False, "isUntilNow": False, "last": 0, "next": 0}
    if current_option == "THIS":
        relative_date_option["containsCurrentDatePart"] = True
    elif current_option == "LAST":
        relative_date_option["last"] = safe_int(last)
    elif current_option == "NEXT":
        relative_date_option["next"] = safe_int(next)
    elif current_option == "TO":
        relative_date_option["containsCurrentDatePart"] = True
        relative_date_option["isUntilNow"] = True
    return relative_date_option

class V12600FilterAndFlagOnDateRangeProcessorRelativeDateMultiSelect(migration_app.ShakerStepMigrationOperation):
    def __init__(self, original_name):
        super(V12600FilterAndFlagOnDateRangeProcessorRelativeDateMultiSelect, self).__init__(original_name)

    def transform_step(self, step):
        params = step.get('params', None)
        if params is not None:
            params["option"] = convert_relative_date_option(params.get("option", ""), params.get("relativeMin", 0), params.get("relativeMax", 0))
            params.pop('relativeMin', None)
            params.pop('relativeMax', None)
        return step

class V12600ChartFiltersRelativeDateMultiSelect(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12600ChartFiltersRelativeDateMultiSelect, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
            if "dateFilterOption" in filter:
                if filter.get("dateFilterType", None) == "RELATIVE":
                    filter["dateFilterOption"] = convert_relative_date_option(filter.get("dateFilterOption", ""), filter.get("minValue", 0), filter.get("maxValue", 0))
                    filter.pop('minValue', None)
                    filter.pop('maxValue', None)
                else:
                    filter.pop('dateFilterOption', None)

        return chart_def

class V12600DashboardFiltersRelativeDateMultiSelect(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Relative date filters become multiselect"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for tile in page.get("grid", {}).get("tiles", []):
                isInsight = tile.get("tileType", None) == "INSIGHT"
                isFilterInsight = isInsight and tile.get("insightType", None) == "filters"

                if not isFilterInsight:
                    continue
                filters = tile.get("tileParams", {}).get("filters", [])
                for filter in filters:
                    if "dateFilterOption" in filter:
                        if filter.get("dateFilterType", None) == "RELATIVE":
                            filter["dateFilterOption"] = convert_relative_date_option(filter.get("dateFilterOption", ""), filter.get("minValue", 0), filter.get("maxValue", 0))
                            filter.pop('minValue', None)
                            filter.pop('maxValue', None)
                        else:
                            filter.pop('dateFilterOption', None)
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V12600DatasetInsightsRelativeDateMultiSelect(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Relative date filters become multiselect"

    def transform(self, obj, filepath=None):
        if obj.get("type", "") == "dataset_table":
            filters = obj.get("params", {}).get("shakerScript", {}).get("explorationFilters", [])
            for filter in filters:
                facet = filter.get("facet", {})
                if "dateFilterRelativeOption" in facet:
                    facet["dateFilterRelativeOption"] = convert_relative_date_option(facet.get("dateFilterRelativeOption", ""), facet.get("dateFilterRelativeLast", 0), facet.get("dateFilterRelativeNext", 0))
                    facet.pop('dateFilterRelativeLast', None)
                    facet.pop('dateFilterRelativeNext', None)
        return obj

    def file_patterns(self,):
        return ["insights/*.json"]

    def jsonpath(self):
        return ""

class V12600DatasetFilterRelativeDateMultiSelect(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Relative date filters become multiselect"

    def transform(self, obj, filepath):
        for filter in obj.get("explorationFilters", []):
            facet = filter.get("facet", {})
            if "dateFilterRelativeOption" in facet:
                facet["dateFilterRelativeOption"] = convert_relative_date_option(facet.get("dateFilterRelativeOption", ""), facet.get("dateFilterRelativeLast", 0), facet.get("dateFilterRelativeNext", 0))
                facet.pop('dateFilterRelativeLast', None)
                facet.pop('dateFilterRelativeNext', None)

        return obj

    def file_patterns(self,):
        return ["explore/*.json"]

    def jsonpath(self):
        return "script"

class V12600ShakerFilterRelativeDateMultiSelect(migration_app.ShakerScriptMigrationOperation):
    def __init__(self):
        migration_app.ShakerScriptMigrationOperation.__init__(self)

    def transform_script(self, obj):
        for filter in obj.get("explorationFilters", []):
            facet = filter.get("facet", {})
            if "dateFilterRelativeOption" in facet:
                facet["dateFilterRelativeOption"] = convert_relative_date_option(facet.get("dateFilterRelativeOption", ""), facet.get("dateFilterRelativeLast", 0), facet.get("dateFilterRelativeNext", 0))
                facet.pop('dateFilterRelativeLast', None)
                facet.pop('dateFilterRelativeNext', None)
        return obj

class V12600InvalidateEdaSampleCaches(migration_base.MigrationOperation):
    def __repr__(self):
        return "Drop EDA sample caches"

    def execute(self, diphome, simulate=False):
        eda_cache_folder = osp.join(diphome.path, "caches", "eda")
        if osp.isdir(eda_cache_folder):
            shutil.rmtree(eda_cache_folder)



class V12600FixupZoomOptions(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12600FixupZoomOptions, self).__init__()

    def migrate_def(self, chart_def):
        if "zoomOptions" in chart_def:
            chart_def["scatterZoomOptions"] = chart_def["zoomOptions"]

        return chart_def

class V13000FixupLineFormatting(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V13000FixupLineFormatting, self).__init__()

    def migrate_def(self, chart_def):
        reference_lines = chart_def.get("referenceLines", [])
        for index, ref_line in enumerate(reference_lines):
            if "lineFormatting" not in ref_line:
                chart_def["referenceLines"][index]["lineFormatting"] = {}
            if "lineSize" in ref_line:
                chart_def["referenceLines"][index]["lineFormatting"]["size"] = ref_line["lineSize"]
            if "lineColor" in ref_line:
                chart_def["referenceLines"][index]["lineFormatting"]["color"] = ref_line["lineColor"]
            if "lineType" in ref_line:
                chart_def["referenceLines"][index]["lineFormatting"]["type"] = ref_line["lineType"]

        if "regression" in chart_def.get("scatterOptions", []):
            if "lineFormatting" not in chart_def["scatterOptions"]["regression"]:
                chart_def["scatterOptions"]["regression"]["lineFormatting"] = {}
            if "lineSize" in chart_def["scatterOptions"]["regression"]:
                chart_def["scatterOptions"]["regression"]["lineFormatting"]["size"] = chart_def["scatterOptions"]["regression"]["lineSize"]
            if "lineColor" in chart_def["scatterOptions"]["regression"]:
                chart_def["scatterOptions"]["regression"]["lineFormatting"]["color"] = chart_def["scatterOptions"]["regression"]["lineColor"]

        return chart_def


###############################################################################
# V13000 / DSS 13.0.0
###############################################################################

class V13000AddNeedsInputDataFolderParamDeephub(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "add a new needsInputDataFolder param to deephub models & deephub training / scoring recipes"

    def add_needsInputDataFolder_param(self, params):
        was_data_modified = False
        if params and isinstance(params, dict):
            #  backendType is at root for recipes params and in the miniTask params for SM params:
            is_deephub_model = params.get("backendType") == "DEEP_HUB" or params.get("miniTask", {}).get("backendType") == "DEEP_HUB"
            if is_deephub_model:
                params["needsInputDataFolder"] = True  # any deephub model/recipes prior to v13 needs an input data folder to retrieve images.
                was_data_modified = True
        return was_data_modified

    def execute(self, project_paths):
        # Examples path: config/projects/PROJECT_KEY/recipes/*.prediction_training
        #                config/projects/PROJECT_KEY/recipes/*.prediction_scoring
        # Note: eval recipe not supported by deephub prior to v13.
        for recipe_params_file in (glob("%s/recipes/*.prediction_training" % project_paths.config)
                                   + glob("%s/recipes/*.prediction_scoring" % project_paths.config)):
            try:
                recipe_data = base.json_loadf(recipe_params_file)
                was_data_modified = self.add_needsInputDataFolder_param(recipe_data)
                if was_data_modified:
                    print("Adding needsInputDataFolder param in recipe deephub: %s " % recipe_params_file)
                    base.json_dumpf(recipe_params_file, recipe_data)
            except Exception as e:
                print("Addition of needsInputDataFolder param in deephub train or scoring recipe FAILED: %s" % e)

        # Examples path: config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            try:
                sm_data = base.json_loadf(saved_model_file)
                was_data_modified = self.add_needsInputDataFolder_param(sm_data)
                if was_data_modified:
                    print("Adding needsInputDataFolder param in deephub saved model : %s " % saved_model_file)
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Addition of needsInputDataFolder param in deephub saved model FAILED: %s" % e)
        # Note: No need to migrate trained-but-not-deployed deephub models: we set the needsInputDataFolder=true for each
        # new deephub training recipe

class V13000RenameFinetunedSavedModelFields(migration_base.ProjectLocalMigrationOperation):

    FIELDS_MAPPING = {
        "openAIConnection": "connection",
        "openAIModelId": "remoteModelId",
    }

    def __repr__(self, ):
        return "make OpenAI-specific fields generic on fine-tuned LLM models"

    def execute(self, project_paths):
        # Examples path: saved_models/PROJECT_KEY/YhHuDsXk/versions/1711102329218/llm_info.json
        for llm_info_file in (glob("%s/*/versions/*/llm_info.json" % project_paths.saved_models)):
            try:
                llm_info = base.json_loadf(llm_info_file)
                was_data_modified = False
                for old_key, new_key in self.FIELDS_MAPPING.items():
                    if old_key in llm_info:
                        llm_info[new_key] = llm_info[old_key]
                        del llm_info[old_key]
                        was_data_modified = True
                if was_data_modified:
                    print("Renamed fields in: %s" % llm_info_file)
                    base.json_dumpf(llm_info_file, llm_info)
            except Exception as e:
                print("Renaming of fields in LLM info file %s FAILED: %s" % (llm_info_file, e))


class V13000FixupTileBorderColor(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Add border color to tiles"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for tile in page.get("grid", {}).get("tiles", []):
                if tile.get("borderColor") is None:
                    tile["borderColor"] = '#D9D9D9'

        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""


class V13000SplitGeneralSettingsFM(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Split user-data from the general settings to a dedicated file"

    def appliesTo(self):
        return ["fm"]

    def execute(self, diphome, simulate=False):
        settings_file = osp.join(diphome.path, 'config', 'settings.json')
        if not osp.isfile(settings_file):
            return
        settings = base.json_loadf(settings_file)

        user_data_settings = {}
        user_data_settings['databaseSettings'] = settings.get('databaseSettings', {})
        user_data_settings['cloud'] = settings.get('cloud', "")
        user_data_settings['awsSettings'] = settings.get('awsSettings', {})
        user_data_settings['azureSettings'] = settings.get('azureSettings', {})
        user_data_settings['gcpSettings'] = settings.get('gcpSettings', {})
        user_data_settings['instanceVisibleURL'] = settings.get('instanceVisibleURL', "")

        # Remove the migrated keys from the original settings dictionary
        for key in user_data_settings.keys():
            settings.pop(key, None)

        file = osp.join(diphome.path, 'config', 'user-data.json')
        print("Write user-data")
        with open(file, 'w') as f:
            json.dump(user_data_settings, f, indent=2)

        # Write the modified settings back to the original settings.json file
        with open(settings_file, 'w') as settings_file_stream:
            json.dump(settings, settings_file_stream, indent=2)


class V12620FixupZoomScale(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12620FixupZoomScale, self).__init__()

    def migrate_def(self, chart_def):
        if ("scatterZoomOptions" in chart_def) and ("scale" in chart_def["scatterZoomOptions"] and type(chart_def["scatterZoomOptions"]["scale"]) is not list):
            chart_def["scatterZoomOptions"]["scale"] = 2*[chart_def["scatterZoomOptions"]["scale"]]

        return chart_def

class V12620FixDashboardDateFiltersWithRelativeOption(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Date filters with a relative option not migrated yet"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for filter in page.get("filters", []):
                if isinstance(filter.get("dateFilterOption"), str):
                    if filter.get("dateFilterType", None) == "RELATIVE":
                        filter["dateFilterOption"] = convert_relative_date_option(filter.get("dateFilterOption", ""), filter.get("minValue", 0), filter.get("maxValue", 0))
                        filter.pop('minValue', None)
                        filter.pop('maxValue', None)
                    else:
                        filter.pop('dateFilterOption', None)
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V12620FixChartDateFiltersWithRelativeOption(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12620FixChartDateFiltersWithRelativeOption, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
                if isinstance(filter.get("dateFilterOption"), str):
                    if filter.get("dateFilterType", None) == "RELATIVE":
                        filter["dateFilterOption"] = convert_relative_date_option(filter.get("dateFilterOption", ""), filter.get("minValue", 0), filter.get("maxValue", 0))
                        filter.pop('minValue', None)
                        filter.pop('maxValue', None)
                    else:
                        filter.pop('dateFilterOption', None)

        return chart_def

class V12650FixChartDateFilters(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12650FixChartDateFilters, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
            if filter.get("dateFilterType", None) != "RELATIVE" and filter.get("dateFilterOption", None) != None:
                filter.pop('dateFilterOption', None)

        return chart_def


class V12650FixDashboardDateFilters(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Remove relative option from non relative date filters"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for filter in page.get("filters", []):
                if filter.get("dateFilterType", None) != "RELATIVE" and filter.get("dateFilterOption", None) != None:
                    filter.pop('dateFilterOption', None)
        return obj

    def file_patterns(self, ):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""


def replace_empty_key_in_filter(filter):
    if "excludedValues" in filter:
        if "" in filter["excludedValues"]:
            filter["excludedValues"]["___dku_no_value___"] = filter["excludedValues"].pop("")

    if "selectedValues" in filter:
        if "" in filter["selectedValues"]:
            filter["selectedValues"]["___dku_no_value___"] = filter["selectedValues"].pop("")


class V12650FixupChartFiltersWithNullValue(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V12650FixupChartFiltersWithNullValue, self).__init__()

    def migrate_def(self, chart_def):
        for filter in chart_def.get("filters", []):
            replace_empty_key_in_filter(filter)

        return chart_def


class V12650FixupDashboardFiltersWithNullValue(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Replace null key by '___dku_no_value___' in filters"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for filter in page.get("filters", []):
                replace_empty_key_in_filter(filter)

        return obj

    def file_patterns(self, ):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V12600UseImplicitCluster(migration_json.JsonMigrationOperation):
    """
    We want to set settings.useImplicitK8sCluster to true on
    existing instances for backwards compatibility.
    """
    def __repr__(self):
        return "Mark instance as possibly using the implicit K8S cluster"

    def transform(self, settings, filepath=None):
        settings['useImplicitK8sCluster'] = True
        return settings

    def file_patterns(self):
        return ["config/general-settings.json"]


class V12600IAMGroupsRestriction(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate the IAM groups restriction from comma-separated list to actual list"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, settings, filepath=None):
        ldap_settings = settings.get('ldapSettings', {})
        ldap_authorized_groups = ldap_settings.get('authorizedGroups', '')
        if not isinstance(ldap_authorized_groups, list):
            ldap_settings['authorizedGroups'] = [] if ldap_authorized_groups == '' else ldap_authorized_groups.split(',')

        sso_settings = settings.get('ssoSettings', {})
        sso_authorized_groups = sso_settings.get('authorizedGroups', '')
        if not isinstance(sso_authorized_groups, list):
            sso_settings['authorizedGroups'] = [] if sso_authorized_groups == '' else sso_authorized_groups.split(',')

        azure_ad_settings = settings.get('azureADSettings', {})
        azure_ad_authorized_groups = azure_ad_settings.get('authorizedGroups', '')
        if not isinstance(azure_ad_authorized_groups, list):
            azure_ad_settings['authorizedGroups'] = [] if azure_ad_authorized_groups == '' else azure_ad_authorized_groups.split(',')

        custom_auth_settings = settings.get('customAuthSettings', {})
        custom_auth_authorized_groups = custom_auth_settings.get('authorizedGroups', '')
        if not isinstance(custom_auth_authorized_groups, list):
            custom_auth_settings['authorizedGroups'] = [] if custom_auth_authorized_groups == '' else custom_auth_authorized_groups.split(',')

        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


class V12600IAMGroupsRestrictionInFM(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate the IAM groups restriction from comma-separated list to actual list"

    def appliesTo(self):
        return [ "fm" ]

    def transform(self, settings, filepath=None):
        ldap_settings = settings.get('ldapSettings', {})
        ldap_authorized_groups = ldap_settings.get('authorizedGroups', '')
        if not isinstance(ldap_authorized_groups, list):
            ldap_settings['authorizedGroups'] = [] if ldap_authorized_groups == '' else ldap_authorized_groups.split(',')

        sso_settings = settings.get('ssoSettings', {})
        sso_authorized_groups = sso_settings.get('authorizedGroups', '')
        if not isinstance(sso_authorized_groups, list):
            sso_settings['authorizedGroups'] = [] if sso_authorized_groups == '' else sso_authorized_groups.split(',')

        azure_ad_settings = settings.get('azureADSettings', {})
        azure_ad_authorized_groups = azure_ad_settings.get('authorizedGroups', '')
        if not isinstance(azure_ad_authorized_groups, list):
            azure_ad_settings['authorizedGroups'] = [] if azure_ad_authorized_groups == '' else azure_ad_authorized_groups.split(',')

        return settings

    def file_patterns(self, ):
        return ["config/settings.json"]


class V12600IAMGroupMappings(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate the IAM group mappings from comma-separated list to actual list"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, group, filepath=None):
        ldapGroupNames = group.get('ldapGroupNames', '')
        if not isinstance(ldapGroupNames, list):
            group['ldapGroupNames'] = [] if ldapGroupNames == '' else ldapGroupNames.split(',')

        azureADGroupNames = group.get('azureADGroupNames', '')
        if not isinstance(azureADGroupNames, list):
            group['azureADGroupNames'] = [] if azureADGroupNames == '' else azureADGroupNames.split(',')

        ssoGroupNames = group.get('ssoGroupNames', '')
        if not isinstance(ssoGroupNames, list):
            group['ssoGroupNames'] = [] if ssoGroupNames == '' else ssoGroupNames.split(',')

        customGroupNames = group.get('customGroupNames', '')
        if not isinstance(customGroupNames, list):
            group['customGroupNames'] = [] if customGroupNames == '' else customGroupNames.split(',')

        return group

    def jsonpath(self,):
        return "groups"

    def file_patterns(self,):
        return ["config/users.json"]

class V12620LLMConnectionsAndCodeEnvs(migration_json.ProjectConfigJsonMigrationOperation):
    """
    Versions before 12.6 don't add the llm connection types to the `requiredConnections` field neither the used code env
    by knowledge banks to the `usedCodeEnvRefs` field in the manifest.
    From version 12.6 included exported models now allow remapping of llm connections and code envs,
    so exported models before that version may have an incomplete manifest making it impossible to import them if the
    items like prompt studios, knowledge banks or nlp recipes are using llm connections and code envs.
    """
    def __init__(self):
        self.llm_connections = {}
        self.code_envs = set()

    def __repr__(self,):
        return "Add llm connections to manifest"

    #  Don't update these connection types nor llm recipe types since they are generated from the snapshot at the
    #  time of creating this migration step
    connection_types = {
        "openai": "OpenAI",
        "azureopenai": "AzureOpenAI",
        "cohere": "Cohere",
        "mosaicml": "MosaicML",
        "anthropic": "Anthropic",
        "vertex": "VertexAILLM",
        "bedrock": "Bedrock",
        "huggingfaceapi": "HuggingFaceInferenceAPI",
        "huggingfacelocal": "HuggingFaceLocal",
        "databricksllm": "DatabricksLLM",
        "sagemaker-generic": "SageMaker-GenericLLM",
        "custom": "CustomLLM"
    }

    llm_recipe_types = [
        "nlp_llm_model_provided_classification",
        "nlp_llm_user_provided_classification",
        "prompt",
        "nlp_llm_summarization"
    ]

    class Connection(object):
        def __init__(self, name, type_):
            """
            :type name: str
            :type type_: str
            """
            self.name = name
            self.type = type_

    @staticmethod
    def parse_llm_for_connection(llm_id):
        """
        :type llm_id: str
        :rtype: V12600LLMConnectionsAndCodeEnvs.Connection or None
        """
        if not llm_id:
            return None
        connection_items = llm_id.split(":")
        if len(connection_items) < 2:
            return None
        if connection_items[0] not in V12620LLMConnectionsAndCodeEnvs.connection_types:
            return None

        name = connection_items[1]
        type_ = V12620LLMConnectionsAndCodeEnvs.connection_types[connection_items[0]]
        return V12620LLMConnectionsAndCodeEnvs.Connection(name, type_)

    @staticmethod
    def get_connections_from_ps(prompt_studio):
        """
        :type prompt_studio: dict
        :rtype: dict[str, str]
        """
        connections = {}
        prompts = prompt_studio.get("prompts", [])
        for prompt in prompts:
            valid_connection = V12620LLMConnectionsAndCodeEnvs.parse_llm_for_connection(prompt.get('llmId'))
            if valid_connection is None:
                continue
            connections[valid_connection.name] = valid_connection.type
        return connections

    @staticmethod
    def get_connections_from_kb(knowledge_bank):
        """
        :type knowledge_bank: dict
        :rtype: dict[str, str]
        """
        connections = {}
        valid_connection = V12620LLMConnectionsAndCodeEnvs.parse_llm_for_connection(knowledge_bank.get("embeddingLLMId"))
        if valid_connection is not None:
            connections[valid_connection.name] = valid_connection.type
        for exposed_llm in knowledge_bank.get("llmsExposedWith", []):
            valid_connection = V12620LLMConnectionsAndCodeEnvs.parse_llm_for_connection(exposed_llm.get('llmId'))
            if valid_connection is None:
                continue
            connections[valid_connection.name] = valid_connection.type
        return connections

    @staticmethod
    def get_code_env_from_kb(knowledge_bank):
        """
        :type knowledge_bank: dict
        :rtype: dict[str, str]
        """
        env_selection = knowledge_bank.get("envSelection")
        if env_selection is None:
            return None
        if env_selection.get("envMode") != "EXPLICIT_ENV":
            return None
        return env_selection.get("envName")

    @staticmethod
    def get_connections_from_nlp_recipe(recipe):
        """
        :type recipe: dict
        :rtype: dict[str, str]
        """
        connections = {}
        valid_connection = V12620LLMConnectionsAndCodeEnvs.parse_llm_for_connection(recipe.get('llmId'))
        if valid_connection is not None:
            connections[valid_connection.name] = valid_connection.type
        return connections

    @staticmethod
    def get_connections_from_lambda(lambda_service):
        """
        :type lambda_service: dict
        :rtype: dict[str, str]
        """
        connections = {}
        endpoints = lambda_service.get("endpoints", [])
        for endpoint in endpoints:
            endpoint_type = endpoint.get('type')
            if endpoint_type is None or endpoint_type != 'LLM_PROMPT':
                continue
            valid_connection = V12620LLMConnectionsAndCodeEnvs.parse_llm_for_connection(endpoint.get('llmId'))
            if valid_connection is None:
                continue
            connections[valid_connection.name] = valid_connection.type
        return connections

    def execute(self, project_paths):
        """
        :type project_paths: migration_base.ProjectPaths
        :rtype: None
        """
        for ps_file in glob("%s/prompt-studios/*.json" % project_paths.config):
            try:
                prompt_studio = base.json_loadf(ps_file)
                self.llm_connections.update(self.get_connections_from_ps(prompt_studio))
            except:
                print("error loading connections from ps {}".format(ps_file))

        for kb_file in glob("%s/knowledge-banks/*.json" % project_paths.config):
            try:
                knowledge_bank = base.json_loadf(kb_file)
                self.llm_connections.update(self.get_connections_from_kb(knowledge_bank))
                code_env = self.get_code_env_from_kb(knowledge_bank)
                if code_env is not None:
                    self.code_envs.add(code_env)
            except Exception as e:
                print("error loading connections or code envs from kb {}, e:{}".format(kb_file, e))

        for llm_recipe_type in self.llm_recipe_types:
            for recipe_payload_file in glob("%s/recipes/*.%s" % (project_paths.config, llm_recipe_type)):
                try:
                    payload = base.json_loadf(recipe_payload_file)
                    self.llm_connections.update(self.get_connections_from_nlp_recipe(payload))
                except:
                    print("error loading connections from recipe {}".format(recipe_payload_file))

        for lambda_file in glob("%s/lambda_services/*.json" % project_paths.config):
            try:
                lambda_service = base.json_loadf(lambda_file)
                self.llm_connections.update(self.get_connections_from_lambda(lambda_service))
            except:
                print("error loading connections from lambda service {}".format(lambda_file))

    def update_connections(self, required_connections):
        """
        :type required_connections: dict[str, dict[str, str]]
        :rtype: dict[str, dict[str, str or boolean]]
        """
        if not self.llm_connections:
            return required_connections
        for connection_name, connection_type in iteritems(self.llm_connections):
            if connection_name not in required_connections:
                required_connections[connection_name] = {'name': connection_name, 'type': connection_type, 'isMissing': False}
        return required_connections

    def update_code_envs(self, used_code_env_refs):
        """
        :type used_code_env_refs: list[dict[str, str]]
        :rtype: list[dict[str, str]]
        """
        if not self.code_envs:
            return used_code_env_refs

        #  We don't want to override the existing ones, so we remove them from our code_envs set
        for use_code_env in used_code_env_refs:
            use_code_env_name = use_code_env.get("envName", "")
            if use_code_env_name in self.code_envs:
                self.code_envs.remove(use_code_env_name)

        for new_code_env in self.code_envs:
            used_code_env_refs.append({"envName": new_code_env, "envLang": "PYTHON"})
        return used_code_env_refs

    def get_manifest_additions(self, additions, project_paths):
        updated_connections = self.update_connections(additions.get('requiredConnections', {}))
        if updated_connections:
            additions['requiredConnections'] = updated_connections

        updated_code_envs = self.update_code_envs(additions.get("usedCodeEnvRefs", []))
        if updated_code_envs:
            additions["usedCodeEnvRefs"] = updated_code_envs

###############################################################################
# V13000 / DSS 13.0.0
###############################################################################

class V13000AddNeedsInputDataFolderParamDeephub(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "add a new needsInputDataFolder param to deephub models & deephub training / scoring recipes"

    def add_needsInputDataFolder_param(self, params):
        was_data_modified = False
        if params and isinstance(params, dict):
            #  backendType is at root for recipes params and in the miniTask params for SM params:
            is_deephub_model = params.get("backendType") == "DEEP_HUB" or params.get("miniTask", {}).get("backendType") == "DEEP_HUB"
            if is_deephub_model:
                params["needsInputDataFolder"] = True  # any deephub model/recipes prior to v13 needs an input data folder to retrieve images.
                was_data_modified = True
        return was_data_modified

    def execute(self, project_paths):
        # Examples path: config/projects/PROJECT_KEY/recipes/*.prediction_training
        #                config/projects/PROJECT_KEY/recipes/*.prediction_scoring
        # Note: eval recipe not supported by deephub prior to v13.
        for recipe_params_file in (glob("%s/recipes/*.prediction_training" % project_paths.config)
                                   + glob("%s/recipes/*.prediction_scoring" % project_paths.config)):
            try:
                recipe_data = base.json_loadf(recipe_params_file)
                was_data_modified = self.add_needsInputDataFolder_param(recipe_data)
                if was_data_modified:
                    print("Adding needsInputDataFolder param in recipe deephub: %s " % recipe_params_file)
                    base.json_dumpf(recipe_params_file, recipe_data)
            except Exception as e:
                print("Addition of needsInputDataFolder param in deephub train or scoring recipe FAILED: %s" % e)

        # Examples path: config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            try:
                sm_data = base.json_loadf(saved_model_file)
                was_data_modified = self.add_needsInputDataFolder_param(sm_data)
                if was_data_modified:
                    print("Adding needsInputDataFolder param in deephub saved model : %s " % saved_model_file)
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Addition of needsInputDataFolder param in deephub saved model FAILED: %s" % e)
        # Note: No need to migrate trained-but-not-deployed deephub models: we set the needsInputDataFolder=true for each
        # new deephub training recipe


class V13000RenameFinetunedSavedModelFields(migration_base.ProjectLocalMigrationOperation):

    FIELDS_MAPPING = {
        "openAIConnection": "connection",
        "openAIModelId": "remoteModelId",
    }

    def __repr__(self, ):
        return "make OpenAI-specific fields generic on fine-tuned LLM models"

    def execute(self, project_paths):
        # Examples path: saved_models/PROJECT_KEY/YhHuDsXk/versions/1711102329218/llm_info.json
        for llm_info_file in (glob("%s/*/versions/*/llm_info.json" % project_paths.saved_models)):
            try:
                llm_info = base.json_loadf(llm_info_file)
                was_data_modified = False
                for old_key, new_key in self.FIELDS_MAPPING.items():
                    if old_key in llm_info:
                        llm_info[new_key] = llm_info[old_key]
                        del llm_info[old_key]
                        was_data_modified = True
                if was_data_modified:
                    print("Renamed fields in: %s" % llm_info_file)
                    base.json_dumpf(llm_info_file, llm_info)
            except Exception as e:
                print("Renaming of fields in LLM info file %s FAILED: %s" % (llm_info_file, e))


class V13000SplitGeneralSettingsFM(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Split user-data from the general settings to a dedicated file"

    def appliesTo(self):
        return ["fm"]

    def execute(self, diphome, simulate=False):
        settings_file = osp.join(diphome.path, 'config', 'settings.json')
        if not osp.isfile(settings_file):
            return
        settings = base.json_loadf(settings_file)

        user_data_settings = {}
        user_data_settings['databaseSettings'] = settings.get('databaseSettings', {})
        user_data_settings['cloud'] = settings.get('cloud', "")
        user_data_settings['awsSettings'] = settings.get('awsSettings', {})
        user_data_settings['azureSettings'] = settings.get('azureSettings', {})
        user_data_settings['gcpSettings'] = settings.get('gcpSettings', {})
        user_data_settings['instanceVisibleURL'] = settings.get('instanceVisibleURL', "")

        # Remove the migrated keys from the original settings dictionary
        for key in user_data_settings.keys():
            settings.pop(key, None)

        file = osp.join(diphome.path, 'config', 'user-data.json')
        print("Write user-data")
        with open(file, 'w') as f:
            json.dump(user_data_settings, f, indent=2)

        # Write the modified settings back to the original settings.json file
        with open(settings_file, 'w') as settings_file_stream:
            json.dump(settings, settings_file_stream, indent=2)


class V13000DefaultSAMLHashAlgorithmInFM(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Default SAML hash algorithm to SHA-1 if already signing client requests"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, samlSPParams, filepath=None):
        if samlSPParams.get('signRequests', False):
            samlSPParams['hashingAlgorithm'] = 'SHA1'

        return samlSPParams

    def jsonpath(self, ):
        return "ssoSettings.samlSPParams"

    def file_patterns(self, ):
        return ["config/general-settings.json"]


class V13000DefaultSAMLHashAlgorithmInFMInFM(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Default SAML hash algorithm to SHA-1 if already signing client requests"

    def appliesTo(self):
        return [ "fm" ]

    def transform(self, samlSPParams, filepath=None):
        if samlSPParams.get('signRequests', False):
            samlSPParams['hashingAlgorithm'] = 'SHA1'

        return samlSPParams

    def jsonpath(self, ):
        return "ssoSettings.samlSPParams"

    def file_patterns(self,):
        return ["config/settings.json"]


class V13000AddRunIdToPromptStudioResponses(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Add runId to history response object"

    def execute(self, project_paths):
        for file in glob("%s/*/prompts/*/history-responses/response-*.json" % project_paths.prompt_studios):
            print("Extracting runId from history response filename {} and adding it to response object".format(file))
            try:
                runId = os.path.splitext(os.path.basename(file))[0].replace("response-", "")
                history_response = base.json_loadf(file)
                history_response["runId"] = runId
                base.json_dumpf(file, history_response)

            except Exception as e:
                logging.exception("Failed to add runId to file {}: {}".format(file, e))


class V13000FixGlobalExplanationFactsJson(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self, ):
        return "Fix global explanations facts jsons"

    def execute(self, project_paths):
        # dss-home/analysis-data/VISUALML/rb0ukg2u/1JKOJGhx/sessions/s19/pp1/m1/global_explanations_facts.json
        for global_explanation_file in (glob("%s/*/*/sessions/s*/pp*/m*/global_explanations_facts.json" % project_paths.analysis_data)):

            try:
                facts = base.json_loadf(global_explanation_file)
                was_modified = False
                for klass in facts["perClassFacts"].keys():
                    for column in facts["perClassFacts"][klass].keys():
                        for attr in facts["perClassFacts"][klass][column].keys():
                            import math
                            if isinstance(facts["perClassFacts"][klass][column][attr], float):
                                if math.isnan(facts["perClassFacts"][klass][column][attr]):
                                    was_modified = True
                                    facts["perClassFacts"][klass][column][attr] = None
                if was_modified:
                    print("Fixed NaNs in: %s" % global_explanation_file)
                    base.json_dumpf(global_explanation_file, facts)
            except Exception as e:
                print("Fixing NaNs in global explanations file %s FAILED: %s" % (global_explanation_file, e))

class V13000FixupDashboardFiltersEngineType(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Initialise engine type for page with filters if it is not set"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            if len(page.get("filters", [])) > 0:
                filters_params = page.get("filtersParams", None)
                if filters_params == None:
                    page["filtersParams"] = {
                        "panelPosition": "TOP"
                    }

                if page["filtersParams"].get("engineType", None) == None:
                    page["filtersParams"]["engineType"] = "LINO"
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""


###############################################################################
# V13010 / DSS 13.0.1
###############################################################################

class V13010FixupChartsMinMaxAggregation(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V13010FixupChartsMinMaxAggregation, self).__init__()

    def migrate_def(self, chart_def):
        # min & max on alphanums are only applicable in these attributes from the UI
        allMeasures = ["genericMeasures", "tooltipMeasures"]
        function_mapping = {
            "STRING_MIN": "OBJECT_MIN",
            "STRING_MAX": "OBJECT_MAX"
        }
        for measureName in allMeasures:
            for measure in chart_def.get(measureName, []):
                function = measure.get("function", "COUNT")
                if function in function_mapping:
                    measure["function"] = function_mapping[function]
        return chart_def

###############################################################################
# V13100 / DSS 13.1.0
###############################################################################

class V13100UpdateProjectBundleContentConfiguration(migration_json.ProjectConfigJsonMigrationOperation):

    def __repr__(self, ):
        return "update project bundle content configuration to add editable datasets in additional datasets list"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, obj, filepath=None):
        included_datasets_data = obj.get("bundleExporterSettings", {}).get("exportOptions", {}).get("includedDatasetsData", None)
        if included_datasets_data is not None:
            path = os.path.split(filepath)[0]
            for editable_data in (glob("%s/datasets/*.data" % path)):
                dataset_name = osp.splitext(osp.basename(editable_data))[0]
                dataset_obj = {'name': dataset_name, 'type': 'Inline'}
                if dataset_obj not in included_datasets_data:
                    print("Adding Editable dataset %s in bundleExporterSettings.exportOptions.includedDatasetsData" % dataset_name)
                    included_datasets_data.append(dataset_obj)
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self):
        return ["params.json"]


def V13100_migrate_single_rule(rule, filepath):
    if not isinstance(rule, dict): # what? should never happen and will cause issues in the instance, but let's not fail the full migration for it
        print("WARNING: rule config is invalid. File: " + filepath)
        return rule
    if rule.get('type', None) in ['ColumnUniqueValuesRule', 'ColumnMinInRangeRule', 'ColumnAvgInRangeRule', 'ColumnMaxInRangeRule', 'ColumnSumInRangeRule', 'ColumnMedianInRangeRule', 'ColumnStdDevInRangeRule', 'TopValuesInSetRule', 'ModeValueInSetRule']:
        column = rule.pop('column', None)
        rule['columns'] = [] if column is None else [column]
    if rule.get('type', None) == 'ColumnMeaningValidityRule':
        column_spec = {}
        if 'column' in rule:
            column_spec['column'] = rule.pop('column')
        if 'meaning' in rule:
            column_spec['meaning'] = rule.pop('meaning')
        rule['columnSpecs'] = [column_spec]
    return rule

class V13100UpdateDQRulesForMultiColumnsInDatasets(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self,):
        return "Upgrades Data Quality rule configs for multiple column selection in datasets"

    def transform(self, rule, filepath=None):
        return V13100_migrate_single_rule(rule, filepath)

    def jsonpath(self,):
        return "metricsChecks.checks"

    def file_patterns(self,):
        return ["datasets/*.json"]

class V13100UpdateDQRulesForMultiColumnsInTemplates(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Upgrades Data Quality rule configs for multiple column selection in DQ templates"

    def transform(self, rule, filepath=None):
        return V13100_migrate_single_rule(rule, filepath)

    def jsonpath(self, ):
        return "rules"

    def file_patterns(self,):
        return ["config/data-quality-templates/*.json"]

class V13000MergeRequestInGitIgnore(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Add file for merge requests to gitignore"

    def execute(self, project_paths):
        gitignore_path = osp.join(project_paths.config, '.gitignore')
        if osp.exists(gitignore_path):
            try:
                with open(gitignore_path, 'r') as fd:
                    data = fd.read()
                if 'merge-requests.json' not in data:
                    data = data + '\nmerge-requests.json\n'
                    with open(gitignore_path, 'w') as fd:
                        fd.write(data)
            except Exception as e:
                logging.exception("Failed to migrate gitignore at %s" % gitignore_path, e)

class V13100UpdateChartDimensionOneTickPerBin(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V13100UpdateChartDimensionOneTickPerBin, self).__init__()

    def migrate_def(self, chart_def):
        allDimensions = ["genericDimension0", "genericDimension1", "facetDimension", "animationDimension", "xDimension", "yDimension", "groupDimension", "boxplotBreakdownDim"]
        for dimensionName in allDimensions:
            for dimension in chart_def.get(dimensionName, []):
                oneTickPerBin = dimension.get("oneTickPerBin", False)
                if isinstance(oneTickPerBin, bool):
                    if oneTickPerBin:
                        dimension["oneTickPerBin"] = "YES"
                    else:
                        dimension["oneTickPerBin"] = "NO"
        return chart_def

class V13100FixupTilesOpts(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Migrate tiles' title and border and transparency options to match the new class structure"

    def transform(self, obj, filepath):
        for page in obj.get("pages", []):
            for tile in page.get("grid", {}).get("tiles", []):
                if tile.get("tileParams", {}).get("isTransparent", False):
                    tile["backgroundOpacity"] = 0

                if tile.get("borderOptions") is None:
                    tile["borderOptions"] = {
                        "color": '#D9D9D9',
                        "radius": 0,
                        "size": 1
                    }
                if "borderColor" in tile:
                    tile["borderOptions"]["color"] = tile["borderColor"]

                if tile.get("titleOptions") is None:
                    tile["titleOptions"] = {
                        "fontColor": '#000',
                        "fontSize": 13
                    }
                if "showTitle" in tile:
                    tile["titleOptions"]["showTitle"] = tile["showTitle"]
                if "title" in tile:
                    tile["titleOptions"]["title"] = tile["title"]
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""



class V13100MarkPineconeConnectionsAsLegacy(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate Pinecone connections to add version field, mark as PRE_APRIL_2024_LEGACY"

    def transform(self, connections, filepath=None):
        for name, connection in iteritems(connections):
            if connection.get("type", None) == "Pinecone":
                params = connection.get("params", {})
                if "version" not in params:
                    print("Migrating pinecone connection: {}".format(name))
                    params["version"] = "PRE_APRIL_2024_LEGACY"
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

class V13100DisableHashedApiKeys(migration_json.JsonMigrationOperation):

    def __repr__(self):
        return "Disable hashed API keys for upgraded instances"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, security_settings, filepath=None):
        security_settings["hashApiKeys"] = False
        return security_settings

    def jsonpath(self,):
        return "security"

    def file_patterns(self):
        return ["config/general-settings.json"]

class V13100DisableHashedApiKeysApiNode(migration_json.JsonMigrationOperation):

    def __repr__(self):
        return "Disable hashed API keys for upgraded instances"

    def appliesTo(self):
        return [ "api" ]

    def transform(self, server_config, filepath=None):
        server_config["hashApiKeys"] = False
        return server_config

    def file_patterns(self):
        return ["config/server.json"]

###############################################################################
# V13112 / DSS 13.1.2
###############################################################################

class V13112UpdateMainInputsOnPromptStudioResponses(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Update mainInputNames to include input type and rename to mainPromptTemplateInputs"

    def execute(self, project_paths):
        for file in glob("%s/*/prompts/*/last-response.json" % project_paths.prompt_studios):
            self.update_main_input_list(file)

        for file in glob("%s/*/prompts/*/history-responses/response-*.json" % project_paths.prompt_studios):
            self.update_main_input_list(file)

    def update_main_input_list(self, file):
        print("Renaming mainInputNames to mainPromptTemplateInputs in response filename {} and adding type".format(file))
        try:
            response = base.json_loadf(file)
            new_input_list = []
            for input in response.get("mainInputNames", []):
                if isinstance(input, dict):
                    new_input_list.append(input)
                else:
                    new_input_list.append({"name": input, "type": "TEXT"})
            if "mainPromptTemplateInputs" not in response:
                response["mainPromptTemplateInputs"] = new_input_list
            response.pop("mainInputNames", None)
            base.json_dumpf(file, response)
        except Exception as e:
            logging.exception("Failed to update mainInputNames in file {}: {}".format(file, e))



###############################################################################
# V13150 / DSS 13.1.5
###############################################################################

class V13150FixScikitLearn13ExtraTreesMulticlassSelectionMode(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self,):
        return "Migrate selection_mode to 'sqrt' in multiclass extra trees since 'auto' is removed in scikit-learn 1.3"

    def migrate_tree_based_classification_algo_params(self, params):
        if not isinstance(params, dict) or params.get("selection_mode") != "auto":
            return False
        # "auto" was removed in sklearn 1.3, but it was the same as "sqrt" (see https://github.com/scikit-learn/scikit-learn/pull/20803)
        params["selection_mode"] = "sqrt"
        return True

    def migrate_algo_params_in_mltask(self, mltask_data):
        if not isinstance(mltask_data, dict):
            return False
        if mltask_data.get("predictionType", "").lower() == "multiclass":
            return self.migrate_tree_based_classification_algo_params(mltask_data.get("modeling", {}).get("extra_trees", {}))
        return False

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating algorithms params in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                if self.migrate_algo_params_in_mltask(mltask_data):
                    base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Algo params in mltask migration FAILED: %s" % e)

        # config/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in (glob("%s/saved_models/*.json" % project_paths.config)):
            print("Migrating algorithms params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                if self.migrate_algo_params_in_mltask(sm_data.get("miniTask", {})):
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Algo params in saved model miniTask migration FAILED: %s" % e)

###############################################################################
# V13200 / DSS 13.2.0
###############################################################################

class V13200SecuritySettingsUpdate(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Set enableEmailAndDisplayNameModification to false for all instances"

    def appliesTo(self):
        return [ "design", "automation", "govern" ]

    def transform(self, settings, filepath=None):
        if "security" in settings:
            settings['security']['enableEmailAndDisplayNameModification'] = False
        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]

class V13200EnrichFrenchDepartementDefaultToInseeSourceDataset20092011(migration_app.ShakerStepMigrationOperation):

    def __init__(self):
        super(V13200EnrichFrenchDepartementDefaultToInseeSourceDataset20092011, self).__init__("EnrichFrenchDepartement")

    def __repr__(self,):
        return "Default sourceDatasetVersion to INSEE_2009_2011 for EnrichFrenchDepartement processor step"

    def transform_step(self, step):
        params = step.get("params", {})
        if "sourceDatasetVersion" not in params:
            params["sourceDatasetVersion"] = "INSEE_2009_2011"
        return step

class V13200EnrichFrenchPostcodeDefaultToInseeSourceDataset20092011(migration_app.ShakerStepMigrationOperation):

    def __init__(self):
        super(V13200EnrichFrenchPostcodeDefaultToInseeSourceDataset20092011, self).__init__("EnrichFrenchPostcode")

    def __repr__(self,):
        return "Default sourceDatasetVersion to INSEE_2009_2011 for EnrichFrenchPostcode processor step"

    def transform_step(self, step):
        params = step.get("params", {})
        if "sourceDatasetVersion" not in params:
            params["sourceDatasetVersion"] = "INSEE_2009_2011"
        return step

class V13200ClearImpalaDriverClassInManagedMode(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Clear driver class for Impala when Impala URL is managed"

    def appliesTo(self):
        return [ "design", "automation" ]

    def transform(self, settings, filepath=None):
        if "impalaSettings" in settings:
            impala_settings = settings['impalaSettings']
            if impala_settings.get("useURL") == True:
                impala_settings['driver'] = impala_settings['driver'] or 'com.cloudera.impala.jdbc41.Driver'
            else:
                impala_settings['driver'] = None
        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


###############################################################################
# V13300 / DSS 13.3.0
###############################################################################

class V13300AdaptTilesPositionAndSizeToNewColumnsNumber(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Multiply tile coordinates by 3 as the grid went from 12 columns to 36 columns"

    def transform(self, obj, filepath):
        column_number = obj.get("columnNumber", None)
        if column_number is None:
            obj["columnNumber"] = 36
            for page in obj.get("pages", []):
                for tile in page.get("grid", {}).get("tiles", []):
                    box = tile.get("box", {})
                    box["width"] = min(safe_int(box.get("width", 1)) * 3, 36)
                    for key in ["height", "left", "top"]:
                        box[key] = safe_int(box.get(key, 1)) * 3
        return obj

    def file_patterns(self,):
        return ["dashboards/*.json"]

    def jsonpath(self):
        return ""

class V13300RemoveDashboardsFromHomepage(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Remove dashboard from home-page for all users"

    def transform(self, obj, filepath=None):
        rows = obj.get("rows", [])
        if isinstance(rows, list): # should always be true, just to be safe
            for row in rows:
                if row.get("tileType", '') == 'dashboard':
                    row["visible"] = False
        return obj

    def jsonpath(self,):
        return "userSettings.*.home"

    def file_patterns(self,):
        return ["config/user-settings.json"]


class V13300KpiColorRulesToChartDef(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V13300KpiColorRulesToChartDef, self).__init__()

    def migrate_def(self, chart_def):
        for measure in chart_def.get("genericMeasures", []):
            color_rules = measure.get("colorRules", [])

            if len(color_rules) > 0:
                color_group = self.create_color_group_from_rules(color_rules, measure)
                chart_def["colorGroups"] = chart_def.get("colorGroups", []) + [color_group]

        return chart_def

    def create_color_group_from_rules(self, color_rules, measure):
        color_group = {
            "colorGroupMode": "RULES",
            "rules": [],
            "colorOptions": None,
            "appliedColumns": [{
                "column": measure.get("column"),
                "type": measure.get("type", "NUMERICAL"),
                "function": measure.get("function", "AVG"),
                "percentile": measure.get("percentile", 50)
            }]
        }

        if measure.get("type") == 'CUSTOM':
            color_group["appliedColumns"][0]["inferredType"] = "NUMERICAL"

        for rule in color_rules:
            rule = {
                "mode": rule.get("mode"),
                "styleClass": rule.get("styleClass"),
                "colorOptions": {
                    "customColors": rule.get("colorOptions", { "customColors": {} }).get("customColors")
                },
                "conditionValue1": rule.get("value"),
                "conditionValue2": None
            }
            color_group["rules"].append(rule)

        return color_group


class V13300HFModelSettingsMigration(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate HF connection to new  data model"

    HF_ALLOWED_MODELS_MAPPING = {
        "offerLlama27BChat": [{
            "presetId": "LLAMA_2_7B_CHAT",
            "huggingFaceId": "meta-llama/Llama-2-7b-chat-hf",
            "displayName": "Llama 2 7B Chat",
            "handlingMode": "TEXT_GENERATION_LLAMA_2",
            "canBeFineTuned": True}],
        "offerLlama213BChat": [{
            "presetId": "LLAMA_2_13B_CHAT",
            "huggingFaceId": "meta-llama/Llama-2-13b-chat-hf",
            "displayName": "Llama 2 13B Chat",
            "handlingMode": "TEXT_GENERATION_LLAMA_2",
            "canBeFineTuned": True}],
        "offerLlama38BInstruct": [{
            "presetId": "LLAMA_3_8B_INSTRUCT",
            "huggingFaceId": "meta-llama/Meta-Llama-3-8B-Instruct",
            "displayName": "Llama 3 8B Instruct",
            "handlingMode": "TEXT_GENERATION_LLAMA_2",
            "canBeFineTuned": True}],
        "offerLlama318BInstruct": [{
            "presetId": "LLAMA_3_1_8B_INSTRUCT",
            "huggingFaceId": "meta-llama/Meta-Llama-3.1-8B-Instruct",
            "displayName": "Llama 3.1 8B Instruct",
            "handlingMode": "TEXT_GENERATION_LLAMA_2",
            "canBeFineTuned": True}],
        "offerLlama3170BInstruct": [{
            "presetId": "LLAMA_3_1_70B_INSTRUCT",
            "huggingFaceId": "meta-llama/Meta-Llama-3.1-70B-Instruct",
            "displayName": "Llama 3.1 70B Instruct",
            "handlingMode": "TEXT_GENERATION_LLAMA_2",
            "canBeFineTuned": True}],
        "offerLlamaGuard2": [{
            "presetId": "LLAMA_GUARD2",
            "huggingFaceId": "meta-llama/Meta-Llama-Guard-2-8B",
            "displayName": "Llama Guard 2",
            "handlingMode": "TEXT_GENERATION_LLAMA_GUARD",
            "canBeFineTuned": True}],
        "offerLlamaGuard31B": [{
            "presetId": "LLAMA_GUARD3_1B",
            "huggingFaceId": "meta-llama/Llama-Guard-3-1B",
            "displayName": "Llama Guard 3 1B",
            "handlingMode": "TEXT_GENERATION_LLAMA_GUARD",
            "canBeFineTuned": True}],
        "offerLlamaGuard38B": [{
            "presetId": "LLAMA_GUARD3_8B",
            "huggingFaceId": "meta-llama/Llama-Guard-3-8B",
            "displayName": "Llama Guard 3 8B",
            "handlingMode": "TEXT_GENERATION_LLAMA_GUARD",
            "canBeFineTuned": True}],
        "offerPromptGuard": [{
            "presetId": "PROMPT_GUARD",
            "huggingFaceId": "meta-llama/Prompt-Guard-86M",
            "displayName": "Prompt Guard",
            "handlingMode": "TEXT_CLASSIFICATION_PROMPT_INJECTION"}],
        "offerMistral7BInstruct": [{
            "presetId": "MISTRAL_7B_INSTRUCT",
            "huggingFaceId": "mistralai/Mistral-7B-Instruct-v0.1",
            "displayName": "Mistral 7B Instruct",
            "handlingMode": "TEXT_GENERATION_MISTRAL",
            "canBeFineTuned": True}],
        "offerMistral7BInstructV2": [{
            "presetId": "MISTRAL_7B_INSTRUCT_V2",
            "huggingFaceId": "mistralai/Mistral-7B-Instruct-v0.2",
            "displayName": "Mistral 7B Instruct v0.2",
            "handlingMode": "TEXT_GENERATION_MISTRAL",
            "canBeFineTuned": True}],
        "offerMistralNemo12B": [{
            "presetId": "MISTRAL_NEMO_12B_INSTRUCT",
            "huggingFaceId": "mistralai/Mistral-Nemo-Instruct-2407",
            "displayName": "Mistral NeMo 12B Instruct",
            "handlingMode": "TEXT_GENERATION_MISTRAL",
            "canBeFineTuned": True}],
        "offerMixtral8x7BInstruct": [{
            "presetId": "MIXTRAL_8X7B_INSTRUCT",
            "huggingFaceId": "mistralai/Mixtral-8x7B-Instruct-v0.1",
            "displayName": "Mixtral 8x7B Instruct",
            "handlingMode": "TEXT_GENERATION_MISTRAL",
            "canBeFineTuned": True}],
        "offerZephyr7B": [{
            "presetId": "ZEPHYR_7B",
            "huggingFaceId": "HuggingFaceH4/zephyr-7b-beta",
            "displayName": "Zephyr 7B",
            "handlingMode": "TEXT_GENERATION_ZEPHYR",
            "canBeFineTuned": True}],
        "offerFalcon7BInstruct": [{
            "presetId": "FALCON_7B_INSTRUCT",
            "huggingFaceId": "tiiuae/falcon-7b-instruct",
            "displayName": "Falcon 7B Instruct",
            "handlingMode": "TEXT_GENERATION_FALCON",
            "canBeFineTuned": True}],
        "offerFalcon40BInstruct": [{
            "presetId": "FALCON_40B_INSTRUCT",
            "huggingFaceId": "tiiuae/falcon-40b-instruct",
            "displayName": "Falcon 40B Instruct",
            "handlingMode": "TEXT_GENERATION_FALCON",
            "canBeFineTuned": True}],
        "offerMPT7B": [
            {"presetId": "MPT_7B_INSTRUCT",
             "huggingFaceId": "mosaicml/mpt-7b-instruct",
             "displayName": "MosaicML MPT 7B Instruct",
             "handlingMode": "TEXT_GENERATION_MPT",
             "canBeFineTuned": True},
            {"presetId": "MPT_7B_CHAT",
             "huggingFaceId": "mosaicml/mpt-7b-chat",
             "displayName": "MosaicML MPT 7B Chat (NON COMMERCIAL USAGE ONLY)",
             "handlingMode": "TEXT_GENERATION_MPT",
             "canBeFineTuned": True}],
        "offerDolly7B": [{
            "presetId": "DOLLY_7B",
            "huggingFaceId": "databricks/dolly-v2-7b",
            "displayName": "Dolly 7B",
            "handlingMode": "TEXT_GENERATION_DOLLY",
            "canBeFineTuned": True}],
        "offerPhi3Mini4k": [{
            "presetId": "MICROSOFT_PHI_3_MINI_4K_INSTRUCT",
            "huggingFaceId": "microsoft/Phi-3-mini-4k-instruct",
            "displayName": "Phi-3 Mini 4K instruct",
            "handlingMode": "TEXT_GENERATION_PHI_3",
            "canBeFineTuned": True}],
        "offerPhi3Mini128k": [{
            "presetId": "MICROSOFT_PHI_3_MINI_128K_INSTRUCT",
            "huggingFaceId": "microsoft/Phi-3-mini-128k-instruct",
            "displayName": "Phi-3 Mini 128K instruct",
            "handlingMode": "TEXT_GENERATION_PHI_3",
            "canBeFineTuned": True}],
        "offerGemma2BInstruct": [{
            "presetId": "GEMMA_2B_INSTRUCT",
            "huggingFaceId": "google/gemma-2b-it",
            "displayName": "Gemma 2B Instruct",
            "handlingMode": "TEXT_GENERATION_GEMMA",
            "canBeFineTuned": True}],
        "offerGemma7BInstruct": [{
            "presetId": "GEMMA_7B_INSTRUCT",
            "huggingFaceId": "google/gemma-7b-it",
            "displayName": "Gemma 7B Instruct",
            "handlingMode": "TEXT_GENERATION_GEMMA",
            "canBeFineTuned": True}],
        "offerGemma22BInstruct": [{
            "presetId": "GEMMA_2_2B_INSTRUCT",
            "huggingFaceId": "google/gemma-2-2b-it",
            "displayName": "Gemma 2 2B Instruct",
            "handlingMode": "TEXT_GENERATION_GEMMA",
            "canBeFineTuned": True}],
        "offerGemma29BInstruct": [{
            "presetId": "GEMMA_2_9B_INSTRUCT",
            "huggingFaceId": "google/gemma-2-9b-it",
            "displayName": "Gemma 2 9B Instruct",
            "handlingMode": "TEXT_GENERATION_GEMMA",
            "canBeFineTuned": True}],
        "offerLlava16Mistral7b": [{
            "presetId": "LLAVA_1_6_MISTRAL_7B",
            "huggingFaceId": "llava-hf/llava-v1.6-mistral-7b-hf",
            "displayName": "Llava v1.6 Mistral 7B",
            "handlingMode": "TEXT_GENERATION_MISTRAL",
            "supportsImageInputs": True,
            "canBeFineTuned": True}],
        "offerPhi35Vision": [{
            "presetId": "PHI_3_5_VISION",
            "huggingFaceId": "microsoft/Phi-3.5-vision-instruct",
            "displayName": "Phi 3.5 vision instruct",
            "handlingMode": "TEXT_GENERATION_PHI_3",
            "supportsImageInputs": True,
            "canBeFineTuned": True}],
        "offerBartLargeMNLI": [{
            "presetId": "BART_LARGE_MNLI",
            "huggingFaceId": "facebook/bart-large-mnli",
            "displayName": "BART Large (MNLI)",
            "handlingMode": "ZSC_GENERIC"}],
        "offerBertMultilingualUncasedSentiment": [{
            "presetId": "BERT",
            "huggingFaceId": "nlptown/bert-base-multilingual-uncased-sentiment",
            "displayName": "BERT Multilingual uncased sentiment",
            "handlingMode": "TEXT_CLASSIFICATION_SENTIMENT"}],
        "offerDistilbertFineTunedSST2English": [{
            "presetId": "DISTILBERT",
            "huggingFaceId": "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
            "displayName": "Distilbert SST 2 En",
            "handlingMode": "TEXT_CLASSIFICATION_SENTIMENT"}],
        "offerRobertaBaseGoEmotions": [{
            "presetId": "ROBERTA_BASE_GO_EMOTIONS",
            "huggingFaceId": "SamLowe/roberta-base-go_emotions",
            "displayName": "RoBERTa Base GoEmotions",
            "handlingMode": "TEXT_CLASSIFICATION_EMOTIONS"}],
        "offerToxicBert": [{
            "presetId": "TOXIC_BERT",
            "huggingFaceId": "unitary/toxic-bert",
            "displayName": "Toxic Bert",
            "handlingMode": "TEXT_CLASSIFICATION_TOXICITY"}],
        "offerDistilbertToxicity": [{
            "presetId": "DISTILBERT_TOXICITY",
            "huggingFaceId": "citizenlab/distilbert-base-multilingual-cased-toxicity",
            "displayName": "Distilbert Multilingual Toxicity Detection",
            "handlingMode": "TEXT_CLASSIFICATION_TOXICITY"}],
        "offerCamembertToxicity": [{
            "presetId": "FRENCH_TOXICITY",
            "huggingFaceId": "EIStakovskii/french_toxicity_classifier_plus_v2",
            "displayName": "Camembert Toxicity",
            "handlingMode": "TEXT_CLASSIFICATION_TOXICITY"}],
        "offerChatGPTDetectorRoberta": [{
            "presetId": "CHAT_GPT_DETECTOR",
            "huggingFaceId": "Hello-SimpleAI/chatgpt-detector-roberta",
            "displayName": "ChatGPT Detector (HEllo Simple AI)",
            "handlingMode": "TEXT_CLASSIFICATION_OTHER"}],
        "offerBartLargeCNN": [{
            "presetId": "BART_LARGE_CNN",
            "huggingFaceId": "facebook/bart-large-cnn",
            "displayName": "BART Large (CNN)",
            "handlingMode": "SUMMARIZATION_GENERIC"}],
        "offerCamembertFinetunedFrenchSummarization": [{
            "presetId": "CAMEMBERT",
            "huggingFaceId": "mrm8488/camembert2camembert_shared-finetuned-french-summarization",
            "displayName": "Camembert summarization",
            "handlingMode": "SUMMARIZATION_ROBERTA"}],
        "offerGooglePegasumXsum": [{
            "presetId": "PEGASUS",
            "huggingFaceId": "google/pegasus-xsum",
            "displayName": "Google Pegasus",
            "handlingMode": "SUMMARIZATION_GENERIC"}],
        "offerAllMiniLML6V2": [{
            "presetId": "ALL_MINI_LM_L6_V2",
            "huggingFaceId": "sentence-transformers/all-MiniLM-L6-v2",
            "displayName": "all-MiniLM-L6-v2",
            "handlingMode": "TEXT_EMBEDDING",
            "embeddingSize": 384,
            "maxTokensLimit": 256}],
        "offerDistiluseBaseMultilingualCasedV2": [{
            "presetId": "DISTILUSE_BASE_MULTILINGUAL_CASED_V2",
            "huggingFaceId": "sentence-transformers/distiluse-base-multilingual-cased-v2",
            "displayName": "Distiluse Base Multilingual Cased V2",
            "handlingMode": "TEXT_EMBEDDING",
            "embeddingSize": 512,
            "maxTokensLimit": 128}],
        "offerVitDinov2": [{
            "presetId": "VIT_SMALL_PATCH14_DINOV2_LVD_142M",
            "huggingFaceId": "timm/vit_small_patch14_dinov2.lvd142m",
            "displayName": "Vision Transformer DINOv2 LVD-142M",
            "handlingMode": "IMAGE_EMBEDDING",
            "embeddingSize": 384}],
        "offerEfficientNetB4": [{
            "presetId": "EFFICIENTNET_B4_RA2_IN1K",
            "huggingFaceId": "timm/efficientnet_b4.ra2_in1k",
            "displayName": "EfficientNet B4 ImageNet-1k",
            "handlingMode": "IMAGE_EMBEDDING",
            "embeddingSize": 1792}],
        "offerSD2_1": [{
            "presetId": "STABLE_DIFFUSION_2_1",
            "huggingFaceId": "stabilityai/stable-diffusion-2-1",
            "displayName": "Stable Diffusion v2-1",
            "handlingMode": "IMAGE_GENERATION_DIFFUSION"}],
        "offerSDXL1_0": [{
            "presetId": "STABLE_DIFFUSION_XL_1_0",
            "huggingFaceId": "stabilityai/stable-diffusion-xl-base-1.0",
            "displayName": "Stable Diffusion XL",
            "handlingMode": "IMAGE_GENERATION_DIFFUSION",
            "refinerId": "stabilityai/stable-diffusion-xl-refiner-1.0"}],
        "offerFlux1Schnell": [{
            "presetId": "FLUX_1_SCHNELL",
            "huggingFaceId": "black-forest-labs/FLUX.1-schnell",
            "displayName": "Flux 1 Schnell",
            "handlingMode": "IMAGE_GENERATION_DIFFUSION"}],
    }

    def transform(self, connections, filepath=None):
        for connection_name, connection in iteritems(connections):
            if connection.get("type", None) == "HuggingFaceLocal":
                print("Migrating HF connection data model: {}".format(connection_name))
                if "params" not in connection:
                    connection["params"] = {}
                params = connection["params"]
                models = params.get("models", []) # should be empty unless migration is applied twice
                model_ids = set([m.get("huggingFaceId", None) for m in models])


                print("Migrating custom models...")
                new_custom_models = []
                additional_models = params.get("additionalHuggingFaceModels", [])
                for additional_model in additional_models:
                    if additional_model["huggingFaceId"] in model_ids:
                        continue # should not happen unless migration is applied twice
                    print(" - Migrating model: {}".format(additional_model["huggingFaceId"]))
                    model_ids.add(additional_model["huggingFaceId"])
                    additional_model["id"] = additional_model["huggingFaceId"]
                    new_custom_models.append(additional_model)

                params.pop('additionalHuggingFaceModels', None)


                print("Migrating hardcoded models...")
                new_hardcoded_models = []
                for allow_key, allowed_models in self.HF_ALLOWED_MODELS_MAPPING.items():
                    if allow_key in params and params[allow_key] == True:
                        print("Migrating {} models...".format(allow_key))
                        for model in allowed_models:
                            if model["huggingFaceId"] in model_ids:
                                continue # should not happen unless a custom model overrides an hardcoded model or unless migration is applied twice
                            print(" - Migrating model: {}".format(model["huggingFaceId"]))
                            if model["handlingMode"]=="TEXT_GENERATION_FALCON":
                                model["quantizationMode"] = params.get("falconQuantizationMode", "NONE")
                            elif model["handlingMode"]=="TEXT_GENERATION_MISTRAL":
                                model["quantizationMode"] = params.get("mistralQuantizationMode", "NONE")
                            elif model["handlingMode"]=="TEXT_GENERATION_PHI_3":
                                model["quantizationMode"] = params.get("phi3QuantizationMode", "NONE")
                            model["id"] = model["huggingFaceId"]
                            model_ids.add(model["huggingFaceId"])
                            new_hardcoded_models.append(model)
                    # remove deprecated allow fields
                    params.pop(allow_key, None)

                # remove deprecated quantizationMode fields
                params.pop('falconQuantizationMode', None)
                params.pop('mistralQuantizationMode', None)
                params.pop('phi3QuantizationMode', None)


                # add new models fields
                params["models"] = models + new_hardcoded_models + new_custom_models

        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

class V13300JsonModeToResponseFormatInPromptStudiosAndRecipes(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self, ):
        return "Migrate 'jsonMode' to 'responseFormat' in prompt recipes & prompt studios"

    def jsonpath(self, ):
        return ""

    def file_patterns(self,):
        return ["recipes/*.prompt", "recipes/*.nlp_llm_evaluation", "prompt-studios/*.json"]

    def _patch_settings(self, llm_settings):
        if isinstance(llm_settings, dict) and "jsonMode" in llm_settings and "responseFormat" not in llm_settings:
            if llm_settings["jsonMode"]:
                llm_settings["responseFormat"] = {"type": "json"}
            llm_settings.pop("jsonMode")

    def transform(self, obj, filepath=None):
        if not isinstance(obj, dict):
            return obj

        # Prompt recipe & LLM evaluation recipe
        self._patch_settings(obj.get("completionSettings"))

        # Prompt studio
        if isinstance(obj.get("prompts"), list):
            for prompt in obj["prompts"]:
                if isinstance(prompt, dict):
                    self._patch_settings(prompt.get("llmSettings"))

        return obj


class V13300RemoveUserPasswordFromMongoConnectionAdvancedURI(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Remove the text from user and password fields for mongodb connections with enabled advanced URI syntax"

    def transform(self, connections, filepath=None):
        for name, connection in iteritems(connections):
            params = connection.get("params", None)
            if params is not None:
                connection_type = connection.get("type", None)
                use_uri = params.get("useURI", False)
                user = params.get("user", "")
                password = params.get("password", "")
                if connection_type is not None and connection_type == "MongoDB" and use_uri is True and (user != "" or password != ""):
                    print("Migrating the connection {}".format(name))
                    params.pop("user", None)
                    params.pop("password", None)
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]


class V13300MigrateUnifiedMonitoringBatchFrequency(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Migrate batchFrequencyInMinutes in the Unified Monitoring settings"

    def transform(self, settings, filepath=None):
        batch_frequency = settings.get("batchFrequencyInMinutes")
        if batch_frequency is not None:
            settings["projectsBatchFrequencyInMinutes"] = batch_frequency
            settings["endpointsBatchFrequencyInMinutes"] = batch_frequency
        return settings

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["config/unified-monitoring.json"]

###############################################################################
# V13310 / DSS 13.3.1
###############################################################################

# We wrongly added an inferredType type property to all columns, but it should only be there for CUSTOM ones
class V13310KPIColorGroupInferredTypeMigration(migration_app.ChartsMigrationOperation):
    def __init__(self):
        super(V13310KPIColorGroupInferredTypeMigration, self).__init__()

    def migrate_def(self, chart_def):
        for colorGroup in chart_def.get("colorGroups", []):
            for column in colorGroup.get("appliedColumns", []):
                if column.get("type") != "CUSTOM" and "inferredType" in column:
                    column["inferredType"] = None
        return chart_def

###############################################################################
# V13400 / DSS 13.4.0
###############################################################################

def migrate_sql_read_as_dates_flags(params, ds_type):
    if 'readColsWithUnknownTzAsDates' in params:
        params['datetimenotzReadMode'] = 'AS_DATE' if params.get('readColsWithUnknownTzAsDates', False) else 'AS_STRING'
        del params['readColsWithUnknownTzAsDates']
    if 'readSQLDateColsAsDSSDates' in params:
        if ds_type == 'Oracle':
            # special: DSS wasn't able to use Oracle's DATE as a 'SQL Date' so it was covered by readColsWithUnknownTzAsDates instead
            params['dateonlyReadMode'] = params.get('datetimenotzReadMode')
        else:
            params['dateonlyReadMode'] = 'AS_DATE' if params.get('readSQLDateColsAsDSSDates', True) else 'AS_STRING'
        del params['readSQLDateColsAsDSSDates']

def migrate_nonfs_read_as_dates_flags(dataset_type, params):
    if dataset_type == 'ElasticSearch':
        params["readTemporalMode"] = "AS_DATE"

def migrate_fs_read_as_dates_flags(format_type, format_params):
    if format_type == 'parquet':
        format_params['readTemporalMode'] = 'AS_RAW'
    elif format_type == 'orcfile':
        format_params['readTemporalMode'] = 'AS_STRING'
    elif format_type == 'delta':
        format_params['readTemporalMode'] = 'AS_DATE'

class V13400SwitchDatesReadModeFlagsInDatasets(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Update read modes for SQL date types without timezone in datasets"

    def transform(self, obj, filepath=None):
        # SQL
        params = obj.get('params', {})
        ds_type = obj.get('type')
        migrate_sql_read_as_dates_flags(params, ds_type)
        # the rest
        dataset_type = obj.get("type", "")
        migrate_nonfs_read_as_dates_flags(dataset_type, params)
        # FS-like
        format_type = obj.get("formatType", "")
        format_params = obj.get("formatParams", {})
        migrate_fs_read_as_dates_flags(format_type, format_params)
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]

class V13400SwitchDatesReadModeFlagsInDatasetLookups(migration_json.JsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self, ):
        return "Update read modes for SQL date types without timezone in dataset lookup endpoints"

    def appliesTo(self):
        return ["api"]

    def transform(self, service_params, filepath=None):
        dataset_resources = service_params.get("datasetResources", [])
        for dataset_resource in dataset_resources:
            params = dataset_resource.get("dssDataset", {}).get("params", {})
            ds_type = dataset_resource.get("dssDataset", {}).get("type")
            migrate_sql_read_as_dates_flags(params, ds_type)
        return service_params

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["services/*/gens/*/config.json"]


class V13400AddNewAlignmentFieldsInTimeseriesMLTasks(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "Add new unit and monthly alignment fields in timeseries models"

    def migrate_timestep_params_in_mltask(self, mltask_data):
        if not isinstance(mltask_data, dict):
            return False
        params = mltask_data.get("timestepParams")
        if not params or not isinstance(params, dict):
            return False

        was_data_modified = False

        # add new fields
        if "unitAlignment" not in params:
            params["unitAlignment"] = 0
            was_data_modified = True

        if "monthlyAlignment" not in params:
            params["monthlyAlignment"] = 0
            was_data_modified = True

        # set default values
        if params["unitAlignment"] == 0:
            if params.get("timeunit") == "QUARTER":
                params["unitAlignment"] = 3
                was_data_modified = True
            elif params.get("timeunit") == "HALF_YEAR":
                params["unitAlignment"] = 6
                was_data_modified = True
            elif params.get("timeunit") == "YEAR":
                params["unitAlignment"] = 12
                was_data_modified = True

        if params["monthlyAlignment"] == 0:
            params["monthlyAlignment"] = 31
            was_data_modified = True

        return was_data_modified

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/analysis/a7QE8ig7/ml/ecsqyuFW/params.json
        # analysis_data/PROJECT_KEY/a7QE8ig7/ecsqyuFW/sessions/s1/mltask.json
        # analysis-data/PROJECT_KEY/0DasLGaJ/YziCvMan/sessions/s2/core_params.json
        # saved_models/PROJECT_KEY/1JWNYxBy/versions/initial/core_params.json
        # saved_models/PROJECT_KEY/rWGuLwNu/pversions/Paris/1732132300771/core_params.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)
                            + glob("%s/*/*/sessions/*/core_params.json" % project_paths.analysis_data)
                            + glob("%s/*/versions/*/core_params.json" % project_paths.saved_models)
                            + glob("%s/*/pversions/*/*/core_params.json" % project_paths.saved_models)):
            print("Migrating timestep params in MLTask: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                if self.migrate_timestep_params_in_mltask(mltask_data):
                    base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Timestep params in mltask migration FAILED: %s" % e)

        # config/projects/PROJECT_KEY/saved_models/58ipAuN7*.json
        for saved_model_file in glob("%s/saved_models/*.json" % project_paths.config):
            print("Migrating timestep params in saved model miniTask: %s " % saved_model_file)
            try:
                sm_data = base.json_loadf(saved_model_file)
                if self.migrate_timestep_params_in_mltask(sm_data.get("miniTask", {})):
                    base.json_dumpf(saved_model_file, sm_data)
            except Exception as e:
                print("Timestep params in saved model miniTask migration FAILED: %s" % e)

        # config/projects/PROJECT_KEY/recipes/train_Filter_deployed_NPTS.prediction_training
        for recipe_file in glob("%s/recipes/*.prediction_training" % project_paths.config):
            print("Migrating timestep params in training recipe: %s " % recipe_file)
            try:
                recipe_data = base.json_loadf(recipe_file)
                if self.migrate_timestep_params_in_mltask(recipe_data.get("core", {})):
                    base.json_dumpf(recipe_file, recipe_data)
            except Exception as e:
                print("Timestep params in training recipe migration FAILED: %s" % e)


class V13400FlagTypeSystemVersionInDatasets(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Add typing system version in datasets"

    def transform(self, obj, filepath=None):
        obj['typeSystemVersion'] = 'V1' # obj can't be None
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]


class V13400MigrateSnowflakeAndDatabricksUtilsConfigTimeoutProps(
    migration_base.MigrationOperation
):
    def __repr__(self):
        return "Rename Snowflake and Databricks utils config timeout keys"

    def appliesTo(self):
        return ['design', 'automation']

    def execute(self, diphome, simulate=False):
        keys_to_rename = [
            "dku.deployer.deployment.snowflake.utils.deploymentTimeout",
            "dku.deployer.deployment.databricks.utils.modelRegistrationTimeout",
        ]
        props_file = JavaPropertiesFile(osp.join(diphome.path, "config", "dip.properties"))
        props = props_file.props

        is_migration_required = False

        for key in keys_to_rename:
            if key in props:
                is_migration_required = True
                old_value = props[key]
                try:
                    value_in_ms = int(old_value)
                    value_in_s = round(value_in_ms / 1000)
                    value_in_s = max(1, value_in_s)
                    new_value = str(value_in_s)
                except: # The old value can be an empty string or any other non-int
                    new_value = old_value

                props[key + "S"] = new_value
                props.pop(key)

        if is_migration_required:
            props_file.save()



class V13400AddNewResamplingFieldsInTimeseriesMLTasks(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "Add new resampling dates fields in timeseries models"

    def migrate_resampling_params(self, mltask_data):
        if not isinstance(mltask_data, dict):
            return False
        params = mltask_data.get("timeseriesSampling")
        if not params or not isinstance(params, dict):
            return False

        was_data_modified = False

        if "startDateMode" not in params:
            params["startDateMode"] = "AUTO"
            was_data_modified = True

        if "endDateMode" not in params:
            params["endDateMode"] = "AUTO"
            was_data_modified = True

        return was_data_modified

    def execute(self, project_paths):
        # config/projects/PROJECT_KEY/analysis/OMp0JQnC/ml/sUIbSR0c/params.json
        # config/projects/PROJECT_KEY/recipes/train_transformer.prediction_training
        # analysis-data/PROJECT_KEY/OMp0JQnC/sUIbSR0c/sessions/s5/mltask.json
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/recipes/*.prediction_training" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            print("Migrating resampling params in ML Task file: %s" % mltask_file)
            try:
                mltask_data = base.json_loadf(mltask_file)
                if self.migrate_resampling_params(mltask_data.get("preprocessing", {})):
                    base.json_dumpf(mltask_file, mltask_data)
            except Exception as e:
                print("Resampling params in ML Task file migration FAILED: %s" % e)

        # analysis-data/PROJECT_KEY/OMp0JQnC/sUIbSR0c/sessions/s5/pp1/rpreprocessing_params.json
        # saved_models/PROJECT_KEY/T8QeUySv/versions/initial/rpreprocessing_params.json
        # saved_models/PROJECT_KEY/SAkIukMY/pversions/Lille/1732132406314/rpreprocessing_params.json
        for preprocessing_file in (glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data)
                                   + glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models)
                                   + glob("%s/*/pversions/*/*/rpreprocessing_params.json" % project_paths.saved_models)):
            print("Migrating resampling params in preprocessing file: %s" % preprocessing_file)
            try:
                preprocessing_data = base.json_loadf(preprocessing_file)
                if self.migrate_resampling_params(preprocessing_data):
                    base.json_dumpf(preprocessing_file, preprocessing_data)
            except Exception as e:
                print("Resampling params in preprocessing file migration FAILED: %s" % e)


class V13400RenameOneLakeConnectionsToFabricWarehouse(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate OneLake connections which have been renamed to Fabric Warehouse"

    def transform(self, connections, filepath=None):
        for name, connection in iteritems(connections):
            if connection.get("type", None) == "OneLake":
                connection["type"] = "FabricWarehouse"
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

class V13400RenameOneLakeDatasetsToFabricWarehouse(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Migrate OneLake datasets which have been renamed to Fabric Warehouse"

    def transform(self, obj, filepath=None):
        if obj.get("type", "") == "OneLake":
            obj["type"] = "FabricWarehouse"
        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]

class V13400MigrateToGuardrailsPipeline(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate LLM connection usage control to Guardrails pipelines"

    def transform_one_connection_params(self, params):
        guardrails = []

        for legacy in params.get("queryForbiddenTermsDetectors", []):
            if legacy is None:
                continue
            guardrail = {
                "type" : "ForbiddenTermsDetector",
                "enabled": legacy.get("enabled", True),
                "params": legacy
            }
            if "enabled" in guardrail["params"]:
                del guardrail["params"]["enabled"]
            guardrail["params"]["filterQueries"] = True
            guardrail["params"]["filterResponses"] = False
            guardrails.append(guardrail)

        for legacy in params.get("responseForbiddenTermsDetectors", []):
            if legacy is None:
                continue
            guardrail = {
                "type" : "ForbiddenTermsDetector",
                "enabled": legacy.get("enabled", True),
                "params": legacy
            }
            if "enabled" in guardrail["params"]:
                del guardrail["params"]["enabled"]
            guardrail["params"]["filterQueries"] = False
            guardrail["params"]["filterResponses"] = True
            guardrails.append(guardrail)

        for legacy in params.get("queryPIIDetectors", []):
            if legacy is None:
                continue
            guardrail = {
                "type" : "PIIDetector",
                "enabled": legacy.get("enabled", True),
                "params": legacy
            }
            if "enabled" in guardrail["params"]:
                del guardrail["params"]["enabled"]
            guardrail["params"]["filterQueries"] = True
            guardrail["params"]["filterResponses"] = False
            guardrails.append(guardrail)

        for legacy in params.get("queryToxicityDetectors", []):
            if legacy is None:
                continue
            guardrail = {
                "type" : "ToxicityDetector",
                "enabled": legacy.get("enabled", True),
                "params": legacy
            }
            if "enabled" in guardrail["params"]:
                del guardrail["params"]["enabled"]
            guardrail["params"]["filterQueries"] = True
            guardrail["params"]["filterResponses"] = False
            guardrails.append(guardrail)

        for legacy in params.get("responseToxicityDetectors", []):
            if legacy is None:
                continue
            guardrail = {
                "type" : "ToxicityDetector",
                "enabled": legacy.get("enabled", True),
                "params": legacy
            }
            if "enabled" in guardrail["params"]:
                del guardrail["params"]["enabled"]
            guardrail["params"]["filterQueries"] = False
            guardrail["params"]["filterResponses"] = True
            guardrails.append(guardrail)

        for legacy in params.get("queryPromptInjectionDetectors", []):
            if legacy is None:
                continue
            guardrail = {
                "type" : "PromptInjectionDetector",
                "enabled": legacy.get("enabled", True),
                "params": legacy
            }
            if "enabled" in guardrail["params"]:
                del guardrail["params"]["enabled"]
            guardrails.append(guardrail)

        params["guardrailsPipelineSettings"] = {
            "guardrails": guardrails
        }

    def transform(self, connections, filepath=None):
        for name, connection in iteritems(connections):
            params = connection.get("params", {})
            if "queryForbiddenTermsDetectors" in params:
                self.transform_one_connection_params(params)
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

###############################################################################
# V13430 / DSS 13.4.3
###############################################################################

class V13430AddGuardrailsPipelineToPromptRecipes(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Add guardrails pipeline settings to prompt recipe payload params"

    def transform(self, prompt, filepath=None):
        if "guardrailsPipelineSettings" not in prompt:
            prompt["guardrailsPipelineSettings"] = {"guardrails": []}
        return prompt

    def jsonpath(self):
        return "prompt"

    def file_patterns(self):
        return ["config/projects/*/recipes/*.prompt"]

###############################################################################
# V13440 / DSS 13.4.4
###############################################################################

class DipPropertyToServerJsonMigrationOperation(migration_json.JsonMigrationOperation):
    DIP_PROPERTY_KEY = None
    SERVER_JSON_KEY = None

    def __repr__(self):
        return f"Move {self.DIP_PROPERTY_KEY} to server.json"

    def appliesTo(self):
        return ["api"]

    def pop_dip_property(self):
        props_file = JavaPropertiesFile(osp.join(self.diphome.path, "config", "dip.properties"))
        props = props_file.props
        dip_property_value = False

        if self.DIP_PROPERTY_KEY in props:
            dip_property_value = props_file.get_as_bool(self.DIP_PROPERTY_KEY)
            props.pop(self.DIP_PROPERTY_KEY)
            props_file.save()

        return dip_property_value

    def transform(self, server_config, filepath=None):
        try:
            dip_property_value = self.pop_dip_property()
        except Exception as e:
            print(f"Could not read the value of {self.DIP_PROPERTY_KEY}:\n\t- {e}")
            print(f"Setting the value of {self.SERVER_JSON_KEY} in server.json to False")
            dip_property_value = False

        server_config[self.SERVER_JSON_KEY] = dip_property_value
        return server_config

    def file_patterns(self):
        return ["config/server.json"]


class V13440MoveStackTracesAndLogTailsDipProperty(DipPropertyToServerJsonMigrationOperation):
    DIP_PROPERTY_KEY = "dku.hide_stacks_and_logtails_in_error_replies"
    SERVER_JSON_KEY = "hideStackTracesAndLogTails"

class V13440MoveHttpRequestMetadataDipProperty(DipPropertyToServerJsonMigrationOperation):
    DIP_PROPERTY_KEY = "dku.enable_dku_http_request_metadata_python_variable"
    SERVER_JSON_KEY = "isRequestMetadataEnabled"

###############################################################################
# V13500 / DSS 13.5.0
###############################################################################

class V13500SkipPageToursForExistingUsers(migration_base.MigrationOperation):
    def __repr__(self):
        return "Add Page Tour properties to existing users, in state completed"

    def execute(self, diphome, simulate=False):
        users_file = osp.join(diphome.path, "config/users.json")
        users_data = base.json_loadf(users_file)

        for user in users_data["users"]:
            user["pageSpecificTourSettings"] = {
                "flowTourCompleted": True,
                "exploreTourCompleted": True,
                "prepareTourCompleted": True
            }

        base.json_dumpf(users_file, users_data)

class V13500RemoveStorySettings(migration_base.MigrationOperation):
    def __repr__(self):
        return "Remove story-settings.json from config if it exists"

    def appliesTo(self):
        return [ "design" ]

    def execute(self, diphome, simulate=False):
        story_settings = osp.join(diphome.path, "config", "story-settings.json")

        if osp.isfile(story_settings):
            if simulate:
                return
            print(f"Found file {story_settings}, removing it.")
            try:
                os.remove(story_settings)
            except Exception as e:
                print(f"Removing story-settings.json from config migration FAILED: {e}")

class V13500RemoveLLMPromptEndpointsInProjects(migration_json.ProjectConfigJsonMigrationOperation):

    def __init__(self):
        pass

    def __repr__(self):
        return "Remove deprecated LLM_PROMPT API endpoints"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, service_params, filepath=None):
        endpoints = service_params.get("endpoints", [])
        new_endpoints = []
        for endpoint in endpoints:
            print(f"Endpoint check")
            if endpoint.get("type") == "LLM_PROMPT":
                print(f"Found deprecated LLM_PROMPT endpoint ({endpoint['id']}), removing it")
                continue
            else:
                new_endpoints.append(endpoint)

        service_params["endpoints"] = new_endpoints
        return service_params

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["lambda_services/*.json"]

class V13500RemoveLLMPromptEndpointsInApiNodeServices(migration_json.JsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self):
        return "Remove deprecated LLM_PROMPT API endpoints"

    def appliesTo(self):
        return ["api"]

    def transform(self, service_params, filepath=None):
        endpoints = service_params.get("endpoints", [])
        new_endpoints = []
        for endpoint in endpoints:
            print(f"Endpoint check")
            if endpoint.get("type") == "LLM_PROMPT":
                print(f"Found deprecated LLM_PROMPT endpoint ({endpoint['id']}), removing it")
                continue
            else:
                new_endpoints.append(endpoint)

        service_params["endpoints"] = new_endpoints
        return service_params

    def jsonpath(self):
        return ""

    def file_patterns(self, ):
        return ["services/*/gens/*/config.json"]

class V13500UpgradeActivityRrdFiles(migration_base.MigrationOperation):
    # in 13.5, activity rrds go from RRD_METRIC_VERSION v2 to v3
    def __repr__(self,):
        return "Upgrade Activity (rrd) files, adding TOTAL-related field."

    def execute(self, diphome, simulate=False):
        pass

    def post_execute(self, diphome):
        import subprocess

        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call(dkupath + " __upgrade_activity_rrd_files", shell=True)


# WARNING: added on 14.1.1 but under 13.5.0, must only apply to projects generated before 13.5.0.
#   - Intended as import-time migration (read-only when upgrading a full data directory from <13.5.0)
#   - To apply on 14.1.1+ import of projects exported before 13.5.0
# Not a generally recommended pattern, as it leads to differences between upgrade paths.
class V13500LLMConnectionsForAgentsToolsAndRaLlms(migration_json.ProjectConfigJsonMigrationOperation):
    """
    Versions before 13.5.0 don't add the llm connection types to the `requiredConnections` field for the
    agents and agent tools.
    From version 13.5.0 included exported models now allow remapping of llm connections and code envs,
    so exported models before that version may have an incomplete manifest making it impossible to import
    if they contain items like agent, agent tools or ra-llms.
    """

    LLM_ID_PATTERN = re.compile(r'^retrievalaugmented:([^:]+):([^:]+)$')

    def __init__(self):
        self.llm_connections = {}
        self.code_envs = set()

    def __repr__(self,):
        return "Add llm connections to manifest"

    #  Don't update these connection types since they are generated from the snapshot at the
    #  time of creating this migration step for 153.5.0-rc2
    #  Information extracted from LLMStructuredRef.java:decodeID method for connection types
    connection_types = {
        "openai": "OpenAI",
        "azureopenai-model": "AzureOpenAI",
        "azureopenai": "AzureOpenAI",
        "cohere": "Cohere",
        "mistralai": "MistralAI",
        "anthropic": "Anthropic",
        "vertex": "VertexAILLM",
        "bedrock": "Bedrock",
        "mosaicml": "MosaicML",
        "stabilityai": "StabilityAI",
        "huggingfaceapi": "HuggingFaceInferenceAPI",
        "huggingfacelocal": "HuggingFaceLocal",
        "databricksllm": "DatabricksLLM",
        "snowflakecortex": "SnowflakeCortex",
        "custom": "CustomLLM",
        "sagemaker-generic": "SageMaker-GenericLLM",
        "azure-llm": "AzureLLM"
    }

    class Connection(object):
        def __init__(self, name, type_):
            """
            :type name: str
            :type type_: str
            """
            self.name = name
            self.type = type_

    @staticmethod
    def parse_llm_for_connection(llm_id):
        """
        If the object passed doesn't match an llm format this method will simply return None
        :type llm_id: str
        :rtype: V13500LLMConnectionsForAgentsToolsAndRaLlms.Connection or None
        """
        if not llm_id:
            return None
        if not isinstance(llm_id, str):
            return None

        connection_items = llm_id.split(":")
        if len(connection_items) < 2:
            return None
        if connection_items[0] not in V13500LLMConnectionsForAgentsToolsAndRaLlms.connection_types:
            return None

        name = connection_items[1]
        type_ = V13500LLMConnectionsForAgentsToolsAndRaLlms.connection_types[connection_items[0]]
        return V13500LLMConnectionsForAgentsToolsAndRaLlms.Connection(name, type_)

    @staticmethod
    def get_connections_from_agent(agent):
        """
        :type agent: dict
        :rtype: dict[str, str]
        """
        connections = {}
        sm_type = agent.get("savedModelType", "")
        if sm_type == "TOOLS_USING_AGENT":
            versions = agent.get("inlineVersions", [])
            for agent_version in versions:
                settings = agent_version.get("toolsUsingAgentSettings", {})
                valid_connection = V13500LLMConnectionsForAgentsToolsAndRaLlms.parse_llm_for_connection(settings.get("llmId"))
                if valid_connection is None:
                    continue
                connections[valid_connection.name] = valid_connection.type
        elif sm_type == "PLUGIN_AGENT":
            versions = agent.get("inlineVersions", [])
            for agent_version in versions:
                config = agent_version.get("pluginAgentConfig", {})
                for param in config.keys():
                    valid_connection = V13500LLMConnectionsForAgentsToolsAndRaLlms.parse_llm_for_connection(config[param])
                    if valid_connection is None:
                        continue
                    connections[valid_connection.name] = valid_connection.type
        return connections

    @staticmethod
    def get_connections_from_agent_tool(agent_tool):
        """
        :type agent_tool: dict
        :rtype: dict[str, str]
        """
        connections = {}
        params = agent_tool.get("params", {})
        valid_connection = V13500LLMConnectionsForAgentsToolsAndRaLlms.parse_llm_for_connection(params.get("llmId"))
        if valid_connection is not None:
            connections[valid_connection.name] = valid_connection.type
        plugin_config = params.get("config", {})
        for param in plugin_config.keys():
            valid_connection = V13500LLMConnectionsForAgentsToolsAndRaLlms.parse_llm_for_connection(plugin_config[param])
            if valid_connection is not None:
                connections[valid_connection.name] = valid_connection.type
        return connections

    def execute(self, project_paths):
        """
        :type project_paths: migration_base.ProjectPaths
        :rtype: None
        """

        for sm_file in glob("%s/saved_models/*.json" % project_paths.config):
            try:
                saved_model = base.json_loadf(sm_file)
                self.llm_connections.update(self.get_connections_from_agent(saved_model))
            except Exception as e:
                print("error loading connections from agent {}, e:{}".format(sm_file, e))

        for at_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                agent_tool = base.json_loadf(at_file)
                self.llm_connections.update(self.get_connections_from_agent_tool(agent_tool))
            except Exception as e:
                print("error loading connections from agent tool {}, e:{}".format(at_file, e))

    def update_connections(self, required_connections):
        """
        :type required_connections: dict[str, dict[str, str]]
        :rtype: dict[str, dict[str, str or boolean]]
        """
        if not self.llm_connections:
            return required_connections
        for connection_name, connection_type in iteritems(self.llm_connections):
            if connection_name not in required_connections:
                required_connections[connection_name] = {'name': connection_name, 'type': connection_type, 'isMissing': False}
        return required_connections

    def get_manifest_additions(self, additions, project_paths):
        updated_connections = self.update_connections(additions.get('requiredConnections', {}))
        if updated_connections:
            additions['requiredConnections'] = updated_connections

###############################################################################
# V13520 / DSS 13.5.2
###############################################################################


class V13530GenerateGlobalProjectGitIgnore(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Create global projects gitignore"

    def execute(self, diphome, simulate=False):
        gitignore_path = osp.join(diphome.path, "config/.dku-projects-gitignore")

        with open(gitignore_path, 'w') as file:
            file.write(f'# Global Dataiku projects gitignore\n')
            file.write(f'.dss-meta\n')


class V13530MoveProtectedFilesToHiddenFolder(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Move DSS project protected files to .dss-meta"

    @staticmethod
    def is_valid_json(file_path):
        """Check if the file contains valid JSON and has no duplicate keys."""
        def reject_duplicate_keys(pairs):
            d = {}
            for k, v in pairs:
                if k in d:
                    raise Exception(f"Duplicate key found in {file_path}: {k}")
                d[k] = v
            return d

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                json.load(f, object_pairs_hook=reject_duplicate_keys)
                return True
        except json.JSONDecodeError as e:
            print(f"WARNING: Invalid JSON format -> {file_path}: {e}")
        except Exception as e:
            print(f"ERROR: Could not read file {file_path}: {e}")
        return False

    def execute(self, project_paths):
        project_key = os.path.basename(project_paths.config)
        print(f"Move protected files to hidden folder .dss-meta for project {project_key}")

        # Create .dss-meta folder
        meta_folder = os.path.join(project_paths.config, '.dss-meta')
        if not os.path.exists(meta_folder):
            os.makedirs(meta_folder)

        # Define files to move or copy
        files_to_check = ['merge-requests.json', 'version-tags.json', 'git-info.json']
        for file_name in files_to_check:
            file_path = os.path.join(project_paths.config, file_name)
            target_path = os.path.join(meta_folder, file_name)

            if os.path.exists(file_path):
                if not self.is_valid_json(file_path):
                    print(f"Skipping {file_name}")
                    continue

                # Check if the file is version-controlled, on Windows we just assume it is
                if base.is_os_windows() and not base.is_git_installed():
                    print(f"Copying {file_name} to {meta_folder}")
                    shutil.copy(file_path, meta_folder)
                    return

                result = subprocess.run(['git', 'ls-files', '--error-unmatch', file_name],
                                        cwd=project_paths.config, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                if result.returncode != 0:
                    # File is not version-controlled, move it
                    print(f"Moving {file_name} to {meta_folder}")
                    shutil.move(file_path, target_path)
                else:
                    # File is version-controlled, copy it, overwriting if necessary
                    if os.path.exists(target_path):
                        print(f"{target_path} exists and will be overwritten by {file_path}")
                    print(f"Copying {file_name} to {meta_folder}")
                    shutil.copy(file_path, target_path)


class V13530AddDssMetaToGitignoreAndAddGlobalProjectGitIgnore(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Commit .gitignore and add global project gitignore"

    def execute(self, project_paths):
        if base.is_os_windows() and not base.is_git_installed():
            return

        gitignore_path = osp.join(project_paths.config, '.gitignore')
        if osp.exists(gitignore_path):
            subprocess.run(['git', 'add', "-f", gitignore_path], cwd=project_paths.config)

        global_project_gitignore_path = osp.abspath(osp.join(project_paths.config, '..', '..', '.dku-projects-gitignore'))

        # Set git config to use the global gitignore file
        subprocess.run(['git', 'config', 'core.excludesFile', global_project_gitignore_path],
                       cwd=project_paths.config)



###############################################################################
# V14000 / DSS 14.0.0
###############################################################################

class V14000SkipOnboardingQuestionnaireForExistingUsers(migration_base.MigrationOperation):
    def __repr__(self):
        return "Add Onboarding Questionnaire properties to existing users, in state skipped"

    def execute(self, diphome, simulate=False):
        users_file = osp.join(diphome.path, "config", "users.json")
        users_data = base.json_loadf(users_file)

        for user in users_data["users"]:
            user["questionnaire"] = {
                "skippedQuestionnaire": True,
                "finishedOnboardingChoice": True,
                "finishedQuestionnaire": True
            }

        base.json_dumpf(users_file, users_data)

class V14000SetWriteSQLCommentParameterInConnections(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Sets the writeDescriptionsAsSQLComment parameter to True for connections that support writing SQL comments from a CREATE SQL statement"

    def transform(self, connections, filepath=None):
        sql_types_supporting_comments_in_create = ["Snowflake", "BigQuery", "Databricks", "MySQL"]
        for name, connection in iteritems(connections):
            connection_type = connection.get("type", None)
            if connection_type in sql_types_supporting_comments_in_create:
                params = connection.get("params", {})
                naming_rule = params.get("namingRule", {})

                naming_rule["writeDescriptionsAsSQLComment"] = True
                params["namingRule"] = naming_rule

                connection["params"] = params
                print(f"Updating connection {name} with writeDescriptionsAsSQLComment to true")

        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]

class V14000SetWriteSQLCommentParameterInDatasets(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Sets the writeDescriptionsAsSQLComment parameter to false for managed SQL datasets that do not support writing SQL comments from a CREATE SQL statement"

    def transform(self, obj, filepath):
        sql_types_to_disable = ['PostgreSQL', 'Redshift', 'Oracle']
        if obj.get("type", '') in sql_types_to_disable:
            config = obj.get('params', {})
            is_managed = obj.get('managed', False)
            write_descriptions_as_sql_comment = config.get('writeDescriptionsAsSQLComment', False)
            if write_descriptions_as_sql_comment and is_managed:
                print(f"Updating dataset {filepath} with writeDescriptionsAsSQLComment to False")
                obj['params']['writeDescriptionsAsSQLComment'] = False

        return obj

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["datasets/*.json"]

class V14000HfIdsUtils:
    # Utility methods
    @staticmethod
    def migrate_hf_id_if_needed(llm_id):
        if not isinstance(llm_id, str):
            return llm_id
        # huggingfacelocal:CONNECTION_NAME:MODEL_NAME:HANDLING_MODE:promptDriven=true/false
        if llm_id.startswith("huggingfacelocal:"):
            chunks = llm_id.split(":")
            if len(chunks)>=4:
                return ":".join(chunks[0:3])
        # otherwise return original id
        return llm_id

    @staticmethod
    def migrate_hf_id_in_dict_if_needed(dict_object, llm_id_field):
        llm_id = dict_object.get(llm_id_field, None)
        if llm_id is not None:
            dict_object[llm_id_field]=V14000HfIdsUtils.migrate_hf_id_if_needed(llm_id)
            return dict_object[llm_id_field] != llm_id
        else:
            return False

    @staticmethod
    def migrate_guardrails_pipeline_settings_if_needed(dict_object):
        migrated = False
        guardrails = dict_object.get("guardrailsPipelineSettings", {}).get("guardrails", [])
        for guardrail in guardrails:
            guardrail_params = guardrail.get("params", {})
            migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(guardrail_params, "genericTextCompletionLlmId") or migrated
            custom_guardrail_config = guardrail_params.get("config", {})
            for param in custom_guardrail_config.keys():
                migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(custom_guardrail_config, param) or migrated
        return migrated

class V14000MigrateHuggingFaceIdsInProjects(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "Migrate HuggingFace LLM ids references in projects"

    def execute(self, project_paths):
        self.migrate_hf_fine_tuned_saved_models(project_paths)
        self.migrate_agents(project_paths)
        self.migrate_tools(project_paths)
        self.migrate_prompt_studios(project_paths)
        self.migrate_recipes(project_paths)
        self.migrate_knowledge_banks(project_paths)
        self.migrate_evaluation_stores(project_paths)
        self.migrate_mltasks_preprocessing(project_paths)
        self.migrate_embed_doc_recipes(project_paths)

    @staticmethod
    def migrate_hf_fine_tuned_saved_models(project_paths):
        for llm_info_file in glob("%s/*/versions/*/llm_info.json" % project_paths.saved_models):
            try:
                llm_info = base.json_loadf(llm_info_file)
                migrated = False
                llm_type = llm_info.get("llmType", None)
                if llm_type == "SAVED_MODEL_FINETUNED_HUGGINGFACE_TRANSFORMER":
                    # Add new handling mode field in local HF fine-tuned LLMs (derived from old originalLLMId)
                    llm_id = llm_info.get("originalLLMId", None)
                    if llm_id is not None:
                        chunks = llm_id.split(":")
                        if len(chunks)>=4:
                            handling_mode = chunks[3]
                            llm_info["huggingFaceHandlingMode"] = handling_mode
                            migrated = True
                    # Migrate LLM ID fields: originalLLMId and inputLLMId
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(llm_info, "originalLLMId") or migrated
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(llm_info, "inputLLMId") or migrated
                    if migrated:
                        base.json_dumpf(llm_info_file, llm_info)
                        print("Migrated fine-tuned model: %s" % llm_info_file)
            except Exception as e:
                print("Migrating fine-tuned model %s FAILED: %s" % (llm_info_file, e))

        for desc_file in glob("%s/*/versions/*/desc.json" % project_paths.saved_models):
            try:
                desc = base.json_loadf(desc_file)
                migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(desc, "llmId")
                if migrated:
                    base.json_dumpf(desc_file, desc)
                    print("Migrated fine-tuned model desc: %s" % desc_file)
            except Exception as e:
                print("Migrating fine-tuned model desc %s FAILED: %s" % (desc_file, e))

    @staticmethod
    def migrate_agents(project_paths):
        for sm_file in glob("%s/saved_models/*.json" % project_paths.config):
            try:
                sm = base.json_loadf(sm_file)
                sm_type = sm.get("savedModelType", "")
                migrated = False
                if sm_type == "TOOLS_USING_AGENT":
                    for version in sm.get("inlineVersions", []):
                        settings = version.get("toolsUsingAgentSettings", {})
                        migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(settings, "llmId") or migrated
                if sm_type == "PLUGIN_AGENT":
                    for version in sm.get("inlineVersions", []):
                        agent_config = version.get("pluginAgentConfig", {})
                        for param in agent_config.keys():
                            migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(agent_config, param) or migrated
                # Migrate guardrails settings
                for version in sm.get("inlineVersions", []):
                    migrated = V14000HfIdsUtils.migrate_guardrails_pipeline_settings_if_needed(version) or migrated
                if migrated:
                    base.json_dumpf(sm_file, sm)
                    print("Migrated saved model: %s" % sm_file)
            except Exception as e:
                print("Migrating saved-model %s FAILED: %s" % (sm_file, e))

    @staticmethod
    def migrate_tools(project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                params = tool.get("params", {})
                migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(params, "llmId")
                plugin_config = params.get("config", {})
                for param in plugin_config.keys():
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(plugin_config, param) or migrated
                if migrated:
                    base.json_dumpf(tool_file, tool)
                    print("Migrated tool: %s" % tool_file)
            except Exception as e:
                print("Migrating tool %s FAILED: %s" % (tool_file, e))

    @staticmethod
    def migrate_prompt_studios(project_paths):
        # config files
        for ps_file in glob("%s/prompt-studios/*.json" % project_paths.config):
            try:
                ps = base.json_loadf(ps_file)
                migrated = False
                for prompt in ps.get("prompts", []):
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(prompt, "llmId") or migrated
                    migrated = V14000HfIdsUtils.migrate_guardrails_pipeline_settings_if_needed(prompt.get("prompt", {})) or migrated
                if migrated:
                    base.json_dumpf(ps_file, ps)
                    print("Migrated prompt studio: %s" % ps_file)
            except Exception as e:
                print("Migrating prompt studio %s FAILED: %s" % (ps_file, e))
        # history
        for history_file in glob("%s/*/prompts/*/history.json" % project_paths.prompt_studios):
            try:
                history = base.json_loadf(history_file)
                entries = history.get("entries", [])
                migrated = False
                for entry in entries:
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(entry.get("promptStudioPrompt", {}), "llmId") or migrated
                if migrated:
                    base.json_dumpf(history_file, history)
                    print("Migrated prompt studio history: %s" % history_file)
            except Exception as e:
                print("Migrating prompt studio history %s FAILED: %s" % (history_file, e))
        # last-response
        for lastresp_file in glob("%s/*/prompts/*/last-response.json" % project_paths.prompt_studios):
            try:
                lastresp = base.json_loadf(lastresp_file)
                responses = lastresp.get("responses", [])
                migrated = False
                for response in responses:
                    for _, chat_message in response.get("chatMessages", {}).items():
                        migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(chat_message.get("llmStructuredRef", {}), "id") or migrated
                if migrated:
                    base.json_dumpf(lastresp_file, lastresp)
                    print("Migrated prompt studio lastresp: %s" % lastresp_file)
            except Exception as e:
                print("Migrating prompt studio lastresp %s FAILED: %s" % (lastresp_file, e))


    @staticmethod
    def migrate_recipes(project_paths):
        recipe_type_to_llm_key = {
            "nlp_llm_evaluation": ["embeddingLLMId", "completionLLMId"],
            "nlp_llm_summarization": ["llmId"],
            "nlp_llm_user_provided_classification": ["llmId"],
            "nlp_llm_model_provided_classification": ["llmId"],
            "prompt": ["llmId"],
            "nlp_llm_finetuning": ["llmId"]
        }

        for recipe_file in glob("%s/recipes/*.json" % project_paths.config):
            try:
                recipe = base.json_loadf(recipe_file)
                recipe_type = recipe.get("type", "")
                if recipe_type not in recipe_type_to_llm_key:
                    continue
                recipe_name = os.path.splitext(os.path.basename(recipe_file))[0]
                recipe_payload_file = os.path.join(project_paths.config, "recipes", recipe_name + "." + recipe_type)
                recipe_payload = base.json_loadf(recipe_payload_file)
                migrated = False
                # Migrate llm root fields
                for llm_field in recipe_type_to_llm_key[recipe_type]:
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(recipe_payload, llm_field) or migrated
                # Migrate guardrails settings
                migrated = V14000HfIdsUtils.migrate_guardrails_pipeline_settings_if_needed(recipe_payload.get("prompt", {})) or migrated
                if migrated:
                    base.json_dumpf(recipe_payload_file, recipe_payload)
                    print("Migrated recipe payload: %s" % recipe_payload_file)
            except Exception as e:
                print("Migrating recipe %s FAILED: %s" % (recipe_file, e))

    @staticmethod
    def migrate_knowledge_banks(project_paths):
        # KB configuration
        for kb_file in (glob("%s/knowledge-banks/*.json" % project_paths.config)
                        + glob("%s/*/kb.json" % project_paths.knowledge_banks)
                        + glob("%s/*/versions/*/kb.json" % project_paths.knowledge_banks)):
            try:
                kb = base.json_loadf(kb_file)
                migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(kb, "embeddingLLMId")
                # augmented LLMs
                for augmented_llm in kb.get("llmsExposedWith", []):
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(augmented_llm, "llmId") or migrated
                    rag_guardrails_settings = augmented_llm.get("guardrailsSettings", {})
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(rag_guardrails_settings, "llmId") or migrated
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(rag_guardrails_settings, "embeddingModelId") or migrated
                if migrated:
                    base.json_dumpf(kb_file, kb)
                    print("Migrated knowledge bank: %s" % kb_file)
            except Exception as e:
                print("Migrating knowledge bank %s FAILED: %s" % (kb_file, e))

    @staticmethod
    def migrate_evaluation_stores(project_paths):
        for mes_file in (glob("%s/*/*/evaluation.json" % project_paths.model_evaluation_stores)):
            try:
                mes = base.json_loadf(mes_file)
                migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(mes, "embeddingLLMId")
                migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(mes, "completionLLMId") or migrated
                if migrated:
                    base.json_dumpf(mes_file, mes)
                    print("Migrated mes: %s" % mes_file)
            except Exception as e:
                print("Migrating mes %s FAILED: %s" % (mes_file, e))

    @staticmethod
    def migrate_mltasks_preprocessing(project_paths):
        # mltask files
        for mltask_file in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config)
                            + glob("%s/recipes/*.prediction_training" % project_paths.config)
                            + glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            try:
                migrated = False
                mltask = base.json_loadf(mltask_file)
                for _, feature_params in mltask.get("preprocessing", {}).get("per_feature", {}).items():
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(feature_params, "sentenceEmbeddingModel") or migrated
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(feature_params.get("pretrainedModelsParams", {}), "structureRefId") or migrated
                if migrated:
                    base.json_dumpf(mltask_file, mltask)
                    print("Migrated ml task: %s" % mltask_file)
            except Exception as e:
                print("Migrating ml task %s FAILED: %s" % (mltask_file, e))
        # preprocessing files
        for preprocessing_file in (glob("%s/*/*/sessions/*/*/rpreprocessing_params.json" % project_paths.analysis_data)
                                   + glob("%s/*/versions/*/rpreprocessing_params.json" % project_paths.saved_models)
                                   + glob("%s/*/pversions/*/*/rpreprocessing_params.json" % project_paths.saved_models)):
            try:
                migrated = False
                preprocessing = base.json_loadf(preprocessing_file)
                for _, feature_params in preprocessing.get("per_feature", {}).items():
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(feature_params, "sentenceEmbeddingModel") or migrated
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(feature_params.get("pretrainedModelsParams", {}), "structureRefId") or migrated
                if migrated:
                    base.json_dumpf(preprocessing_file, preprocessing)
                    print("Migrated model preprocessing: %s" % preprocessing_file)
            except Exception as e:
                print("Migrating model preprocessing %s FAILED: %s" % (preprocessing_file, e))

    @staticmethod
    def migrate_embed_doc_recipes(project_paths):
        for recipe_file in glob("%s/recipes/*.json" % project_paths.config):
            try:
                recipe = base.json_loadf(recipe_file)
                recipe_type = recipe.get("type", "")
                if recipe_type != "embed_documents":
                    continue
                recipe_params = recipe.get("params", {})
                migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(recipe_params, "defaultVlmId")
                for rule in recipe_params.get("rules", []):
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(rule.get("vlmSettings", {}), "llmId") or migrated

                if "allOtherRule" in recipe_params:
                    migrated = V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(recipe_params["allOtherRule"].get("vlmSettings", {}), "llmId") or migrated

                if migrated:
                    base.json_dumpf(recipe_file, recipe)
                    print("Migrated embed doc recipe payload: %s" % recipe_file)
            except Exception as e:
                print("Migrating embed doc recipe %s FAILED: %s" % (recipe_file, e))


class V14000MigrateHuggingFaceIdsInGeneralSettings(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate HuggingFace LLM ids references in general settings"

    def appliesTo(self):
        return [ "design", "automation" ]

    def transform(self, settings, filepath=None):
        genai_settings = settings.get("generativeAISettings", {})
        V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(genai_settings, "defaultEvalTextEmbeddingModelId")
        V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(genai_settings, "defaultEvalLLMCompletionModelId")
        V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(genai_settings.get("embedDocumentsRecipeSettings", {}), "defaultVLMId")
        V14000HfIdsUtils.migrate_hf_id_in_dict_if_needed(settings.get("codeAssistantSettings", {}), "codeAssistantDefaultLlmId")
        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]

class V14000MigrateHuggingFaceIdsInConnections(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Migrate HuggingFace LLM ids references in connections"

    def transform(self, connections, filepath=None):
        for name, connection in iteritems(connections):
            params = connection.get("params", {})
            if "guardrailsPipelineSettings" in params:
                V14000HfIdsUtils.migrate_guardrails_pipeline_settings_if_needed(params)
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self,):
        return ["config/connections.json"]


class V14000ExtractRetrievalAugmentedLLMFromKBIntoSM(migration_base.ProjectLocalMigrationOperation):
    OLD_LLM_ID_PATTERN = re.compile(r'^retrievalaugmented:([^:]+):([^:]+)$')

    def __repr__(self):
        return "Extract retrieval augmented LLM from KB into SavedModel"

    def execute(self, project_paths):

        # Read only
        config = project_paths.config
        params_file = osp.join(config, "params.json")

        new_retrieval_augmented_llms, new_shared_llms, kb_to_llms = self.generate_new_retrieval_augmented_llms(config, params_file)
        if not new_retrieval_augmented_llms:
            print("Nothing to do")
            return

        # Writing operations
        self.save_new_llm_saved_models(config, new_retrieval_augmented_llms)
        self.extend_shared_params(params_file, new_shared_llms)
        self.update_zones(config, kb_to_llms)

        self.migrate_agents(config)
        self.migrate_tools(config)
        self.migrate_prompt_studios(config)
        self.migrate_recipes(config)

    def get_shared_kb_configs(self, params_file):
        """
        :param str params_file:
        :return: the mapping of shared knowledge bank ids to its shared configuration
        :rtype: dict
        """
        shared_kbs_to_config = {}
        project_params = base.json_loadf(params_file)

        for exposed_object in project_params.get("exposedObjects", {}).get("objects", []):
            print("Reading exposed object {}".format(exposed_object))
            if exposed_object.get("type") == "RETRIEVABLE_KNOWLEDGE":
                kb_name = exposed_object.get("localName")
                if kb_name is None:
                    continue
                shared_kbs_to_config[kb_name] = exposed_object
        return shared_kbs_to_config

    @staticmethod
    def transform_kb_shared_config_to_retrieval_augmented_llm(sm_ref, kb_shared_conf):
        """
        Generates the shared llm params for the new saved model
        :param str sm_ref: The reference of the saved model
        :param dict kb_shared_conf: Original shared conf from the kb
        :return: The new shared llm params for the saved model
        :rtype: dict
        """
        sm_conf = copy.deepcopy(kb_shared_conf)
        sm_conf["type"] = "SAVED_MODEL"
        sm_conf["localName"] = sm_ref
        return sm_conf

    @staticmethod
    def extend_shared_params(params_file, new_params):
        """
        Extends the shared params of the project with the new params
        :param str params_file: The project params file path
        :param list[dict] new_params: The new params to add to the project params
        :return: None
        """

        project_params = base.json_loadf(params_file)
        exposed_objects = project_params.setdefault("exposedObjects", {}).setdefault("objects", [])
        already_exposed_keys = set(exposed["localName"] for exposed in exposed_objects)
        for new_exposed in new_params:
            if new_exposed["localName"] in already_exposed_keys:
                # Don't add if already exposed
                continue
            exposed_objects.append(new_exposed)
        base.json_dumpf(params_file, project_params)

    # Knowledge bank processing
    def generate_new_retrieval_augmented_llms(self, config, params_file):
        """
        This method is read only, iterates over the knowledge banks and their llms and generates a corresponding
        retrieval augmented llm saved model, as well as its shared configuration if the knowledge bank is being shared.

        :param str config: The config path
        :param str params_file: The project params file path containing the shared configuration
        :return: A tuple with the new saved model llms, their shared configurations and a mappign from knowledge bank to created llm
        :rtype: tuple[list[dict], list[dict], dict]
        """

        new_retrieval_augmented_llms = []
        new_shared_llms = []
        kb_ref_to_llms = {}

        shared_kbs_to_shared_conf = self.get_shared_kb_configs(params_file)

        for file in glob("%s/knowledge-banks/*.json" % config):
            print("Extracting retrieval augmented LLMs from knowledge bank config file {}".format(file))
            try:
                kb = base.json_loadf(file)
                kb_ref = os.path.splitext(os.path.basename(file))[0]
                kb_ref_to_llms[kb_ref] = []
                count = 0
                for llm_settings in kb.get("llmsExposedWith", []):
                    name = "Retrieval of " + kb.get("name", "knowledge bank")
                    if count:
                        name += " " + str(count)
                    sm_ref = self.generate_sm_ref(kb_ref, llm_settings.get("ragModelId", ""))
                    kb_ref_to_llms[kb_ref].append(sm_ref)
                    sm = self.new_sm_retrieval_augmented_llm(name, llm_settings, kb_ref)
                    if kb_ref in shared_kbs_to_shared_conf:
                        new_shared_llms.append(
                            self.transform_kb_shared_config_to_retrieval_augmented_llm(sm_ref, shared_kbs_to_shared_conf[kb_ref]))
                    new_retrieval_augmented_llms.append((sm_ref, sm))
                    count += 1

            except Exception as e:
                logging.exception(
                    "Failed to extract retrieval augmented llm from knowledge bank config file {}: {}".format(file, e))
        print(
            "Found {} new retrieval-augmented LLMs to create as saved models".format(len(new_retrieval_augmented_llms)))
        return new_retrieval_augmented_llms, new_shared_llms, kb_ref_to_llms

    @staticmethod
    def save_new_llm_saved_models(config, new_retrieval_augmented_llms):
        """
        Creates the new llm saved model files in the config/saved_models directory
        :param str config: The config path
        :param list[dict] new_retrieval_augmented_llms: The new saved model llms to create
        :return: None
        """
        tags_file = osp.join(config, ".dss-meta", "version-tags.json")
        if not osp.isfile(tags_file):
            print("No tags file, we should not update the llms then")
            return
        tags_content = base.json_loadf(tags_file)
        tags = tags_content.setdefault("tags", {})
        sm_dir = osp.join(config, "saved_models")
        base.create_dir_if_needed(sm_dir)
        for sm_ref, sm in new_retrieval_augmented_llms:
            sm_file = osp.join(sm_dir, f"{sm_ref}.json")
            if os.path.exists(sm_file):
                print(f"Saved model {sm_file} already exists, skipping it")
                continue
            print(f"Creating saved model {sm_file}")
            base.json_dumpf(sm_file, sm)

            # Following the backend logic, the version tags are created on a different file. In our case reusing the new creation tag to keep them aligned
            tags["SAVED_MODEL." + sm_ref] = sm["creationTag"]

        base.json_dumpf(tags_file, tags_content)

    @staticmethod
    def update_zones(config, kb_ref_to_llms):
        OBJECT_ID_KEY = "objectId"
        OBJECT_TYPE_KEY = "objectType"
        for zone_file in glob("%s/zones/*.json" % config):
            zone = base.json_loadf(zone_file)
            to_add = []
            items = zone.get("items",[])
            existing_keys = set(item[OBJECT_ID_KEY] for item in items)
            for item in items:
                object_id = item.get(OBJECT_ID_KEY, "")
                if item.get(OBJECT_TYPE_KEY) == "RETRIEVABLE_KNOWLEDGE" and object_id in kb_ref_to_llms:
                    for retrieval_augmented_llm in kb_ref_to_llms[object_id]:
                        if retrieval_augmented_llm in existing_keys:
                            # We don't want to add the same id if we rerun the migration on
                            continue
                        to_add.append({
                            OBJECT_ID_KEY: retrieval_augmented_llm,
                            OBJECT_TYPE_KEY: "SAVED_MODEL"
                        })
            items.extend(to_add)
            base.json_dumpf(zone_file, zone)

    # LLM ID Migration methods for the different elements
    def migrate_agents(self, config):
        """
        Migrates the llmId in the agents saved models
        :param str config: The config path
        :return: None
        """

        for sm_file in glob("%s/saved_models/*.json" % config):
            sm = base.json_loadf(sm_file)
            sm_type = sm.get("savedModelType", "")
            if sm_type == "TOOLS_USING_AGENT":
                for version in sm.get("inlineVersions", []):
                    settings = version.get("toolsUsingAgentSettings", {})
                    old_llm_id = settings.get("llmId")
                    if old_llm_id is not None:
                        migrated, new_llm_id = self.migrate_llm_id_if_needed(old_llm_id)
                        if migrated:
                            settings["llmId"] = new_llm_id
            if sm_type == "PLUGIN_AGENT":
                for version in sm.get("inlineVersions", []):
                    agent_config = version.get("pluginAgentConfig", {})
                    for param in agent_config.keys():
                        migrated, new_llm_id = self.migrate_llm_id_if_needed(agent_config[param])
                        if migrated:
                            agent_config[param] = new_llm_id
            # Migrate guardrails settings
            for version in sm.get("inlineVersions", []):
                guardrails = version.get("guardrailsPipelineSettings", {}).get("guardrails", [])
                for guardrail in guardrails:
                    guardrail_config = guardrail.get("params", {}).get("config", {})
                    for param in guardrail_config.keys():
                        migrated, new_llm_id = self.migrate_llm_id_if_needed(guardrail_config[param])
                        if migrated:
                            guardrail_config[param] = new_llm_id
            base.json_dumpf(sm_file, sm)

    def migrate_tools(self, config):
        """
        Migrates the llmId in the tools saved models
        :param str config: The config path
        :return: None
        """

        for tool_file in glob("%s/agent-tools/*.json" % config):
            tool = base.json_loadf(tool_file)
            params = tool.get("params", {})
            old_llm_id = params.get("llmId")
            if old_llm_id is not None:
                params["llmId"] = self.migrate_llm_id_if_needed(old_llm_id)[1]
            plugin_config = params.get("config", {})
            for param in plugin_config.keys():
                plugin_config[param] = self.migrate_llm_id_if_needed(plugin_config[param])[1]
            base.json_dumpf(tool_file, tool)

    def migrate_prompt_studios(self, config):
        """
        Migrates the llmId in the prompt studios
        :param str config: The config path
        :return: None
        """

        for ps_file in glob("%s/prompt-studios/*.json" % config):
            ps = base.json_loadf(ps_file)
            for prompt in ps.get("prompts", []):
                old_llm_id = prompt.get("llmId", "")
                prompt["llmId"] = self.migrate_llm_id_if_needed(old_llm_id)[1]
            base.json_dumpf(ps_file, ps)

    def migrate_recipe_payload(self, payload_file, llm_id_key="llmId"):
        """
        Migrates the llm id in the recipe payload file
        :param str payload_file: The recipe payload file path
        :param str llm_id_key: The key of the llm id in the recipe payload
        :return: A tuple with the new llm id and the old kb ref
        :rtype: tuple[str, str]
        """

        mpc_recipe = base.json_loadf(payload_file)
        old_llm_id = mpc_recipe.get(llm_id_key, "")
        migration_needed, llm_id = self.migrate_llm_id_if_needed(old_llm_id)
        if not migration_needed:
            return None, None
        mpc_recipe[llm_id_key] = llm_id
        base.json_dumpf(payload_file, mpc_recipe)

        old_kb_ref = old_llm_id.split(":")[1]  # old_llm_id will contain a ":" if migration_needed is True
        return llm_id, old_kb_ref

    def migrate_recipes(self, config):
        """
        Migrates the llm id in the recipes
        :param str config: The config path
        :return: None
        """

        recipe_type_to_llm_key = {
            "nlp_llm_evaluation": "completionLLMId",
            "nlp_llm_summarization": "llmId",
            "nlp_llm_user_provided_classification": "llmId",
            "nlp_llm_model_provided_classification": "llmId",
            "prompt": "llmId"
        }

        for recipe_file in glob("%s/recipes/*.json" % config):
            recipe = base.json_loadf(recipe_file)
            recipe_name = os.path.splitext(os.path.basename(recipe_file))[0]
            recipe_type = recipe.get("type", "")
            if recipe_type not in recipe_type_to_llm_key:
                continue

            recipe_payload_file = os.path.join(config, "recipes", recipe_name + "." + recipe_type)
            new_llm_id, old_kb_ref = self.migrate_recipe_payload(recipe_payload_file,
                                                                 recipe_type_to_llm_key[recipe_type])
            if new_llm_id is None:
                continue

            for item in recipe.get("inputs", {}).get("model", {}).get("items", []):
                # We want to change the ref only if it is pointing to the kb directly, ignoring other model inputs.
                if item.get("ref", "") == old_kb_ref:
                    item["ref"] = new_llm_id.split(":")[1]
            base.json_dumpf(recipe_file, recipe)

    def migrate_llm_id_if_needed(self, llm_id):
        """
        Migrates the llm id if it is in the old format, otherwise it simply returns the given llm_id
        :param str llm_id: The llm id to migrate
        :return: A tuple with a boolean indicating if the migration was needed and the new llm id
        :rtype: tuple[bool, str]
        """
        if not isinstance(llm_id, str):
            return False, llm_id
        match = self.OLD_LLM_ID_PATTERN.match(llm_id)
        if not match:
            return False, llm_id
        kb_ref, rag_llm_id = match.group(1), match.group(2)
        sm_ref = self.generate_sm_ref(kb_ref, rag_llm_id)
        if sm_ref is None:
            return False, llm_id
        return True, "retrieval-augmented-llm:" + sm_ref

    # Other helper methods

    @staticmethod
    def get_version_id(rag_model_id):
        """
        # The supported pattern for rag_model_id is `/^[A-Za-z0-9_-]+$/` while the one for the versionId is`/^\w+$/`
        # We need to replace the `-` by `_` to make it a valid versionId
        :param rag_model_id: original rag model ID
        :return: The safe version ID
        :rtype: str
        """
        return rag_model_id.replace("-", "_")

    def generate_sm_ref(self, kb_ref, rag_model_id):
        """
        Generates the new saved model id based on the hashing logic of the kb_id and the rag_model_id
        If the kb_ref includes the source project it will still be prepended to the new generated saved model id
        :param kb_ref: The smart ref of a knowledge bank. Can include the PROJECT if its shared
        :param rag_model_id: The rag model id defined by the user
        :return: The unique new sm ref including the new saved model id
        :rtype: str
        """
        parts = kb_ref.split(".")
        if len(parts) == 1:
            return self.hash_method(parts[0], rag_model_id)
        elif len(parts) == 2:
            return parts[0] + "." + self.hash_method(parts[1], rag_model_id)
        logging.error("Invalid kb_ref format: {}".format(kb_ref))
        return None

    @staticmethod
    def hash_method(part1, part2):
        """
        :type part1: str
        :type part2: str
        :rtype: str
        """
        return hex(zlib.crc32((part1 + ":" + part2).encode('ascii')) & 0xffffffff).split("x")[1].zfill(8)

    def new_sm_retrieval_augmented_llm(self, name, original_llm_settings, kb_ref):
        """
        Creates the configuration for a new retrieval augmented llm saved model from the original llm settings
        :param str name: The name of the new saved model
        :param dict original_llm_settings: The original config
        :param str kb_ref: The kb ref it is based on
        :return: A dictionary containing the configuration
        :rtype: dict
        """
        new_llm_settings = copy.deepcopy(original_llm_settings)
        new_llm_settings["kbRef"] = kb_ref
        version_id = self.get_version_id(original_llm_settings.get("ragModelId", ""))
        new_llm_settings.pop("ragModelId", None)
        new_llm_settings.pop("customizeContextMessage", None)
        creation_tag = {
            "versionNumber": 0,
            "lastModifiedBy": {
                "login": "admin"
            },
            "lastModifiedOn": int(time.time() * 1000)
        }

        # [sc-247869] handle renaming in migration, while we're still in 14.0.0 (not released yet)
        if "guardrailsSettings" in new_llm_settings:
            new_llm_settings["ragSpecificGuardrails"] = new_llm_settings["guardrailsSettings"]
            del new_llm_settings["guardrailsSettings"]

        if (not original_llm_settings.get("customizeContextMessage", "")) or (not original_llm_settings.get("customizeContextMessage", False)):
            new_llm_settings["contextMessage"] = """Use the following pieces of context to answer the question at the end.
If you do not know the answer, just say that you do not know, do not try to make up an answer.
"""
        return {
            "needsInputDataFolder": False,
            "savedModelType": "RETRIEVAL_AUGMENTED_LLM",
            "publishPolicy": "UNCONDITIONAL",
            "rebuildBehavior": "EXPLICIT",
            "cleanTemporaryVersionsPostJob": False,
            "name": name,
            "activeVersion": version_id,
            "inlineVersions": [
                {
                    "versionId": version_id,
                    "versionTag": creation_tag,
                    "creationTag": creation_tag,
                    "pythonAgentSettings": {
                        "codeEnvSelection": {
                            "envMode": "INHERIT"
                        },
                        "containerExecSelection": {
                            "containerMode": "INHERIT"
                        },
                        "dependencies": [],
                        "supportsImageInputs": False,
                        "maxParallelRequestsPerProcess": 4
                    },
                    "pluginAgentSettings": {
                        "containerExecSelection": {
                            "containerMode": "INHERIT"
                        },
                        "maxParallelRequestsPerProcess": 4
                    },
                    "toolsUsingAgentSettings": {
                        "tools": [],
                        "containerExecSelection": {
                            "containerMode": "INHERIT"
                        },
                        "maxParallelRequestsPerProcess": 4
                    },
                    "ragllmSettings": new_llm_settings,
                    "guardrailsPipelineSettings": {
                        "guardrails": []
                    }
                }
            ],
            "conditionalOutputs": [],
            "partitioning": {
                "ignoreNonMatchingFile": False,
                "considerMissingRequestedPartitionsAsEmpty": False,
                "dimensions": []
            },
            "flowOptions": {
                "virtualizable": False,
                "rebuildBehavior": "NORMAL",
                "crossProjectBuildBehavior": "DEFAULT",
                "ignoreErrorStatusOnBuild": False
            },
            "metrics": {
                "probes": [],
                "engineConfig": {
                    "padRunsWithMetrics": False,
                    "hive": {
                        "active": True,
                        "extraConf": []
                    },
                    "basic": {},
                    "dss": {
                        "active": True,
                        "selection": {
                            "useMemTable": False,
                            "filter": {
                                "distinct": False,
                                "enabled": False
                            },
                            "partitionSelectionMethod": "ALL",
                            "latestPartitionsN": 1,
                            "ordering": {
                                "enabled": False,
                                "rules": []
                            },
                            "samplingMethod": "FULL",
                            "maxRecords": -1,
                            "targetRatio": 0.02,
                            "ascending": True,
                            "withinFirstN": -1,
                            "maxReadUncompressedBytes": -1
                        }
                    },
                    "sql": {
                        "active": True
                    },
                    "impala": {
                        "active": True
                    },
                    "spark": {
                        "active": True,
                        "extraConf": []
                    },
                    "python": {}
                },
                "displayedState": {
                    "partition": "NP",
                    "columns": [],
                    "metrics": ["reporting:BUILD_DURATION"]
                }
            },
            "metricsChecks": {
                "runOnBuild": False,
                "checks": [],
                "displayedState": {
                    "partition": "NP",
                    "checks": []
                }
            },
            "lastTrainIndex": 1,
            "lastTrainJobId": "",
            "creationTag": creation_tag,
            "tags": [],
            "customFields": {},
            "checklists": {
                "checklists": []
            }
        }
class V14000RenameStoriesGeneralSettings(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Rename fields from [Ss]tory to [Ss]tories in general-settings.json"

    def appliesTo(self):
        return [ "design" ]

    def transform(self, settings, filepath=None):
        def _apply_rename_map(settings_part, rename_map):
            for old_key, new_key in rename_map.items():
                if old_key in settings_part:
                    settings_part[new_key] = settings_part.pop(old_key)

        _apply_rename_map(settings, { "dataikuStorySettings": "dataikuStoriesSettings" })

        if "aiDrivenAnalyticsSettings" in settings:
            rename_map_ai = {
                "storyAIEnabled": "storiesAIEnabled",
                "storyAISamplesEnabled": "storiesAISamplesEnabled",
                "storyAITelemetryEnabled": "storiesAITelemetryEnabled"
            }
            _apply_rename_map(settings["aiDrivenAnalyticsSettings"], rename_map_ai)

        return settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]

class V14000RenameStoriesCategoryInInstallConfig(migration_base.MigrationOperation):
    def __repr__(self):
        return "Rename category 'story' to 'stories' in install.ini"

    def appliesTo(self):
        return [ "design" ]

    def execute(self, diphome, simulate=False):
        install_ini = osp.join(diphome.path, "install.ini")
        if not osp.isfile(install_ini):
            return

        lines = []
        inside_stories_section = False
        has_story = False
        try:
            with open(install_ini) as f:
                for line in f.readlines():
                    if line.startswith('[stories]'):
                        inside_stories_section = True
                    elif line.startswith('['):
                        inside_stories_section = False

                    if line.startswith('[story]'):
                        print("'story' settings found in install.ini")
                        has_story = True
                        lines.append(line.replace('[story]', '[stories]', 1))
                    elif inside_stories_section:
                        continue # avoids duplicate [stories]
                    else:
                        lines.append(line)
        except Exception as e:
            print(f"Reading install.ini in rename stories settings migration FAILED: {e}")

        if not has_story:
            print("'story' settings not found in install.ini, noop")
            return

        if simulate:
            return

        try:
            print("Renaming 'story' settings to 'stories' in install.ini")
            with open(install_ini, 'w') as f:
                for line in lines:
                    f.write(line)
        except Exception as e:
            print(f"Writing install.ini in rename stories settings migration FAILED: {e}")

class V14000RenameStoriesPort(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Rename DKU_STORY_PORT to DKU_STORIES_PORT in env-site.sh"

    def appliesTo(self):
        return [ "design" ]

    def execute(self, diphome, simulate=False):
        env_site = osp.join(diphome.path, "bin", "env-site.sh")
        if not osp.isfile(env_site):
            return

        has_change = False
        lines = []
        try:
            with open(env_site) as f:
                for line in f.readlines():
                    if line.startswith('export DKU_STORY_PORT='):
                        print("DKU_STORY_PORT found in env-site.sh")
                        has_change = True
                        lines.append(line.replace('export DKU_STORY_PORT=', 'export DKU_STORIES_PORT=', 1))
                    else:
                        lines.append(line)
        except Exception as e:
            print(f"Reading env-site.sh in rename stories port migration FAILED: {e}")

        if not has_change:
            print("DKU_STORY_PORT not found in env-site.sh, noop")
            return

        if simulate:
            return

        try:
            print("Renaming DKU_STORY_PORT to DKU_STORIES_PORT in env-site.sh")
            with open(env_site, 'w') as f:
                for line in lines:
                    f.write(line)
        except Exception as e:
            print(f"Writing env-site.sh in rename stories port migration FAILED: {e}")

class V14000RenameStoriesDirectories(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Rename directory story to stories in [datadir] and [datadir]/run/"

    def appliesTo(self):
        return [ "design" ]

    def execute(self, diphome, simulate=False):
        def rename_path(old_path, new_path):
            if os.path.exists(new_path):
                print(f"Warning: '{new_path}' already exists, noop")
                return
            if not os.path.exists(old_path):
                print(f"'{old_path}' does not exist, noop")
                return

            if simulate:
                return

            print(f"Renaming '{old_path}' to '{new_path}'")
            try:
                os.rename(old_path, new_path)
            except Exception as e:
                print(f"Renaming '{old_path}' to '{new_path}' in stories directories renaming migration FAILED: {e}")

        old_data = osp.join(diphome.path, "story")
        new_data = osp.join(diphome.path, "stories")
        rename_path(old_data, new_data)

        old_logs = osp.join(diphome.path, "run", "story")
        new_logs = osp.join(diphome.path, "run", "stories")
        rename_path(old_logs, new_logs)

        old_story_log = osp.join(diphome.path, "run", "story.log")
        new_story_log = osp.join(diphome.path, "run", "stories.log")
        rename_path(old_story_log, new_story_log)

class V14000ChangeReferenceModeForProxyAndDatabricksCodeEnvs(migration_base.MigrationOperation):
    def __repr__(self):
        return "Use reference definition mode for Databricks utils and External/Proxy models code environments"

    def execute(self, diphome, simulate=False):
        for code_env in ["INTERNAL_databricks_utils_v1", "INTERNAL_proxy_models_v1"]:
            settings_file = osp.join(diphome.path, "code-envs/desc/python", code_env, "desc.json")
            if not osp.isfile(settings_file):
                continue
            settings = base.json_loadf(settings_file)

            # Add the useReferenceSpec key with a true value to the settings
            settings["useReferenceSpec"] = True

            # Write the modified settings to the desc.json file
            base.json_dumpf(settings_file, settings)

class V14000AddDeprecatedPythonVersionsToCodeEnvs(migration_json.JsonMigrationOperation):
    def __repr__(self):
        return "Add deprecated Python versions to code environments"

    def appliesTo(self):
        return ["design", "automation"]

    def file_patterns(self):
        return [
            "code-envs/desc/python/*/desc.json",
            "acode-envs/python/*/desc/desc.json",
            "acode-envs/python/*/versions/v.*/desc/desc.json",
        ]

    def should_code_env_migrate(self, code_env_desc):
        supported_deployment_modes = [
            "DSS_INTERNAL",
            "DESIGN_MANAGED",
            "AUTOMATION_VERSIONED",
            "AUTOMATION_SINGLE",
            "PLUGIN_MANAGED",
        ]

        supported_python_versions = [
            "PYTHON36",
            "PYTHON37",
            "PYTHON38"
        ]

        return "deploymentMode" in code_env_desc and code_env_desc["deploymentMode"] in supported_deployment_modes and \
            "pythonInterpreter" in code_env_desc and code_env_desc["pythonInterpreter"] in supported_python_versions

    def transform(self, code_env_desc, filepath=None):
        # Check if Cloud Stacks marker (/etc/dataiku-cloudstacks) exists
        is_dcs = os.path.exists("/etc/dataiku-cloudstacks")
        if not is_dcs:
            print("Not a Cloud Stacks installation, skipping migration")
            return code_env_desc

        if not self.should_code_env_migrate(code_env_desc):
            print("Code env '%s' will not be migrated, skipping" % filepath)
            return code_env_desc

        python_version = code_env_desc["pythonInterpreter"]

        python_version_to_hook = {
            "PYTHON36": { "type": "PYTHON36_SUPPORT" },
            "PYTHON37": { "type": "PYTHON37_SUPPORT" },
            "PYTHON38": { "type": "PYTHON38_SUPPORT" },
        }

        if python_version not in python_version_to_hook:
            print("Python interpreter version '%s' for code env '%s' desc has no matching container runtime addition, skipping" % (python_version, filepath))
            return code_env_desc

        hook = python_version_to_hook[python_version]

        if ("predefinedContainerHooks" in code_env_desc and code_env_desc["predefinedContainerHooks"]):
            if any(hook["type"] == predefined_hook.get("type", "") for predefined_hook in code_env_desc["predefinedContainerHooks"]):
                print("Container runtime addition '%s' already exists in code env '%s' desc, skipping" % (hook["type"], filepath))
            else:
                code_env_desc["predefinedContainerHooks"].append(hook)
                print("Added deprecated Python version container runtime addition '%s' to code env '%s' desc" % (hook["type"], filepath))
        else:
            code_env_desc["predefinedContainerHooks"] = [hook]
            print("Added deprecated Python version container runtime addition '%s' to code env '%s' desc" % (hook["type"], filepath))

        return code_env_desc

class V14010EnsureRaLLMsHaveActiveVersion(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Ensure retrieval-augmented LLMs have an active version"

    def execute(self, project_paths):
        for sm_file in glob("%s/saved_models/*.json" % project_paths.config):
            sm = base.json_loadf(sm_file)
            if sm.get("savedModelType", "") == "RETRIEVAL_AUGMENTED_LLM":
                if sm.get("activeVersion", ""):
                    continue
                inline_versions = sm.get("inlineVersions", [])
                if not inline_versions:
                    print(f"Warning: Weird, but no inline versions found for {sm_file}, skipping")
                    continue
                version_id = inline_versions[0].get("versionId", "")
                if not version_id:
                    print(f"Warning: Weird, but no versionId found for {sm_file}, skipping")
                    continue
                sm["activeVersion"] = version_id
            base.json_dumpf(sm_file, sm)


class V14010RemoveEmptyLLMIds(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Remove empty llm ids from prompt studios and llm recipes"

    @staticmethod
    def remove_llm_key_from_recipe_payload(payload_file, llm_id_key="llmId"):
        """
        Removes the llm id key if it is an empty string
        :param str payload_file: The recipe payload file path
        :param str llm_id_key: The key of the llm id in the recipe payload
        """

        mpc_recipe = base.json_loadf(payload_file)
        if mpc_recipe.get(llm_id_key) == "":
            del mpc_recipe[llm_id_key]
            base.json_dumpf(payload_file, mpc_recipe)

    def execute(self, project_paths):
        recipe_type_to_llm_key = {
            "nlp_llm_evaluation": "completionLLMId",
            "nlp_llm_summarization": "llmId",
            "nlp_llm_user_provided_classification": "llmId",
            "nlp_llm_model_provided_classification": "llmId",
            "prompt": "llmId"
        }

        try:
            for recipe_file in glob("%s/recipes/*.json" % project_paths.config):
                recipe = base.json_loadf(recipe_file)
                recipe_name = os.path.splitext(os.path.basename(recipe_file))[0]
                recipe_type = recipe.get("type", "")
                if recipe_type not in recipe_type_to_llm_key:
                    continue

                recipe_payload_file = os.path.join(project_paths.config, "recipes", recipe_name + "." + recipe_type)
                self.remove_llm_key_from_recipe_payload(recipe_payload_file, recipe_type_to_llm_key[recipe_type])

            for ps_file in glob("%s/prompt-studios/*.json" % project_paths.config):
                updated = False
                ps = base.json_loadf(ps_file)
                for prompt in ps.get("prompts", []):
                    if prompt.get("llmId") == "":
                        del prompt["llmId"]
                        updated = True
                if updated:
                    base.json_dumpf(ps_file, ps)
        except Exception as e:
            print(f"Error while removing empty llm ids: {e}")


###############################################################################
# V14020 / DSS 14.0.2
###############################################################################

class V14020ResultSetsMemorySizeLimit(migration_json.JsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self, ):
        return "Add SQL Result Sets memory size limitation for SQL notebooks and SQL scenario steps"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, general_settings, filepath=None):
        limits = general_settings.get("limits", {})
        if "sqlNotebookResultSetBytes" in limits:
            return general_settings
        limits["sqlNotebookResultSetBytes"] = {
            "soft": -1,
            "hard": 524288000
        }
        limits["sqlScenarioResultSetBytes"] = {
            "soft": -1,
            "hard": 524288000
        }
        return general_settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]

class V14020MigrateFullDocumentSettingforVLMRuleInEmbedDocumentsRecipe(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "Migrate 'Full document' split setting to 'Sliding window of pages' for VLM rules in 'Embed document' recipes"

    def execute(self, project_paths):
        self.migrate_embed_doc_recipes(project_paths)

    @staticmethod
    def migrate_embed_doc_recipes(project_paths):
        for recipe_file in glob("%s/recipes/*.json" % project_paths.config):
            try:
                recipe = base.json_loadf(recipe_file)
                recipe_type = recipe.get("type", "")
                if recipe_type != "embed_documents":
                    continue
                recipe_params = recipe.get("params", {})
                migrated = False
                for rule in recipe_params.get("rules", []):
                    if V14020MigrateFullDocumentSettingforVLMRuleInEmbedDocumentsRecipe.migrate_rule(rule):
                        migrated = True

                all_other_rule = recipe_params.get("allOtherRule", {})
                if V14020MigrateFullDocumentSettingforVLMRuleInEmbedDocumentsRecipe.migrate_rule(all_other_rule):
                    migrated = True
                if migrated:
                    base.json_dumpf(recipe_file, recipe)
                    print("Migrated embed doc recipe payload: %s" % recipe_file)
            except Exception as e:
                print("Migrating embed doc recipe %s FAILED: %s" % (recipe_file, e))

    @staticmethod
    def migrate_rule(rule):
        migrated = False
        vlm_settings = rule.get("vlmSettings", {})
        split_unit = vlm_settings.get("splitUnit", "")
        if split_unit == "DOCUMENT":
            vlm_settings["splitUnit"] = "CUSTOM"
            vlm_settings["customNbPages"] = 20
            vlm_settings["customPagesOverlap"] = 0
            migrated = True
        return migrated


class V14020MigrateSendMessageTools(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self,):
        return "Migrate Send Message tools"

    def execute(self, project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                if tool.get("type", "") != "DataikuReporter":
                    continue

                params = tool.get("params", {})
                if "variables" not in params:
                    params["variables"] = [
                        {
                            "sourceType": "TOOL_INPUT",
                            "id": "message",
                            "toolInputDescription": "The message to send"
                        }
                    ]
                    tool["params"] = params

                    if params.get("integration", {}).get("type", "") in ["slack-direct", "msft-teams-direct"]:
                        params["integration"]["configuration"]["message"]="${message}"
                    if params.get("integration", {}).get("type", "") == "slack-direct":
                        params["integration"]["configuration"]["messageMode"]="TEXT_MESSAGE"
                    base.json_dumpf(tool_file, tool)
                    print("Migrated tool: %s" % tool_file)
            except Exception as e:
                print("Migrating tool %s FAILED: %s" % (tool_file, e))


###############################################################################
# V14100 / DSS 14.1.0
###############################################################################
class V14100ExtractPythonAgentCodeToPythonFile(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Extract the code of python agent from config files to dedicated python files."

    def execute(self, project_paths):
        for sm_file in glob("%s/saved_models/*.json" % project_paths.config):
            try:
                sm = base.json_loadf(sm_file)
                sm_type = sm.get("savedModelType", "")
                sm_id = os.path.splitext(os.path.basename(sm_file))[0]
                if sm_id and sm_type == "PYTHON_AGENT":
                    for version in sm.get("inlineVersions", []):
                        code = version.get("code", "")
                        version_id = version.get("versionId")
                        if version_id is None:
                            continue
                        python_file = os.path.join(project_paths.config, "saved_models", sm_id + "_" + version_id + ".py")
                        if not os.path.exists(python_file):
                            with open(python_file, 'w') as file:
                                file.write(code)
                        version.pop("code")
                base.json_dumpf(sm_file, sm)
            except Exception as e:
                print(f"Extracting the code of python agent from config files to dedicated python files failed: {e}")
        return

class V14100MigrateRagSearchType(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "Migrate RAG search types for RA LLMs and KB search tools in projects"

    def execute(self, project_paths):
        self.migrate_ra_llms(project_paths)
        self.migrate_kb_search_tools(project_paths)

    @staticmethod
    def migrate_ra_llms(project_paths):
        for saved_model_config_file in glob("%s/saved_models/*.json" % project_paths.config):
            try:
                saved_model = base.json_loadf(saved_model_config_file)
                if saved_model.get("savedModelType") != "RETRIEVAL_AUGMENTED_LLM":
                    continue  # not a RA LLM

                migrated = False

                for inline_version in saved_model.get("inlineVersions", []):
                    rag_llm_settings = inline_version.get("ragllmSettings", {})
                    if rag_llm_settings.get("searchType"):
                        continue  # already migrated

                    if rag_llm_settings.get("useMMR", False):
                        search_type = "MMR"
                    elif rag_llm_settings.get("useHybridSearch", False):
                        search_type = "HYBRID"
                    else:
                        search_type = "SIMILARITY"
                    rag_llm_settings["searchType"] = search_type
                    migrated = True

                if migrated:
                    base.json_dumpf(saved_model_config_file, saved_model)
                    ra_llm_name = saved_model.get("name", "")
                    print(f"Updated Search Type for RA LLM: {ra_llm_name}")
            except Exception as e:
                print(f"FAILED to migrate search type for saved model from file: {saved_model_config_file}: {e}")

    @staticmethod
    def migrate_kb_search_tools(project_paths):
        for agent_tool_config_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                agent_tool = base.json_loadf(agent_tool_config_file)
                if agent_tool.get("type") != "VectorStoreSearch":
                    continue  # not a KB search tool

                kb_tool_params = agent_tool.get("params", {})
                if kb_tool_params.get("searchType"):
                    continue  # already migrated

                if kb_tool_params.get("useMMR", False):
                    search_type = "MMR"
                elif kb_tool_params.get("useHybridSearch", False):
                    search_type = "HYBRID"
                else:
                    search_type = "SIMILARITY"
                kb_tool_params["searchType"] = search_type

                base.json_dumpf(agent_tool_config_file, agent_tool)
                ra_llm_name = agent_tool.get("name", "")
                print(f"Updated Search Type for Knowledge Bank Search Tool: {ra_llm_name}")
            except Exception as e:
                print(f"FAILED to migrate search type for agent tool from file: {agent_tool_config_file}: {e}")

class V14100ExtractInlinePythonToolCodeToPythonFile(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Extract the code of inline python tool from config file to dedicated python file."

    def execute(self, project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                tool_type = tool.get("type", "")
                tool_id = os.path.splitext(os.path.basename(tool_file))[0]
                if tool_id and tool_type == "InlinePython":
                    params = tool.get("params")
                    if params is None:
                        continue
                    code = params.get("code", "")
                    python_file = os.path.join(project_paths.config, "agent-tools", tool_id + ".py")
                    if not os.path.exists(python_file):
                        with open(python_file, 'w') as file:
                            file.write(code)
                        params.pop("code")
                        base.json_dumpf(tool_file, tool)
            except Exception as e:
                print("Extracting the code of inline python tool from config file {} to dedicated python file failed: {}".format(tool_file, e))
        return

class V14100GroupSourcesSettingsForKBSearchTool(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Group sources settings for KB Search Tool from tool['params'][xx] to tool['params']['sourcesSettings'][xx]"


    sources_settings_params = ["titleMetadata", "urlMetadata", "thumbnailURLMetadata", "snippetMetadata", "snippetFormat", "metadataInSources"]


    def execute(self, project_paths):
        # Migrate Knowledge Bank Search Tool
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                tool_type = tool.get("type", "")
                if tool_type != "VectorStoreSearch":
                    continue
                params = tool.get("params", {})
                if 'sourcesSettings' in params:
                    print("Skipping: Updating the parameters of KB Search Tool {} already done.".format(tool_file))
                    continue

                # Ensuring the source_settings is created with the default values
                sources_settings = {
                    "snippetFormat": "TEXT",
                    "metadataInSources": []
                }
                sources_settings.update({key: params[key] for key in self.sources_settings_params if key in params})
                params['sourcesSettings'] = sources_settings
                base.json_dumpf(tool_file, tool)
                print("Saving: Updated the parameters of KB Search Tool {}".format(tool_file))
            except Exception as e:
                print("Harmonizing the parameters of KB Search Tool {} failed: {}".format(tool_file, e))



class V14100HomeMessageMustReadsToPromotedContent(migration_json.JsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self, ):
        return "Migrate the Home Message and MustReads links settings to promoted content cards"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, general_settings, filepath=None):
        home_message = general_settings.get("recoBannerHomeMessage")
        must_reads = general_settings.get("recoBannerMustReads")
        if not home_message and not must_reads:
            return general_settings

        if "personalHomePages" not in general_settings:
            general_settings["personalHomePages"] = {}
        personal_home_pages_settings = general_settings["personalHomePages"]

        promoted_content = personal_home_pages_settings.get("promotedContent", [])
        if must_reads:
            description = "\n".join([
                "- [{0}]({1})".format(link.get("label", ""), link.get("url", ""))
                for link in must_reads
            ])

            promoted_content.insert(0, {
                "type": "ADMIN_MESSAGE",
                "customTitle": "Must-reads",
                "customDescription": description,
                "thumbnailMode": "NONE"
            })

        if home_message:
            promoted_content.insert(0, {
                "type": "ADMIN_MESSAGE",
                "customTitle": "Welcome",
                "customDescription": home_message,
                "thumbnailMode": "NONE"
            })

        personal_home_pages_settings["promotedContent"] = promoted_content

        return general_settings

    def file_patterns(self, ):
        return ["config/general-settings.json"]


class V14100ChangeReferenceModeForDeephubCodeEnvs(migration_base.MigrationOperation):
    def __repr__(self):
        return "Activate reference definition mode for image classification and object detection code environments for code environment update."

    def execute(self, diphome, simulate=False):
        for code_env in ["INTERNAL_image_classification_v1", "INTERNAL_object_detection_v1"]:
            settings_file = osp.join(diphome.path, "code-envs/desc/python", code_env, "desc.json")
            if not osp.isfile(settings_file):
                continue

            try:
                settings = base.json_loadf(settings_file)

                # Add the useReferenceSpec key with a true value to the settings
                settings["useReferenceSpec"] = True

                # Write the modified settings to the desc.json file
                base.json_dumpf(settings_file, settings)
                print('File {} successfully migrated'.format(settings_file))
            except:
                logging.exception('Error when migrating file {}'.format(settings_file))


class V14120RemoveAgentToolCustomDescriptionField(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Move the custom description of agent tools to the common description field."

    def execute(self, project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool_id = os.path.splitext(os.path.basename(tool_file))[0]
                if tool_id is None:
                    continue

                tool = base.json_loadf(tool_file)
                params = tool.get("params")
                if params is None:
                    continue

                tool_type = tool.get("type", "")
                if tool_type == "VectorStoreSearch" and "knowledgeBankDescription" in params:
                    custom_description = params.pop("knowledgeBankDescription")
                elif tool_type == "LLMMeshLLMQuery" and "purposeString" in params:
                    custom_description = params.pop("purposeString")
                else:
                    continue

                if custom_description:
                    # prepend the custom description, as was done in the back-end before
                    common_description = tool.get("additionalDescriptionForLLM")
                    if common_description:
                        common_description = custom_description + "\n\n" + common_description
                    else:
                        common_description = custom_description
                    tool["additionalDescriptionForLLM"] = common_description

                base.json_dumpf(tool_file, tool)
            except Exception as e:
                print("Moving the agent tool custom description from config file {} failed: {}: {}".format(tool_file, type(e).__name__, e))


class V14120RenameProjectStandardsConfigFields(migration_json.JsonMigrationOperation):
    def __repr__(self,):
        return "Rename Project Standards config fields and set selectionMethod=ALL to the default scope"

    def appliesTo(self):
        return [ "design", "automation" ]

    def transform(self, config, filepath=None):
        if not config.get("checks"):
            config["checks"] = config.get("bundleChecks")
        if not config.get("nonDefaultScopes"):
            config["nonDefaultScopes"] = config.get("bundleScopes")
        if not config.get("defaultScope"):
            default_scope = config.get("defaultBundleScope")
            if default_scope:
                default_scope["selectionMethod"] = "ALL"
                config["defaultScope"] = default_scope

        return config

    def file_patterns(self, ):
        return ["config/project-standards.json"]



class V14200SynchronizeProjectTypeBadges(migration_base.MigrationOperation):
    def __repr__(self):
        return "Synchronize project Type Badges for the first time"

    def execute(self, diphome, simulate=False):
        pass

    def post_execute(self, diphome):
        import subprocess

        dkupath = os.getenv("DKUBIN", diphome.path + "/bin/dku")
        subprocess.check_call(dkupath + " __synchronize_type_badges", shell=True)


class V14200DeleteOrphanEntityFolders(migration_base.MigrationOperation):
    def __repr__(self):
        return "Delete internal folders of deleted agent tools and prompt studios"

    def execute(self, diphome, simulate=False):
        self.delete_folders_of_deleted_objects(diphome, simulate, "agent-tools")
        self.delete_folders_of_deleted_objects(diphome, simulate, "prompt-studios")

    @staticmethod
    def delete_folders_of_deleted_objects(diphome, simulate, object_type):
        """ Delete remnant folders of entities that were deleted, because of a bug in DSS < 14.0.2
        See https://app.shortcut.com/dataiku/story/238393/bug-deleting-a-project-does-not-properly-delete-its-prompt-studios
        """
        projects_dir = osp.join(diphome.path, "config", "projects")
        orphan_folders_dir = osp.join(diphome.path, object_type)
        if not osp.isdir(orphan_folders_dir):
            return

        for project_id in os.listdir(orphan_folders_dir):
            project_folder = osp.join(orphan_folders_dir, project_id)
            if not osp.isdir(project_folder):
                continue

            # delete the whole folder if the project was deleted
            if not osp.isdir(osp.join(projects_dir, project_id)):
                try:
                    if not simulate:
                        shutil.rmtree(project_folder)
                    print(f"Deleted {object_type} folder of deleted project: {project_id}")
                except OSError as e:
                    print(f"Failed to delete {object_type} folder of deleted project {project_id}: {e}")
            else:
                for object_id in os.listdir(project_folder):
                    object_folder = osp.join(project_folder, object_id)
                    if not osp.isdir(object_folder):
                        continue

                    # delete the object folder if the object was deleted but the project wasn't
                    if not osp.isfile(osp.join(projects_dir, project_id, object_type, object_id + ".json")):
                        try:
                            if not simulate:
                                shutil.rmtree(object_folder)
                            print(f"Deleted {object_type} folder of deleted object: {project_id}.{object_id}")
                        except OSError as e:
                            print(f"Failed to delete {object_type} folder of deleted object {project_id}.{object_id}: {e}")

                # delete the parent folder if it's now empty
                if os.path.isdir(project_folder) and not os.listdir(project_folder):
                    try:
                        if not simulate:
                            shutil.rmtree(project_folder)
                        print(f"Deleted empty {object_type} folder for project: {project_id}")
                    except OSError as e:
                        print(f"Failed to delete empty {object_type} folder for project: {project_id}")

class V14210MergeStoriesImageDirectories(migration_base.MigrationOperation):
    def __repr__(self):
        return "Merge image directories within [datadir]/stories/powerslide into one global repository"

    def appliesTo(self):
        return [ "design" ]

    def execute(self, diphome, simulate=False):
        def merge_to_global(name):
            root_dir = osp.join(diphome.path, "stories", "powerslide", name)
            if not osp.isdir(root_dir):
                print(f"Directory '{root_dir}' does not exist, skipping.")
                return

            global_dir = osp.join(root_dir, "global")

            if not osp.exists(global_dir):
                print(f"Creating global directory '{global_dir}'")
                if not simulate:
                    os.makedirs(global_dir)
            elif not osp.isdir(global_dir):
                print(f"Error: '{global_dir}' exists but is not a directory. Cannot merge.")
                return

            for item_name in os.listdir(root_dir):
                item_path = osp.join(root_dir, item_name)
                if item_name == "global":
                    continue

                if osp.isdir(item_path):
                    print(f"Processing directory '{item_path}'")
                    has_failure = False
                    for sub_file in os.listdir(item_path):
                        origin = osp.join(item_path, sub_file)
                        destination = osp.join(global_dir, sub_file)

                        print(f"Moving '{origin}' to '{destination}'")
                        if simulate:
                            continue
                        try:
                            os.rename(origin, destination)
                        except Exception as e:
                            has_failure = True
                            print(f"Moving '{origin}' to '{destination}' in merge stories {name} directory migration FAILED: {e}")

                    if not has_failure:
                        print(f"Removing '{item_path}'")
                        if not simulate:
                            try:
                                os.rmdir(item_path)
                            except Exception as e:
                                print(f"Removing '{item_path}' in merge stories {name} directory migration FAILED: {e}")
                    else:
                        print(f"Skipping removal of '{item_path}' due to move failures.")

        merge_to_global('images')
        merge_to_global('screenshot')

###############################################################################
# V14220 / DSS 14.2.2
###############################################################################
class V14220MultipleRetrievalColumnsForKBSearchToolsAndRALLMs(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self):
        return "Align KB search tool metadata in sources with retrieval columns setting used in ra-llm"

    @staticmethod
    def migrate_ra_llms(project_paths):
        for saved_model_config_file in glob("%s/saved_models/*.json" % project_paths.config):
            try:
                saved_model = base.json_loadf(saved_model_config_file)
                if saved_model.get("savedModelType") != "RETRIEVAL_AUGMENTED_LLM":
                    continue  # not a RA LLM
                migrated = False
                name = saved_model.get('name','')
                for inline_version in saved_model.get("inlineVersions", []):
                    rag_llm_settings = inline_version.get("ragllmSettings", {})
                    if "retrievalColumns" in rag_llm_settings:
                        print(f"Skipping RA LLM {name}: already migrated.")
                        continue  # already migrated

                    source = rag_llm_settings.get("retrievalSource")
                    if not source or source == "MULTIMODAL":
                        print(f"Skipping RA LLM {name}: RA LLM without retrievalSource or using MULTIMODAL as retrievalSource won't be migrated "
                              f"since it doesn't currently support multiple retrieval columns.")
                        continue

                    if source == "CUSTOM":
                        column_used = rag_llm_settings.get("retrievalColumn")
                        if not column_used:
                            # we should never be here
                            print(f"Warning: Weird, but no retrieval column found for RA LLM {name}, skipping")
                            continue
                        rag_llm_settings["retrievalColumns"] = [column_used]
                        rag_llm_settings["retrievalSource"] = "EMBEDDING"
                        rag_llm_settings.pop("retrievalColumn", None)
                    elif source == "EMBEDDING":
                        rag_llm_settings["retrievalColumns"] = ["DKU_TEXT_EMBEDDING_COLUMN"]
                        # Removing the used column, now if its the embedding one, instead of the specific column name we will use an internal value
                        rag_llm_settings.pop("retrievalColumn", None)
                    migrated = True

                if migrated:
                    base.json_dumpf(saved_model_config_file, saved_model)
                    ra_llm_name = saved_model.get("name", "")
                    print(f"Updated Retrieval Columns for RA LLM: {ra_llm_name}")
            except Exception as e:
                print(f"FAILED to migrate retrieval columns for saved model from file: {saved_model_config_file}: {e}")

    @staticmethod
    def try_get_kb_file(project_paths, kb_ref: str):
        try:
            versions_file = os.path.join(project_paths.knowledge_banks, kb_ref, "versions.json")
            if not os.path.exists(versions_file):
                print(f"No versions file available for {kb_ref}")
                return None

            versions = base.json_loadf(versions_file)
            current_version = versions.get("currentVersion")
            if not current_version:
                print(f"No version found for {kb_ref}")
                return None

            kb_file = os.path.join(project_paths.knowledge_banks, kb_ref, "versions", current_version, "kb.json")
            if not os.path.exists(kb_file):
                print(f"No kb file for {kb_ref}")
                return None
            return kb_file
        except Exception as e:
            print(f"Failed to get kb data for {kb_ref}", e)
        return None

    @staticmethod
    def get_metadata_columns_from_kb(project_paths, kb_ref: str):
        # First try to see if there is an active version in the knowledge_bank folder and use that config for the columns.
        # Fallback to the kb config otherwise.
        try:
            kb_file = V14220MultipleRetrievalColumnsForKBSearchToolsAndRALLMs.try_get_kb_file(project_paths, kb_ref)
            if not kb_file:
                print(f"No valid built kb version for {kb_ref}. Falling back to config file")
                kb_file = os.path.join(project_paths.config, "knowledge_banks", f"{kb_ref}.json")
                if not os.path.exists(kb_file):
                    print(f"No kb configfile found for {kb_ref}. Skipping loading metadata columns")
                    return []
            kb = base.json_loadf(kb_file)
            meta_cols_schema = kb.get("metadataColumnsSchema", [])
            return [col["name"] for col in meta_cols_schema if "name" in col]
        except Exception as e:
            print(f"Failed to get metadata columns from {kb_ref}, returning empty", e)
        return []


    @staticmethod
    def migrate_tools(project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                tool_type = tool.get("type", "")
                if tool_type != "VectorStoreSearch":
                    continue
                params = tool.get("params", {})

                if "retrievalColumns" in params:
                    print("Skipping: KB Search Tool {} already migrated.".format(tool_file))
                    continue
                if params.get("allMetadataInContext", False):
                    cols = V14220MultipleRetrievalColumnsForKBSearchToolsAndRALLMs.get_metadata_columns_from_kb(project_paths, params.get("knowledgeBankRef"))
                    params["retrievalColumns"] = ["DKU_TEXT_EMBEDDING_COLUMN"] + cols
                elif "metadataInContext" not in params:
                    print("Setting default value for retrieval columns for tool {}.".format(tool_file))
                    params["retrievalColumns"] = ["DKU_TEXT_EMBEDDING_COLUMN"]
                else:
                    params["retrievalColumns"] = params["metadataInContext"] + ["DKU_TEXT_EMBEDDING_COLUMN"]
                params.pop("allMetadataInContext", None)
                params.pop("metadataInContext", None)

                base.json_dumpf(tool_file, tool)
                print("Saving: Updated the parameters of KB Search Tool {}".format(tool_file))
            except Exception as e:
                print("Migrating metadata in context failed {} failed: {}".format(tool_file, e))


    def execute(self, project_paths):
        self.migrate_tools(project_paths)
        self.migrate_ra_llms(project_paths)


class V14220MigrateCustomTrainTestIntervalsEndDates(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Make defined custom train test intervals [inclusive, exclusive) adding one timestep to end dates"

    def execute(self, project_paths):
        # Migrate ml tasks
        for filename in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) +
                         glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            self.migrate_mltask(filename)

    @staticmethod
    def migrate_mltask(mltask_filename):
        marker_file = mltask_filename + ".migrated_to_exclusive"
        if osp.exists(marker_file):
            return
        try:
            data = base.json_loadf(mltask_filename)
            if data.get("customTrainTestIntervalsEndDatesMigrated", False):
                return

            custom_train_test_intervals = data.get("customTrainTestIntervals", [])

            for interval in custom_train_test_intervals:
                interval["train"][1] = V14220MigrateCustomTrainTestIntervalsEndDates.add_one_timestep(interval["train"][1], data.get("timestepParams"))
                interval["test"][1] = V14220MigrateCustomTrainTestIntervalsEndDates.add_one_timestep(interval["test"][1], data.get("timestepParams"))

            base.json_dumpf(mltask_filename, data)
            open(marker_file, 'a').close()
        except Exception as e:
            logging.exception("Failed to migrate file {}: {}".format(mltask_filename, e))

    @staticmethod
    def add_one_timestep(date_str, timestep_params):
        import pandas as pd
        new_date = pd.to_datetime(date_str)
        timeunit_to_offset = {
            "MILLISECOND": pd.DateOffset(milliseconds=1),
            "SECOND": pd.DateOffset(seconds=1),
            "MINUTE": pd.DateOffset(minutes=1),
            "HOUR": pd.DateOffset(hours=1),
            "BUSINESS_DAY": pd.DateOffset(days=1),
            "DAY": pd.DateOffset(days=1),
            "WEEK": pd.DateOffset(days=1),
            "MONTH": pd.DateOffset(months=1),
            "QUARTER": pd.DateOffset(months=1),
            "HALF_YEAR": pd.DateOffset(months=1),
            "YEAR": pd.DateOffset(years=1)
        }
        new_date += timeunit_to_offset[timestep_params["timeunit"]]
        return new_date.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
class V14220MigrateTimeseriesInteractiveScoringScenarios(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Move scenarios to a specific timeseries identifier folders"

    def execute(self, project_paths):
        # Migrate ml tasks
        for interactive_scoring_folder in (glob("%s/*/*/sessions/s*/pp*/m*/interactive-scoring-analysis" % project_paths.analysis_data) + # Analysis (part models are pp1-*) so we have both
                                           glob("%s/*/versions/*/interactive-scoring-analysis" % project_paths.saved_models) + # Saved models
                                           glob("%s/*/pversions/*/*/interactive-scoring-analysis" % project_paths.saved_models)): # partitioned models
            V14220MigrateTimeseriesInteractiveScoringScenarios.migrate_one(interactive_scoring_folder)

    @staticmethod
    def migrate_one(interactive_scoring_folder):
        import pandas as pd
        import csv
        try:
            identifiers_mapping = base.json_loadf(osp.join(interactive_scoring_folder, "identifiers_mapping.json"))
            for timeseries_identifier, identifier_hash in identifiers_mapping.items():
                scenarios_metadata = { "names" : {} }
                if osp.exists(osp.join(interactive_scoring_folder, identifier_hash)):
                    continue # Migration OK.

                os.mkdir(osp.join(interactive_scoring_folder, identifier_hash))
                if osp.exists(osp.join(interactive_scoring_folder, "scenario-{}.csv".format(identifier_hash))):
                    scenario_df = pd.read_csv(osp.join(interactive_scoring_folder, "scenario-{}.csv".format(identifier_hash)), sep="\t", dtype=str)
                    scenario_df = scenario_df.drop(columns=["scenario_id"])
                    random_id = V14220MigrateTimeseriesInteractiveScoringScenarios.get_random_id()
                    scenarios_metadata["names"][random_id] = "0"
                    with open(osp.join(interactive_scoring_folder, identifier_hash, "scenario-{}.csv".format(random_id)), 'wt') as fp:
                        # dataframe_to_csv
                        scenario_df.to_csv(
                            fp,
                            encoding="utf8",
                            index=False,
                            header=True,
                            sep='\t',
                            quoting=csv.QUOTE_ALL,
                            date_format='%Y-%m-%dT%H:%M:%S.%fZ')
                    # finally remove the initial csv file.
                    os.remove(osp.join(interactive_scoring_folder, "scenario-{}.csv".format(identifier_hash)))
                base.json_dumpf(osp.join(interactive_scoring_folder, identifier_hash, "scenarios_metadata.json"), scenarios_metadata)
        except Exception as e:
            logging.exception("Failed to migrate folder {}: {}".format(interactive_scoring_folder, e))

    @staticmethod
    def get_random_id():
        return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(8))

class V14220UpdateClassicalModelsMinSizeForScoring(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Update timeseries classical models min size for scoring in iperf.json"

    def execute(self, project_paths):
        # Migrate iperf files
        for iperf_path in (glob("%s/*/versions/*/iperf.json" % project_paths.saved_models) \
                           + glob("%s/*/pversions/*/*/iperf.json" % project_paths.saved_models) \
                           + glob("%s/*/*/sessions/*/*/*/iperf.json" % project_paths.analysis_data)):
            try:
                modeling_params = base.json_loadf(iperf_path.replace("iperf.json", "rmodeling_params.json"))
                if modeling_params.get("algorithm") in ["RIDGE_REGRESSION", "RANDOM_FOREST_REGRESSION", "XGBOOST_REGRESSION"]:
                    if osp.isfile(iperf_path.replace("iperf.json", "rpreprocessing_params.json")):
                        # Saved models rpreprocessing params lives with iperf.json
                        rpreprocessing_params = base.json_loadf(iperf_path.replace("iperf.json", "rpreprocessing_params.json"))
                        core_params = base.json_loadf(iperf_path.replace("iperf.json", "core_params.json"))
                        is_ts_model = core_params.get("prediction_type") == "TIMESERIES_FORECAST"
                    else:
                        # Analysis data rpreprocessing params lives within pp* folder
                        rpreprocessing_params = base.json_loadf(osp.join(osp.dirname(osp.dirname(iperf_path)), "rpreprocessing_params.json"))
                        task_data = base.json_loadf(osp.join(osp.dirname(osp.dirname(osp.dirname(iperf_path))), "mltask.json"))
                        is_ts_model = task_data.get("predictionType") == "TIMESERIES_FORECAST"
                    if not is_ts_model:
                        continue
                    shifts = rpreprocessing_params.get("feature_generation", {}).get("shifts", [])
                    windows = rpreprocessing_params.get("feature_generation", {}).get("windows", [])
                    iperf = base.json_loadf(iperf_path)
                    iperf["minTimeseriesSizeForScoring"] = self.get_minimum_size_for_x_number_of_rows(shifts, windows)
                    base.json_dumpf(iperf_path, iperf)
            except Exception as e:
                print("Migration of iperf.json min size for scoring failed: %s" % e)

    @staticmethod
    def get_minimum_size_for_x_number_of_rows(shifts, windows, prediction_length=None):
        """
        <-- Past ------------|------------ Future -->
        ... -k, ..., -2, -1, 0, 1, 2, 3, 4, 5 ..., prediction_length
                             |  |__________________________________| Forecast Horizon
                    Forecast origin (t=0)
        """
        furthest_shift_from_horizon, furthest_shift_from_forecast, min_window_shift = sys.maxsize, sys.maxsize, sys.maxsize

        if len(shifts) == 0 and len(windows) == 0:
            return prediction_length if prediction_length else 1

        if len(shifts) > 0:
            furthest_shift_from_horizon = min([min(shift["from_horizon"]) if len(shift["from_horizon"]) > 0 else 0 for shift in shifts.values()])
            furthest_shift_from_forecast = -1 + min([min(shift["from_forecast"]) if len(shift["from_forecast"]) > 0 else 0 for shift in shifts.values()])

        if len(windows) > 0:
            min_window_shift = min([-window["length"] + window["shift"] - (1 if window["is_from_forecast"] else 0) for window in windows])

        furthest = min(furthest_shift_from_forecast, furthest_shift_from_horizon, min_window_shift)

        if prediction_length:
            furthest -= prediction_length

        return abs(furthest)


class V14230SetLocalMCPToolDefaultTimeouts(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Move init and request timeout settings to local MCP tools parameters"

    @staticmethod
    def pop_property(tool, name):
        if "dkuProperties" not in tool:
            return None
        for index, dkuProperty in enumerate(tool["dkuProperties"]):
            if dkuProperty.get('name') == name:
                try:
                    return int(tool["dkuProperties"].pop(index)["value"])
                except:
                    return None
        return None

    def execute(self, project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                tool_type = tool.get("type")
                if tool_type != "GenericStdioMCPClient":
                    continue
                params = tool.get("params", {})
                modified = False
                if "initTimeout" not in params:
                    timeout = self.pop_property(tool, "dku.mcp.client.init.timeout")
                    if timeout is None:
                        timeout = 300
                    params["initTimeout"] = timeout
                    modified = True
                if "requestTimeout" not in params:
                    timeout = self.pop_property(tool, "dku.mcp.client.request.timeout")
                    if timeout is None:
                        timeout = 3600
                    params["requestTimeout"] = timeout
                    modified = True
                if modified:
                    tool["params"] = params
                    base.json_dumpf(tool_file, tool)
                    print("Saving updated local MCP tool {}".format(tool_file))
            except Exception as e:
                print("Failed to add timeout settings for {}: {}".format(tool_file, e))


###############################################################################
# V14300 / DSS 14.3.0
###############################################################################

class V14300RequiresPartitionFilterSettingInBigQueryDatasets(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self):
        return "Update the 'Requires partition filter' setting in BigQuery datasets to a tri-state enabled/disabled/inherited parameter"

    def transform(self, obj, filepath=None):
        if "type" in obj and obj["type"] == "BigQuery":
            if obj.get("params", {}).get("bigQueryRequirePartitionFilter", None) is not None:
                if obj["params"]["bigQueryRequirePartitionFilter"]:
                    obj["params"]["bigQueryRequirePartitionFilter"] = "ENABLED"
                else:
                    obj["params"]["bigQueryRequirePartitionFilter"] = "INHERIT"
        return obj

    def jsonpath(self):
        return ""

    def file_patterns(self):
        return ["datasets/*.json"]

class V14300DatabricksVolumeExpectedLocation(migration_json.JsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self, ):
        return "Databricks Volume: align default catalog/schema for managed folders and managed datasets"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, connections, filepath=None):
        for name, connection in iteritems(connections):
            connection_type = connection.get("type", None)
            if connection_type == "Databricks":
                params = connection.get("params", {})
                if "useManagedDatasetCatalogAndSchema" not in params:
                    params["useManagedDatasetCatalogAndSchema"] = False
                    connection["params"] = params
                    print(f"Updating connection {name} with useManagedDatasetCatalogAndSchema to false")
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self, ):
        return ["config/connections.json"]

class V14300ManagedSQLServerDriver(migration_json.JsonMigrationOperation):
    def __init__(self):
        pass

    def __repr__(self, ):
        return "Provide managed SQL Server driver for SQL Server, Synapse and Fabric Warehouse"

    def appliesTo(self):
        return ["design", "automation"]

    def transform(self, connections, filepath=None):
        sql_server_driver_databases = ["FabricWarehouse", "SQLServer", "Synapse"]
        for name, connection in iteritems(connections):
            connection_type = connection.get("type", None)
            if connection_type in sql_server_driver_databases:
                params = connection.get("params", {})
                if "driverMode" not in params:
                    params["driverMode"] = "CUSTOM"
                    connection["params"] = params
                    print(f"Updating connection {name} with driverMode = 'CUSTOM'")
        return connections

    def jsonpath(self,):
        return "connections"

    def file_patterns(self, ):
        return ["config/connections.json"]

class V14300EscapeVariablesInDashboards(migration_json.ProjectConfigJsonMigrationOperation):
    def __repr__(self, ):
        return "Escape variables existing in dashboards to replace ${var} syntax by $${var}"

    def transform(self, obj, filepath=None):
        for page in obj.get("pages", []):
            if "title" in page:
                page["title"] = self.search_and_replace_var(page["title"])
            if "tiles" in page.get("grid", {}):
                self.replace_variables_in_tiles(page["grid"]["tiles"])

        return obj

    def replace_variables_in_tiles(self, tiles):
        for tile in tiles:
            if "title" in tile.get("titleOptions", {}):
                tile["titleOptions"]["title"] = self.search_and_replace_var(tile["titleOptions"]["title"])
            if tile.get("tileType", "") == "TEXT" and "text" in tile.get("tileParams", {}):
                tile["tileParams"]["text"] = self.search_and_replace_var(tile["tileParams"]["text"])

    def search_and_replace_var(self, text):
        pattern = r'\${(.*?)}'
        if re.search(pattern, text):
            return re.sub(pattern, r'$${\1}', text)

        return text

    def jsonpath(self,):
        return ""

    def file_patterns(self,):
        return ["dashboards/*.json"]

class V14300AddTimeseriesFeatureGenerationAutoShiftParams(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Add default values for timeseries feature generation auto shifts params"

    def execute(self, project_paths):
        # Only update mltask.json to support retraining, rpreprocessing_params.json are deliberately ignored.
        # Existing models and sessions do not have features with auto shifts, and thus do not have to be updated.
        for filename in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) +
                         glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):

            data = base.json_loadf(filename)

            if data.get('taskType', None) != 'PREDICTION' or data.get('predictionType', None) != 'TIMESERIES_FORECAST':
                # Skip
                continue

            try:
                prediction_length = data.get("predictionLength", 1)
                preprocessing_data = data.get("preprocessing", None)

                if preprocessing_data is not None:
                    feature_generation = preprocessing_data.get("feature_generation", None)
                    if feature_generation is not None:
                        auto_shifts_params = feature_generation.get("auto_shifts_params", None)
                        if auto_shifts_params is None:
                            # Set default auto_shift_params
                            data["preprocessing"]["feature_generation"]["auto_shifts_params"] = {
                                "max_selected_horizon_shifts": 10,
                                "min_horizon_shift_past_only": -35 - prediction_length,
                                "max_horizon_shift_past_only": -prediction_length,
                                "min_horizon_shift_known_in_advance": -35,
                                "max_horizon_shift_known_in_advance": 0
                            }

                        shifts = feature_generation.get("shifts", None)
                        if shifts is not None:
                            # Set default from_horizon_mode to FIXED for legacy analysis
                            for feature in shifts:
                                data["preprocessing"]["feature_generation"]["shifts"][feature]["from_horizon_mode"] = 'FIXED'

                        base.json_dumpf(filename, data)

            except Exception as e:
                logging.exception("Failed to migrate file {}: {}".format(filename, e))

class V14300KBSTAgentInferredFiltering(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Add a default `[]` value for column descriptions to avoid an initial dirty state in the KBST"

    def execute(self, project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                tool_type = tool.get("type", "")
                if tool_type != "VectorStoreSearch":
                    continue
                params = tool.get("params", {})
                if "columnsDescriptions" in params: continue
                params["columnsDescriptions"] = params.get("columnsDescriptions", [])
                base.json_dumpf(tool_file, tool)
                print("Default `columnsDescriptions` added to {}".format(tool_file))
            except Exception as e:
                print("Adding default `columnsDescriptions` to {} failed: {}".format(tool_file, e))


class V14300TimeseriesInteractiveScoringAddColorsAndMigrateForecasts(migration_base.ProjectLocalMigrationOperation):

    def __repr__(self, ):
        return "Add colors in the metadata and move scenarios forecasts into their distinct files"

    def execute(self, project_paths):
        # Migrate ml tasks
        for interactive_scoring_folder in (glob("%s/*/*/sessions/s*/pp*/m*/interactive-scoring-analysis" % project_paths.analysis_data) + # Analysis (part models are pp1-*) so we have both
                                           glob("%s/*/versions/*/interactive-scoring-analysis" % project_paths.saved_models) + # Saved models
                                           glob("%s/*/pversions/*/*/interactive-scoring-analysis" % project_paths.saved_models)): # partitioned models
            V14300TimeseriesInteractiveScoringAddColorsAndMigrateForecasts.migrate_one(interactive_scoring_folder)

    @staticmethod
    def migrate_one(interactive_scoring_folder):
        try:
            identifiers_mapping = base.json_loadf(osp.join(interactive_scoring_folder, "identifiers_mapping.json"))
            forecast_json_path = osp.join(interactive_scoring_folder, "scenario-forecasts.json.gz")
            forecasts_json = base.json_loadf(forecast_json_path, True) if osp.exists(forecast_json_path) else None
            for timeseries_identifier, identifier_hash in identifiers_mapping.items():
                metadata_filepath = osp.join(interactive_scoring_folder, identifiers_mapping.get(timeseries_identifier), "scenarios_metadata.json")
                scenarios_metadata = base.json_loadf(metadata_filepath)
                if "colors" not in scenarios_metadata:
                    scenarios_metadata["colors"] = {}
                # Iteration is just safer and cleaner than checking keys size imo
                for idx, scenario_id in enumerate(scenarios_metadata.get("names", {}).keys()):
                    # 1. Add a color to the scenario; Copy of colors in interactive_scenarios_handler.py
                    scenarios_metadata["colors"][scenario_id] = 0

                    # 2. Check if a forecast for that scenario exists and create a dedicated forecast json instead.
                    if forecasts_json is not None:
                        scenario_id_forecasts = forecasts_json.get(timeseries_identifier, {}).get("perScenarios", {}).get(scenarios_metadata.get("names", {}).get(scenario_id))
                        if scenario_id_forecasts is not None:
                            scenario_id_forecasts_filepath = osp.join(interactive_scoring_folder, identifiers_mapping.get(timeseries_identifier), "scenario-forecasts-{}.json.gz".format(scenario_id))
                            base.json_dumpf(
                                scenario_id_forecasts_filepath,
                                forecasts_json[timeseries_identifier]["perScenarios"][scenarios_metadata["names"][scenario_id]],
                                True
                            )
                base.json_dumpf(metadata_filepath, scenarios_metadata)
            if forecasts_json is not None:
                # Delete the full forecasts if it exists
                os.remove(forecast_json_path)

        except Exception as e:
            logging.exception("Failed to migrate folder {}: {}".format(interactive_scoring_folder, e))


class V14300LocalMCPToolRemoveUsePythonAsCommand(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Remove the useCodeEnvPythonAsCommand field on local MCP tools"

    def execute(self, project_paths):
        for tool_file in glob("%s/agent-tools/*.json" % project_paths.config):
            try:
                tool = base.json_loadf(tool_file)
                tool_type = tool.get("type")
                if tool_type != "GenericStdioMCPClient":
                    continue
                params = tool.get("params", {})
                use_code_env_python = params.pop("useCodeEnvPythonAsCommand", None)
                if use_code_env_python is None:
                    continue
                elif use_code_env_python is True:
                    params["command"] = "python"
                tool["params"] = params
                base.json_dumpf(tool_file, tool)
                print("Saving updated local MCP tool {}".format(tool_file))
            except Exception as e:
                print("Failed to remove useCodeEnvPythonAsCommand from {}: {}".format(tool_file, e))


class V14300ConvertPodRunAsFFToProperties(migration_base.MigrationOperation):
    """
    Read the feature flags 'podRunAsNonRoot' and 'podRunAsUid' from dip.properties
    and convert them to custom properties in all Kubernetes container exec configs,
    Spark runtime configs and API deployer Kubernetes infra settings.
    If the properties already exist in the execution configs, they are not overwritten.
    If the feature flags are disabled, they are simply removed from dip.properties.
    """

    class GeneralSettingsJsonMigration(migration_json.JsonMigrationOperation):
        def __init__(self, enabled_feature_flags):
            super().__init__()
            self.enabled_feature_flags = enabled_feature_flags

        def file_patterns(self):
            return ["config/general-settings.json"]

        def transform(self, settings, filepath=None):
            if not self.enabled_feature_flags:
                return settings

            # Container settings
            for container_execution_config in settings.get("containerSettings", {}).get("executionConfigs", []):
                if container_execution_config is None or container_execution_config.get("type", None) != "KUBERNETES":
                    # Only set property for Kubernetes execution configs
                    continue

                properties = container_execution_config.get("properties", [])

                for feature_flag in self.enabled_feature_flags:
                    if next((prop for prop in properties if prop.get("key", None) == feature_flag), None) is None:
                        properties.append({ "key": feature_flag, "value": "true" })

            # Spark settings
            for spark_execution_config in settings.get("sparkSettings", {}).get("executionConfigs", []):
                if spark_execution_config is None:
                    continue

                configs = spark_execution_config.get("conf", [])

                for feature_flag in self.enabled_feature_flags:
                    if next((config for config in configs if config.get("key", None) == feature_flag), None) is None:
                        configs.append({
                            "key": feature_flag,
                            "value": "true",
                            "isFinal": False,
                            "secret": False,
                        })

            return settings

    class InfraSettingsJsonMigration(migration_json.JsonMigrationOperation):
        def __init__(self, enabled_feature_flags):
            super().__init__()
            self.enabled_feature_flags = enabled_feature_flags

        def file_patterns(self,):
            return ["config/api-deployer/infras/*.json"]

        def transform(self, infra_settings, filepath=None):
            if infra_settings.get("type", None) != "K8S":
                return infra_settings

            k8s_properties = infra_settings.get("k8sProperties", [])

            for feature_flag in self.enabled_feature_flags:
                if next((k8s_property for k8s_property in k8s_properties if k8s_property.get("key", None) == feature_flag), None) is None:
                    k8s_properties.append({ "key": feature_flag, "value": "true" })

            return infra_settings

    FEATURE_FLAGS = ["podRunAsNonRoot", "podRunAsUid"]

    def __repr__(self):
        return "Convert the podRunAs- feature flags to custom properties in container runtime settings"

    def _read_and_update_dip_properties(self, diphome):
        props_file = JavaPropertiesFile(osp.join(diphome.path, "config", "dip.properties"))
        enabled_feature_flags = []
        dip_properties_save_needed = False

        for feature_flag in self.FEATURE_FLAGS:
            # Read feature flag value from dip properties
            feature_flag_key = f"dku.feature.{feature_flag}.enabled"
            if props_file.get_as_bool(feature_flag_key, False):
                enabled_feature_flags.append(feature_flag)

            # Remove feature flag from dip properties
            dip_properties_save_needed |= props_file.props.pop(feature_flag_key, None) is not None

        return enabled_feature_flags, dip_properties_save_needed, props_file

    def execute(self, diphome):
        # Retrieve feature flag state and remove them from dip.properties (no saving yet)
        enabled_feature_flags, dip_properties_save_needed, props_file = self._read_and_update_dip_properties(diphome)

        if len(enabled_feature_flags) > 0:
            # Update and save general settings (container exec and Spark configs)
            general_settings_migration = self.GeneralSettingsJsonMigration(enabled_feature_flags)
            general_settings_migration.execute(diphome)

            # Update and save API deployer infra settings
            infra_settings_migration = self.InfraSettingsJsonMigration(enabled_feature_flags)
            infra_settings_migration.execute(diphome)

        # Save dip properties at the end to prevent partial updates if something fails
        if dip_properties_save_needed:
            props_file.save()


class V14300AddMesFlavorToEvaluationStores(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return 'Add the new required "mesFlavor" property to Model/Genai Evaluation Stores'

    def execute(self, project_paths):
        # Loop over every evaluation recipe to correctly set the mesFlavor
        for recipe_file in iglob("%s/recipes/*evaluation" % project_paths.config):
            try:
                recipe_json_file, recipe_type = os.path.splitext(recipe_file)
                recipe_json_file += ".json"
                recipe = base.json_loadf(recipe_json_file)
                if recipe_type in ["standalone_evaluation", "evaluation"]:
                    mes_flavor = "TABULAR"
                elif recipe_type == "nlp_agent_evaluation":
                    mes_flavor = "AGENT"
                elif recipe_type == "nlp_llm_evaluation":
                    mes_flavor = "LLM"
                else:
                    continue
                items = recipe.get("outputs", {}).get("evaluationStore", {}).get("items", [])
                if not items:
                    continue
                evaluation_store_ref = items[0].get("ref", {})
                if not evaluation_store_ref:
                    continue
                if "." in evaluation_store_ref:
                    # Skip external evaluation stores. They will be handled by the loop over all MES. This is extremly
                    # unlikely to happen anyway since running a GenAI evaluation recipe with an external MES does not work.
                    continue
                mes_file = "%s/model_evaluation_stores/%s.json" % (project_paths.config, evaluation_store_ref)
                mes = base.json_loadf(mes_file)
                existing_flavor = mes.get("mesFlavor", None)
                if existing_flavor:
                    continue
                mes["mesFlavor"] = mes_flavor
                base.json_dumpf(mes_file, mes)
            except Exception as e:
                logging.exception("Migrating evaluation store config for recipe %s FAILED: %s" % (recipe_file, e))

        # Loop over all evaluation stores, to update those which were not updated in the loop over recipes, because they
        # were not outputs of any recipe.
        for mes_file in iglob("%s/model_evaluation_stores/*.json" % project_paths.config):
            try:
                mes = base.json_loadf(mes_file)
                existing_flavor = mes.get("mesFlavor", None)
                if existing_flavor:
                    continue
                mes_id = os.path.splitext(os.path.basename(mes_file))[0]
                eval = None
                # Look at the first eval
                for eval_file in iglob("%s/%s/*/evaluation.json" % (project_paths.model_evaluation_stores, mes_id)):
                    eval = base.json_loadf(eval_file)
                    break
                # default to TABULAR if no eval
                mes_flavor = "TABULAR"
                if eval:
                    eval_type = eval.get("type", "tabular")
                    if eval_type == "llm":
                        mes_flavor = "LLM"
                    elif eval_type == "agent":
                        mes_flavor = "AGENT"

                mes["mesFlavor"] = mes_flavor
                base.json_dumpf(mes_file, mes)

            except Exception as e:
                logging.exception("Migrating evaluation store config for MES %s FAILED: %s" % (mes_file, e))

class V14310MigrateCustomTrainTestIntervalsEndDates(migration_base.ProjectLocalMigrationOperation):
    def __repr__(self):
        return "Migrate custom train test intervals end dates (cleanup V14220 marker)"

    def execute(self, project_paths):
        # Migrate ml tasks
        for filename in (glob("%s/analysis/*/ml/*/params.json" % project_paths.config) +
                         glob("%s/*/*/sessions/*/mltask.json" % project_paths.analysis_data)):
            self.migrate_mltask(filename)

    @staticmethod
    def migrate_mltask(mltask_filename):
        marker_file = mltask_filename + ".migrated_to_exclusive"
        try:
            if osp.exists(marker_file):
                data = base.json_loadf(mltask_filename)
                is_ts_model = data.get("predictionType") == "TIMESERIES_FORECAST"
                if is_ts_model: # We only set the new field in `ResolvedTimeseriesForecastingCoreParams`
                    data["customTrainTestIntervalsEndDatesMigrated"] = True
                    base.json_dumpf(mltask_filename, data)
                os.remove(marker_file)
        except Exception as e:
            logging.exception("Failed to migrate file {}: {}".format(mltask_filename, e))

###############################################################################
# Generic stuff
###############################################################################

class GenericGovernUpdate(migration_base.MigrationOperation):
    description = ""

    def __init__(self, description = ""):
        self.description = description

    def __repr__(self,):
        return "Update govern to latest version" + ((": " + self.description) if len(self.description) > 0 else "")

    def appliesTo(self):
        return ["govern"]

    def execute(self, diphome, simulate=False):
        # nothing on purpose, this task is here to tag when govern needs a migration
        # and also to not have an empty version migration block if DSS doesn't need a migration
        return

class GenericDropCaches(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Drop caches"

    def execute(self, diphome, simulate=False):
        caches_folder = osp.join(diphome.path, "caches")
        if osp.isdir(caches_folder):
            shutil.rmtree(caches_folder)

class GenericDropPnotifications(migration_base.MigrationOperation):
    def __repr__(self,):
        return "Drop persistent notifications database"

    def execute(self, diphome, simulate=False):
        pnotif_db = osp.join(diphome.path, "config", "pnotifications.db")
        if osp.isfile(pnotif_db):
            os.remove(pnotif_db)

def clean_h2_timestamps(diphome):
    """
    H2 v1.3 was storing some weird values for timestamp close to midnight, and when bumping
    to 1.4, H2 stopped accepting them, instead crashing.
    """
    import subprocess

    java_bin = os.getenv("DKUJAVABIN", "java")
    old_h2_jar = osp.join(os.environ["DKUINSTALLDIR"], 'scripts', 'h2-1.3.176_dss0.jar')
    h2_cleanup_jar = osp.join(os.environ["DKUINSTALLDIR"], 'scripts', 'h2-cleanup.jar')
    for db in ['jobs', 'user_offline_queues', 'user_interests', 'persistent_notifications', 'flow_state', 'dss_usage']:
        if not osp.isfile(osp.join(diphome.path, 'databases', db + '.h2.db')):
            continue # that database doesn't exist (yet?)
        cmd = '"%s" -cp "%s:%s" com.dataiku.CleanH2Timestamps databases/%s "%s"' % (java_bin, old_h2_jar, h2_cleanup_jar, db, diphome.path)
        print('Cleaning timestamps in %s' % db)
        subprocess.check_call(cmd, shell=True)

###############################################################################
# Main table
###############################################################################

# Now removed:
    # Config V2 (from 0.6.12+): Large cleanup of types
    # Config V3 (0.8): Refactoring of shaker files to prepare for new selections and charts
    # Config V4 (DSS 1.1) : Projects
    # Config V5 (DSS 1.2): minor changes
    # Config V6 (DSS 1.3): minor changes
    # Config V7 (DSS 1.4) : new general settings system, LDAP groups
    # Config V8 (DSS 2.0) : Huge refactoring


# Config V9 (DSS 2.1)
migration_base.declare_version_migration(8, 9, [
    V9ElasticSearchDatasetParams(),
    V9RecipeRoles(),
    V9FilterRecipeSelection(),
    V9AnalysisCharts(),
    V9DatasetCharts(),
    V9ShakerRecipeEngine(),
    V9APIKeysForWebapps(),

    V9RenameArraysCombine(),
    V9ColumnRenamerMultiColumns(),

    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V10 (DSS 2.2)
migration_base.declare_version_migration(9, 10, [
    V10UpDownFiller(),
    V10TimestamNoTzInSqlDatasets(),
    V10TrueInPluginRecipesConfig()
])

# Config V11 (DSS 2.3)
migration_base.declare_version_migration(10, 11, [
    V11InstallIni(),
    V11SQLNotebooks(),
    V11FillEmptyWithValue(),
    V11RemoveRowsOnEmpty(),
    V11RoundProcessor(),
    V11FindReplace(),
    V11StringTransformer(),
    V11CellClearer(),
    V11RowsSelector(),
    V11ClearCellsOnBadType(),
    V11RemoveRowsOnBadType(),
    V11NumericalRangeSelector(),
    V11SplitFoldTrimFalse(),
    V11JSONFlattenNull(),
    V11DateParser(),
    V11RemoveShakerFilters(),
    V11RemoveStepsFromInsightCharts(),

    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V12 (DSS 3.0)
migration_base.declare_version_migration(11, 12, [
    V12SchedulerToScenario(),
    V12CustomPythonModelsInAnalysisConfig(),
    V12CustomPythonModelsInAnalysisData(),
    V12CustomPythonModelsInSavedModels(),
    V12AnalysisCharts(),
    V12DatasetCharts(),
    V12GroupPermissions(),
    V12AddGitMode(),
    V12ConnectionParams(),
    V12ColumnsSelector(),
    V12NestProcessor(),
    V12NumericalCombinator(),
    V12DkuSparkHome(),
    V12SetupDefaultMetrics()
])


# Config V13 (DSS 3.0.2)
migration_base.declare_version_migration(12, 13, [
     V13EnableMetrics()
])

# Config V14 (DSS 3.1)
migration_base.declare_version_migration(13, 14, [
    V14JoinRecipesInputs(),
    V14JoinRecipesJoinType(),
    V14StackRecipesInputs(),
    V14HideHiveDkuUdf(),
    V14ClusteringScatterplot(),
    V14NormalizeDoubles(),
    V14DefaultProjectStatus(),
    V14RenameProjectPayloadFiles(),

    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V15 (DSS 4.0)
migration_base.declare_version_migration(14, 15, [
    V15JsonFlattenerWithCustomSeparator(),
    V15RoundProcessor(),
    V15ClusteringHeatmap(),
    V15JupyterExportsDir(),
    V15RefreshNotebookInsightScenarioStep(),
    V15ProjectSettingsExposed(),
    V15HProxyRemovalInRecipes(),
    V15HProxyRemovalInScenarios(),
    V15HProxyRemovalInNotebooks(),
    V15DenormalizeMessagingChannels(),
    V15RetypeChannels(),
    V15RetypeMessagings(),
    V15RetypeMessagingsInScenarioRuns(),
    V15FixupAuthCtxInScenarioRuns(),
    V15MoveKerberosSettings(),

    V15AddGridSearchRFGBTETInAnalysisData(),
    V15AddGridSearchRFGBTETInAnalysisConfig(),
    V15AddGridSearchRFGBTETInSavedModels(),
    V15AddGridSearchRFGBTETInRootSavedData(),

    V15ChartsInExplore(),
    V15ChartsInAnalysis(),
    V15ChartsInAnalysisModels(),
    V15PrepareRecipeEngine(),
    V15SelectDSSSyncRecipeEngine(),
    V15SelectDSSRecipeEngine(),

    # MUST be after V15ChartsInExplore
    V15Insights(),

    V15ProjectAPIKeys(),
    V15ProjectAccessLevels(),
    V15GlobalAPIKeys(),
    V15SplitRecipesOutput(),
    V15AddInstallId(),
    V15HiveOverrideDatabase(),
    V15HiveJobCompressionCommands(),
    V15HiveExecutionConfig(),
    V15HiveDefaultDatabase(),
    V15RenameJupyterNotebooks(),
    V15MoveDatabases(),
    V15DKUCommand(),
    V15FixScoringRecipes(),

    GenericDropPnotifications()
])


# Config V16 (DSS 4.0.5)
migration_base.declare_version_migration(15, 16, [
     V16DKUCommand(),
     V16UpdateWeeklyTriggers()
])

# Config V17 (DSS 4.1)
migration_base.declare_version_migration(16, 17, [
    V17DKUCommand(),
    V17UpdatePluginSettings(),
    V17ChartsInExplore(),
    V17ChartsInAnalysis(),
    V17ChartsInAnalysisModels(),
    V17ChartsInInsights(),
    V17AddManagedFoldersConnection(),
    V17FoldersOnProviders(),
    V17WebApps(),
    V17WebAppsSnippets(),
    V17WebAppsInsights(),
    V17UnfoldProcessor(),
    V17SplitUnfoldProcessor(),
    V17ChangeRemoteFilesDataset(),
    V17ChangeRemoteFilesDatasetInProject(), # after the instance-level migration, so that it's only effective in project imports
    V17MLLibResolvedGrids(),
    V17MLLibUnresolvedGridsInAnalysis(),
    V17MLLibUnresolvedGridsInSM(),
    V17ComputedColumnsGroupingRecipe(),
    V17ComputedColumnsJoinRecipe(),
    V17GlobalAPIKeys(),
    V17Meanings(),
    V17ConvertVariablesToComputedColumnsSplitRecipe(),
    V17ConvertFilesInFolderSelectionPattern(),
    V17EngineCreationSettings(),
    V17MoveJupyterExports(),
    V17InitGraceDelays(),
    V17UpdateMailAttachment(),

    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V18 (DSS 4.2)
migration_base.declare_version_migration(17, 18, [
    V18UpdateSQLDatasets(),
    V18MigrateDashboardImageResizeSetting(),
    V18CleanupMLResolvedParams(),
    V18FeatureGenerationParams(),

    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V19 (DSS 4.3)
migration_base.declare_version_migration(18, 19, [
    # Nothing to do
])

# Config V20 (DSS 5.0)
migration_base.declare_version_migration(19, 20, [
    V20AddParamsToMLRecipes(),
    V20TransformCommentsInsightsToDiscussionsInsights(),
    V20TransformCommentsInsightsToDiscussionsInsightsInDashboards(),
    #V20DKUCommand(),

    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V21 (DSS 5.0.2)
migration_base.declare_version_migration(20, 21, [
    V21RegoupMLSparkParamsInSavedModelsMLTasks(),
    V21RegoupMLSparkParamsInAnalysisDataMLTasks(),
    V21RegoupMLSparkParamsInAnalysesMLTasks(),
    V21RegoupMLSparkParamsInRecipes(),
    #V21DKUCommand(),

    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V22 (DSS 5.0.3)
migration_base.declare_version_migration(21, 22, [
    V22GiveNPSSurveySettingsToUsers()
])

# Config V23 (DSS 5.1)
migration_base.declare_version_migration(22, 23, [
    V23TransferKernelSpecEnvName(),
    V23MigrateH2Databases(),
    V23MakeClassWeightTheDefaultForClassifications(),
    V23DefaultGitURLWhitelist(),
    V23UseSmartnameInRefreshChartsStep(),
    V23SkipExpensiveReportsInMLTasks(),
    GenericDropCaches(),
    GenericDropPnotifications()
])

# Config V24 (DSS 5.1.1)
migration_base.declare_version_migration(23, 24, [
    V24UseSmartnameInArticleAttachments()
])

# Config V6000 (DSS 6.0.0)
migration_base.declare_version_migration(24, 6000, [
    V6000MigrateProjectPathToProjectFolder(),
    V6000MigrateHomeSettings(),
    V6000UseNumericIdsForArticle(),
    V6000MigrateHomepagesArticles(),
    V6000MigrateDashboardArticles(),
    V6000UpgradeWikiTimelineNumericIds(),
    V6000UpgradeEC2Connections(),
    V6000PrePushHookGeneralSettings(),
    V6000PrePushHookInAPIDeployerInfras(),
    V6000MigrateDoctorExecutionParams(),
    V6000MigrateKerasModelListedInCodeEnv(),
    V6000MigrateEvaluationRecipeMetricsOutputs()
])

# Config V6020 (DSS 6.0.2)
migration_base.declare_version_migration(6000, 6020, [
    V6020FixArticleIdMigration()
])

# Config V6030 (DSS 6.0.3)
migration_base.declare_version_migration(6020, 6030, [
    V6030FixMicrosoftTeamsIntegrationMigration()
])

# Config V7000 (DSS 7.0.0)
migration_base.declare_version_migration(6030, 7000, [
    V7000UserCredentialsRenaming(),
    V7000ExpositionkInAPIDeployerInfras(),
    V7000ExpositionkInAPIDeployerDeployments(),
    V7000RemoveHipchatChannels(),
    V7000RemoveHipchatReporters(),
    V7000RemoveHipchatIntegrations(),
    V7000MigrateAlgorithmsParamsStructure(),
    V7000MigratePosttrainComputationParams(),
    V7000MigrateSamlSPConfig()
])

# Config V7020 (DSS 7.0.2)
migration_base.declare_version_migration(7000, 7020, [
    V7020MigrateExplanationsScoringRecipeParams(),
])

# Config V8000 (DSS 8.0.0)
migration_base.declare_version_migration(7020, 8000, [
    V8000MigrateAuditConfig(),
    V8000MigrateAuditConfigAPINode(),
    V8000MigrateAlgorithmsSVMParamsStructure(),  # MUST be executed before V8000MigratePredictionAlgorithmsRanges
    V8000MigrateMaxFeaturePropStructure(),  # MUST be executed before V8000MigratePredictionAlgorithmsRanges
    V8000MigratePredictionAlgorithmsRanges(),
    V8000MigrateGridLengthForNonSearchableAlgos(),
    V8000MigrateCodeEnvSelection()
])

# Config V8020 (DSS 8.0.2)
migration_base.declare_version_migration(8000, 8020, [
    V8020MigrateTreeBasedMLResults(),
    V8020RenameHashSizeField(),
    V8020UpdateGlobalTagsStructure()
])

# Config V9000 (DSS 9.0.0)
migration_base.declare_version_migration(8999, 9000, [
    V9000MigrateCategoricalHashingMethod(),
    V9000FilterAndFlagOnDateRangeProcessor("FilterOnDateRange", "FilterOnDate"),
    V9000FilterAndFlagOnDateRangeProcessor("FlagOnDateRange", "FlagOnDate"),
    V9000MigrateNotebook(),
    V9000AddEvaluationRecipeParameters(),
    V9000MigrateAPIServiceParamsWithType(),
    V9000MigrateDeployerSettings(),
    V9000MigrateTimeTriggers(),
    V9000MigrateTreeBasedModelsMaxDepth()
])

# Config V9020 (DSS 9.0.2)
migration_base.declare_version_migration(9000, 9020, [
    V9020MigrateCVSeed(),
    V9020MigrateNumericalFeatureRescaling()
])

# Config V9050 (DSS & FM 9.0.5)
migration_base.declare_version_migration(9020, 9050, [
    V9050RenameFmInstanceImagesFile()
])

# Config V9060 (DSS 9.0.6)
migration_base.declare_version_migration(9050, 9060, [
    V9060RenameCredentialsToAuthtype()
])

# Config V10000 (DSS 10.0.0)
migration_base.declare_version_migration(9999, 10000, [
    V10000ImpactCoding(),
    V10000PreprocessingReport(),
    V10000KeepRegularForBinarizeAndQuantize(),
    V10000InteractiveStatisticsWorksheetColorBy(),
    V10000ReorganizeReadyReportsCache(),
    V10000RenameTrainDiagnosticFile(),
    V10000InteractiveStatisticsScatterPlotsColorBy(),
    V10000DisableChartsLogScale(["scatter", "binned_xy"]),
    V10000EnableProjectShareToWorkspace()
])

# Config V10030 (DSS 10.0.3)
migration_base.declare_version_migration(10000, 10030, [
    V10030EDAExtraBivariateSummaryMetrics()
])

# Config V10040 (DSS 10.0.4)
migration_base.declare_version_migration(10030, 10040, [
    V10040MigrateUDRSettings()
])

# Config V10060 (DSS 10.0.6)
migration_base.declare_version_migration(10050, 10060, [
    GenericGovernUpdate("item search in SQL, improved pagination"),
    V10060RemoveDataikuJarFromGeoadminPlugin(),
    V10060MigrateAuthorizationConfig(),
    V10060MigrateAuthorizationConfigForServiceInProject()
])

# Config V11000 (DSS 11.0.0)
migration_base.declare_version_migration(10999, 11000, [
    GenericGovernUpdate("item history and timeline initialization, setup bundle management"),
    V11000FixUpGeoMapDefinition(),
    V11000UpgradeGeoAdmin(),
    V11000UpdateEditSQLTileModes()
])

# Config V11010 (DSS 11.0.1)
migration_base.declare_version_migration(11000, 11010, [
    V11010CreateIfThenElseWithLegacyPositioningParam()
])

# Config V11100 (DSS 11.1.0)
migration_base.declare_version_migration(11099, 11100, [
    V11100MigrateGovernUDRSettings(),
    V11100CheckTaggableObjectReadUsePrivileges(),
    V11100AddWorkspaceObjectIds(),
    V11100DatasetInsights(),
    V11100ApiNodeConfigKeyCreation(),
    V11100TimeseriesHpSearch(),
    V11100RefactorTrainableLayersParamDeephub(),
    V11100OAuth2SQLConnectionRefactoring()
])

# Config V11200 (DSS 11.2.0)
migration_base.declare_version_migration(11199, 11200, [
    V11200AddLearningRateToTimeSeriesDeepLearningModels(),
    V11200CodeStudioObjectOwner(),
    V11200EDATimeSeriesCardLongFormat(),
    V11200CodeStudioResourcesRenaming(),
    V11200ScenarioStepsWarningHandling(),
    GenericGovernUpdate("Update provided blueprint versions of Govern"),
    V11200GovernActivateUDR()
])

# Config V11300 (DSS 11.3.0)
migration_base.declare_version_migration(11299, 11300, [
    V11300ChartFiltersIncludeExcludeDefaults(),
    V11300DashboardFiltersIncludeExcludeDefaults(),
    V11300DatasetFilterValuesMigration(),
    V11300PartitionedModelsMoveJobIdUpdateToSmOrigin(),
    V11300CreatePredictionFileForModelsAndModelEvaluations()
])

# Config V11400 (DSS 11.4.0)
migration_base.declare_version_migration(11399, 11400, [
    V11400DeleteWrongCIForDataDrift(),
    V11400MoveWebappsTempFolder()
])

# Config V12000 (DSS 12.0.0)
migration_base.declare_version_migration(11999, 12000, [
    V12000EnableOpals(),
    V12000FixupTopNParams(),
    V12000FixupJoinParams(),
    V12000OVariableImportanceDashboardMigration(),
    V12000FixupScenarioStartingFrom(),
    V12000LabelingTaskLabelColumn(),
    V12000TimeseriesTestSize(),
    V12000PrepareRecipeTypeInference(),
    GenericGovernUpdate("added Dataiku Deployer item types")
])

# Config V12100 (DSS 12.1.0)
migration_base.declare_version_migration(12099, 12100, [
    V12100FixupWindowParams(),
    V12100ChartFiltersDateTypeAndPart(),
    V12100DashboardFiltersDateTypeAndPart(),
    V12100IAMSettingsUpdate(),
    V12100IAMSettingsUpdateForFM(),
    GenericGovernUpdate("custom metrics sync")
])

# Config V12200 (DSS 12.2.0)
migration_base.declare_version_migration(12199, 12200, [
    V12200FixupPivotTableOptions(),
    V12200OpenIDPromptParameter(),
    V12200OpenIDPromptParameterForFM(),
    V12200AddIdToChartFilters(),
    V12200AddIdToDashboardFilters(),
    V12200AddFilterSelectionTypeToDashboardFilters(),
    V12200AddFilterSelectionTypeToChartFilters(),
    GenericGovernUpdate("Business initiative default template update, Update Signoff model")
])

# Config V12220 (DSS 12.2.2)
migration_base.declare_version_migration(12219, 12220, [
    V12220TeradataDefaultConnectionTimezone(),
])

# Config V12320 (DSS 12.3.2)
migration_base.declare_version_migration(12319, 12320, [
    V12320FixupFilterAndFlagParamsExclude(),
])

# Config V12400 (DSS 12.4.0)
migration_base.declare_version_migration(12399, 12400, [
    GenericGovernUpdate("Blueprint version migration path, Updated infrastructure type in API Deployer Infrastructure blueprint, Added standard custom pages, Business initiative and Govern project default template update, fix associated artifacts"),
    V12400SecuritySettingsUpdate(),
    V12400EnableCrossFiltersInDashboardPages(),
    V12400FixupReferenceLines(),
    V12400CustomScoringMetricMigration(),
    V12400FixupAxes(),
    V12400AddMonitoringToInfrastructure(),
    V12400InvalidateEDAInsightsWithConfidenceIntervals(),
    V12400UpdateGPUSettings(),
    V12400AddVersionTagsFileToGitIgnore(),
    V12400UpdatePromptRecipePayloadParams()
])

# Config V12420 (DSS 12.4.2)
migration_base.declare_version_migration(12419, 12420, [
    V12420FixupObjectRefForLocalAuthorizedObjectsInDashboardAuthorizations(),
    V12420UpdatePromptStudioPromptStructure(),
    V12420UpdateCustomLLMConnections(),
    V12420AddRAGModelId(),
])

# Config V12500 (DSS 12.5.0)
migration_base.declare_version_migration(12499, 12500, [
    GenericGovernUpdate("improved item timeline, added LLM/GenAI and Dataiku applications metadata on synced projects"),
    V12500FixupReferenceLines(),
    V12500FixupChartValues()
])

# Config V12520 (DSS 12.5.2)
migration_base.declare_version_migration(12519, 12520, [
    V12520EnableAdaEmbeddingModelInOpenAIConnections()
])

# Config V12600 (DSS 12.6.0)
migration_base.declare_version_migration(12599, 12600, [
    V12600DatasetFilterRelativeDateMultiSelect(),
    V12600DatasetInsightsRelativeDateMultiSelect(),
    V12600ShakerFilterRelativeDateMultiSelect(),
    V12600ChartFiltersRelativeDateMultiSelect(),
    V12600DashboardFiltersRelativeDateMultiSelect(),
    V12600FilterAndFlagOnDateRangeProcessorRelativeDateMultiSelect("FilterOnDate"),
    V12600FilterAndFlagOnDateRangeProcessorRelativeDateMultiSelect("FlagOnDate"),
    V12600AddSupportForScikitLearn13(),
    V12600CheckToDataQualityRulesUpdate(),
    V12600CheckDatasetScenarioStepUpdate(),
    V12600CustomMeasuresInExplore(),
    V12600CustomMeasuresInAnalysis(),
    V12600CustomMeasuresInAnalysisModels(),
    V12600CustomMeasuresInInsights(),
    V12600MoveDashboardFiltersInsideDashboardPage(),
    GenericGovernUpdate("add relation between application template and application instance"),
    V12600InvalidateEdaSampleCaches(),
    V12600FixupZoomOptions(),
    V12600UseImplicitCluster(),
    V12600IAMGroupsRestriction(),
    V12600IAMGroupsRestrictionInFM(),
    V12600IAMGroupMappings(),
])

# Config V12620 (DSS 12.6.2)
migration_base.declare_version_migration(12619, 12620, [
    V12620FixupZoomScale(),
    V12620LLMConnectionsAndCodeEnvs(),
    V12620FixChartDateFiltersWithRelativeOption(),
    V12620FixDashboardDateFiltersWithRelativeOption()
])

# Config V12650 (DSS 12.6.5)
migration_base.declare_version_migration(12649, 12650, [
    V12650FixChartDateFilters(),
    V12650FixDashboardDateFilters(),
    V12650FixupChartFiltersWithNullValue(),
    V12650FixupDashboardFiltersWithNullValue(),
])

# Config V13000 (DSS 13.0.0)
migration_base.declare_version_migration(12999, 13000, [
    V13000AddNeedsInputDataFolderParamDeephub(),
    V13000FixupLineFormatting(),
    GenericGovernUpdate("deletion of the displayAddNewButton setting in view components, added item/artifact role assignments, added cost rating in main view of govern projects"),
    V13000RenameFinetunedSavedModelFields(),
    V13000FixupTileBorderColor(),
    V13000SplitGeneralSettingsFM(),
    V13000AddRunIdToPromptStudioResponses(),
    V13000DefaultSAMLHashAlgorithmInFM(),
    V13000DefaultSAMLHashAlgorithmInFMInFM(),
    V13000FixupDashboardFiltersEngineType(),
    V13000FixGlobalExplanationFactsJson(),
    V13000MergeRequestInGitIgnore()
])

# Config V13010 (DSS 13.0.1)
migration_base.declare_version_migration(13009, 13010, [
    V13010FixupChartsMinMaxAggregation()
])

# Config V13100 (DSS 13.1.0)
migration_base.declare_version_migration(13099, 13100, [
    GenericGovernUpdate("split create and read permissions, adjusted timeline for sign-offs, added global timeline, added descriptions on some standard fields, removed unused fields in standard Govern project template"),
    V13100UpdateProjectBundleContentConfiguration(),
    V13100UpdateDQRulesForMultiColumnsInDatasets(),
    V13100UpdateDQRulesForMultiColumnsInTemplates(),
    V13100FixupTilesOpts(),
    V13100MarkPineconeConnectionsAsLegacy(),
    V13100UpdateChartDimensionOneTickPerBin(),
    V13100DisableHashedApiKeys(),
    V13100DisableHashedApiKeysApiNode(),
])

# Config V13112 (DSS 13.1.2)
migration_base.declare_version_migration(13119, 13120, [
    V13112UpdateMainInputsOnPromptStudioResponses(),
])

# Config V13150 (DSS 13.1.5)
migration_base.declare_version_migration(13149, 13150, [
    V13150FixScikitLearn13ExtraTreesMulticlassSelectionMode(),
])

# Config V13200 (DSS 13.2.0)
migration_base.declare_version_migration(13199, 13200, [
    V13200EnrichFrenchDepartementDefaultToInseeSourceDataset20092011(),
    V13200EnrichFrenchPostcodeDefaultToInseeSourceDataset20092011(),
    V13200SecuritySettingsUpdate(),
    V13200ClearImpalaDriverClassInManagedMode(),
    GenericGovernUpdate("fix API Deployer deployment template")
])

# Config V13300 (DSS 13.3.0)
migration_base.declare_version_migration(13299, 13300, [
    V13300AdaptTilesPositionAndSizeToNewColumnsNumber(),
    GenericGovernUpdate("Merged card views and row views, adjusted artifact table settings, sync embedding LLM usage, create auto-govern settings table, add sensitive_data field in the default Govern Project template, add a visibility field in dataiku bundle and saved model version"),
    V13300RemoveDashboardsFromHomepage(),
    V13300KpiColorRulesToChartDef(),
    V13300HFModelSettingsMigration(),
    V13300JsonModeToResponseFormatInPromptStudiosAndRecipes(),
    V13300MigrateUnifiedMonitoringBatchFrequency(),
    V13300RemoveUserPasswordFromMongoConnectionAdvancedURI()
])

# Config V13310 (DSS 13.3.1)
migration_base.declare_version_migration(13309, 13310, [
    V13310KPIColorGroupInferredTypeMigration()
])

# Config V13400 (DSS 13.4.0)
migration_base.declare_version_migration(13399, 13400, [
    GenericGovernUpdate("added sync of clustering models, fine-tuned LLMs, agents, knowledge banks, and augmented LLMs; added requesters on sign-offs"),
    V13400SwitchDatesReadModeFlagsInDatasets(),
    V13400SwitchDatesReadModeFlagsInDatasetLookups(),
    V13400FlagTypeSystemVersionInDatasets(),
    V13400AddNewAlignmentFieldsInTimeseriesMLTasks(),
    V13400MigrateSnowflakeAndDatabricksUtilsConfigTimeoutProps(),
    V13400AddNewResamplingFieldsInTimeseriesMLTasks(),
    V13400RenameOneLakeConnectionsToFabricWarehouse(),
    V13400RenameOneLakeDatasetsToFabricWarehouse(),
    V13400MigrateToGuardrailsPipeline()
])

# Config V13430 (DSS 13.4.3)
migration_base.declare_version_migration(13429, 13430, [
    V13430AddGuardrailsPipelineToPromptRecipes()
])

# Config V13440 (DSS 13.4.4)
migration_base.declare_version_migration(13430, 13440, [
    V13440MoveStackTracesAndLogTailsDipProperty(),
    V13440MoveHttpRequestMetadataDipProperty()
])

# Config V13500 (DSS 13.5.0)
migration_base.declare_version_migration(13499, 13500, [
    GenericGovernUpdate("created user configuration table, added custom fields to Dataiku items, updated workflow system, added settings for auto-governance of projects, added uploaded files and timeseries' owner"),
    V13500SkipPageToursForExistingUsers(),
    V13500RemoveStorySettings(),
    V13500RemoveLLMPromptEndpointsInProjects(),
    V13500RemoveLLMPromptEndpointsInApiNodeServices(),
    V13500UpgradeActivityRrdFiles(),
    V13500LLMConnectionsForAgentsToolsAndRaLlms(),
])

# Config V13530 (DSS 13.5.3)
migration_base.declare_version_migration(13529, 13530, [
    V13530GenerateGlobalProjectGitIgnore(),
    V13530AddDssMetaToGitignoreAndAddGlobalProjectGitIgnore(),
    V13530MoveProtectedFilesToHiddenFolder(),
])


# Config V14000 (DSS 14.0.0)
migration_base.declare_version_migration(13599, 14000, [
    GenericGovernUpdate("updated augmented LLMs to their own items in the flow"),
    V14000SkipOnboardingQuestionnaireForExistingUsers(),
    V14000SetWriteSQLCommentParameterInConnections(),
    V14000SetWriteSQLCommentParameterInDatasets(),
    V14000MigrateHuggingFaceIdsInProjects(),
    V14000MigrateHuggingFaceIdsInGeneralSettings(),
    V14000MigrateHuggingFaceIdsInConnections(),
    V14000ExtractRetrievalAugmentedLLMFromKBIntoSM(),
    V14000RenameStoriesGeneralSettings(),
    V14000RenameStoriesCategoryInInstallConfig(),
    V14000RenameStoriesPort(),
    V14000RenameStoriesDirectories(),
    V14000ChangeReferenceModeForProxyAndDatabricksCodeEnvs(),
    V14000AddDeprecatedPythonVersionsToCodeEnvs(),
])

# Config V14010 (DSS 14.0.1)
migration_base.declare_version_migration(14009, 14010, [
    V14010EnsureRaLLMsHaveActiveVersion(),
    V14010RemoveEmptyLLMIds()
])

# Config V14020 (DSS 14.0.2)
migration_base.declare_version_migration(14019, 14020, [
    V14020ResultSetsMemorySizeLimit(),
    V14020MigrateFullDocumentSettingforVLMRuleInEmbedDocumentsRecipe(),
    V14020MigrateSendMessageTools()
])

# Config V14100 (DSS 14.1.0)
migration_base.declare_version_migration(14099, 14100, [
    GenericGovernUpdate("added index on sign-offs table status, added new conditional views, added Project Standards fields definition in Dataiku Bundle"),
    V14100ExtractPythonAgentCodeToPythonFile(),
    V14100ExtractInlinePythonToolCodeToPythonFile(),
    V14100HomeMessageMustReadsToPromotedContent(),
    V14100MigrateRagSearchType(),
    V14100ChangeReferenceModeForDeephubCodeEnvs(),
    V14100GroupSourcesSettingsForKBSearchTool()
])

# Config V14120 (DSS 14.1.2)
migration_base.declare_version_migration(14119, 14120, [
    V14120RemoveAgentToolCustomDescriptionField(),
    V14120RenameProjectStandardsConfigFields(),
])

# Config V14200 (DSS 14.2.0)
migration_base.declare_version_migration(14199, 14200, [
    GenericGovernUpdate("remove hidden status on governed artifacts, added default script for autogovernance and renamed autogovernance status to action, added AI Types attribute on Dataiku Projects and Dataiku Bundles"),
    V14200SynchronizeProjectTypeBadges(),
    V14200DeleteOrphanEntityFolders(),
])

# Config V14210 (DSS 14.2.1)
migration_base.declare_version_migration(14200, 14210, [
    V14210MergeStoriesImageDirectories(),
])

# Config V14220 (DSS 14.2.2)
migration_base.declare_version_migration(14219, 14220, [
    V14220MultipleRetrievalColumnsForKBSearchToolsAndRALLMs(),
    V14220MigrateCustomTrainTestIntervalsEndDates(),
    V14220MigrateTimeseriesInteractiveScoringScenarios(),
    V14220UpdateClassicalModelsMinSizeForScoring(),
])

# Config V14230 (DSS 14.2.3)
migration_base.declare_version_migration(14229, 14230, [
    V14230SetLocalMCPToolDefaultTimeouts(),
])

# Config V14300 (DSS 14.3.0)
migration_base.declare_version_migration(14299, 14300, [
    GenericGovernUpdate("updated group and global API key templates regarding new group-based global API keys, updated a description on standard govern project template, renamed view component action to plugin-action, added actions table"),
    V14300RequiresPartitionFilterSettingInBigQueryDatasets(),
    V14300DatabricksVolumeExpectedLocation(),
    V14300ManagedSQLServerDriver(),
    V14300EscapeVariablesInDashboards(),
    V14300AddTimeseriesFeatureGenerationAutoShiftParams(),
    V14300KBSTAgentInferredFiltering(),
    V14300TimeseriesInteractiveScoringAddColorsAndMigrateForecasts(),
    V14300LocalMCPToolRemoveUsePythonAsCommand(),
    V14300ConvertPodRunAsFFToProperties(),
    V14300AddMesFlavorToEvaluationStores(),
])

# Config V14310 (DSS 14.3.1)
migration_base.declare_version_migration(14300, 14310, [
    V14310MigrateCustomTrainTestIntervalsEndDates(),
])
