import glob
import hashlib
import io
import json
import logging
import os
import os.path as osp
import re
import requests
import shutil
import stat
import subprocess
import sys
import sysconfig
import tarfile
import tempfile
from datetime import datetime
from enum import Enum

from six.moves import configparser

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')

DEV_MODE = os.environ.get("DEV_MODE", False)
DKUPYTHONBIN = os.environ["DKUPYTHONBIN"]
DKUINSTALLDIR = os.environ["DKUINSTALLDIR"]
DIP_HOME = os.environ["DIP_HOME"]
MYDIR = osp.dirname(osp.realpath(__file__))
if DEV_MODE:
    DKUINSTALL_PATH = "%s/packagers/dkuinstall" % DKUINSTALLDIR
else:
    DKUINSTALL_PATH = "%s/scripts/dkuinstall" % DKUINSTALLDIR

dss_version_bin = "%s/dss_version.py" % DKUINSTALL_PATH
if not osp.isfile(dss_version_bin):
    print("unable to find %s" % dss_version_bin)
    print("check that DKUINSTALLDIR is correct")
    exit(1)

RAW_DSS_VERSION = subprocess.check_output([DKUPYTHONBIN, dss_version_bin])
if sys.version_info > (3,0):
    RAW_DSS_VERSION = RAW_DSS_VERSION.decode("utf8")
DSS_VERSION = RAW_DSS_VERSION.lower().replace("/", "_").strip()

PYTHONVERSION = sysconfig.get_python_version()
# Keep values in sync with BaseImageBuilder.java and _base-image-cuda-support.txt in doc
DEFAULT_CUDA_VERSION_ALMA8 = "10.0"
SUPPORTED_CUDA_VERSIONS_ALMA8 = ["9.0", DEFAULT_CUDA_VERSION_ALMA8, "10.1", "10.2", "11.0", "11.2", "11.8"]
DEFAULT_CUDA_VERSION_ALMA9 = "11.8"
SUPPORTED_CUDA_VERSIONS_ALMA9 = [DEFAULT_CUDA_VERSION_ALMA9]
SUPPORTED_CUDA_VERSIONS = SUPPORTED_CUDA_VERSIONS_ALMA8 + [x for x in SUPPORTED_CUDA_VERSIONS_ALMA9 if x not in SUPPORTED_CUDA_VERSIONS_ALMA8]

CDE_BASE = "cde-base"
CDE_PLUGINS = "cde-plugins"

# Keep values in sync with BaseImageBuilder.java
class PythonInstallOption(Enum):
    DEFAULT = "DEFAULT"
    INSTALL = "INSTALL"
    SKIP = "SKIP"

    def __str__(self):
        return self.value


def compute_source_image(image_type, distrib, r, r_major_version, cuda, cuda_version):
    """Base image for downloading"""

    r_suffix = "-r%s" % r_major_version if r else ""
    cuda_suffix = "-cuda%s" % cuda_version if cuda else ""

    if image_type == "container-exec":
        return "dataiku-dss-container-exec-base:dss-%s-%s%s%s-py%s" % (DSS_VERSION, distrib, r_suffix, cuda_suffix, PYTHONVERSION)
    elif image_type == "spark":
        return "dataiku-dss-spark-exec-base:dss-%s-%s%s%s-py%s" % (DSS_VERSION, distrib, r_suffix, cuda_suffix, PYTHONVERSION)
    elif image_type == "api-deployer":
        return "dataiku-dss-apideployer-base:dss-%s-%s%s%s-py%s" % (DSS_VERSION, distrib, r_suffix, cuda_suffix, PYTHONVERSION)
    elif image_type == "cde":
        return "dataiku-dss-%s:dss-%s-%s%s%s-py%s" % (CDE_BASE, DSS_VERSION, distrib, r_suffix, cuda_suffix, PYTHONVERSION)
    else:
        raise Exception("invalid image type %s"% image_type)

def get_install_id():
    install_id = subprocess.check_output([DKUPYTHONBIN, "%s/dss_install_id.py" % DKUINSTALL_PATH])
    if sys.version_info > (3,0):
        install_id = install_id.decode("utf8")
    return install_id.lower().strip()

def get_node_type():
    try:
        with open(osp.join(DIP_HOME, "install.ini")) as f:
            config = configparser.RawConfigParser()
            config.readfp(f)
        return config.get('general', 'nodetype')
    except:
        logging.warning("Unable to find node type, assuming design node")
        return 'design'

# keep in sync with the method in dataiku/code_studio/__init__.py
def get_dataiku_user_uid():
    try:
        path = osp.join(DIP_HOME, "config/dip.properties")
        if not os.path.exists(path):
            return "500"
        with open(path, "r") as f:
            for line in f.readlines():
                match = re.match(r"^dku.container.dataiku.uid\s*=\s*(\w+)$", line)
                if match:
                    return match.group(1)
    except Exception as e:
        logging.error("Could not read dip properties json file", exc_info=e)
    return "500"


def compute_default_tag(image_type):
    """Computes the default tag that we'll write. This logic must match the one in DSS"""
    install_id = get_install_id()

    if image_type == "container-exec":
        return "dku-exec-base-%s:dss-%s" % (install_id, DSS_VERSION)
    elif image_type == "spark":
        return "dku-spark-base-%s:dss-%s" % (install_id, DSS_VERSION)
    elif image_type == "api-deployer":
        return "dku-apideployer-apinode-base:dss-%s" % DSS_VERSION
    elif image_type == "cde":
        # Having the installId is as much to avoid collisions than for tracing the image to the instance it comes from
        return "dku-%s-%s:dss-%s" % (CDE_BASE, install_id, DSS_VERSION)
    elif image_type == "cde-plugins":
        # The issue at hand is updating the images in the nodes of the pods.
        # The default settings in cluster make it so that the image name and tag are used.
        # If these 2 don't change, the kubelet won't bother re-pulling the image (unless explicitely told to do so).
        # In the case of the image for CDEs, we're expecting somewhat frequent rebuilds, like code envs, because the image will need to be rebuilt every time you change some plugin in the DSS install.
        # Hence the tag with the timestamp.
        return "dku-%s-%s-dss-%s:r-%s" % (CDE_PLUGINS, install_id, DSS_VERSION, datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S-%f')[:-3])
    else:
        raise Exception("invalid image type %s"% image_type)

class BaseImageBuilder(object):
    """
    All image builders derive for this. It only provides generic management
    of a build env and a Dockerfile string
    """
    def __init__(self, opts):
        self.opts = opts
        self.dockerfile = ""
        self.build_env_path =tempfile.mkdtemp(dir="%s/tmp" % DIP_HOME,
                                              prefix="exec-docker-base-image.")

    def copy_to_buildenv(self, from_path, to=None):
        if to is None:
            to = self.build_env_path
        else:
            to = osp.join(self.build_env_path, to)
        logging.info("COPY %s to %s" % (from_path, to))
        if osp.isdir(from_path):
            shutil.copytree(from_path, to)
        else:
            shutil.copy2(from_path, to)

    def append_to_dockerfile(self, path):
        with open(path) as f:
            self.dockerfile += f.read()


class FinalizeOnlyImageBuilder(BaseImageBuilder):
    """
    When using 'download' or 'use' mode, we'll still create a new
    image, possibly to append customer options. The FinalizeOnly builder does that
    """
    def __init__(self, opts, source_image, type_):
        super(FinalizeOnlyImageBuilder, self).__init__(opts)
        self.source_image = source_image
        self.type_ = type_

    def build(self):
        self.dockerfile += "FROM %s\n" % (self.source_image)
        if self.opts.dockerfile_append:
            self.append_to_dockerfile(self.opts.dockerfile_append)

class CompleteImageBuilder(BaseImageBuilder):
    """
    When using 'build' mode, we use this family of image builders"
    """

    def initialize_dockerfile(self):
        if self.opts.build_from_image:
            from_image = self.opts.build_from_image
        elif self.opts.distrib == 'almalinux8':
            from_image = 'almalinux:8'
        elif self.opts.distrib == 'almalinux9':
            from_image = 'almalinux:9'
        else:
            assert(False)
        self.dockerfile += "FROM %s\n" % from_image
        self.dockerfile += "WORKDIR /opt/dataiku\n"

        if self.opts.http_proxy:
            self.dockerfile += 'ENV http_proxy "%s"\n' % self.opts.http_proxy
        if self.opts.https_proxy:
            self.dockerfile += 'ENV https_proxy "%s"\n' % self.opts.https_proxy
        if self.opts.no_proxy:
            self.dockerfile += 'ENV no_proxy "%s"\n' % self.opts.no_proxy

        if self.opts.dockerfile_prepend:
            self.append_to_dockerfile(self.opts.dockerfile_prepend)


    # Common to AlmaLinux 8 and 9
    DEFAULT_ALMA_SYSPACKAGES = [
        "procps-ng", "glibc-langpack-en",
        "curl", "util-linux", "bzip2",
        "nginx", "expat", "zip", "unzip",
        "freetype", "libgfortran", "libgomp",
        "libicu-devel", "libcurl-devel", "openssl-devel", "libxml2-devel",
        "git", "libglvnd-glx"
    ]

    # AlmaLinux 8 only
    DEFAULT_ALMA8_SYSPACKAGES = [
    ]

    # AlmaLinux 9 only
    DEFAULT_ALMA9_SYSPACKAGES = [
        "curl"
    ]

    def set_default_java(self):
        """
        Force default java for the image
        """
        self.dockerfile += """
RUN alternatives --set java java-17-openjdk.x86_64
# From java-17-openjdk.x86_64
ENV JAVA_HOME=/usr/lib/jvm/jre-17-openjdk
"""

    def add_syspackages(self, additional=None):
        """
        Installs the core set of system packages + builder-specific ones + user-specified ones
        """

        if self.opts.py27:
            # Not available on AlmaLinux 9
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "scripts/virtualenv-20.21.pyz"))
            self.dockerfile += """
COPY virtualenv-20.21.pyz build/
"""
            self.__class__.DEFAULT_ALMA8_SYSPACKAGES += [ "python2-devel" ]
        if additional is None:
            additional = []
        if self.opts.system_packages is None:
            self.opts.system_packages =""
        user_packages = self.opts.system_packages.split(",")

        # Ensure recent nginx version
        self.dockerfile += "RUN dnf -qy module enable nginx:1.24"

        self.dockerfile += """
RUN dnf -y update \\
    && dnf -y install epel-release \\
    && . /etc/os-release && case "$VERSION_ID" in \\
        8*) %s;; \\
        9*) %s;; \\
        *) echo 2>&1 'OS version not supported'; exit 1;; \\
       esac \\
    && dnf -y install %s %s %s \\
    && dnf -y --setopt=group_package_types="mandatory" install @development \\
    && dnf -y autoremove \\
    && dnf clean all
""" % (
    "dnf -y install %s" % (" ".join(self.__class__.DEFAULT_ALMA8_SYSPACKAGES)) if self.__class__.DEFAULT_ALMA8_SYSPACKAGES else "",
    "dnf -y install --allowerasing %s" % (" ".join(self.__class__.DEFAULT_ALMA9_SYSPACKAGES)) if self.__class__.DEFAULT_ALMA9_SYSPACKAGES else "",
    " ".join(self.__class__.DEFAULT_ALMA_SYSPACKAGES),
    " ".join(additional),
    " ".join(user_packages)
    )

    def add_additional_pythons(self):
        """Installs missing versions of Python 3.x in /usr/local"""
        # Python 3.6.x
        if opts.py36 == PythonInstallOption.INSTALL and opts.distrib == "almalinux9":
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/container-exec/build-python36.sh"))
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "scripts/virtualenv-20.21.pyz"))
            self.dockerfile += """
COPY build-python36.sh virtualenv-20.21.pyz build/
RUN build/build-python36.sh >/tmp/build-python.log && rm -f /tmp/build-python.log
"""
        elif opts.py36 == PythonInstallOption.INSTALL or (opts.py36 == PythonInstallOption.DEFAULT and opts.distrib == "almalinux8"):
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "scripts/virtualenv-20.21.pyz"))
            self.dockerfile += """
COPY virtualenv-20.21.pyz build/
RUN dnf -y install python36-devel && dnf clean all
"""
        if opts.py37:
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/container-exec/build-python37.sh"))
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "scripts/virtualenv-20.26.pyz"))
            self.dockerfile += """
COPY build-python37.sh virtualenv-20.26.pyz build/
RUN build/build-python37.sh >/tmp/build-python.log && rm -f /tmp/build-python.log
"""
        if opts.py38:
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/container-exec/build-python38.sh"))
            self.dockerfile += """
COPY build-python38.sh build/
RUN . /etc/os-release && case "$VERSION_ID" in \\
        8*) dnf -y install python38-devel && dnf clean all;; \\
        9*) build/build-python38.sh >/tmp/build-python.log && rm -f /tmp/build-python.log;; \\
        *) echo 2>&1 'OS version not supported'; exit 1;; \\
    esac
"""
        if opts.py39:
            self.dockerfile += """
RUN . /etc/os-release && case "$VERSION_ID" in \\
        8*) dnf -y install python39-devel && dnf clean all;; \\
        9*) dnf -y install python3-devel && dnf clean all;; \\
        *) echo 2>&1 'OS version not supported'; exit 1;; \\
    esac
"""
        if opts.py310:
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/container-exec/build-python310.sh"))
            self.dockerfile += """
COPY build-python310.sh build/
RUN build/build-python310.sh >/tmp/build-python.log && rm -f /tmp/build-python.log
"""
        if opts.py311:
            self.dockerfile += """
RUN dnf -y install python3.11-devel && dnf clean all
"""
        if opts.py312:
            self.dockerfile += """
RUN dnf -y install python3.12-devel && dnf clean all
"""
        if opts.py313:
            if self.opts.distrib == 'almalinux8':
                raise Exception("Python 3.13 is not supported on AlmaLinux 8, please remove the --with-py313 option")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/container-exec/build-python313.sh"))
            self.dockerfile += """
COPY build-python313.sh build/
RUN build/build-python313.sh >/tmp/build-python.log && rm -f /tmp/build-python.log
"""

    def add_r_base(self):
        """Adds R system packages and installs (as root) the needed R packages"""

        # Mirror the version of R that DSS is installed with. If DSS is not installed with R support ...
        # well it's a bad situation.
        dss_r_version = os.environ.get("DKURVERSION", "???")

        if dss_r_version.startswith("4."):
            r_version_to_install = "r4"
            r_libs_to_use = "4.x"
        else:
            logging.warning("R not installed in DSS, but trying to install it in container")
            r_version_to_install = "r4"
            r_libs_to_use = "4.x"

        logging.info("Adding R %s on distrib %s" % (r_version_to_install, self.opts.distrib))

        build_fragment = """
RUN . /etc/os-release && case "$VERSION_ID" in \\
        8*) dnf -y install dnf-plugins-core && dnf -y config-manager --set-enabled powertools &&  dnf -y install R-core-devel && \\
         dnf -y install freetype-devel libpng-devel libtiff-devel libjpeg-turbo-devel harfbuzz-devel fribidi-devel fontconfig-devel && \\
         dnf clean all;; \\
        9*) dnf -y install dnf-plugins-core && dnf -y config-manager --set-enabled crb &&  dnf -y install R-core-devel && \\
         dnf -y install freetype-devel libpng-devel libtiff-devel libjpeg-turbo-devel harfbuzz-devel fribidi-devel fontconfig-devel && \\
         dnf clean all;; \\
        *) echo 2>&1 'OS version not supported'; exit 1;; \\
    esac
"""

        self.dockerfile += build_fragment
        self.dockerfile += """
# Copy R build capabilities
COPY install-packages-builtin.sh build/
COPY minimal-packages-base.txt build/
COPY minimal-packages-noconda.txt build/

# Install the equivalent of the base R packages of DSS globally
ENV DKU_R_LIB_SUBPATH=%s
RUN mkdir -p R/bin R/R.lib/${DKU_R_LIB_SUBPATH} \
    && rBin=$(command -v R) \
    && rscriptBin=$(command -v Rscript) \
    && ln -s "$rBin" "$rscriptBin" R/bin/
ENV DKU_R_DATAIKU_PACKAGES_PATH=/opt/dataiku/R/R.lib/${DKU_R_LIB_SUBPATH}
ENV R_LIBS_USER=${DKU_R_DATAIKU_PACKAGES_PATH}
RUN build/install-packages-builtin.sh R ${DKU_R_LIB_SUBPATH} build/minimal-packages-base.txt %s
RUN build/install-packages-builtin.sh R ${DKU_R_LIB_SUBPATH} build/minimal-packages-noconda.txt %s
""" % (r_libs_to_use, self.opts.cran_mirror, self.opts.cran_mirror)

        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/code-envs/r/install-packages-builtin.sh"))
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/code-envs/r/minimal-packages-base.txt"))
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/code-envs/r/minimal-packages-noconda.txt"))

    def add_cuda(self):
        """Adds CUDA to the image and displays the CUDA license banner"""
        if not self.opts.cuda:
            return
        cuda_version = self.opts.cuda_version
        if self.opts.distrib == "almalinux8":
            if cuda_version is None:
                cuda_version = DEFAULT_CUDA_VERSION_ALMA8
            if cuda_version not in SUPPORTED_CUDA_VERSIONS_ALMA8:
                raise Exception("Invalid CUDA version %s for AlmaLinux 8, supported versions are %s" % (cuda_version, ','.join(SUPPORTED_CUDA_VERSIONS_ALMA8)))
        elif self.opts.distrib == "almalinux9":
            if cuda_version is None:
                cuda_version = DEFAULT_CUDA_VERSION_ALMA9
            if cuda_version not in SUPPORTED_CUDA_VERSIONS_ALMA9:
                raise Exception("Invalid CUDA version %s for AlmaLinux 9, supported versions are %s" % (cuda_version, ','.join(SUPPORTED_CUDA_VERSIONS_ALMA9)))
        else:
            raise Exception("Unsupported distribution for CUDA: %s" % self.opts.distrib)

        cudnn_version_from_cuda_version = {"9.0": "7.6", "10.0": "7.6", "10.1": "7.6", "10.2": "8.0", "11.0": "8.0", "11.2": "8.1", "11.8": "8.7"}
        cudnn_version = cudnn_version_from_cuda_version[cuda_version]
        cuda_folder = osp.join(MYDIR, "../cuda")
        distrib_folder = "%s/distributions/centos/%s" % (cuda_folder, cuda_version)

        with open(osp.join(cuda_folder, "terms-and-conditions-banner.txt")) as f:
            logging.info("\n%s" % f.read())

        self.dockerfile += """
RUN . /etc/os-release && case "$VERSION_ID" in \\
        8*) dnf install -y dnf-plugins-core && \\
            dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && \\
            dnf clean all;; \\
        9*) dnf install -y dnf-plugins-core && \\
            dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo && \\
            dnf clean all;; \\
        *) echo 2>&1 'OS version not supported'; exit 1;; \\
    esac
"""
        self.append_to_dockerfile(osp.join(distrib_folder, "Dockerfile-fragment-cuda-%s"% (cuda_version)))
        self.append_to_dockerfile(osp.join(distrib_folder, "Dockerfile-fragment-cuDNN-%s-cuda-%s"% (cudnn_version, cuda_version)))


class NonAPIExecImageBuilder(CompleteImageBuilder):
    """Common builder between container-exec and spark-exec"""

    def add_python(self):
        """
        Installs base virtualenv and installs Dataiku Python modules
        This part is run at the end of the dockerfile because the copied files change frequently (and invalidate the docker build cache on jenkins)
        """
        pip_install_options = ""
        pip_install_before_script = ""
        pip_install_after_script = ""

        # XGBoost CPU only build
        if not self.opts.cuda or not self.opts.xgboost_gpu_support:
            pip_install_options = "--no-binary xgboost"
            pip_install_before_script = "dnf install -y cmake &&"
            pip_install_after_script = "&& dnf remove -y cmake && dnf autoremove -y && dnf clean all"

        self.dockerfile += """
COPY _create-virtualenv.sh virtualenv.pyz install-builtin-env-python-packages.sh resources/builtin-python-env/container-images/ build/
RUN build/_create-virtualenv.sh python%s pyenv && \\
    %s build/install-builtin-env-python-packages.sh pyenv/bin/pip "%s" %s && \\
    mkdir -p bin && \\
    echo -e '#!/bin/bash -e\\nexec /opt/dataiku/pyenv/bin/python "$@"' >bin/python && \\
    chmod a+x bin/python && \\
    rm -rf ~/.cache/pip
COPY dataiku python/dataiku
COPY dataikuapi python/dataikuapi
COPY dataikuscoring python/dataikuscoring
COPY ai_code_assistant python/ai_code_assistant
RUN bin/python  -m compileall -f python || echo "[-] Error precompiling Dataiku Python code (ignored)"
ENV PYTHONPATH=/opt/dataiku/python
""" % (PYTHONVERSION, pip_install_before_script, pip_install_options, pip_install_after_script)
        # Copy files common to dev and production images
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "scripts/virtualenv.pyz"))
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/container-exec/install-builtin-env-python-packages.sh"))
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/builtin-python-env/container-images"), "resources/builtin-python-env/container-images")

        if DEV_MODE:
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "src/main/python/dataiku"), "dataiku")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "src/main/python/dataikuscoring"), "dataikuscoring")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "src/main/python/dataikuapi"), "dataikuapi")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "src/main/python/ai_code_assistant"), "ai_code_assistant")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "packagers/common/scripts/_create-virtualenv.sh"))
        else:
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "python/dataiku"), "dataiku")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "python/dataikuscoring"), "dataikuscoring")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "python/dataikuapi"), "dataikuapi")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "python/ai_code_assistant"), "ai_code_assistant")
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "scripts/_create-virtualenv.sh"))

    def add_r(self):
        if not opts.r:
            return

        self.add_r_base()

        # And container-exec-specific R support
        # /!\ keep install-packages-if-needed.sh for code envs
        self.dockerfile += """
# Copy R build capabilities
COPY install-packages-if-needed.sh build/
COPY irkernel-packages-noconda.txt build/

# Install irkernel
RUN build/install-packages-builtin.sh R ${DKU_R_LIB_SUBPATH} build/irkernel-packages-noconda.txt %s
COPY R-exec-wrapper.R R/
COPY R R/R.lib/
""" % (self.opts.cran_mirror)

        # Add R support files to build env
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "dist/R" if DEV_MODE else "R"), "R")
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/code-envs/r/irkernel-packages-noconda.txt"))
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/R/R-exec-wrapper.R"))

        # /!\ keep install-packages-if-needed.sh for code envs
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources/code-envs/r/install-packages-if-needed.sh"))

class ContainerExecImageBuilder(NonAPIExecImageBuilder):
    """Builds container-exec kind of image"""

    type_ = "container_exec"

    def build(self):
        self.initialize_dockerfile()
        self.add_syspackages()
        self.add_additional_pythons()
        self.add_r()
        self.add_cuda()
        self.add_python()
        self.add_webapp_support()
        self.add_nlp_resources()

        if self.opts.r:
            self.dockerfile += """
ENV DKU_SOURCE_LIB_R_PATH=/home/dataiku/lib/instance
"""
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "dss-version.json"))

        # Final setup
        self.dockerfile += """
WORKDIR /home/dataiku

COPY dss-version.json /opt/dataiku/

# Fake DIP_HOME with tmp folder for R recipes
ENV DIP_HOME=/home/dataiku/fake_dip_home

RUN groupadd -r dataiku \
    && useradd -r -g dataiku -u {uid} -d /home/dataiku dataiku \
    && mkdir fake_dip_home fake_dip_home/tmp lib lib/project lib/instance plugin \
    && chown -Rh dataiku:dataiku /home/dataiku

# OpenShift compatibility:
# OpenShift runs containers with an arbitrary uid as an additional security measure
# Thus, we are not "dataiku" and cannot write inside /home/dataiku
# However, we are always gid=0, so we give /home/dataiku to gid 0 and make sure group can
# write into it.
# This is the official recommendation:
# https://docs.openshift.com/container-platform/4.3/openshift_images/create-images.html#images-create-guide-openshift_create-images
#  "Support arbitrary user ids"
#
# More details:
# This is enforced through a Security Context Constraint - see
# https://docs.openshift.com/container-platform/4.3/authentication/managing-security-context-constraints.html
# One of the SCC says
#  Run As User Strategy: MustRunAsRange
#    UID:                    <none>
#    UID Range Min:              <none>
#    UID Range Max:              <none>
# with the range given by an annotation on the project: openshift.io/sa.scc.uid-range=1000540000/10000
#
RUN chgrp -R 0 /home/dataiku && chmod -R 775 /home/dataiku
ENV DKU_CONTAINER_EXEC=1

# USER dataiku
USER {uid}
ENV HOME=/home/dataiku
ENV VIRTUALENV_OVERRIDE_APP_DATA=/opt/dataiku/virtualenv-appdata
ENTRYPOINT ["/opt/dataiku/bin/python", "-m", "dataiku.container.runner"]
""".format(uid=get_dataiku_user_uid())
        if self.opts.dockerfile_append:
            self.append_to_dockerfile(self.opts.dockerfile_append)


    def add_webapp_support(self):
        os.makedirs(osp.join(self.build_env_path, "web"))
        webapp_static_files_base = "src/main/platypus" if DEV_MODE else "frontend"
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, webapp_static_files_base, "webapp-error-401.html"), "web")
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, webapp_static_files_base, "webapp-error-403.html"), "web")
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, webapp_static_files_base, "webapp-error-502.html"), "web")
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, webapp_static_files_base, "code-studio-error-401.html"), "web")
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, webapp_static_files_base, "code-studio-error-403.html"), "web")
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, webapp_static_files_base, "code-studio-error-502.html"), "web")
        self.dockerfile += "COPY web/ /opt/dataiku/web/\n"

    def add_nlp_resources(self):
        resources_path = osp.join(self.build_env_path, "resources")
        if not osp.isdir(resources_path):
            os.makedirs(osp.join(self.build_env_path, "resources"))
        self.copy_to_buildenv(osp.join(DKUINSTALLDIR, "resources", "nlp"), "resources/nlp")
        self.dockerfile += "COPY resources/nlp /opt/dataiku/resources/nlp/\n"


class SparkExecImageBuilder(NonAPIExecImageBuilder):
    """Builder for Spark image"""

    type_ = "spark"

    def build(self):
        if not "DKU_SPARK_HOME" in os.environ:
            raise ValueError("Spark integration does not seem to be setup in DSS yet")
        if self.opts.cuda:
            raise ValueError("CUDA support is not available for Spark images")

        self.initialize_dockerfile()
        self.add_syspackages()
        self.add_additional_pythons()
        self.add_r()
        # No CUDA here ... at least for the moment
        self.add_python()
        self.add_spark()
        self.set_default_java()

        # Final setup
        self.dockerfile += """
RUN groupadd -r dataiku \
    && useradd -r -g dataiku -u {uid} -d /home/dataiku dataiku \
    && mkdir -p /home/dataiku && chown dataiku:dataiku /home/dataiku

WORKDIR /home/dataiku

# OpenShift compatibility - See container-exec for details
# This is normally not strictly required for Spark since we're not supposed
# to write in /home/dataiku but better be safe
RUN chgrp -R 0 /home/dataiku && chmod -R 775 /home/dataiku

# USER dataiku
USER {uid}
ENV HOME=/home/dataiku
ENV VIRTUALENV_OVERRIDE_APP_DATA=/opt/dataiku/virtualenv-appdata
ENTRYPOINT ["/opt/spark/entrypoint.sh"]
""".format(uid=get_dataiku_user_uid())
        if self.opts.dockerfile_append:
            self.append_to_dockerfile(self.opts.dockerfile_append)

    def add_spark(self):
        """Copies Spark from DKU_SPARK_HOME to the image"""
        SPARK_HOME = os.environ["DKU_SPARK_HOME"]

        # Copy Spark stuff to the build env
        os.makedirs(osp.join(self.build_env_path, "spark_home"))
        self.copy_to_buildenv(osp.join(SPARK_HOME, "jars"), "spark_home/jars/")
        self.copy_to_buildenv(osp.join(SPARK_HOME, "bin"), "spark_home/bin/")
        self.copy_to_buildenv(osp.join(SPARK_HOME, "sbin"), "spark_home/sbin/")
        self.copy_to_buildenv(osp.join(SPARK_HOME, "python"), "spark_home/python/")
        self.copy_to_buildenv(osp.join(SPARK_HOME, "R"), "spark_home/R/")
        # self.copy_to_buildenv(osp.join(SPARK_HOME, "kubernetes/dockerfiles/spark/entrypoint.sh"), "spark_home/entrypoint.sh.base")

        with open(osp.join(self.build_env_path, "env-site.sh"), "w") as f:
            f.write('export PYTHONPATH=${CODE_ENV_PYTHONPATH:-/opt/dataiku/python}:/opt/spark/python/lib/pyspark.zip:/opt/spark/python/lib/py4j-*.zip\n')

        # tweak the entrypoint.sh to :
        # - set the PYTHONPATH to override the override from CodeEnv.dockerfile (which doesn't put py4j.zip on the PYTHONPATH)
        # - read a env-site.sh to make it easier to override stuff in the entrypoint.sh (starting with PYTHONPATH)
        # - ditch the use of 'tini' (not available on centos unless you want to fight for it)
        with open(osp.join(SPARK_HOME, "kubernetes/dockerfiles/spark/entrypoint.sh")) as f:
            entrypoint = f.read()
        entrypoint = re.sub(".*\\/tini .*-- (.*)", "\\1", entrypoint)
        entrypoint = re.sub("(#!.*)", "\\1\nsource /opt/dataiku/env-site.sh", entrypoint)
        with open(osp.join(self.build_env_path, "spark_home/entrypoint.sh"), "w") as f:
            f.write(entrypoint)

        self.dockerfile += """
# spark specificity w.r.t. python
ENV PYTHONPATH=/opt/dataiku/python:/opt/spark/python/lib/pyspark.zip:/opt/spark/python/lib/py4j-*.zip

RUN dnf install -y \
       java-17-openjdk-headless \
    && echo "securerandom.source=file:/dev/urandom" >> /usr/lib/jvm/jre/lib/security/java.security \
    && dnf clean all

# Copy stuff from SPARK_HOME
COPY spark_home/ /opt/spark/
COPY env-site.sh /opt/dataiku/env-site.sh
RUN chmod 755 /opt/spark/entrypoint.sh

ENV SPARK_HOME /opt/spark
ENV LD_LIBRARY_PATH /lib64
"""


class KitImageBuilder(CompleteImageBuilder):
    """
    Builds images with a kit inside. Works pretty differently from the
    others as it installs a full DSS in the container
    """

    # Comes in addition to DEFAULT_ALMA_SYSPACKAGES
    KIT_ALMA_SYSPACKAGES = [
        "acl", "java-17-openjdk-headless", "dejavu-sans-fonts", "freetype", "fontconfig"
    ]

    def setup_dataiku_user(self):
        self.dockerfile += """
MAINTAINER Dataiku <dss@dataiku.com>
ENV SHELL "/bin/bash"
ENV TERM 1
ENV LANG en_us.utf8
RUN groupadd -r dataiku \
    && useradd -r -g dataiku -u {uid} -d /home/dataiku dataiku \
    && mkdir /home/dataiku \
    && chown -Rh dataiku:dataiku /home/dataiku
""".format(uid=get_dataiku_user_uid())

    def copy_kit(self):
        # see make-studio-package.sh
        if PYTHONVERSION == "3.9":
            PYTHON_PACKAGES = "python39.packages"
            PYTHON_PACKAGES_FILE = "packages-py39.tar.bz2"
            PYTHON_DSS_VERSION = "arch=cp39-linux"
        elif PYTHONVERSION == "3.10":
            PYTHON_PACKAGES = "python310.packages"
            PYTHON_PACKAGES_FILE = "packages-py310.tar.bz2"
            PYTHON_DSS_VERSION = "arch=cp310-linux"
        elif PYTHONVERSION == "3.11":
            PYTHON_PACKAGES = "python311.packages"
            PYTHON_PACKAGES_FILE = "packages-py311.tar.bz2"
            PYTHON_DSS_VERSION = "arch=cp311-linux"
        else:
            raise ValueError("Python version not supported: %s" % PYTHONVERSION)

        if DEV_MODE:
            # copy just enough to make the installer.sh happy, knowing that the image
            # doesn't need anything for the UI or the notebooks.
            # Check make-studio-packages.sh for how it's done for real.

            def get_version_from_file(file_name):
                with open(osp.join(DKUINSTALLDIR, 'packagers', 'studio', file_name), "r") as f:
                    return f.read().strip()
            def fetch_and_uncompress(url, dest):
                # response is not seekable, so tarfile is not happy
                with requests.get(url) as resp:
                    with tarfile.open(fileobj=io.BytesIO(resp.content), mode='r:bz2') as tar:
                        tar.extractall(dest)

            # dist stuff
            os.mkdir(osp.join(self.build_env_path, 'dist'))
            for item in glob.glob(osp.join(DKUINSTALLDIR, 'dist', '*.jar')):
                self.copy_to_buildenv(item, osp.join('dist', osp.basename(item)))
            for item in glob.glob(osp.join(DKUINSTALLDIR, 'src', 'main', 'java', '*.properties')):
                self.copy_to_buildenv(item, osp.join('dist', osp.basename(item)))
            # dss-version.json is a placeholder
            with open(osp.join(self.build_env_path, 'dss-version.json'), "w") as f:
                json.dump({"product_commitid": "doesnotmatter", "product_version": "dev/doesnotmatter"}, f)
            # dss-arch.txt is set to linux
            with open(osp.join(self.build_env_path, 'dss-arch.txt'), "w") as f:
                f.write("linux")
            # installer.sh
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'packagers', 'studio', 'installer.sh'), 'installer.sh')
            # lib folder, without govern and spark stuff
            os.makedirs(osp.join(self.build_env_path, 'lib', 'ivy'))
            for ivy_conf in ["common-run", "backend-run"]:
                self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'lib', 'ivy', ivy_conf), osp.join("lib", "ivy", ivy_conf))
            for lib_conf in ["third", "shadelib", "shims"]:
                self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'lib', lib_conf), osp.join("lib", lib_conf))
            for ivy_conf in glob.glob(osp.join(DKUINSTALLDIR, 'lib', 'ivy', 'jdbc*')):
                self.copy_to_buildenv(ivy_conf, osp.join("lib", "ivy", osp.basename(ivy_conf)))
            # python, without govern again
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'src', 'main', 'python'), 'python')
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'lambda', 'src', 'main', 'python', 'dataiku', 'apinode'), osp.join('python', 'dataiku', 'apinode'))
            # python packages, for the right python version. Implies downloading them
            # first get the 'official' versions
            PYTHON_PACKAGES_VERSION = get_version_from_file('python_packages_version_linux.txt')
            fetch_and_uncompress('https://cdn.downloads.dataiku.com/studio.rsrc/%s/%s' % (PYTHON_PACKAGES_VERSION, PYTHON_PACKAGES_FILE), osp.join(self.build_env_path, PYTHON_PACKAGES))
            with open(osp.join(self.build_env_path, PYTHON_PACKAGES, 'dss-version.txt'), 'w') as f:
                f.write(PYTHON_DSS_VERSION)
            # conda packages too
            CONDA_PACKAGES_VERSION = get_version_from_file('conda_packages_version_linux.txt')
            fetch_and_uncompress('https://cdn.downloads.dataiku.com/studio.rsrc/%s/conda-packages.tar.bz2' % CONDA_PACKAGES_VERSION, osp.join(self.build_env_path, 'conda.packages'))
            # R lives in dist in dev env
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'dist', 'R'), 'R')
            # scripts, with the virtualenv too
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'packagers', 'studio', 'scripts'), 'scripts')
            for item in glob.glob(osp.join(DKUINSTALLDIR, 'packagers', 'common', 'scripts', '*sh')):
                self.copy_to_buildenv(item, osp.join('scripts', osp.basename(item)))
            for item in glob.glob(osp.join(DKUINSTALLDIR, 'packagers', 'common', 'scripts', '*py')):
                self.copy_to_buildenv(item, osp.join('scripts', osp.basename(item)))
            for item in glob.glob(osp.join(DKUINSTALLDIR, 'packagers', 'common', 'scripts', 'install', '*')):
                self.copy_to_buildenv(item, osp.join('scripts', 'install', osp.basename(item)))
            self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'packagers', 'dkuinstall'), osp.join('scripts', 'dkuinstall'))
            # first get the 'official' virtualenv version
            PYTHON_VIRTUALENV_VERSION = get_version_from_file('python_virtualenv_version_linux.txt')
            # then download + unzip
            fetch_and_uncompress('https://cdn.downloads.dataiku.com/studio.rsrc/%s/virtualenv.tar.bz2' % PYTHON_VIRTUALENV_VERSION, osp.join(self.build_env_path, 'scripts'))
            # and the last (big) one, resources. Only take the interesting parts
            os.mkdir(osp.join(self.build_env_path, 'resources'))
            for resource in ['publicdata', 'eurorates', 'nlp', 'holidays.db', 'weekends.db', 'logging', \
                                'builtin-python-env', 'code-envs', 'apinode', 'security', 'api-deployer', \
                                'container-exec', 'R', 'databricks', 'cuda']:
                self.copy_to_buildenv(osp.join(DKUINSTALLDIR, 'resources', resource), osp.join('resources', resource))
            # add default samples
            for item in glob.glob(osp.join(DKUINSTALLDIR, "plugins", "default-samples")):
                self.copy_to_buildenv(item, osp.join("resources", "plugins", osp.basename(item)))

            # don't forget geolite
            GEOIP_VERSION = get_version_from_file('geoip_version.txt')
            with requests.get('http://downloads.dataiku.com/studio-build/geoip/GeoLite2City-%s.fgdb.gz' % GEOIP_VERSION, stream=True) as resp:
                with open(osp.join(self.build_env_path, 'resources', 'GeoLite2City.fgdb.gz'), 'wb') as f:
                    shutil.copyfileobj(resp.raw, f)


            # then we can copy all of that in the container
            for item in ["dist", "dss-version.json", "dss-arch.txt", "installer.sh",
                         "lib", "python", PYTHON_PACKAGES, "R", "resources", "scripts", "conda.packages"]:
                self.dockerfile += 'COPY "%s" /home/dataiku/installdir/%s\n' % (item, item)

        else:
            for item in ["dist", "dss-version.json", "dss-arch.txt", "installer.sh",
                         "lib", "python", PYTHON_PACKAGES, "R", "resources", "scripts", "conda.packages"]:
                self.dockerfile += 'COPY "%s" /home/dataiku/installdir/%s\n' % (item, item)
                self.copy_to_buildenv(osp.join(DKUINSTALLDIR, item), item)

            # Remove some files that are not actually useful and that trigger stupid security scanners
            # DeepHub requirements: they are not actually installed, but the stupid scanner lives in a post-reality world
            # govern-run: not used in API node
            # jnr-posix: used by cassandra driver, not used in API node
            # xmlsec/xmltooling/opensaml/esapi/not-yet-commons: used by SAML login, not used in API node
            # nbconvert: not needed in API node
            self.dockerfile += """
RUN rm -f /home/dataiku/installdir/resources/code-envs/python/image-classification/v1/spec/requirements.txt \
    && rm -f /home/dataiku/installdir/resources/code-envs/python/object-detection/v1/spec/requirements.txt \
    && rm -rf /home/dataiku/installdir/lib/ivy/govern-server-run \
    && rm -f /home/dataiku/installdir/lib/ivy/backend-run/jnr-posix-3.0.44.jar \
    && rm -rf /home/dataiku/installdir/python36.packages/nbconvert*
"""

class APIImageBuilder(KitImageBuilder):
    """
    Builds images for API deployer.
    """

    type_ = "api_deployer"

    def build(self):
        self.initialize_dockerfile()
        self.add_syspackages(additional=KitImageBuilder.KIT_ALMA_SYSPACKAGES)
        self.add_additional_pythons()
        if self.opts.r:
            self.add_r_base()
        self.add_cuda()
        self.set_default_java()
        self.setup_dataiku_user()

        self.copy_and_install_apinode()
        if self.opts.dockerfile_append:
            self.append_to_dockerfile(self.opts.dockerfile_append)

    def copy_and_install_apinode(self):
        self.dockerfile += "ENV BUILD_TIMESTAMP=$(date)\n"

        self.copy_kit()

        self.dockerfile += """
RUN /bin/su - dataiku -c "/home/dataiku/installdir/installer.sh -y -t api -d /home/dataiku/data -p 12000 -P python%s"
""" % PYTHONVERSION
        if self.opts.r:
            repo_option = "-repo %s" % self.opts.cran_mirror if self.opts.cran_mirror else ""
            self.dockerfile += """
# USER dataiku
USER {uid}
ENV HOME=/home/dataiku
ENV VIRTUALENV_OVERRIDE_APP_DATA=/opt/dataiku/virtualenv-appdata
RUN /home/dataiku/data/bin/dssadmin install-R-integration -noDeps {repo_option} -noJupyterKernel
USER root
""".format(repo_option=repo_option, uid=get_dataiku_user_uid())


class CDEImageBuilder(KitImageBuilder):
    """
    Builds images for containerized DSS engine.
    """

    type_ = "cde"

    def build(self):
        self.initialize_dockerfile()
        self.add_syspackages(additional=KitImageBuilder.KIT_ALMA_SYSPACKAGES)
        self.add_additional_pythons()
        if self.opts.r:
            self.add_r_base()
        self.add_cuda()
        self.set_default_java()

        self.setup_dataiku_user()

        self.copy_and_install_cde()
        if self.opts.dockerfile_append:
            self.append_to_dockerfile(self.opts.dockerfile_append)

    def copy_and_install_cde(self):
        self.dockerfile += "ENV BUILD_TIMESTAMP=$(date)\n"

        self.copy_kit()

        self.dockerfile += """
RUN /bin/su - dataiku -c "/home/dataiku/installdir/installer.sh -y -t design -d /home/dataiku/data -p 12000 -P python%s -j"
""" % PYTHONVERSION
        if self.opts.r:
            repo_option = "-repo %s" % self.opts.cran_mirror if self.opts.cran_mirror else ""
            self.dockerfile += 'RUN /bin/su - dataiku -c "/home/dataiku/data/bin/dssadmin install-R-integration -noDeps %s -noJupyterKernel"' % repo_option

        # if the hadoop standalone integration was done, cram it in the image too
        hadoop_standalone_libs_folder = None
        if DEV_MODE:
            # check if the variable for "use hadoop" is set
            flavor = os.environ.get("DKU_STANDALONE_FLAVOR")
            if flavor is not None:
                hadoop_standalone_libs_folder = osp.join(DKUINSTALLDIR, 'packagers', 'resources-build', 'hadoop-standalone-libs', 'dist', flavor)
        else:
            # can't use the tar.gz, have to rely on whether the standalone libs folder is there
            if osp.exists(osp.join(DKUINSTALLDIR, 'hadoop-standalone-libs')):
                hadoop_standalone_libs_folder = osp.join(DKUINSTALLDIR, 'hadoop-standalone-libs')
        if hadoop_standalone_libs_folder is not None:
            self.copy_to_buildenv(hadoop_standalone_libs_folder, 'hadoop-standalone-libs')
            self.dockerfile += """
COPY hadoop-standalone-libs /home/dataiku/installdir/hadoop-standalone-libs/
RUN /home/dataiku/data/pyenv/bin/python /home/dataiku/installdir/scripts/_build_standalone_hadoop_env.py /home/dataiku/data/bin/env-hadoop.sh
"""

        self.dockerfile += """
WORKDIR /opt/dataiku
USER root
"""

        # add the entrypoint
        self.dockerfile += """
WORKDIR /home/dataiku
# USER dataiku
USER {uid}
ENV HOME=/home/dataiku
ENV VIRTUALENV_OVERRIDE_APP_DATA=/opt/dataiku/virtualenv-appdata
ENTRYPOINT ["/home/dataiku/data/bin/cde"]
""".format(uid=get_dataiku_user_uid())

class CDEPluginImageBuilder(BaseImageBuilder):
    """
    Builds image for containerized DSS engine.
    """

    type_ = "cde-plugins"

    def __init__(self, opts):
        super(CDEPluginImageBuilder, self).__init__(opts)
        requested_image_tag = self._get_requested_cde_base_image_tag()
        if requested_image_tag is not None:
            self.source_tag = requested_image_tag
        else:
            self.source_tag = compute_default_tag("cde")

    def _get_requested_cde_base_image_tag(self):
        try:
            with open(os.path.join(DIP_HOME, 'config', 'general-settings.json'), 'r') as f:
                settings = json.load(f)
                return settings.get('containerSettings', {}).get('cdeBaseImage')
        except Exception as e:
            logging.warning('Could not read general settings: ' + str(e))
        return None

    def build(self):
        self.dockerfile += "FROM %s\nUSER root\n" % self.source_tag

        # also copy the lib/jdbc and lib/java stuff
        if osp.exists(osp.join(DIP_HOME, 'lib', 'jdbc')):
            self.copy_to_buildenv(osp.join(DIP_HOME, 'lib', 'jdbc'), 'lib-jdbc')
            self.dockerfile += "COPY --chown=dataiku:dataiku lib-jdbc /home/dataiku/data/lib/jdbc/\n"
        if osp.exists(osp.join(DIP_HOME, 'lib', 'java')):
            self.copy_to_buildenv(osp.join(DIP_HOME, 'lib', 'java'), 'lib-java')
            self.dockerfile += "COPY --chown=dataiku:dataiku lib-java /home/dataiku/data/lib/java/\n"
        # since we know where they are, also copy the driver jars directories
        # and remap their locations
        with open(osp.join(DIP_HOME, 'config', 'connections.json'), 'r') as f:
            connections = json.load(f)
        jars_directories = set()
        for connection_name, connection in connections.get('connections', {}).items():
            jars_directory = connection.get('params', {}).get('jarsDirectory')
            if jars_directory is not None and len(jars_directory) > 0:
                if not osp.exists(os.path.normpath(os.path.join(DIP_HOME, jars_directory))):
                    continue
                jars_directories.add(jars_directory)
        for jars_directory in jars_directories:
            m = hashlib.md5()
            m.update(jars_directory.encode('utf8'))
            jars_directory_hash = m.hexdigest()
            self.copy_to_buildenv(os.path.join(DIP_HOME, jars_directory), 'jdbc-%s' % jars_directory_hash)
            self.dockerfile += "COPY --chown=dataiku:dataiku jdbc-%s /home/dataiku/drivers/jdbc-%s/\n" % (jars_directory_hash, jars_directory_hash)

        if self.is_cde_plugins_enabled():
            self._install_plugins()

        # add the entrypoint
        self.dockerfile += """
WORKDIR /home/dataiku
# USER dataiku
USER {uid}
ENV HOME=/home/dataiku
ENV VIRTUALENV_OVERRIDE_APP_DATA=/opt/dataiku/virtualenv-appdata
ENTRYPOINT ["/home/dataiku/data/bin/cde"]
""".format(uid=get_dataiku_user_uid())

    def _install_plugins(self):
        # copy plugins too
        plugin_ids = set()
        for plugin_folder in glob.glob(os.path.join(DIP_HOME, "plugins", '*', '*')):
            useful_components = ['python-exporters', 'java-exporters', 'python-fs-providers', 'python-formats', 'python-connectors', 'jython-processors', 'java-fs-providers', 'java-formats', 'java-connectors', 'sample-datasets']
            has_something_useful = False
            for useful_component in useful_components:
                if len(glob.glob(os.path.join(plugin_folder, useful_component, '*'))) > 0:
                    has_something_useful = True
            # sniff the java processor stuff
            try:
                with open(os.path.join(plugin_folder, 'plugin.json'), 'r') as f:
                    desc = f.read()
            except Exception as e:
                logging.warning("Unable to read plugin definition in " + plugin_folder + " : " + str(e))
                continue # broken plugin can't be useful
            # this is a very approximate test of whether the plugin could be needed, but it should
            # be good enough (no false negative, and probably very few false positives on dev plugins)
            if '"entryPoint"' in desc:
                has_something_useful = True
            if '"javaPreparationProcessors"' in desc:
                has_something_useful = True
            if '"javaFormulaFunctions"' in desc:
                has_something_useful = True
            if not has_something_useful:
                continue

            plugin_id = os.path.basename(plugin_folder)
            # check settings to see if plugin was excluded from cde
            try:
                with open(os.path.join(DIP_HOME, 'config', 'plugins', plugin_id, 'settings.json'), 'r') as f:
                    settings = json.load(f)
                    if settings.get("excludedFromCDE"):
                        logging.info("Plugin %s is excluded from cde" % plugin_id)
                        continue
            except Exception as e:
                logging.warning("Unable to read plugin settings for " + plugin_id + " : " + str(e))
                pass
            plugin_ids.add(plugin_id)

        for plugin_id in plugin_ids:
            if plugin_id is None or len(plugin_id) == 0:
                continue
            for plugin_folder in glob.glob(os.path.join(DIP_HOME, 'plugins', '*', plugin_id)):
                target_path = plugin_folder.replace(DIP_HOME, self.build_env_path + os.sep)
                self.copy_to_buildenv(plugin_folder, target_path)
        if os.path.exists(os.path.join(self.build_env_path, 'plugins')):
            self.dockerfile += "COPY --chown=dataiku:dataiku plugins /home/dataiku/data/plugins/\n"
        # and now... the fun part! Make the code envs of these plugins
        # Start by finding the code envs (from the plugins settings)
        code_env_names = []
        for plugin_id in plugin_ids:
            try:
                with open(os.path.join(DIP_HOME, 'config', 'plugins', plugin_id, 'settings.json'), 'r') as f:
                    settings = json.load(f)
                code_env_name = settings.get('codeEnvName')
                if code_env_name is not None and len(code_env_name) > 0:
                    code_env_names.append(code_env_name)
            except:
                logging.warning("Unable to find which code env the %s plugin uses, ignoring" % plugin_id)
        # then copy
        os.makedirs(os.path.join(self.build_env_path, 'code-envs', 'desc', 'python'))
        os.makedirs(os.path.join(self.build_env_path, 'code-envs', 'desc', 'R'))
        node_type = get_node_type()
        needs_conda = False
        filtered_code_env_names = []
        for code_env_name in code_env_names:
            if node_type == 'automation':
                code_env_folder = os.path.join(DIP_HOME, 'acode-envs', 'python', code_env_name, 'desc')
            else:
                code_env_folder = os.path.join(DIP_HOME, 'code-envs', 'desc', 'python', code_env_name)

            if not os.path.exists(code_env_folder):
                # code-env not found in the python subfolder, so probably a RRRR code-env - ignoring it
                logging.warning("%s is not a Python code-env, ignoring" % code_env_name)
                continue

            if not needs_conda:
                try:
                    with open(os.path.join(code_env_folder, 'desc.json')) as f:
                        desc = json.load(f)
                        needs_conda |= desc.get('conda')
                except Exception as e:
                    logging.error('Could not read code-env %s actual desc', code_env_name)
            code_env_build_folder = os.path.join('code-envs', 'desc', 'python', code_env_name)
            code_env_target = os.path.join('/home/dataiku/data', 'code-envs', 'desc', 'python', code_env_name)
            # move to image
            self.copy_to_buildenv(code_env_folder, code_env_build_folder)
            self.dockerfile += "COPY --chown=dataiku:dataiku %s %s/\n" % (code_env_build_folder, code_env_target)
            filtered_code_env_names.append(code_env_name)
        if filtered_code_env_names:
            if needs_conda:
                # Miniconda.Dockerfile have a similar chunk, have a look there if you update here
                self.dockerfile += """
RUN if ! test -d /opt/conda; then curl -sSL https://repo.anaconda.com/miniconda/Miniconda3-py37_23.1.0-1-Linux-x86_64.sh -o /tmp/miniconda.sh \
    && bash /tmp/miniconda.sh -bfp /opt/conda/ \
    && rm -rf /tmp/miniconda.sh; fi
ENV PATH="/opt/conda/bin:$PATH"
"""
            # then make the DSS inside build all the code envs
            thread_override = self.get_thread_count()
            self.dockerfile += """
RUN chown -Rh dataiku:dataiku /home/dataiku/data/code-envs
WORKDIR /home/dataiku
# USER dataiku
USER {uid}
ENV HOME=/home/dataiku
ENV VIRTUALENV_OVERRIDE_APP_DATA=/opt/dataiku/virtualenv-appdata
RUN cd /home/dataiku/data && ./bin/dku __build_cde_plugin_code_env -names {csv_env_names} {threads_flag} && rm -rf ~/.cache/pip
WORKDIR /opt/dataiku
USER root
    """.format(csv_env_names=','.join(filtered_code_env_names), threads_flag="--threads %s" % thread_override if thread_override is not None else "", uid=get_dataiku_user_uid())

    @staticmethod
    def get_thread_count():
        try:
            path = osp.join(DIP_HOME, "config/dip.properties")
            if not os.path.exists(path):
                return None
            with open(path, "r") as f:
                for line in f.readlines():
                    match = re.match("^dku.cde.plugin.codenv.rebuild.threads\s*=\s*(\w+)$", line)
                    if match:
                        return match.group(1)
        except Exception as e:
            logging.error("Could not read dip properties json file", exc_info=e)
        return None

    @staticmethod
    def is_cde_plugins_enabled():
        # default value is true
        path = osp.join(DIP_HOME, "config/general-settings.json")
        if not os.path.exists(path):
            return True
        try:
            with open(path, "r") as f:
                settings = json.load(f)
            return settings.get("containerSettings", {}).get("cdePluginSupport", True)
        except Exception as e:
            logging.error("Could not read general settings file", exc_info=e)
        return True

def run_wait_check(cmd, shell=False, stdout=None):
    """Runs a process, don't capture output"""
    logging.info("Running command: %s" % (cmd))
    retcode = subprocess.Popen(cmd, shell=shell, stdout=stdout).wait()
    if retcode != 0:
        raise Exception("Command failed: %s - code %s" % (cmd, retcode))

def build_image(builder, opts, tag):
    logging.info("Preparing build env and Dockerfile")
    builder.build()

    if opts.copy_to_buildenv is not None:
        for copy in opts.copy_to_buildenv:
            logging.info("Copying to buildenv: %s -> %s" % (copy[0], copy[1]))
            builder.copy_to_buildenv(copy[0], copy[1])

    for root, dirs, files in os.walk(builder.build_env_path, followlinks=False):
        for d in dirs:
            p = os.path.join(root, d)
            copy_rx_from_owner(p)
        for f in files:
            p = os.path.join(root, f)
            copy_rx_from_owner(p)

    logging.info("Docker build env and Dockerfile ready, building it")
    logging.info("Build env path:%s" % builder.build_env_path)
    logging.info("Dockerfile content:\n%s" % builder.dockerfile)

    dockerfilepath = osp.join(builder.build_env_path, "Dockerfile")
    with open(dockerfilepath, "w") as f:
        f.write(builder.dockerfile)

    print ("Saved to %s" % dockerfilepath)
    if opts.mode == "dry":
        exit(0)

    docker_cmd = ["docker", "build", "-t", tag]
    if opts.docker_build_opt is not None:
        for opt in opts.docker_build_opt:
            docker_cmd.append(opt.strip())
    docker_cmd.append(builder.build_env_path)

    run_wait_check(docker_cmd)

def get_built_images_json_file():
    return os.path.join(DIP_HOME, "run", "built-base-images.json")

def get_cde_images_json_file():
    return os.path.join(DIP_HOME, "run", "cde-images.json")

def _get_images(built_images_json_file):
    try:
        if os.path.exists(built_images_json_file):
            with open(built_images_json_file, 'r') as f:
                return json.load(f)
    except Exception as e:
        logging.error("Could not read base image json file", exc_info=e)
    return {}

def get_built_base_images():
    return _get_images(get_built_images_json_file())

def get_cde_images():
    return _get_images(get_cde_images_json_file())

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(prog='build-images.py', description="Build container images")
    image_types = ["container-exec", "api-deployer", "spark", "cde", "cde-plugins"]
    # cde-plugins is hidden from CLI since it's only internal and it's here for convenience/legacy reasons
    filtered_image_types = [typ_ for typ_ in image_types if typ_ != "cde-plugins"]
    type_types = "{%s}" % ','.join(filtered_image_types)
    parser.add_argument('--type', required=True, metavar=type_types, help="Type of image to build")

    mode_options = ["build", "download", "use", "build-push"]
    if DEV_MODE:
        mode_options.append("dry")
    parser.add_argument('--mode', default="build", choices=mode_options, help="What to do")

    # Only if mode == "use" or mode == "download"
    parser.add_argument('--source-image', help="[mode=download or mode=use] Source image to use")

    # Only if mode == "download"
    parser.add_argument('--source-registry', help="[mode=download] Source registry base URL")

    # Only if mode == "build" or mode == "build-push"
    parser.add_argument('--distrib', type=str, choices=['almalinux8', 'almalinux9'], default='almalinux9', help="[mode=build only] Base distribution to use when building from scratch")
    parser.add_argument('--build-from-image', type=str, help="[mode=build only] Custom base image to use when building from scratch")
    parser.add_argument('--system-packages', type=str, help="[mode=build only] Comma-separated list of additional system packages")
    parser.add_argument('--http-proxy', type=str, help="[mode=build only] http_proxy string for building")
    parser.add_argument('--https-proxy', type=str, help="[mode=build only] https_proxy string for building")
    parser.add_argument('--no-proxy', type=str, help="[mode=build only] no_proxy string for building")
    parser.add_argument('--dockerfile-prepend', help="[mode=build only] File to prepend to the Dockerfile")
    parser.add_argument('--cran-mirror', default="https://cloud.r-project.org", help="[mode=build only] CRAN mirror to use to download packages")
    parser.add_argument('--docker-build-opt', help="Add a docker build option", action="append")

    # Only if mode == "build-push"
    parser.add_argument('--target-registry', help="[mode=build-push only] Target registry URL")

    # Whatever the mode
    parser.add_argument('--tag', help="Output image tag")

    # Keep options in sync with BaseImageBuildOptions.java
    parser.add_argument('--without-r',action="store_false", dest="r", help="Disable R (default: enabled)")
    parser.add_argument('--with-r',action="store_true", dest="r", help="Enable R (default: enabled)")

    parser.add_argument('--with-py27',action="store_true", dest="py27", help="Enable Python 2.7 (unsupported)")
    parser.add_argument('--without-py27',action="store_false", dest="py27", help="Disable Python 2.7 (unsupported)")

    parser.add_argument('--with-py36', action="store_const", dest="py36", const=PythonInstallOption.INSTALL, help="Enable Python 3.6 (default: disabled)")
    parser.add_argument('--without-py36', action="store_const", dest="py36", const=PythonInstallOption.SKIP, help="Disable Python 3.6 (default: disabled)")
    parser.set_defaults(py36=PythonInstallOption.DEFAULT)

    parser.add_argument('--with-py37',action="store_true", dest="py37", help="Enable Python 3.7 (default: disabled)")
    parser.add_argument('--without-py37',action="store_false", dest="py37", help="Disable Python 3.7 (default: disabled)")

    parser.add_argument('--with-py38',action="store_true", dest="py38", help="Enable Python 3.8 (default: disabled)")
    parser.add_argument('--without-py38',action="store_false", dest="py38", help="Disable Python 3.8 (default: disabled)")

    parser.add_argument('--without-py39',action="store_false", dest="py39", help="Disable Python 3.9 (default: enabled)")
    parser.add_argument('--with-py39',action="store_true", dest="py39", help="Enable Python 3.9 (default: enabled)")

    # we could do that for all python versions and make it generic but so far the other base env py versions are enabled by default
    if PYTHONVERSION == "3.10":
        parser.add_argument('--without-py310',action="store_false", dest="py310", help="Disable Python 3.10 (default: enabled)")
        parser.add_argument('--with-py310',action="store_true", dest="py310", help="Enable Python 3.10 (default: enabled)")
    else:
        parser.add_argument('--with-py310',action="store_true", dest="py310", help="Enable Python 3.10 (default: disabled)")
        parser.add_argument('--without-py310',action="store_false", dest="py310", help="Disable Python 3.10 (default: disabled)")

    if PYTHONVERSION == "3.11":
        parser.add_argument('--without-py311',action="store_false", dest="py311", help="Disable Python 3.11 (default: enabled)")
        parser.add_argument('--with-py311',action="store_true", dest="py311", help="Enable Python 3.11 (default: enabled)")
    else:
        parser.add_argument('--with-py311',action="store_true", dest="py311", help="Enable Python 3.11 (default: disabled)")
        parser.add_argument('--without-py311',action="store_false", dest="py311", help="Disable Python 3.11 (default: disabled)")

    parser.add_argument('--with-py312',action="store_true", dest="py312", help="Enable Python 3.12 (default: disabled)")
    parser.add_argument('--without-py312',action="store_false", dest="py312", help="Disable Python 3.12 (default: disabled)")

    parser.add_argument('--with-py313',action="store_true", dest="py313", help="Enable Python 3.13, supported on AlmaLinux 9 only (default: disabled)")
    parser.add_argument('--without-py313',action="store_false", dest="py313", help="Disable Python 3.13 (default: disabled)")

    parser.add_argument('--with-cuda', action="store_true", dest="cuda", help="Enable CUDA (default: disabled)")
    parser.add_argument('--without-cuda',action="store_false", dest="cuda", help="Disable CUDA (default: disabled)")
    parser.add_argument('--cuda-version', type=str, choices=SUPPORTED_CUDA_VERSIONS, help="CUDA version (default: " + DEFAULT_CUDA_VERSION_ALMA9 + " on AlmaLinux 9, " + DEFAULT_CUDA_VERSION_ALMA8 + " on AlmaLinux 8)")

    parser.add_argument('--without-xgboost-gpu-support', action="store_false", dest="xgboost_gpu_support", help="Don't install GPU support for XGBoost (default: installed only if CUDA is enabled)")

    parser.add_argument('--dockerfile-append', help="Appended to the Dockerfile")
    parser.add_argument('--copy-to-buildenv', help="Copy to the buildenv. SOURCE DEST", nargs=2, action="append")

    opts = parser.parse_args(sys.argv[1:])

    if opts.type not in image_types:
        raise parser.error("type must be one of %s" % ','.join(filtered_image_types))

    if opts.type == "cde-plugins" and opts.mode not in ["build", "build-push", "dry"]:
        raise parser.error("cde-plugins can only be used in build or build-push mode")

    if not opts.cuda and opts.cuda_version:
        logging.warning("A specific CUDA version was requested while CUDA is not enabled, enabling CUDA automatically. Remove the --cuda-version argument to not install CUDA.")
        setattr(opts, "cuda", True)

    logging.info("Building image with options: %s" % (opts))

    if opts.tag is not None:
        base_tag = opts.tag
    else:
        base_tag = compute_default_tag(opts.type)

    dss_r_version = os.environ.get("DKURVERSION", "???")
    if dss_r_version.startswith("4."):
        r_major_version = "4"
    else:
        r_major_version = None

    if opts.mode == "download" or opts.mode == "use":
        if opts.source_image is None:
            source_image = compute_source_image(opts.type, opts.distrib, opts.r, r_major_version, opts.cuda, opts.cuda_version)
        else:
            source_image = opts.source_image
        if opts.source_registry is not None:
            source_image = "%s/%s" % (opts.source_registry, source_image)

    if opts.mode == "download":
        logging.info("Pulling %s" % source_image)
        run_wait_check(["docker", "pull", source_image])

    if opts.mode == "use" or opts.mode == "download":
        logging.info("Building final image from %s" % source_image)
        # TODO: If there is nothing to add, we should not do a build but a simple
        # retagging?
        builder = FinalizeOnlyImageBuilder(opts, source_image, opts.type.replace('-', '_'))

    elif opts.mode == "build" or opts.mode == "build-push" or opts.mode == "dry":
        if opts.type == "container-exec":
            builder = ContainerExecImageBuilder(opts)
        elif opts.type == "spark":
            builder = SparkExecImageBuilder(opts)
        elif opts.type == "api-deployer":
            builder = APIImageBuilder(opts)
        elif opts.type == "cde":
            builder = CDEImageBuilder(opts)
        elif opts.type == "cde-plugins":
            builder = CDEPluginImageBuilder(opts)

    # make sure all the build dir contents bear the right permissions, ie copy the read and execute
    # from the owner to group and other => otherwise in the container the files might end up
    # owned by root and inaccessible to the user set as the run user (permissions copied from install dir)
    def copy_rx_from_owner(p):
        try:
            s = os.stat(p).st_mode
            ns = s
            # copy the read permission if needed
            if bool(s & stat.S_IRUSR):
                ns |= stat.S_IRGRP
                ns |= stat.S_IROTH
            # copy the execute/traversal permission if needed
            if bool(s & stat.S_IXUSR):
                ns |= stat.S_IXGRP
                ns |= stat.S_IXOTH
            if s != ns:
                logging.info("Fixup permission on %s : %s -> %s" % (p, oct(s), oct(ns)))
                os.chmod(p, ns)
        except Exception as e:
            logging.warning("Failed to fixup build file permissions : %s" % str(e))

    build_image(builder, opts, base_tag)

    if opts.mode == "build-push":
        if opts.target_registry is None:
            raise Exception("At least a target registry prefix must be given")
        target_image_tag = "%s/%s" % (opts.target_registry, base_tag)
        run_wait_check(["docker", "tag", base_tag, target_image_tag])
        run_wait_check(["docker", "push", target_image_tag])

    dt = datetime.strftime(datetime.now(), '%Y-%m-%dT%H:%M:%S.000Z')

    # record the build in a file (FYI-only)
    all_built_images = get_built_base_images()
    built_images = all_built_images.get('images', {})
    built_image = built_images.get(builder.type_, {})
    # see `LastBuiltImage` @ https://github.com/dataiku/dip/blob/master/src/main/java/com/dataiku/dip/containers/exec/ContainerExecImagesHelper.java#L151
    built_images[builder.type_] = built_image
    built_image['dssVersion'] = RAW_DSS_VERSION.strip()
    built_image['tag'] = base_tag
    built_image['datetime'] = dt
    opts.py36 = opts.py36.value
    built_image['options'] = vars(opts)
    all_built_images['images'] = built_images
    # ideally we make that an atomic write. But well, let's say it won't happen very often
    with open(get_built_images_json_file(), 'w') as f:
        json.dump(all_built_images, f, indent=4)

    # specific cde stuff (for production use)
    if opts.type in ["cde", "cde-plugins"]:
        cde_images = get_cde_images()
        image = cde_images.get(builder.type_, {})
        built_image = image.get('built', {})
        built_image['dssVersion'] = RAW_DSS_VERSION.strip()
        built_image['tag'] = base_tag
        built_image['datetime'] = dt
        image["built"] = built_image
        if opts.mode == "build-push" and opts.type == "cde-plugins":
            image["pushed"] = built_image
        cde_images[builder.type_] = image
        tmp_dir = osp.join(DIP_HOME, "tmp", "cde-images")
        if not osp.exists(tmp_dir):
            os.mkdir(tmp_dir)
        with open(osp.join(tmp_dir, "ts_%s" % datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%f')), mode="w") as f:
            json.dump(cde_images, f, indent=4)
            os.rename(f.name, get_cde_images_json_file())

    logging.info("Done, cleaning up")
    # TODO
