import json
import logging
import os
import re
import subprocess
from collections import defaultdict

from dataiku.runnables import ResultTable, Runnable
from six.moves import configparser


class DeleteOldContainerImages(Runnable):
    def __init__(self, project_key, raw_config, plugin_config):
        super().__init__(project_key, raw_config, plugin_config)
        self.config = self._get_config(self.config)

    def get_progress_target(self):
        return None

    def _get_install_id(self, dip_home):
        config = configparser.RawConfigParser()
        config.read(os.path.join(dip_home, 'install.ini'))

        if config.has_option('general', 'installid'):
            return config.get('general', 'installid').lower()

        return 'notattributed'

    def _is_automation_node(self, dip_home):
        config = configparser.RawConfigParser()
        config.read(os.path.join(dip_home, 'install.ini'))

        if config.has_option('general', 'nodetype'):
            return config.get('general', 'nodetype').lower() == 'automation'

        return False

    def _get_dss_version(self, dip_home):
        with open(os.path.join(dip_home, 'dss-version.json'), 'r') as f:
            version = json.load(f)
            if 'product_version' in version:
                return version['product_version'].replace('/', '_').replace('.', '\\.').lower()

            return 'dev_doesnotmatter'

    def _get_config(self, raw_config):
        config = {
            'perform_deletion': bool(raw_config.get('perform_deletion', False)),
            'force_rm': bool(raw_config.get('force_rm', False))
        }

        for opt in ['rm_none_images', 'use_custom_host', 'dont_guess_image_name', 'container-exec', 'spark', 'api_deployer', 'cde', 'code_envs', 'code_studio']:
            config[opt] = bool(raw_config.get(opt, True))

        if config['dont_guess_image_name']:
            config['base_image_name'] = raw_config.get('custom_image_name', '')
            if not config['base_image_name']:
                raise ValueError('You should input a custom base image name that is not empty.')
        else:
            config['base_image_name'] = 'dku-exec-base-' + self._get_install_id(os.environ['DIP_HOME'])
        logging.info(f"Base image name: {config['base_image_name']}")
        config['custom_docker_host'] = raw_config.get('custom_docker_host', '')
        config['is_automation_node'] = self._is_automation_node(os.environ['DIP_HOME'])

        return config

    def _get_docker_cmd(self, *args):
        if self.config['use_custom_host']:
            return ['docker', '--host', self.config['custom_docker_host']] + list(args)
        else:
            return ['docker'] + list(args)

    def _list_images_with_docker(self):
        cmd = self._get_docker_cmd('images', '--format', 'json')
        p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if p.returncode != 0:
            raise Exception("Failed to list container images. Error code:%s."
                            " Error message: %s" %(p.returncode, p.stderr.decode(errors='replace')))
        return p.stdout.decode(errors='replace').splitlines()

    def _build_image_dict(self):
        r = {}
        for line in self._list_images_with_docker():
            image = json.loads(line)
            if "Repository" not in image and "Tag" not in image:
                continue
            r.setdefault(image["Repository"], []).append({"tag": image["Tag"], "id": image["ID"], 'createdAt': image["CreatedAt"]})
        return r

    def _automation_node_images_in_use(self):
        automation_node_used_images = {}
        code_env_root_dir = os.path.join(os.environ['DIP_HOME'], 'acode-envs')
        code_env_dirs = (os.path.join(code_env_root_dir, 'R'), os.path.join(code_env_root_dir, 'python'))
        link_targets = set()
        for dir in code_env_dirs:
            if not os.path.isdir(dir):
                continue

            with os.scandir(dir) as files:
                for file in files:
                    links_path = os.path.join(dir, file, 'links')
                    if not os.path.isdir(links_path):
                        continue

                    link_targets.update(
                        os.readlink(link.path)
                        for link in os.scandir(links_path)
                        if os.path.islink(link.path)
                    )

        for version in link_targets:
            container_exec_path = os.path.join(version, 'desc', 'container-exec.json')
            if not os.path.isfile(container_exec_path):
                continue

            with open(container_exec_path, 'r') as file:
                img_versions = json.load(file).get('versionPerBaseImage', {})

            for repo, tag in img_versions.items():
                normalized_repo = repo.replace('|', '' if repo.startswith('|') else '/')
                automation_node_used_images.setdefault(normalized_repo, set()).add(tag)

        return automation_node_used_images

    def _list_images(self):
        install_id = self._get_install_id(os.environ['DIP_HOME'])
        dss_version = self._get_dss_version(os.environ['DIP_HOME'])

        img_to_delete = []
        types = {
            "container-exec": [
                ('(^|.+/)dataiku-dss-container-exec-base$', True),
                ("(^|.+/){base_image_name}$", True)
            ],
            "spark": [
                ("(^|.+/)dataiku-dss-spark-exec-base$", True),
                ("(^|.+/)dku-spark-base-{install_id}$", True),
                ("(^|.+/)dku-spark-{install_id}-dss-(?!.*(?:pyenv|renv)){dss_version}$", False),
                ("(^|.+/)dku-spark-base-{install_id}-dss-(?!.*(?:pyenv|renv)){dss_version}$", False),
                ("(^|.+/)dku-spark-{install_id}-dss-{dss_version}-(?=.*(?:pyenv|renv))(?!.*pyenv.*renv|.*renv.*pyenv).*$", False),
                ("(^|.+/)dku-spark-base-{install_id}-dss-{dss_version}-(?=.*(?:pyenv|renv))(?!.*pyenv.*renv|.*renv.*pyenv).*$", False)
            ],
            "api_deployer": [
                ("(^|.+/)dataiku-dss-apideployer-base$", True),
                ("(^|.+/)dku-apideployer-apinode-base$", True),
                ("(^|.+/)dataiku-mad/apimodel-.+$", False),
                ("(^|.+/)dataiku-mad/apicodeenv.+{dss_version}$", False)
            ],
            "cde": [
                ("(^|.+/)dataiku-dss-cde-base$", True),
                ("(^|.+/)dku-cde-base-{install_id}$", True),
                ("(^|.+/)dku-cde-plugins-{install_id}-dss-{dss_version}$", False)
            ],
            "code_studio": [
                ("(^|.+/)dku-kub-.+$", False)
            ],
            'code_envs': [
                ("(^|.+/)dku-exec-{install_id}-dss-(?!.*(?:pyenv|renv)){dss_version}$", False),
                ("(^|.+/)dku-exec-base-{install_id}-dss-(?!.*(?:pyenv|renv)){dss_version}$", False),
                ("(^|.+/)dku-exec-{install_id}-dss-{dss_version}-(?=.*(?:pyenv|renv))(?!.*pyenv.*renv|.*renv.*pyenv).*$", False),
                ("(^|.+/)dku-exec-base-{install_id}-dss-{dss_version}-(?=.*(?:pyenv|renv))(?!.*pyenv.*renv|.*renv.*pyenv).*$", False)
            ]
        }

        precomputed_patterns = []
        for image_type in types:
            if self.config[image_type]:
                for img_name_reg, is_base in types[image_type]:
                    search_string = img_name_reg.format(
                        install_id=install_id,
                        dss_version=dss_version,
                        base_image_name=self.config['base_image_name']
                    )
                    search_string_without_version = img_name_reg.format(
                        install_id=install_id,
                        dss_version='.+',
                        base_image_name=self.config['base_image_name']
                    )

                    pattern_with_version = re.compile(search_string)
                    pattern_without_version = re.compile(search_string_without_version)
                    precomputed_patterns.append((pattern_with_version, pattern_without_version, is_base))
                    logging.debug(f"pattern_with_version: {pattern_with_version}, pattern_without_version: {pattern_without_version}, is_base: {is_base}")

        images_dict = self._build_image_dict()
        for repo , images in images_dict.items():
            logging.info( "Look for respository: '" + repo + "'")
            managedByDSS = False
            for pattern_with_version, pattern_without_version, is_base in precomputed_patterns:
                if pattern_without_version.match(repo):
                    managedByDSS = True
                    if not pattern_with_version.match(repo):
                        logging.info(f"Repository created by an older DSS version. Deleting all images.")
                        for img in images:
                            logging.info(f"Marking image for deletion: {img}")
                            img_to_delete.append({"repository": repo, **img})
                    else:
                        logging.info(f"Repository managed by current DSS instance")

                        grouped_images = defaultdict(list)
                        for img in images:
                            if re.search(r'^(renv|pyenv).+-r-[\d-]{23}', img['tag']):
                                base_name = img['tag'].split('-r-', 1)[0]
                            else:
                                base_name = 'old-tagging-format'
                            grouped_images[base_name].append(img)

                        for base_name, images in grouped_images.items():
                            images.sort(key=lambda x: x['createdAt'], reverse=True)
                            logging.info(f"Processing base name: {base_name}, found {len(images)} images")

                            start = 0 if self.config['is_automation_node']  and not is_base else 1 # Add the first image in in an automation node, will be filtered later
                            for img in images[start:]:
                                logging.info(f"Marking image for deletion: {img}")
                                img_to_delete.append({"repository": repo, **img})

            if not managedByDSS:
                logging.info(f"Repository not managed by current DSS instance: {repo}")

        # Dangling images, that could be wiped with `docker image prune` (but would need the docker daemon to be up-to-date)
        if self.config['rm_none_images']:
            for key, value in images_dict.items():
                for elt in value:
                    if key == "<none>" or elt["tag"] == "<none>":
                        img_to_delete.append({"repository": key, **elt})

        if self.config['is_automation_node']:
            automation_node_used_images = self._automation_node_images_in_use()
            img_to_delete = [x for x in img_to_delete if x['tag'] not in automation_node_used_images.get(x["repository"], set())]

        return img_to_delete

    def run(self, progress_callback):
        logging.info(f"List images to delete")
        img_to_delete = self._list_images()
        if self.config['perform_deletion']:
            logging.info(f"Perform image deletion")
            for elt in img_to_delete:
                cmd_args = ["rmi"]
                if self.config['force_rm']:
                    cmd_args.append("--force")
                # if we use the identifier (id) all images will be removed (thus using it in case of force delete),
                # if we use <repo>:<tag>, only the given tag will be removed.
                cmd_args.append(elt['id'] if self.config['force_rm'] or elt['tag'] == '<none>' else elt["repository"] + ':' + elt['tag'])
                cmd = self._get_docker_cmd(*cmd_args)
                result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                elt["status"] = "Success" if result.returncode == 0 else "Failed"
                elt["msg"] = result.stdout.decode(errors='replace').strip() if result.returncode == 0 else result.stderr.decode(errors='replace').strip()
        else:
            logging.info(f"Dry run, do not perform image deletion")

        rt = ResultTable()
        rt.set_name("Removed containers")

        rt.add_column("repo", "Repository", "STRING")
        rt.add_column("tag", "Tag", "STRING")
        rt.add_column("id", "Identifier", "STRING")
        rt.add_column("createdAt", "Created at", "STRING")
        if self.config['perform_deletion']:
            rt.add_column("status", "Status", "STRING")
            rt.add_column("msg", "Message", "STRING")

        for elt in img_to_delete:
            record = [elt["repository"],     elt['tag'], elt['id'], elt['createdAt']]
            if self.config['perform_deletion']:
                record = record + [elt.get('status', 'Missing status'), elt.get('msg', '')]
            rt.add_record(record)

        return rt
