import csv
import logging
import random
import string
import io

from typing import Dict, List

import pandas as pd

from dataiku.eda.types import DataStreamIdModel

logger = logging.getLogger(__name__)


class DataStreamId:

    def __init__(self, value: str):
        self.value = value

    @classmethod
    def parse(cls, payload: DataStreamIdModel) -> 'DataStreamId':
        return DataStreamId(payload["value"])

    def serialize(self) -> DataStreamIdModel:
        return {
            "value": self.value,
        }


class DataFrameStore:
    """
    Provides means for registering and accessing dataframes during an EDA
    session.
    """

    def __init__(self):
        self.store: Dict[str, pd.DataFrame] = {}

    def register_dataframe(self, df: pd.DataFrame) -> DataStreamId:
        """
        Registers a new dataframe by assigning a new id to it. This id can be
        used later to write the dataframe to an output stream.

        Note: Dataframes are assigned a 20-length ascii based id upon
        registration.

        :param df: the dataframe to register
        :type df: pd.DataFrame

        :return: the id corresponding to the dataframe
        :rtype: DataStreamId
        """
        next_id = self._generate_next_id()
        self.store[next_id] = df
        logger.info("Registered data frame with id: {}".format(next_id))
        return DataStreamId(next_id)

    def _generate_next_id(self) -> str:
        while True:
            next_store_id = ''.join(random.choice(string.ascii_letters) for _ in range(20))
            if next_store_id not in self.store:  # not already used
                return next_store_id

    def write(self, data_stream_id: DataStreamId, output_stream) -> None:
        """
        Writes the content of the data frame into the output stream.
        The data frame is written using the CSV format.

        :param data_stream_id: the id of the dataframe to stream
        :type data_stream_id: DataStreamId

        :param output_stream: the output to write to
        :type output_stream: file-like
        """
        all_store_ids = list(self.store.keys())
        logger.debug("Available dataframe ids: {}".format(all_store_ids))
        store_id = data_stream_id.value
        df = self.store.get(store_id)

        if df is None:
            raise Exception("No dataframe exists for id: {}".format(store_id))

        logger.info("Started writing dataframe with id: {}".format(store_id))
        _write_dataframe_as_csv(df, output_stream)
        logger.info("Finished writing dataframe with id: {}".format(store_id))

    def clear(self, data_stream_ids: List[DataStreamId]) -> None:
        """
        Clears the dataframes corresponding to the provided ids.

        :param data_stream_ids: the list of dataframe ids
        :type data_stream_ids: list[DataStreamId]
        """
        all_store_ids = list(self.store.keys())
        logger.debug("Available dataframe ids: {}".format(all_store_ids))
        store_ids = [ds_id.value for ds_id in data_stream_ids]
        logger.info("Started clearing dataframes with ids: {}".format(store_ids))

        for store_id in store_ids:
            if store_id in self.store:
                del self.store[store_id]

        logger.info("Finished clearing dataframes with ids: {}".format(store_ids))


def _write_dataframe_as_csv(dataframe, output_stream):
    logger.info("Started streaming as CSV")
    # TODO: handle types properly?

    # pandas >= 1.1 in all builtin envs
    string_writer = io.TextIOWrapper(output_stream, encoding='utf-8', newline='')
    dataframe.to_csv(
        string_writer,
        index=None,
        header=False,
        sep=",",
        quoting=csv.QUOTE_ALL
    )
    string_writer.flush()
    string_writer.detach()

    logger.info("Finished streaming as CSV")
