import json
import logging

import pandas as pd
from dataiku.llm.evaluation.utils import failure_utils

logger = logging.getLogger(__name__)

DATAIKU_ANSWERS_QUESTION_NAME = 'question'
DATAIKU_ANSWERS_ANSWER_NAME = 'answer'
DATAIKU_ANSWERS_SOURCES_NAME = 'sources'


def _read_context_from_json_cell(json_string: str):
    try:
        raw_json = json.loads(json_string)
        sources = raw_json.get('sources', [])
        items = [s.get('items') for s in sources]
        if any(items):
            # Handle updated format from Answers versions > 2.3.0
            contexts = []
            for item in items:
                for i in item:
                    # Try to get the first available snippet type
                    snippet = None
                    for key in ('textSnippet', 'htmlSnippet', 'markdownSnippet'):
                        if i.get(key) is not None:
                            snippet = i.get(key)
                            break
                    # Add the snippet (or None if no snippet found) to contexts
                    contexts.append(snippet)
        else:
            # Handle legacy format from Answers versions <= 2.3.0
            contexts = [s.get('excerpt') for s in sources]
        return [c for c in contexts if c]
    except:
        return []


def try_get_parsed_dataiku_answer_context(input_df: pd.DataFrame, is_context_needed: bool) -> pd.Series:
    """
    Try to parse context_column_name from the input_df as Dataiku Answers json context
    If successful, parsed context are stored in input_df
    :param input_df:
    :param is_context_needed: True if some non-custom metric need the context
    :return: pd.Series with the parsed contexts either the original one or an empty Series if unsuccessful
    """
    raw_response = input_df[DATAIKU_ANSWERS_SOURCES_NAME]
    if raw_response is None:
        # Do not raise but produce a warning. Not having answers is strange, but some metrics don't need it.
        failure_utils.warn('Can\'t find column "%s". Check that your input dataset was produced by Dataiku Answers.' % DATAIKU_ANSWERS_SOURCES_NAME,
                           raise_diagnostic=is_context_needed)
        return pd.Series(dtype=object)

    logger.info('Context column "%s" might be from Dataiku Answers, trying to parse it' % DATAIKU_ANSWERS_SOURCES_NAME)
    contexts_from_dataiku_answers = raw_response.apply(_read_context_from_json_cell)
    if any(contexts_from_dataiku_answers):
        logger.info('Auto-detected column "%s" as a context column from Dataiku Answers. Parsing it.' % DATAIKU_ANSWERS_SOURCES_NAME)
    else:
        failure_utils.warn('Column "%s" does not contain contexts/sources from dataiku answers, won\'t parse it. Metrics based on context may be wrong.' % DATAIKU_ANSWERS_SOURCES_NAME,
                           raise_diagnostic=is_context_needed)
    return contexts_from_dataiku_answers
