# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import json
import re

from dataiku.langchain.dku_llm import DKUChatLLM
from langchain.evaluation import load_evaluator
from langchain_core.agents import AgentAction

df = dataiku.Dataset("requests_processed_langgraph").get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: MARKDOWN
# # Agent Evaluation Method #1: LLM-as-a-judge with `langchain`
# Required inputs:
# - request
# - reply
# - intermediate steps

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

LLM_ID = dataiku.get_custom_variables()["LLM_id"]
llm = DKUChatLLM(
    llm_id=LLM_ID,
    temperature=0
)
evaluator = load_evaluator("trajectory", llm=llm)

pattern = re.compile("[0-9]*. (.*)\((.*)\) --> (.*)")

def deserialize_intermediate_steps(trajectory):
    """
    Deserialize the string representing the intermediate steps of an agent trajectory.
    """
    result = []
    for action in trajectory.split("\n"):
        m = pattern.match(action)
        result.append(
            (
                AgentAction(tool=m.group(1), tool_input=json.loads(m.group(2)), log=""),
                m.group(3)
            )
        )
    return result

for i in df.index:
    try:
        result = evaluator.evaluate_agent_trajectory(
            prediction=df.at[i, "draft_reply"],
            input=df.at[i, "request"],
            agent_trajectory=deserialize_intermediate_steps(df.at[i, "intermediate_steps"]),
        )
        df.at[i, "langchain/score"] = result["score"]
        df.at[i, "langchain/justification"] = result["reasoning"]
    except:
        df.at[i, "langchain/score"] = 0
        df.at[i, "langchain/justification"] = "No score provided by the LLM judge"

dataiku.Dataset("agent_anwers_evaluated").write_with_schema(df)