# -*- coding: utf-8 -*-
import dataiku
import pandas as pd

folder = dataiku.Folder("Rvdjm86o")

files = folder.list_paths_in_partition()  # List all files in the folder
# remove "/label.json" from the files
files.remove("/label.json")

ground_truth = folder.read_json("/label.json")

LLM_ID = dataiku.get_custom_variables()["LLM_ID"]
llm = dataiku.api_client().get_default_project().get_llm(LLM_ID)

PROMPT = """Classify this picture with only one of the 16 following categories: email, handwritten, advertisement, scientific report, scientific publication, letter, form, specification, file folder, news article, budget, invoice, presentation, questionnaire, resume, memo.
Answer only the category, nothing else."""

data = {"File": [], "Response": []}

# Loop through all the files in the folder
for file in files:
    # Prepare the completion request
    completion = llm.new_completion()
    completion.settings["maxOutputTokens"] = 30
    completion.settings["temperature"] = (0)

    mp_message = completion.new_multipart_message()
    mp_message.with_text(PROMPT)
    with folder.get_download_stream(file) as stream:
        mp_message.with_inline_image(stream.read())
    mp_message.add()

    # Execute the request and process the response
    resp = completion.execute()
    if resp.success:  # Only store successful responses
        data["File"].append(file[1:])
        data["Response"].append(resp.text)

# Convert the response data to a DataFrame
df = pd.DataFrame(data)

# Convert the ground truth labels into a DataFrame
ground_truth_df = pd.DataFrame(list(ground_truth.items()), columns=["File", "Label"])

# Merge the LLM responses with the ground truth labels
merged_df = pd.merge(df, ground_truth_df, on="File", how="left")

# Write the results to a Dataiku dataset
dataiku.Dataset("results_classification_GPT").write_with_schema(merged_df)
