# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd

# Read input folder containing cropped image files for OCR (Optical Character Recognition)
folder = dataiku.Folder("Tz1CoYYW")

# Define the LLM ID for the multimodal LLM
LLM_ID = dataiku.get_custom_variables()["LLM_ID"]

llm = dataiku.api_client().get_default_project().get_llm(LLM_ID)

# Prepare a dictionary to hold the transcribed text data
data = {"file": [], "text": []}

# Loop through each file in the folder and send it to the multimodal LLM for OCR
for file in folder.list_paths_in_partition():

    # Prepare the completion request
    completion = llm.new_completion()
    completion.settings["maxOutputTokens"] = 1000
    completion.settings["temperature"] = 0

    mp_message = completion.new_multipart_message()
    mp_message.with_text("Transcribe the text from the image. Do not add any comment or additional information.")
    with folder.get_download_stream(file) as stream:
        mp_message.with_inline_image(stream.read())
    mp_message.add()

    # Execute the multimodal LLM query and retrieve the response
    resp = completion.execute()

    # If the response is successful, store it in the responses list
    if resp.success:
        data["file"].append(file[1:])
        data["text"].append(resp.text)

# Convert the data into a pandas DataFrame
df = pd.DataFrame(data)

# Write the results to a Dataiku dataset
dataiku.Dataset("results_GPT").write_with_schema(df)