# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import json

df = dataiku.Dataset("product_reviews").get_dataframe()
LLM_ID = dataiku.get_custom_variables()["LLM_id"]
llm = dataiku.api_client().get_default_project().get_llm(LLM_ID)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
json_schema = {
    "title": "Extracted information",
    "description": "Key information extracted from product reviews",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "product_type": {
            "type": "string",
            "description": "type of the product ('unknown' if not provided; if the model or brand is included, be as specific as possible)"
        },
        "summary": {
            "type": "string",
            "description": "summary of the product review in less than 10 words"
        },
        "predicted_sentiment": {
            "type": "string",
            "enum": ["positive", "neutral", "negative"],
            "description": "sentiment of the product review"
        },
        "advantages": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "main advantages mentioned in the product review"
        },
        "drawbacks": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "main drawbacks mentioned in the product review"
        }
    },
    "required": [
        "product_type",
        "summary",
        "predicted_sentiment",
        "advantages",
        "drawbacks"
    ]
}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
for i in df.index:
    completion = llm.new_completion()
    completion.settings["temperature"] = 0
    completion.with_message("You are a helpful assistant that extracts key information from product reviews.", role="system")
    completion.with_message(f"Extract key information from the following product review: {df.loc[i, 'text']}")
    completion.with_json_output(schema=json_schema)
    response = completion.with_message(df.loc[i, "text"]).execute()
    d = response.json
    for k in d:
        df.loc[i, k] = json.dumps(d[k]) if k in ["advantages", "drawbacks"] else d[k]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
dataiku.Dataset("extracted_json_structured_output").write_with_schema(df)