# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import os
import json
import dataiku
import cv2
import torch
import pandas as pd

df = dataiku.Dataset("test").get_dataframe()
input_path = dataiku.Folder("PRCGY0s7").get_path()
model_path = dataiku.Folder("vRa5vtTP").get_path()
classes = dataiku.Dataset("classes").get_dataframe()
id2cat = {classes.iloc[i].id: classes.iloc[i].value for i in range(len(classes))}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from detectron2.utils.logger import setup_logger

setup_logger()

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
)
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(id2cat)
cfg.MODEL.WEIGHTS = os.path.join(model_path, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
predictor = DefaultPredictor(cfg)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
output = {"record_id": [], "prediction": [], "label": []}
for i in range(len(df)):
    im = cv2.imread(os.path.join(input_path, df.iloc[i].record_id))
    outputs = predictor(im)
    results = []
    for j, box in enumerate(outputs["instances"].pred_boxes):
        result = {"category": id2cat[int(outputs["instances"].pred_classes[j])]}
        result["confidence"] = float(outputs["instances"].scores[j])
        xmin, ymin, xmax, ymax = [float(x) for x in box.cpu().numpy()]
        result["bbox"] = [xmin, ymin, xmax - xmin, ymax - ymin]
        results.append(result)
    output["record_id"].append(df.iloc[i].record_id)
    output["label"].append(df.iloc[i].label)
    output["prediction"].append(json.dumps(results))

output_df = pd.DataFrame.from_dict(output)
dataiku.Dataset("test_scored_custom").write_with_schema(output_df)
