import dataiku
import os
import tempfile
from byaldi import RAGMultiModalModel

folder = dataiku.Folder("Zcjq6uFf")
output_folder = dataiku.Folder("8jv9ZTHM")

index_name = "docs"
MODEL = "vidore/colpali-v1.2"

with tempfile.TemporaryDirectory() as temp_dir:
    
    RAG = RAGMultiModalModel.from_pretrained(MODEL, index_root=temp_dir)
    
    with tempfile.TemporaryDirectory() as temp_dir2:
        
        for f in folder.list_paths_in_partition():
            source_filename = os.path.basename(f)
            with folder.get_download_stream(f) as stream:
                filepath = os.path.join(temp_dir2, source_filename)
                with open(filepath, "wb") as f2:
                    f2.write(stream.read())
        
        # Index the documents of the input folder
        index_results = RAG.index(
            input_path=temp_dir2,
            index_name=index_name,
            store_collection_with_index=True,
        )
        
        # Store the index in the output folder
        index_folder = os.path.join(temp_dir, index_name)
        for f in os.listdir(index_folder):
            if os.path.isdir(os.path.join(index_folder, f)):
                for f2 in os.listdir(os.path.join(index_folder, f)):
                    output_folder.upload_file(os.path.join(f, f2), os.path.join(index_folder, f, f2))
            else:
                output_folder.upload_file(f, os.path.join(index_folder, f))
