# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import fitz
import io

# Read recipe inputs
documents = dataiku.Folder("008vhkiq")
documents_info = documents.get_info()


# Write recipe outputs
searchable = dataiku.Folder("1Jon6Y5u")
searchable_info = searchable.get_info()
non_searchable = dataiku.Folder("0oOwj5E4")
non_searchable_info = non_searchable.get_info()
error = dataiku.Folder("epzn88Ed")
error_info = non_searchable.get_info()

def upload_data(folder, path, filename):
    with open(path,"rb") as f:
        folder.upload_stream(filename, f)

def searchable_or_nonsearchable(documents, searchable,non_searchable,error):
    file_paths = documents.list_paths_in_partition()
    result_dict = {'searchable':[], 'non_searchable': [], "error":[]}
    for file in file_paths:
        with documents.get_download_stream(file) as f:
            path = documents.file_path(file)
            try:
                doc = fitz.open(path)
                text = ""
                for page in doc:
                    text += page.getText()
            except:
                text = "error"
        if text != "" and not text == "error":
            upload_data(searchable,path,file)
            result_dict['searchable'].append(path)
        elif text == "":
            upload_data(non_searchable,path,file)
            result_dict['non_searchable'].append(path)
        else:
            upload_data(error,path,file)
            result_dict['error'].append(path)
    return result_dict

searchable_or_nonsearchable(documents, searchable,non_searchable,error)