from dataclasses import dataclass, field, InitVar
from typing import List, Optional, Dict, Any


def build_message_log_for_document(source_file_path, message):
    return f"[{source_file_path}] - {message}"


@dataclass(init=False)
class AbstractDocumentNode:
    """
    A tree node structure to map converted structure to Dataiku extraction API: com.dataiku.dip.docextraction.Content
    """
    node_id: str
    children: List["AbstractDocumentNode"]  # List of children DocumentNode
    label: str  # docling label see DocItemLabel, GroupLabel
    content: str = ""  # Content, text or base64 for images
    level: Optional[int] = None  # Level within the hierarchy
    page_provenance: List[int] = field(default_factory=list)  # page provenance from the original document
    node_type: str = field(init=False)

    def to_dict(self) -> Dict[str, Any]:
        res: Dict[str, Any] = {
            "type": self.node_type,
            "content": [child.to_dict() for child in self.children],
        }
        if self.page_provenance:
            if len(self.page_provenance) > 0:
                res["pageRange"] = {
                    "start": min(self.page_provenance),
                    "end": max(self.page_provenance)
                }
        return res


@dataclass
class ImageNode(AbstractDocumentNode):
    highest_confidence_class_name: Optional[str] = None
    highest_confidence_class_confidence: Optional[float] = None
    mime_type: Optional[str] = field(default=None, init=False)
    height: Optional[int] = field(default=None, init=False)
    width: Optional[int] = field(default=None, init=False)
    resolution: Optional[int] = field(default=None, init=False)
    image_base64: Optional[str] = None
    image: InitVar[Optional[Any]] = None
    caption: Optional[str] = None
    node_type: str = field(default="image", init=False)

    def __post_init__(self, image):
        if image is not None:
            self.mime_type = image.mimetype
            self.height = image.size.height
            self.width = image.size.width
            self.resolution = image.dpi

    def to_dict(self) -> Dict[str, Any]:
        res = super().to_dict()
        res["mimeType"] = self.mime_type
        res["imageID"] = self.node_id
        if self.height:
            res["height"] = self.height
        if self.width:
            res["width"] = self.width
        if self.resolution:
            res["resolution"] = self.resolution
        if self.content:
            res["description"] = self.content
        if self.image_base64:
            res["imageRef"] = {
                "type": "inline",
                "content": self.image_base64,
                "mimeType": self.mime_type
            }
        if self.caption:
            res["caption"] = self.caption
        if self.highest_confidence_class_name:
            res["classificationData"] = {"className": self.highest_confidence_class_name}
            if self.highest_confidence_class_confidence is not None:
                res["classificationData"]["confidence"] = self.highest_confidence_class_confidence

        return res


@dataclass
class TextNode(AbstractDocumentNode):
    node_type: str = field(default="text", init=False)

    def to_dict(self) -> Dict[str, Any]:
        res = super().to_dict()
        res["text"] = self.content
        return res


@dataclass
class TableNode(AbstractDocumentNode):
    node_type: str = field(default="table", init=False)

    def to_dict(self) -> Dict[str, Any]:
        res = super().to_dict()
        res["text"] = self.content
        return res


@dataclass
class SectionNode(AbstractDocumentNode):
    node_type: str = field(default="section", init=False)

    def to_dict(self) -> Dict[str, Any]:
        res = super().to_dict()
        res["title"] = self.content
        res["level"] = self.level
        return res


@dataclass
class SlideNode(SectionNode):
    node_type: str = field(default="slide", init=False)


@dataclass
class RootNode(AbstractDocumentNode):
    node_type: str = field(default="document", init=False)
