from typing import Dict, List
from backend.utils.logging_utils import get_logger
import json
import re

logger = get_logger(__name__)


class GuardrailsMatcher:
    """Encapsulates rule parsing and matching logic to avoid duplication."""

    def __init__(self, raw_config: str):
        self.raw_config = raw_config
        self.rules = self._parse_rules(raw_config)

    def _parse_rules(self, raw_config: str) -> List[Dict[str, str]]:
        rules = []
        try:
            parsed = json.loads(raw_config)
            if isinstance(parsed, list):
                rules = parsed
            elif isinstance(parsed, str):
                rules = [{"pattern": parsed, "type": "regex"}]
        except json.JSONDecodeError:
            rules = [{"pattern": raw_config, "type": "regex"}]
        return rules

    def is_violation(self, text: str) -> bool:
        if not text:
            return False
        for rule in self.rules:
            pat = rule.get("pattern", "")
            if not pat:
                continue

            if rule.get("type") == "text":
                # Use word boundary matching to avoid partial matches (e.g., "secret" in "secretion")
                if re.search(r"\b" + re.escape(pat) + r"\b", text, re.IGNORECASE):
                    return True
            else:
                try:
                    if re.search(pat, text):
                        return True
                except re.error:
                    logger.error(f"Invalid guardrails regex pattern: {pat}")
                    continue
        return False

    def check_structure(self, doc_text: str) -> bool:
        """Parses extracted JSON structure and checks for violations."""
        violation = False
        try:
            chunks = json.loads(doc_text)
            if isinstance(chunks, list):
                for chunk in chunks:
                    if isinstance(chunk, dict):
                        # Check main text
                        if self.is_violation(chunk.get("text", "")):
                            violation = True
                            break
                        # Check outline
                        outline = chunk.get("outline", [])
                        if isinstance(outline, list):
                            for item in outline:
                                if isinstance(item, str) and self.is_violation(item):
                                    violation = True
                                    break
                return violation
            # Fallback if list but not expected structure
            elif self.is_violation(doc_text):
                return True
        except (json.JSONDecodeError, TypeError):
            # Not JSON, check raw text
            if self.is_violation(doc_text):
                return True
        return False
