import json
import unittest
from unittest.mock import patch

from common.backend.utils.uploaded_files_utils import extract_uploaded_docs_from_history


class TestExtractUploadedDocsFromHistory(unittest.TestCase):
    def setUp(self):
        # Set up test data
        self.history_empty = []
        self.history_single_no_docs = [{"input": "", "output": ""}]
        self.uploaded_docs_example = [
            {
                "original_file_name": "doc1.pdf",
                "file_path": "/path/to/doc1.pdf",
                "is_deleted": False,
                "summary": "summary1",
            },
            {
                "original_file_name": "doc2.pdf",
                "file_path": "/path/to/doc2.pdf",
                "is_deleted": False,
                "full_extracted_text": "text2",
            },
            {
                "original_file_name": "doc3.pdf",
                "file_path": "/path/to/doc3.pdf",
                "is_deleted": False,
                "questions": ["question1", "question2"],
            },
        ]
        self.history_single_with_docs = [
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": self.uploaded_docs_example,
                        "other_field": "should be ignored",
                    }
                ),
            }
        ]
        self.history_single_with_docs_query = [
            {
                "input": json.dumps(
                    {
                        "query": "example query",
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc1.pdf",
                                "file_path": "/path/to/doc1.pdf",
                                "is_deleted": False,
                                "extracted_text": "text1",
                            }
                        ],
                    }
                ),
                "output": "this is an llm response",
            }
        ]
        self.history_with_deleted_docs = [
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": [
                            {"original_file_name": "doc1.pdf", "file_path": "/path/to/doc1.pdf", "is_deleted": True},
                            {"original_file_name": "doc2.pdf", "file_path": "/path/to/doc2.pdf", "is_deleted": True},
                        ]
                    }
                ),
            }
        ]

        self.history_multiple_mixed = [
            {
                "input": json.dumps(
                    {
                        "query": "query example",
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc1.pdf",
                                "file_path": "/path/to/doc1.pdf",
                                "is_deleted": True,
                                "preview": "preview1",
                            },
                            {
                                "original_file_name": "doc4.pdf",
                                "file_path": "/path/to/doc4.pdf",
                                "is_deleted": False,
                                "preview": "preview4",
                                "metadata_path": "/path/to/metadata4",
                            },
                        ],
                    }
                ),
                "output": "this is an llm response",
            },
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc2.pdf",
                                "file_path": "/path/to/doc2.pdf",
                                "is_deleted": False,
                                "summary": "summary1",
                                "topics": ["topic1", "topic2"],
                                "questions": ["question1", "question2"],
                            }
                        ]
                    }
                ),
            },
            {"input": "hello", "output": "this is an llm response"},
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc3.pdf",
                                "file_path": "/path/to/doc3.pdf",
                                "is_deleted": False,
                                "full_extracted_text": "full text",
                                "chain_type": "chain type",
                                "token_estimate": 100,
                                "preview": "preview3",
                                "metadata_path": "/path/to/metadata3",
                            }
                        ]
                    }
                ),
            },
        ]
        self.history_invalid_input_json = [{"input": "{invalid_json", "output": ""}]
        self.history_invalid_output_json = [{"input": "", "output": "{invalid_json}"}]
        self.history_input_not_string = [{"input": 123, "output": ""}]
        self.history_output_not_string = [{"input": "", "output": 123}]
        self.history_invalid_values = [
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": [{"original_file_name": "doc1.pdf", "file_path": "/path/to/doc1.pdf"}],
                        "questions": [1, 2, 3],
                        "topics": "invalid",
                        "token_estimate": "not a number",
                        "preview": 123,
                        "chain_type": ["invalid"],
                        "summary": 123,
                        "full_extracted_text": 123,
                    }
                ),
            }
        ]

    def test_single_item_with_docs(self):
        # Test with a history list containing a single item with uploaded_docs
        result = extract_uploaded_docs_from_history(self.history_single_with_docs)
        self.assertEqual(
            result,
            self.uploaded_docs_example,
        )
        # Verify that history is updated
        uploaded_docs = [
            {
                "original_file_name": f"doc{i}.pdf",
                "file_path": f"/path/to/doc{i}.pdf",
                "metadata_path": "",
                "chain_type": "",
                "is_file_deleted": False,
            }
            for i in range(1, 4)
        ]

        expected_history = [
            {
                "input": "",
                "output": json.dumps({"uploaded_docs": uploaded_docs}),  # Use the pre-defined docs
            }
        ]
        self.assertEqual(self.history_single_with_docs, expected_history)

    def test_single_item_with_docs_query(self):
        result = extract_uploaded_docs_from_history(self.history_single_with_docs_query)
        self.assertEqual(
            result,
            [
                {
                    "original_file_name": "doc1.pdf",
                    "file_path": "/path/to/doc1.pdf",
                    "is_deleted": False,
                    "extracted_text": "text1",
                }
            ],
        )
        # Verify that history is updated
        expected_history = [
            {
                "input": json.dumps(
                    {
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc1.pdf",
                                "file_path": "/path/to/doc1.pdf",
                                "metadata_path": "",
                                "chain_type": "",
                                "is_file_deleted": False,
                            }
                        ],
                        "query": "example query",
                    }
                ),
                "output": "this is an llm response",
            }
        ]
        self.assertEqual(self.history_single_with_docs_query, expected_history)

    def test_with_deleted_docs(self):
        # Test with a history list containing a single item with a deleted document
        result = extract_uploaded_docs_from_history(self.history_with_deleted_docs)
        self.assertEqual(result, [])
        # Verify that history retains only the required fields
        expected_history = [
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc1.pdf",
                                "file_path": "",
                                "metadata_path": "",
                                "chain_type": "",
                                "is_file_deleted": True,
                            },
                            {
                                "original_file_name": "doc2.pdf",
                                "file_path": "",
                                "metadata_path": "",
                                "chain_type": "",
                                "is_file_deleted": True,
                            },
                        ]
                    }
                ),
            }
        ]
        self.assertEqual(self.history_with_deleted_docs, expected_history)

    def test_multiple_items_mixed(self):
        # Test with a history list containing multiple items, some with uploaded_docs and some without.
        result = extract_uploaded_docs_from_history(self.history_multiple_mixed)
        expected_docs = [
            {
                "original_file_name": "doc4.pdf",
                "file_path": "/path/to/doc4.pdf",
                "is_deleted": False,
                "preview": "preview4",
                "metadata_path": "/path/to/metadata4",
            },
            {
                "original_file_name": "doc2.pdf",
                "file_path": "/path/to/doc2.pdf",
                "is_deleted": False,
                "summary": "summary1",
                "topics": ["topic1", "topic2"],
                "questions": ["question1", "question2"],
            },
            {
                "original_file_name": "doc3.pdf",
                "file_path": "/path/to/doc3.pdf",
                "is_deleted": False,
                "full_extracted_text": "full text",
                "chain_type": "chain type",
                "token_estimate": 100,
                "preview": "preview3",
                "metadata_path": "/path/to/metadata3",
            },
        ]
        self.assertEqual(result, expected_docs)
        # Verify that history is updated
        expected_history = [
            {
                "input": json.dumps(
                    {
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc1.pdf",
                                "file_path": "",
                                "metadata_path": "",
                                "chain_type": "",
                                "is_file_deleted": True,
                            },
                            {
                                "original_file_name": "doc4.pdf",
                                "file_path": "/path/to/doc4.pdf",
                                "metadata_path": "/path/to/metadata4",
                                "chain_type": "",
                                "is_file_deleted": False,
                            },
                        ],
                        "query": "query example",
                    }
                ),
                "output": "this is an llm response",
            },
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc2.pdf",
                                "file_path": "/path/to/doc2.pdf",
                                "metadata_path": "",
                                "chain_type": "",
                                "is_file_deleted": False,
                            }
                        ]
                    }
                ),
            },
            {"input": "hello", "output": "this is an llm response"},
            {
                "input": "",
                "output": json.dumps(
                    {
                        "uploaded_docs": [
                            {
                                "original_file_name": "doc3.pdf",
                                "file_path": "/path/to/doc3.pdf",
                                "metadata_path": "/path/to/metadata3",
                                "chain_type": "chain type",
                                "is_file_deleted": False,
                            }
                        ]
                    }
                ),
            },
        ]
        self.assertEqual(self.history_multiple_mixed, expected_history)

    @patch("json.loads")
    def test_empty_history(self, mock_json_loads):
        # Test with an empty history list
        mock_json_loads.side_effect = ValueError
        result = extract_uploaded_docs_from_history(self.history_empty)
        self.assertEqual(result, [])
        self.assertEqual(self.history_empty, [])

    @patch("json.loads")
    def test_single_item_no_docs(self, mock_json_loads):
        # Test with a history list containing a single item with no uploaded_docs
        mock_json_loads.side_effect = ValueError
        result = extract_uploaded_docs_from_history(self.history_single_no_docs)
        self.assertEqual(result, [])
        self.assertEqual(self.history_single_no_docs, [{"input": "", "output": ""}])

    @patch("json.loads")
    def test_invalid_input_json(self, mock_json_loads):
        # Test with a history list containing an item with an invalid input JSON that cannot be parsed
        mock_json_loads.side_effect = ValueError
        result = extract_uploaded_docs_from_history(self.history_invalid_input_json)
        self.assertEqual(result, [])

    @patch("json.loads")
    def test_invalid_output_json(self, mock_json_loads):
        # Test with a history list containing an item with an invalid output JSON that cannot be parsed
        mock_json_loads.side_effect = ValueError
        result = extract_uploaded_docs_from_history(self.history_invalid_output_json)
        self.assertEqual(result, [])

    def test_input_not_string(self):
        # Test with a history list containing an item with an input that is not a string
        result = extract_uploaded_docs_from_history(self.history_input_not_string)
        self.assertEqual(result, [])

    def test_output_not_string(self):
        # Test with a history list containing an item with an output that is not a string
        result = extract_uploaded_docs_from_history(self.history_output_not_string)
        self.assertEqual(result, [])

    def test_invalid_values(self):
        # Test with a history list containing an item with invalid values for each field
        uploaded_docs = json.loads(self.history_invalid_values[0]["output"])["uploaded_docs"]
        result = extract_uploaded_docs_from_history(self.history_invalid_values)
        self.assertEqual(result, uploaded_docs)
