from other_api_utils.google_docs.typing_commons import Credentials, GoogleSheetDataset, GoogleSpreadsheetDataCollection
from googleapiclient.discovery import build
import pandas as pd


def get_google_spreadsheet(google_credentials: Credentials, spreadsheet_id: str)->dict:
    """Retrieves a google spreadsheet.
    
    :param google_credentials: Credentials: A google credentials object.
    :param spreadsheet_id: str: The ID of the spreadsheet to retrieve.
    
    :returns: spreadsheet: dict: The dictionary associated with a google spreadsheet. 
    """
    print(f"Retrieving the google spreadsheet '{spreadsheet_id}' ...")
    sheets_service = build('sheets', 'v4', credentials=google_credentials)
    spreadsheet = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=True).execute()
    print(f"Google spreadsheet '{spreadsheet_id}' successfully retrieved!")
    return spreadsheet


def get_spreadsheet_data_colection(google_credentials: Credentials, spreadsheet_id: str,
                                   sheets_to_collect: list=["ALL_SHEETS"])->GoogleSpreadsheetDataCollection:
    """Retrieves the data from a google spreadsheet file.

    :param google_credentials: Credentials: A google credentials object.
    :param spreadsheet_id: str: The ID of the spreadsheet to retrieve.
    :param sheets_to_collect: list: The name of the sheets to collect within the spreadheet file. 
        - If it is equals to ['ALL_SHEETS'] (default value) the data from all the sheets will be collected.
        - Else, it must contain the exact labels of the sheet names.
    
    :returns: spreadsheet_data_colection: GoogleSpreadsheetDataCollection: The dictionary containing the spreadsheet data collection.
    """
    print(f"Collecting data from the spreadheet '{spreadsheet_id}' with the scope '{sheets_to_collect}...")
    spreadsheet_data_colection: GoogleSpreadsheetDataCollection = {}

    spreadsheet = get_google_spreadsheet(google_credentials,
                                         spreadsheet_id)
    all_sheets_data = spreadsheet.get("sheets")
    if sheets_to_collect!=["ALL_SHEETS"]:
        all_sheets_data = [
            sheet_data for sheet_data in all_sheets_data if sheet_data["properties"]["title"] in sheets_to_collect
        ]
        if len(all_sheets_data) == 0:
            existing_sheet_titles = [
                sheet_data["properties"]["title"] for sheet_data in all_sheets_data
            ]
            raise ValueError(f"Not any spreadsheet could be collected. You asked to collect the sheets '{sheets_to_collect}' "\
                             f"while the existing sheets are '{existing_sheet_titles}'.")
        
    for sheet_data in all_sheets_data:
        sheet_name = sheet_data["properties"]["title"]
        print(f"Collecting the data from the sheet '{sheet_name}' ...")
        sheet_index = sheet_data["properties"]["index"]
        sheet_dataset: GoogleSheetDataset = {
            "sheet_name": sheet_name,
            "sheet_index": sheet_index,
            "header_columns_data": {},
            "header_column_occurences": {}
        }
        header_column_occurences = {}
        sheet_rows_data = sheet_data["data"][0]["rowData"]
        sheet_header = []
        for row_index, row_data in enumerate(sheet_rows_data):
            row_is_the_sheet_header = (row_index==0)
            row_values_data = row_data["values"]
            
            # Alignment of the number of values in each sheet row with the size of the header:
            if not row_is_the_sheet_header:
                header_length = len(sheet_header)
                n_records_in_row = len(row_values_data)
                n_blank_records_to_add_in_row = header_length - n_records_in_row
                row_values_data += [{} for __ in range(n_blank_records_to_add_in_row)]
            
            # Looping on the data of each sheet row:
            for row_value_data_index, row_value_data in enumerate(row_values_data):
                row_value = row_value_data.get("formattedValue", "")
                
                # Sheet header columns handling:
                if row_is_the_sheet_header:
                    header_column_name = row_value
                    if not row_value:
                        header_column_name = "unknown"
                    if header_column_name not in header_column_occurences.keys():
                        header_column_occurences[header_column_name] = 0
                    else:
                        header_column_occurences[header_column_name] += 1
                        header_column_name = f"{header_column_name}_{header_column_occurences[header_column_name]}"
                    sheet_header.append(header_column_name)
                    sheet_dataset["header_columns_data"][header_column_name] = []        
                
                # Regular sheet values handling:
                else:
                    value_header_column = sheet_header[row_value_data_index]
                    sheet_dataset["header_columns_data"][value_header_column].append(row_value)
    
            sheet_dataset["header_column_occurences"] = header_column_occurences
        spreadsheet_data_colection[sheet_name] = sheet_dataset
    print(f"Data successfully collected from the spreadheet '{spreadsheet_id}' !\n\n")
    return spreadsheet_data_colection


def from_google_sheet_dataset_to_dataframe(google_sheet_dataset: GoogleSheetDataset,
                                           duplicate_columns_handling: str="keep_all")->pd.core.frame.DataFrame:
    """Converts a google sheet dataset object into a pandas dataframe.

    :param google_sheet_dataset: GoogleSheetDataset: A google sheet dataset object.
    :param duplicate_columns_handling: str: Precises how duplicate values should be handled.
        It can take the values:
            - 'keep_all' [default value]: to keep all the duplicate columns.
            - 'keep_first': to keep the first column among all the duplicates.
            - 'remove_all': to remove all the duplicate columns.
    
    :returns: google_sheet_dataframe: pd.core.frame.DataFrame: A pandas dataframe with the content of
        the google sheet tabular data.
    """
    print(f"Converting the GoogleSheetDataset '{google_sheet_dataset['sheet_name']}' into a pandas dataframe...")
    header_columns_data = google_sheet_dataset["header_columns_data"]
    google_sheet_dataframe = pd.DataFrame(header_columns_data)
    dataframe_header_column_occurences = google_sheet_dataset["header_column_occurences"]
    dataframe_columns = google_sheet_dataframe.columns
    if duplicate_columns_handling in ["keep_first", "remove_all"]:
        dataframe_columns_to_remove = set()
        duplicated_column_names = [
            column_name_root for column_name_root in dataframe_columns
            if dataframe_header_column_occurences.get(column_name_root, 0) > 0
        ]
        for dataframe_column_name in dataframe_columns:
            for duplicated_column_name in duplicated_column_names:
                if duplicated_column_name in dataframe_column_name:
                    if duplicate_columns_handling == "remove_all":
                        dataframe_columns_to_remove.update([dataframe_column_name])
                    elif duplicate_columns_handling == "keep_first":
                        if (duplicated_column_name != dataframe_column_name):
                            dataframe_columns_to_remove.update([dataframe_column_name])
        dataframe_columns_to_remove = list(dataframe_columns_to_remove)
        google_sheet_dataframe.drop(dataframe_columns_to_remove, axis=1, inplace=True)
    print(f"GoogleSheetDataset '{google_sheet_dataset['sheet_name']}' has been successfully converted into pandas!\n")
    return google_sheet_dataframe


def from_google_spreadsheet_to_dataframe_collection(google_credentials: Credentials,
                                                    spreadsheet_id: str,
                                                    sheets_to_collect: list=["ALL_SHEETS"],
                                                    duplicate_columns_handling: str="keep_all"
                                                    )->dict:
    """Retrieves the data from a google spreadsheet file into a collection of pandas dataframes.

    :param google_credentials: Credentials: A google credentials object.
    :param spreadsheet_id: str: The ID of the spreadsheet to retrieve.
    :param sheets_to_collect: list: The name of the sheets to collect within the spreadheet file. 
        - If it is equals to ['ALL_SHEETS'] (default value) the data from all the sheets will be collected.
        - Else, it must contain the exact labels of the sheet names.
    :param duplicate_columns_handling: str: Precises how duplicate values should be handled.
        It can take the values:
            - 'keep_all' [default value]: to keep all the duplicate columns.
            - 'keep_first': to keep the first column among all the duplicates.
            - 'remove_all': to remove all the duplicate columns.
    
    :returns: dataframe_collection: dict: The dictionary containing the collection of pandas dataframes.
    """
    dataframe_collection = {}
    spreadsheet_data_colection = get_spreadsheet_data_colection(google_credentials,
                                                                spreadsheet_id,
                                                                sheets_to_collect)
    for sheet_name, google_sheet_dataset  in spreadsheet_data_colection.items():
        google_sheet_dataframe = from_google_sheet_dataset_to_dataframe(google_sheet_dataset,
                                                                                  duplicate_columns_handling)
        dataframe_collection[sheet_name] = google_sheet_dataframe
    
    return dataframe_collection