import dash_bootstrap_components as dbc
import dash_daq as daq
import dataiku
from process_mining.process_mining import mine_process
import pandas as pd

def sink_container():
    return dbc.Row([dbc.Col("Show SINK", width="auto"), dbc.Col(daq.BooleanSwitch(id='with-sink',
                             on=False,
                             color='#abe2fb',
                             persistence=False), width="auto")], 
                   style={'display': 'flex', 'alignItems': 'center', 'padding-left': '6px', 'margin-bottom': '2px'})

def compute_conform_workflow(workflow_df, frequency_time, use_end_timestamp):
    reference_process = dataiku.Dataset('reference_process')
    reference_process_df = reference_process.get_dataframe()
    reference_process_df['variant'] = ['START,' + v + ',END' for v in reference_process_df['variant']]
    reference_process_df['variant'] = reference_process_df['variant'].str.split(',')
    reference_process_exploded = reference_process_df.explode('variant')
    reference_process_exploded['timestamp'] = reference_process_exploded.groupby('variant_rank').cumcount() + 1
    reference_process_exploded['sorting'] = reference_process_exploded['timestamp']
    reference_process_exploded.columns = ['activity', 'count', 'case', 'timestamp', 'sorting']
    
    reference_workflow_mined, reference_start_end, reference_activities = mine_process(reference_process_exploded)
    reference_start_end = pd.concat([reference_start_end, pd.DataFrame({'step': ['SINK'], 'frequency': [0], 'start_end': ['end']})])
    reference_activities = pd.concat([reference_activities, pd.DataFrame({'step': ['SINK'], 'frequency': [0]})])
    
    if frequency_time == 'frequency':
        workflow_mined, start_end, activities = mine_process(workflow_df, transition='frequency')
    else:
        if use_end_timestamp:
            workflow_mined, start_end, activities = mine_process(workflow_df, 'end_timestamp', transition='performance')
        else:
            workflow_mined, start_end, activities = mine_process(workflow_df, transition='performance')
    
    reference_workflow_mined.columns = ['source', 'target', 'reference']
    workflow_joined = pd.merge(workflow_mined, reference_workflow_mined, how='outer', on=['source', 'target'])
    workflow_joined.loc[workflow_joined['reference'].isnull(), 'target'] = 'SINK'
    workflow_joined.loc[workflow_joined['weight'].isnull(), 'weight'] = 0
    if frequency_time == 'frequency':
        workflow_final = workflow_joined.groupby(['source', 'target'])['weight'].sum().reset_index()
    else:
        workflow_final = workflow_joined.groupby(['source', 'target'])['weight'].mean().reset_index()
    workflow_final = workflow_final[workflow_final['source'].isin(reference_activities['step'])]
    reference_activities.drop(labels=['frequency'], axis=1, inplace=True)
    activities_final = pd.merge(reference_activities, activities, how='left', on='step')
    activities_final.loc[activities_final['frequency'].isnull(), 'frequency'] = 0
    
    activities_final = activities_final[activities_final['step'].isin(pd.concat([workflow_final['source'], workflow_final['target']]))]
    
    return workflow_final, reference_start_end, activities_final
    