import pandas as pd
import numpy as np

def group_template(index, column, column_type):
    return {'$idx': index,
              '$selected': False,
              'avg': False if column_type=='string' else True,
              'column': column,
              'concat': False,
              'concatDistinct': False,
              'concatSeparator': ',',
              'count': False,
              'countDistinct': False,
              'first': False,
              'firstLastNotNull': False,
              'last': False,
              'max': False,
              'min': False,
              'orderColumn': 'id',
              'stddev': False,
              'value': True,
              'sum': False,
              'sum2': False,
              'type': column_type}

def formula_template(column):
    return {'preview': False,
            'metaType': 'PROCESSOR',
            'disabled': False,
            'type': 'CreateColumnWithGREL',
            'params': {'expression': 'if({}>{}_avg, "high", "low")'.format(column, column), 'column': '{}'.format(column)},
            'alwaysShowComment': False}

def information_value(X, Y):
    df = pd.DataFrame({'X': np.ravel(X), 'Y': Y})
    d = df.groupby("X", as_index=False).agg({"Y": ["count", "sum"]})
    d.columns = ['X', 'N', 'bads']
    d['bads_distribution'] = np.maximum(d['bads'], 0.5) / d['bads'].sum()
    d['goods'] = d['N'] - d['bads']
    d['goods_distribution'] = np.maximum(d['goods'], 0.5) / d['goods'].sum()
    d['woe'] = np.log(d['goods_distribution']/d['bads_distribution'])
    d['iv'] = d['woe'] * (d['goods_distribution']-d['bads_distribution'])
    return d['iv'].sum()