# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import hashlib
# Read recipe inputs
Reports_faers_renamed = dataiku.Dataset("Reports_faers_renamed")
Reports_faers_renamed_df = Reports_faers_renamed.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#retreive project variables
project = dataiku.api_client().get_project(dataiku.default_project_key())
variables = project.get_variables()
anonymization_manufacturer_filter = variables['standard']['anonymization_manufacturer_filter']
anonymization_drug_filter = variables['standard']['anonymization_drug_filter']

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Convert column to string
manufacturer_name = 'manufacturer'
drug_name = 'drug'
Reports_faers_renamed_df[manufacturer_name] = Reports_faers_renamed_df[manufacturer_name].astype(str)
Reports_faers_renamed_df[drug_name] = Reports_faers_renamed_df[drug_name].astype(str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#hashin function
def anonymization(df, column_name, anonymization_filter):
    if (anonymization_filter):
    # Apply hashing function to each column
        df[column_name] = df[column_name].apply(lambda x: hashlib.sha256(x.encode()).hexdigest())
    else:
        df[column_name] = df[column_name]
    return( df[column_name])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
Reports_faers_renamed_df[manufacturer_name] = anonymization(Reports_faers_renamed_df, manufacturer_name, anonymization_manufacturer_filter)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
Reports_faers_renamed_df[drug_name] = anonymization(Reports_faers_renamed_df, drug_name,anonymization_drug_filter )

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
Reports_faers_anonymization = dataiku.Dataset("Reports_faers_anonymization")
Reports_faers_anonymization.write_with_schema(Reports_faers_renamed_df)