import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator

from transformers import pipeline

class FakeProcessor(BaseEstimator):
    def __init__(self, review_col="review"):
        self.review_col=review_col        
    def fit(self, X, y=None):
        pass
    
    def transform(self, X):
        return X.values
    
    def fit_transform(self, X, y):
        self.fit(X, y)
        return self.predict(X)

    
class SentimentAnalyzer(BaseEstimator):
    def __init__(self, max_rating=5, min_rating=1,review_col="review",column_labels=None):
        self.analyzer = pipeline(
            task="text-classification",
            model="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
            tokenizer="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
        )
        self.max_rating = max_rating
        self.min_rating = min_rating
        self.column_labels = column_labels
        self.review_col=review_col
        
    def get_relevant_columns(self, X, columns_name):
        # Retrieve the index of the important column
        column_index = self.column_labels.index(columns_name)
        # Retrieve the corresponding data column
        return X[:, column_index]
    
    def set_column_labels(self, column_labels):
        # in order to preserve the attribute `column_labels` when cloning
        # the estimator, we have declared it as a keyword argument in the
        # `__init__` and set it there
        print(f"set_column_labels: {column_labels}")
        self.column_labels = column_labels
        
    def fit(self, X, y=None):
        pass

    def build_input_df(self, X):
        return pd.DataFrame(
            {
                "review": self.get_relevant_columns(X, self.review_col),
            },
            dtype="string"
        )
    def predict(self, X):
        input_df = self.build_input_df(X)
        input_series = input_df["review"]
        raw_rating = input_series.apply(
            lambda review: {
                d["label"]: d["score"]
                for d in self.analyzer(review, truncation=True, padding=True, top_k=10)
            }
        )

        sentiment_score = raw_rating.apply(
            lambda rating: (
                (
                    (
                        (rating["positive"] - rating["negative"])
                        / (rating["positive"] + rating["neutral"] + rating["negative"])
                    )
                    + 1
                )
                / 2
            )
        )

        sentiment_score = sentiment_score.apply(
            lambda score: int(np.round(
                score * (self.max_rating - self.min_rating) + self.min_rating
            )
        ))
        return sentiment_score.values

    def fit_predict(self, X, y):
        self.fit(X, y)
        return self.predict(X)
