from collections import OrderedDict
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union, TYPE_CHECKING

from dataiku.doctor.timeseries.utils.pandas_compat import str_to_datetime_compat
from dataiku.eda.types import Literal
from abc import ABC, abstractmethod

import numpy as np
import pandas as pd
from numpy import vectorize, dtype
from itertools import zip_longest
from tabulate import tabulate

from dataiku.base.utils import package_is_at_least
from dataiku.eda.exceptions import InvalidParams, NumericalCastError, DateParseError


if TYPE_CHECKING:
    IDFIndex = pd.Index[pd.Int64Dtype]
    _AnyIntIndex = Union[IDFIndex, List[int], np.ndarray[Tuple[Literal[1]], np.dtype[np.integer]]]
    _AnyBoolIndex = Union[List[bool], np.ndarray[Tuple[Literal[1]], np.dtype[np.bool_]]]
    FloatVector = np.ndarray[Tuple[Literal[1]], np.dtype[np.float64]]
else:
    IDFIndex = Any
    _AnyIntIndex = Any
    _AnyBoolIndex = Any
    FloatVector = Any

RawVector = Union[pd.Categorical, FloatVector, pd.DatetimeIndex]


class ImmutableDataFrame(ABC):
    """
    ImmutableDataFrame is an immutable view constructed from a Pandas's DataFrame providing EDA-specific features
    such as post typing.

    Post typing
    ===========

    With post typing, the same column can be "treated as" any type regardless of the underlying storage
    type (which is determined by the original DataFrame). ImmutableDataFrame provides facilities to specify type
    when reading a column. In order to avoid repeated conversions, casted series are always cached.

    Post-typed text column:
        - pd.Categorical with strings
        - No "np.nan": missing values are always represented by ""

    Post-typed float column:
        - Numpy array (float64)
        - NaN represents missing values
        - No 'Inf' or 'Inf'

    Post-typed date column:
        - pd.DateTimeIndex (contains pd.Timestamp items)
        - pd.NaT is allowed

    Casting rules:
        - Stored as float => read as text:
            - Always possible & safe
            - NaN are converted into ""
        - Stored as text => read as float:
            - "NaN" becomes NaN
            - "Inf"/"-Inf" becomes NaN
            - "" becomes NaN
            - Invalid numbers => exception
        - Stored as text => read as date:
            - "NaT" becomes pd.NaT (not a time)
            - dates are parsed according to the ISO8601 format
            - Invalid formats => exception

    Indexing
    ========

    As opposed to a regular Pandas's DataFrame, ImmutableDataFrame does have any advanced indexing mechanism:
    - Columns are stored in np.array (float) or pd.Categorical (text)
    - Rows are indexed by their positions in the ImmutableDataFrame

    For convenience, ImmutableDataFrame supports boolean & integer indexing, with a few differences compared to Pandas/numpy:
    - Slicing does not copy the data
    - Slicing a ImmutableDataFrame produces another ImmutableDataFrame which can be sliced again
    - Slices can be intersected and unioned, as long as they are originating from the same ImmutableDataFrame
    """

    @abstractmethod
    def __len__(self) -> int:
        raise NotImplementedError

    @abstractmethod
    def columns(self) -> List[str]:
        raise NotImplementedError

    def __str__(self) -> str:
        sample = self[np.arange(min(10, len(self)))]
        data = [sample.text_col(col)[:30] for col in sample.columns()]
        cells = map(list, zip(*data))
        return tabulate(cells, headers=self.columns())

    @abstractmethod
    def raw_col(self, column: str) -> RawVector:
        raise NotImplementedError

    def _text_col_from_cache(self, column: str) -> Optional[pd.Categorical]:
        key = (column, 'text')
        return self.cache.get(key)

    def text_col(self, column: str) -> pd.Categorical:
        key = (column, 'text')
        cached = self.cache.get(key)
        if cached is None:
            cached = self._text_col_uncached(column)
            self.cache[key] = cached
        return cached

    def _date_col_from_cache(self, column: str) -> Optional[pd.DatetimeIndex]:
        key = (column, 'datetime')
        return self.cache.get(key)

    def date_col(self, column: str) -> pd.DatetimeIndex:
        key = (column, 'datetime')
        cached = self.cache.get(key)
        if cached is None:
            cached = self._date_col_uncached(column)
            self.cache[key] = cached
        return cached

    def date_col_no_missing(self, column: str) -> pd.DatetimeIndex:
        series = self.date_col(column)
        return series[~pd.isnull(series)]

    def _float_col_from_cache(self, column: str) -> Optional[FloatVector]:
        key = (column, 'float')
        return self.cache.get(key)

    def float_col(self, column: str) -> FloatVector:
        key = (column, 'float')
        cached = self.cache.get(key)
        if cached is None:
            cached = self._float_col_uncached(column)
            self.cache[key] = cached
        return cached

    def float_col_no_missing(self, column: str) -> FloatVector:
        series = self.float_col(column)
        return series[np.isfinite(series)]

    def __getitem__(self, indices: Union[_AnyIntIndex, _AnyBoolIndex]) -> 'ImmutableDataFrame':
        """
        Take a subset of the current ImmutableDataFrame. Data are not copied (ie. it returns a view).

        Warning: ImmutableDataFrame indexing is not the same as in indexing in np.ndarray/pd.NDFrame

        - Basic slicing is not supported (idf[:], etc)
        - Advanced indexing:
            - Boolean masking is supported
                - Mask must be of same size as self
                - Input must be one of:
                    - np.ndarray(bool)
                    - list of booleans
            - Purely integer indexing is supported:
                - Indices must be within [0, len(self)-1]
                - Input must be one of:
                    - ndarray(int)
                    - list(integer)
                    - pd.Index(dtype='int64')
        """
        if isinstance(indices, list):
            return self[np.array(indices)]

        if isinstance(indices, np.ndarray):
            if len(indices) == 0:
                return self[pd.Index([], dtype='int64')]

            if np.issubdtype(indices.dtype, np.bool_):
                if len(indices) != len(self):
                    raise InvalidParams("ImmutableDataFrame boolean mask must have the same size as the dataframe")
                return self[pd.Index(np.flatnonzero(indices), copy=False, dtype='int64')]

            if np.issubdtype(indices.dtype, np.integer):
                return self[pd.Index(indices, copy=False, dtype='int64')]

        if isinstance(indices, pd.Index) and indices.dtype == dtype('int64'):
            if len(indices) == len(self):
                original_indices = pd.Index(np.arange(len(self)), copy=False, dtype='int64')
                if indices.equals(original_indices):
                    return self
            return _ImmutableDataFrameSubset(self, indices)

        raise TypeError("ImmutableDataFrame only supports boolean and integer indexing")

    def __and__(self, other: 'ImmutableDataFrame') -> 'ImmutableDataFrame':
        return self._combine(other, ImmutableDataFrame._combine_and)

    def __or__(self, other: 'ImmutableDataFrame') -> 'ImmutableDataFrame':
        return self._combine(other, ImmutableDataFrame._combine_or)

    @staticmethod
    def from_csv(stream, dss_schema) -> 'ImmutableDataFrame':
        num_types = {"bigint", "double", "float", "int", "smallint", "tinyint"}
        eda_schema = OrderedDict(
            (col["name"], np.float64 if col["type"] in num_types else "category")
            for col in dss_schema["columns"]
        )

        df = pd.read_csv(
            stream,
            names=eda_schema.keys(),
            dtype=eda_schema,
            header=None,
            sep="\t",
            doublequote=True,
            encoding="utf8",
            quotechar='"',
            parse_dates=False,
            float_precision="round_trip",
        )

        return ImmutableDataFrame.from_df(df)

    @staticmethod
    def from_df(df: pd.DataFrame) -> 'ImmutableDataFrame':
        return ImmutableDataFrame.from_dict({col: df[col] for col in df.columns})

    @staticmethod
    def from_dict(data: Dict[str, Any]) -> 'ImmutableDataFrame':
        converted_series = {col: ImmutableDataFrame._convert_input_series(series) for col, series in data.items()}

        for col_name in converted_series.keys():
            # Sanity of mind: ImmutableDataFrame column names are always 'str'
            assert isinstance(col_name, str)

        all_series = list(converted_series.values())
        size = 0 if len(all_series) == 0 else len(all_series[0])
        if not all(len(series) == size for series in all_series):
            raise ValueError("All columns must have the same size")

        return _RootImmutableDataFrame(converted_series, size)

    def extend(self, data: Dict[str, Any], prefix: str = "base_", align_on: Optional['ImmutableDataFrame'] = None) -> 'ImmutableDataFrame':
        """
        Creates a new full size ImmutableDataFrame from the current one, with additionnal columns.
        Data from the current IDF are not copied, the current IDF is referenced in the extended one.

        If the new data columns are smaller than the current IDF,
        a subset of the current IDF of the same size as the data can be passed
        to align the new data on.
        """
        if align_on is None:
            align_on = self

        if not (align_on._base_root() is self._base_root()):
            raise ValueError("Cannot align on dataframe with a different root base")

        align_on_size = len(align_on)
        align_on_index = align_on._indices_relative_to_root()
        full_index = pd.Index(np.arange(len(self)), dtype='int64')

        extra_col_data = {}
        for col, series in data.items():
            assert isinstance(col, str)

            for base_col in self.columns():
                if col == "{}{}".format(prefix, base_col):
                    raise ValueError('New column "{}" conflicts with base column "{}" with prefix "{}"'.format(col, base_col, prefix))

            if len(series) != align_on_size:
                raise ValueError("Length of new data must be consistent with the data it is aligned on")

            converted_series = ImmutableDataFrame._convert_input_series(series)

            # augment series to root size and align data on correct index
            if isinstance(converted_series, pd.DatetimeIndex):
                # the DateTimeIndex provided by _convert_input_series has UTC timezone
                aligned_series = pd.Series(index=full_index, dtype="datetime64[ns, UTC]")
                aligned_series.loc[align_on_index] = converted_series
                extra_col_data[col] = pd.DatetimeIndex(aligned_series)
            elif isinstance(converted_series, pd.Categorical):
                aligned_series = pd.Series(index=full_index, dtype="object")
                aligned_series.loc[align_on_index] = converted_series
                extra_col_data[col] = ImmutableDataFrame._parse_input_categorical(pd.Categorical(aligned_series))
            elif isinstance(converted_series, np.ndarray) and converted_series.dtype == np.float64:
                aligned_series = pd.Series(index=full_index, dtype="float")
                aligned_series.loc[align_on_index] = converted_series
                extra_col_data[col] = aligned_series.values
            else:
                raise ValueError("Unsupported data type")

        return _ExtendedRootImmutableDataFrame(self, extra_col_data, base_prefix=prefix)

    # End of public API

    def __init__(self):
        self.cache = {}

    def _text_col_uncached(self, column: str) -> pd.Categorical:
        values = self.raw_col(column)

        if isinstance(values, pd.Categorical):
            return values
        elif isinstance(values, pd.DatetimeIndex):
            return ImmutableDataFrame._cast_date_to_text(values)
        elif isinstance(values, np.ndarray) and values.dtype == np.float64:
            return ImmutableDataFrame._cast_float_to_text(values)

        raise NumericalCastError()

    def _float_col_uncached(self, column: str) -> FloatVector:
        values = self.raw_col(column)
        if isinstance(values, np.ndarray):
            return values

        values = self.text_col(column)
        return ImmutableDataFrame._cast_text_to_float(values)

    def _date_col_uncached(self, column: str) -> pd.DatetimeIndex:
        values = self.raw_col(column)
        if isinstance(values, pd.DatetimeIndex):
            return values

        values = self.text_col(column)
        return ImmutableDataFrame._parse_text_to_date(values)

    @staticmethod
    def _cast_text_to_float(text_data: pd.Categorical) -> FloatVector:
        try:
            data = np.asarray(np.where(text_data == '', 'NaN', text_data), dtype=np.float64)
            return np.where(np.isfinite(data), data, np.nan)
        except ValueError:
            raise NumericalCastError()

    @staticmethod
    def _parse_text_to_date(data: pd.Categorical) -> pd.DatetimeIndex:
        try:
            return str_to_datetime_compat(data)
        except ValueError as e:
            raise DateParseError("{}".format(e))

    @staticmethod
    def _cast_date_to_text(data: pd.DatetimeIndex) -> pd.Categorical:
        if len(data) == 0:
            return pd.Categorical([])

        mask = pd.notna(data)
        str_array = np.full(len(data), "", dtype=np.object_)
        str_array[mask] = data[mask].map(pd.Timestamp.isoformat)
        return pd.Categorical(str_array, categories=sorted(pd.unique(str_array)), ordered=True)

    @staticmethod
    def _cast_float_to_text(data: FloatVector) -> pd.Categorical:
        if len(data) == 0:
            return pd.Categorical([])

        @vectorize
        def _float_formatter(float_value):
            return "%g" % float_value

        out = _float_formatter(data)
        out[~np.isfinite(data)] = ""
        return pd.Categorical(out, categories=sorted(pd.unique(out)), ordered=True)

    @staticmethod
    def _convert_input_series(series: Any) -> RawVector:
        """
        Convert an "external" series into ImmutableDataFrame storage format:
        - pd.Categorical(ordered=True) if the values are string-like
        - np.ndarray(dtype=np.float64) if the values are number-like
        - pd.DatetimeIndex if the values are date-like
        """
        if isinstance(series, pd.Series):
            # Only keep values (np.ndarray or pd.Categorical)
            return ImmutableDataFrame._convert_input_series(series.values)
        elif isinstance(series, pd.Categorical):
            return ImmutableDataFrame._parse_input_categorical(series)

        elif isinstance(series, pd.DatetimeIndex):
            # always convert timestamps to UTC
            if series.tz is None:
                return series.tz_localize("UTC")
            else:
                return series.tz_convert("UTC")

        elif isinstance(series, np.ndarray):
            if np.issubdtype(series.dtype, np.float64) or np.issubdtype(series.dtype, np.integer):
                # Map everyone to float64 (even integers)
                series = np.asarray(series, dtype=np.float64)
                # Inf/-Inf are allowed in Pandas, but in EDA world they are treated as empty represented by NaN
                series[~np.isfinite(series)] = np.nan
                # Replace -0 by 0
                series[series == 0] = 0
                return series
            elif series.dtype == 'object':
                # Convert to pd.Categorical
                return ImmutableDataFrame._parse_input_categorical(pd.Categorical(series))
            elif series.dtype == 'datetime64[ns]':
                # Convert to pd.DatetimeIndex
                return pd.to_datetime(series, utc=True)
        elif isinstance(series, list):
            # Use Pandas's type inference
            return ImmutableDataFrame._convert_input_series(pd.Series(series))

        raise ValueError("Unrecognized input type")

    @staticmethod
    def _parse_input_categorical(categorical: pd.Categorical) -> pd.Categorical:
        """
        Transform a pd.Categorical into an ImmutableDataFrame-compliant pd.Categorical:
        - NaN are not allowed and are replaced by empty strings
        - Categories must be unicode strings & lexicographically sorted
        """

        # Replace np.nan by a category (empty string: "")
        if "" not in categorical.categories:
            categorical = categorical.add_categories("")
        categorical = categorical.fillna("")

        # Sort categories lexicographically
        categorical = categorical.reorder_categories(new_categories=sorted(categorical.categories), ordered=True)

        for category in categorical.categories:
            # Sanity of mind: ImmutableDataFrame text values are always 'str'
            #                 and NOTHING else (float, None, ...)
            assert isinstance(category, str)

        return categorical

    @staticmethod
    def _combine_or(base: 'ImmutableDataFrame', a: 'ImmutableDataFrame', b: 'ImmutableDataFrame', indices_a: Optional[IDFIndex], indices_b: Optional[IDFIndex]) -> 'ImmutableDataFrame':
        # if a or b is root
        if (indices_a is None) or (indices_b is None):
            return base
        # else
        return ImmutableDataFrame._combine_subsets(base, a, b, indices_a.union(indices_b), indices_a, indices_b)

    @staticmethod
    def _combine_and(base: 'ImmutableDataFrame', a: 'ImmutableDataFrame', b: 'ImmutableDataFrame', indices_a: Optional[IDFIndex], indices_b: Optional[IDFIndex]) -> 'ImmutableDataFrame':
        # if a or b is root
        if indices_b is None:
            return a
        if indices_a is None:
            if b._root() is a._root():
                return b
            return base[indices_b]
        # else
        return ImmutableDataFrame._combine_subsets(base, a, b, indices_a.intersection(indices_b), indices_a, indices_b)

    @staticmethod
    def _combine_subsets(base: 'ImmutableDataFrame', a: 'ImmutableDataFrame', b: 'ImmutableDataFrame', base_indices: IDFIndex, indices_a: IDFIndex, indices_b: IDFIndex) -> 'ImmutableDataFrame':
        if set(base_indices) == set(indices_a):
            return a
        if (set(base_indices) == set(indices_b)) and (b._root() is a._root()):
            return b

        return base[base_indices]

    def _combine(self, other: 'ImmutableDataFrame', combine_fn: Callable[['ImmutableDataFrame', 'ImmutableDataFrame', 'ImmutableDataFrame', Optional[IDFIndex], Optional[IDFIndex]], 'ImmutableDataFrame']) -> 'ImmutableDataFrame':
        # distinguish symmetric / antisymmetric case
        common_ancestor_a = None
        common_ancestor_b = None

        # if self / other come from different base roots
        zipped_slice_hierarchy_iterator = zip_longest(self._browse_slice_hierarchy(), other._browse_slice_hierarchy())
        root_a, root_b = next(zipped_slice_hierarchy_iterator)

        base_root_a = root_a._base_root()
        base_root_b = root_b._base_root()

        if base_root_a is base_root_b:
            common_ancestor_a = root_a
            common_ancestor_b = root_b

        # if self / other come from the same base root
        for idf_a, idf_b in zipped_slice_hierarchy_iterator:
            if idf_a is not idf_b:
                break

            common_ancestor_a = idf_a
            common_ancestor_b = idf_b

        if common_ancestor_a is None:
            raise ValueError("Combined ImmutableDataFrames must share a common ancestor")

        indices_a = self._squash_indices_into_parent(common_ancestor_a)
        indices_b = other._squash_indices_into_parent(common_ancestor_b)

        return combine_fn(common_ancestor_a, self, other, indices_a, indices_b)

    @abstractmethod
    def _browse_slice_hierarchy(self) -> Iterator['ImmutableDataFrame']:
        raise NotImplementedError
    
    def _root(self) -> 'ImmutableDataFrame':
        return next(self._browse_slice_hierarchy())

    @abstractmethod
    def _browse_extension_hierarchy(self) -> Iterator['ImmutableDataFrame']:
        raise NotImplementedError

    def _base_root(self) -> 'ImmutableDataFrame':
        return next(self._browse_extension_hierarchy())
    
    @abstractmethod
    def _squash_indices_into_parent(self, until_parent: 'ImmutableDataFrame') -> Optional[IDFIndex]:
        raise NotImplementedError

    def _indices_relative_to_root(self) -> IDFIndex:
        indices = self._squash_indices_into_parent(self._root())
        if indices is None:
            return np.arange(len(self))
        return indices


class _RootImmutableDataFrame(ImmutableDataFrame):
    def __init__(self, col_data: Dict[str, RawVector], df_size: int):
        super(_RootImmutableDataFrame, self).__init__()
        self.col_data = col_data
        self.df_size = df_size

    def __len__(self) -> int:
        return self.df_size

    def columns(self) -> List[str]:
        return list(self.col_data.keys())

    def raw_col(self, column: str) -> RawVector:
        if column in self.col_data:
            return self.col_data[column]
        raise InvalidParams('Column "{}" does not exist'.format(column))

    def _browse_slice_hierarchy(self) -> Iterator[ImmutableDataFrame]:
        yield self

    def _browse_extension_hierarchy(self) -> Iterator[ImmutableDataFrame]:
        yield self

    def _squash_indices_into_parent(self, until_parent: 'ImmutableDataFrame') -> None:
        return None


class _ImmutableDataFrameSubset(ImmutableDataFrame):
    def __init__(self, idf: 'ImmutableDataFrame', indices: IDFIndex):
        super(_ImmutableDataFrameSubset, self).__init__()
        self.idf = idf
        self.indices = indices

    def __len__(self) -> int:
        return len(self.indices)
    
    def columns(self) -> List[str]:
        return self.idf.columns()
    
    def raw_col(self, column: str) -> RawVector:
        key = (column, 'raw')
        cached = self.cache.get(key)
        if cached is None:
            cached = self._raw_col_uncached(column)
            self.cache[key] = cached
        return cached

    def extend(self, data: Dict[str, RawVector], prefix: str = "base_", align_on: Optional[ImmutableDataFrame] = None):
        if align_on is None:
            align_on = self

        extended_root_idf = self._root().extend(data, prefix=prefix, align_on=align_on)
        return extended_root_idf & self

    def _raw_col_uncached(self, column: str) -> RawVector:
        return self.idf.raw_col(column)[self.indices]

    def _text_col_uncached(self, column: str) -> pd.Categorical:
        parent_text_col = self.idf._text_col_from_cache(column)
        if parent_text_col is not None:
            return parent_text_col[self.indices]

        return super(_ImmutableDataFrameSubset, self)._text_col_uncached(column)

    def _float_col_uncached(self, column: str) -> FloatVector:
        parent_float_col = self.idf._float_col_from_cache(column)
        if parent_float_col is not None:
            return parent_float_col[self.indices]

        return super(_ImmutableDataFrameSubset, self)._float_col_uncached(column)

    def _date_col_uncached(self, column: str) -> pd.DatetimeIndex:
        parent_date_col = self.idf._date_col_from_cache(column)
        if parent_date_col is not None:
            return parent_date_col[self.indices]

        return super(_ImmutableDataFrameSubset, self)._date_col_uncached(column)

    def _browse_slice_hierarchy(self) -> Iterator[ImmutableDataFrame]:
        for idf in self.idf._browse_slice_hierarchy():
            yield idf
        yield self

    def _browse_extension_hierarchy(self) -> Iterator[ImmutableDataFrame]:
        for idf in self._root()._browse_extension_hierarchy():
            yield idf

    def _squash_indices_into_parent(self, until_parent: 'ImmutableDataFrame') -> Optional[IDFIndex]:
        if self is until_parent:
            return None
        parent_indices = self.idf._squash_indices_into_parent(until_parent)
        if parent_indices is None:
            return self.indices
        return parent_indices[self.indices]


class _ExtendedRootImmutableDataFrame(ImmutableDataFrame):
    def __init__(self, base_idf: 'ImmutableDataFrame', extra_col_data: Dict[str, RawVector], base_prefix: str = "base_"):
        super(_ExtendedRootImmutableDataFrame, self).__init__()
        self.base_idf = base_idf
        self.extra_col_data = extra_col_data

        self._base_columns_mapping = {"{}{}".format(base_prefix, col): col for col in base_idf.columns()}

    def __len__(self) -> int:
        return len(self.base_idf)

    def columns(self) -> List[str]:
        return list(self._base_columns_mapping.keys()) + list(self.extra_col_data.keys())

    def raw_col(self, column: str) -> RawVector:
        if column in self._base_columns_mapping:
            dealiased_column = self._base_columns_mapping[column]
            return self.base_idf.raw_col(dealiased_column)

        if column in self.extra_col_data:
            return self.extra_col_data[column]
        else:
            raise InvalidParams('Column "{}" does not exist'.format(column))

    def _text_col_from_cache(self, column: str) -> Optional[pd.Categorical]:
        # delegating call to base idf first to leverage caching from the base idf
        # original idf cache is used for base columns, local cache is used for extra columns
        if column in self._base_columns_mapping:
            dealiased_column = self._base_columns_mapping[column]
            return self.base_idf._text_col_from_cache(dealiased_column)

        return super(_ExtendedRootImmutableDataFrame, self)._text_col_from_cache(column)

    def text_col(self, column: str) -> pd.Categorical:
        # delegating call to base idf first to leverage caching from the base idf
        # original idf cache is used for base columns, local cache is used for extra columns
        if column in self._base_columns_mapping:
            dealiased_column = self._base_columns_mapping[column]
            return self.base_idf.text_col(dealiased_column)

        return super(_ExtendedRootImmutableDataFrame, self).text_col(column)

    def _float_col_from_cache(self, column: str) -> Optional[FloatVector]:
        # delegating call to base idf first to leverage caching from the base idf
        # original idf cache is used for base columns, local cache is used for extra columns
        if column in self._base_columns_mapping:
            dealiased_column = self._base_columns_mapping[column]
            return self.base_idf._float_col_from_cache(dealiased_column)

        return super(_ExtendedRootImmutableDataFrame, self)._float_col_from_cache(column)

    def float_col(self, column: str) -> FloatVector:
        # delegating call to base idf first to leverage caching from the base idf
        # original idf cache is used for base columns, local cache is used for extra columns
        if column in self._base_columns_mapping:
            dealiased_column = self._base_columns_mapping[column]
            return self.base_idf.float_col(dealiased_column)

        return super(_ExtendedRootImmutableDataFrame, self).float_col(column)

    def _date_col_from_cache(self, column: str) -> Optional[pd.DatetimeIndex]:
        # delegating call to base idf first to leverage caching from the base idf
        # original idf cache is used for base columns, local cache is used for extra columns
        if column in self._base_columns_mapping:
            dealiased_column = self._base_columns_mapping[column]
            return self.base_idf._date_col_from_cache(dealiased_column)

        return super(_ExtendedRootImmutableDataFrame, self)._date_col_from_cache(column)

    def date_col(self, column: str) -> pd.DatetimeIndex:
        # delegating call to base idf first to leverage caching from the base idf
        # original idf cache is used for base columns, local cache is used for extra columns
        if column in self._base_columns_mapping:
            dealiased_column = self._base_columns_mapping[column]
            return self.base_idf.date_col(dealiased_column)

        return super(_ExtendedRootImmutableDataFrame, self).date_col(column)

    def _browse_slice_hierarchy(self) -> Iterator[ImmutableDataFrame]:
        yield self

    def _browse_extension_hierarchy(self) -> Iterator[ImmutableDataFrame]:
        for idf in self.base_idf._browse_extension_hierarchy():
            yield idf
        yield self

    def _squash_indices_into_parent(self, until_parent: 'ImmutableDataFrame') -> None:
        return None
