"""
ImputeMissingValues
Generated by Eden via recursive self-improvement
2025-10-28 17:42:20.342010
"""

class ImputeMissingValues:
    """
    A class to handle missing values in datasets.
    
    This capability provides methods to impute missing values using different strategies.
    It is particularly useful for data preprocessing before machine learning tasks.
    
    Attributes:
        strategy (str): The strategy used for imputing missing values. Can be 'mean', 
            'median', 'mode', or 'custom'. Default is 'mean'.
        fill_value (float or int): The custom value to use if strategy is 'custom'.
        cols_to_impute (list): List of column names or indices to impute.
    """
    
    def __init__(self, strategy='mean', fill_value=None):
        """
        Initialize the ImputeMissingValues object with a specified strategy.
        
        Args:
            strategy (str): The imputation strategy. Can be 'mean', 'median', 
                'mode', or 'custom'. Default is 'mean'.
            fill_value (float or int): Custom value to use if strategy is 'custom'.
        """
        self.strategy = strategy
        self.fill_value = fill_value
        self.cols_to_impute = None
        
    def fit(self, data):
        """
        Fit the imputer to the data.
        
        This method analyzes the data and calculates necessary statistics for 
        imputation (e.g., mean, median, mode).
        
        Args:
            data (pd.DataFrame): The input data containing missing values.
        """
        self.data = data
        self.statistics = {}
        
        if self.cols_to_impute is None:
            self.cols_to_impute = data.columns
        
        for col in self.cols_to_impute:
            if self.strategy == 'mean':
                self.statistics[col] = data[col].mean()
            elif self.strategy == 'median':
                self.statistics[col] = data[col].median()
            elif self.strategy == 'mode':
                self.statistics[col] = data[col].mode().iloc[0]
            elif self.strategy == 'custom':
                if self.fill_value is None:
                    raise ValueError("fill_value must be provided for custom strategy")
                self.statistics[col] = self.fill_value
            else:
                raise ValueError(f"Invalid strategy: {self.strategy}")
    
    def transform(self, data):
        """
        Transform the data by imputing missing values.
        
        Args:
            data (pd.DataFrame): The input data to be transformed.
            
        Returns:
            pd.DataFrame: The transformed data with missing values imputed.
        """
        if self.cols_to_impute is None:
            raise ValueError("cols_to_impute not specified. Call fit() first.")
        
        data_imputed = data.copy()
        
        for col in self.cols_to_impute:
            if self.strategy == 'mean':
                fill_val = self.statistics[col]
            elif self.strategy == 'median':
                fill_val = self.statistics[col]
            elif self.strategy == 'mode':
                fill_val = self.statistics[col]
            elif self.strategy == 'custom':
                fill_val = self.fill_value
                
            data_imputed[col].fillna(fill_val, inplace=True)
        
        return data_imputed
    
    def fit_transform(self, data):
        """
        Fit the imputer to the data and transform it in one step.
        
        Args:
            data (pd.DataFrame): The input data containing missing values.
            
        Returns:
            pd.DataFrame: The transformed data with missing values imputed.
        """
        self.fit(data)
        return self.transform(data)
    
# Example usage:
# import pandas as pd
# 
# # Sample dataset with missing values
# data = pd.DataFrame({
#     'Age': [25, 30, np.nan, 40],
#     'Income': [50000, np.nan, 60000, 70000],
#     'City': ['New York', 'Los Angeles', 'Chicago', 'London']
# })
# 
# # Initialize imputer with mean strategy
# imputer = ImputeMissingValues(strategy='mean')
# imputer.cols_to_impute = ['Age', 'Income']
# 
# # Fit and transform the data
# data_imputed = imputer.fit_transform(data)
# 
# print("Original data:")
# print(data)
# print("\nImputed data:")
# print(data_imputed)