"""
DataDrivenInsightEnhancement
Generated by Eden via recursive self-improvement
2025-11-01 00:06:01.973479
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Purpose: Analyze and extract meaningful insights from datasets using machine learning models.
def DataDrivenInsightEnhancement(data_path, target_column):
    """
    This function loads a dataset, trains a linear regression model,
    and returns the model's predictions along with performance metrics.

    Parameters:
    - data_path (str): Path to the dataset file.
    - target_column (str): The name of the column containing the target variable.

    Returns:
    - DataFrame: Predictions and actual values for the target variable.
    - float: Model R-squared score.
    """
    
    # Load the dataset
    data = pd.read_csv(data_path)
    X = data.drop(columns=[target_column])
    y = data[target_column]

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions on the testing set
    y_pred = model.predict(X_test)

    # Evaluate the model's performance
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Create a DataFrame to store actual and predicted values
    results = pd.DataFrame({
        'Actual': y_test,
        'Predicted': y_pred
    })

    return results, r2

# Example usage:
data_path = "path/to/your/dataset.csv"
target_column = "TargetVariable"

results, r2_score = DataDrivenInsightEnhancement(data_path, target_column)
print("R-squared score:", r2_score)
print(results)