"""
NewsAggregator
Generated by Eden via recursive self-improvement
2025-10-27 17:16:05.701972
"""

import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup

class NewsAggregator:
    """
    A class to aggregate news articles from various sources.
    
    Methods:
        __init__(self): Initializes the NewsAggregator with default parameters.
        get_top_news(self, category='All'): Retrieves top news articles by category.
        search_articles(self, query): Search for articles containing a specific keyword or phrase.
        _scrape_category(self, category): Helper method to scrape news from a specific category.
        _process_article(self, url): Helper method to process and extract article information.
    """
    
    def __init__(self):
        self.sources = ['https://www.reuters.com', 'https://www.bbc.co.uk/news', 
                       'https://www.cnn.com']
        self.categories = ['World', 'Business', 'Technology', 'Sports', 'Entertainment']
        self.api_key = 'YOUR_NEWS_API_KEY'  # Replace with actual API key

    def get_top_news(self, category='All'):
        """
        Retrieve top news articles by category.
        
        Args:
            category (str): One of the predefined categories or 'All'.
            
        Returns:
            list: List of dictionaries containing article details.
            
        Raises:
            ValueError: If the category is not recognized.
        """
        if category == 'All':
            articles = []
            for cat in self.categories:
                articles += self._get_category_news(cat)
            return articles
        elif category in self.categories:
            return self._get_category_news(category)
        else:
            raise ValueError(f"Invalid category: {category}")

    def search_articles(self, query):
        """
        Search through aggregated news for a specific keyword or phrase.
        
        Args:
            query (str): The keyword or phrase to search for.
            
        Returns:
            list: List of articles containing the search term.
        """
        all_articles = []
        for source in self.sources:
            response = requests.get(f"{source}/search?q={query}")
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')
                article_blocks = soup.find_all('div', class_='article-block')
                for block in article_blocks:
                    title = block.find('h1').text
                    summary = block.find('p', class_='summary').text
                    date_str = block.find('span', class_='date').text
                    date = datetime.strptime(date_str, "%Y-%m-%d %H:%M")
                    if (datetime.now() - date) <= timedelta(days=3):
                        all_articles.append({
                            'title': title,
                            'url': block.find('a')['href'],
                            'summary': summary,
                            'source': source.split('/')[-1],
                            'date': date_str
                        })
        return all_articles

    def _get_category_news(self, category):
        """
        Helper method to fetch news from a specific category.
        
        Args:
            category (str): The category to fetch news for.
            
        Returns:
            list: List of dictionaries containing article details.
        """
        articles = []
        endpoint = f"/category/{category.lower()}"
        for source in self.sources:
            url = f"{source}{endpoint}"
            response = requests.get(url)
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')
                article_blocks = soup.find_all('div', class_='article-card')
                for block in article_blocks:
                    title = block.find('h2').text
                    summary = block.find('p', class_='card-summary').text
                    date_str = block.find('span', class_='time').text
                    date = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
                    if (datetime.now() - date) <= timedelta(days=7):
                        articles.append({
                            'title': title,
                            'url': block.find('a')['href'],
                            'summary': summary,
                            'category': category,
                            'date': date_str,
                            'source': source.split('/')[-1]
                        })
        return articles

    def _process_article(self, url):
        """
        Helper method to process individual article details.
        
        Args:
            url (str): The URL of the article.
            
        Returns:
            dict: Dictionary containing processed article information.
        """
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            title = soup.find('h1', class_='article-title').text
            content = soup.find('div', class_='article-content').text
            date_str = soup.find('span', class_='article-date').text
            return {
                'title': title,
                'content': content,
                'date': date_str,
                'source': url.split('/')[-1]
            }
        return None

# Example usage:
if __name__ == "__main__":
    aggregator = NewsAggregator()
    
    # Get top news from a specific category
    world_news = aggregator.get_top_news('World')
    print(f"Retrieved {len(world_news)} World news articles.")
    
    # Search for articles containing a keyword
    search_results = aggregator.search_articles("climate change")
    print(f"Found {len(search_results)} articles mentioning 'climate change'.")