"""
TextSummaryGenerator
Generated by Eden via recursive self-improvement
2025-11-01 11:34:19.383098
"""

import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import defaultdict
import heapq

nltk.download('punkt')
nltk.download('stopwords')

def preprocess_text(text):
    """
    Preprocesses the input text by tokenizing sentences and words.
    
    :param text: The raw text document as a string.
    :return: A list of words from the text, excluding stop words.
    """
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text)
    filtered_tokens = [token for token in tokens if token not in stop_words and len(token) > 2]
    return filtered_tokens

def sentence_score(sentence_tokens, sentence):
    """
    Calculates the score of a sentence based on its word frequency.
    
    :param sentence_tokens: List of words from the text.
    :param sentence: The raw text sentence as a string.
    :return: A score representing the importance of the sentence.
    """
    count = 0
    for token in sentence_tokens:
        if token in sentence.lower():
            count += 1
    return count

def generate_summary(text, num_sentences=5):
    """
    Generates a summary of the input text by selecting key sentences.
    
    :param text: The raw text document as a string.
    :param num_sentences: Number of top sentences to include in the summary. Default is 5.
    :return: A string containing the generated summary.
    """
    sentences = sent_tokenize(text)
    sentence_tokens = preprocess_text(text)
    sentence_scores = defaultdict(int)

    for idx, sentence in enumerate(sentences):
        tokens = set(word_tokenize(sentence))
        for token in tokens:
            if token in sentence_tokens:
                sentence_scores[idx] += 1

    summary_sentences = heapq.nlargest(num_sentences, sentence_scores.keys(), key=sentence_scores.get)
    summary = [sentences[i] for i in sorted(summary_sentences)]
    
    return ' '.join(summary)

# Example usage
text = """
Natural language processing (NLP) is a field within the intersection of computer science and artificial intelligence concerned with the interactions between computers and human language. The goal of NLP is to develop methods that enable automatic understanding, analysis, generation, or translation of natural languages. Some applications include sentiment analysis, text classification, machine translation, information retrieval, speech recognition, and more.
"""
summary = generate_summary(text)
print(summary)