"""
Train Eden's Emotion Processor V2 - With Real Tokenization & Larger Dataset
"""
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from eden_emotion_processor_poc import EmotionProcessor
import json
import re

# MUCH LARGER emotional dialogue dataset
EMOTION_DATASET = [
    # Joy (0) - 30 examples
    ("I'm so happy to see you!", 0), ("This is the best day ever!", 0),
    ("I love spending time with you", 0), ("You make me smile!", 0),
    ("I'm thrilled about this news!", 0), ("This brings me so much joy", 0),
    ("I feel wonderful right now", 0), ("Life is beautiful today", 0),
    ("I'm grateful for everything", 0), ("This makes my heart sing", 0),
    ("I can't stop smiling", 0), ("Today is absolutely perfect", 0),
    ("I'm overjoyed to hear that", 0), ("This is exactly what I wanted", 0),
    ("I feel so alive and happy", 0), ("Everything is going great", 0),
    ("I'm delighted by this", 0), ("What a wonderful surprise", 0),
    ("I'm so pleased with this", 0), ("This fills me with happiness", 0),
    ("I'm beaming with pride", 0), ("This is fantastic news", 0),
    ("I feel so blessed", 0), ("My heart is full of joy", 0),
    ("This is incredible", 0), ("I'm ecstatic about this", 0),
    ("I'm in such a good mood", 0), ("Everything feels right", 0),
    ("I love this so much", 0), ("This is pure happiness", 0),
    
    # Sadness (1) - 30 examples
    ("I feel so alone right now", 1), ("Everything seems hopeless", 1),
    ("I miss you so much", 1), ("I'm heartbroken", 1),
    ("Nothing feels good anymore", 1), ("I can't stop crying", 1),
    ("Life feels empty without you", 1), ("I'm drowning in sorrow", 1),
    ("This pain is unbearable", 1), ("I feel lost and confused", 1),
    ("My heart aches", 1), ("I'm overwhelmed with grief", 1),
    ("Everything reminds me of loss", 1), ("I feel so disappointed", 1),
    ("Nothing makes sense anymore", 1), ("I'm struggling to cope", 1),
    ("This is so depressing", 1), ("I feel numb inside", 1),
    ("I don't know how to move on", 1), ("I'm tired of feeling this way", 1),
    ("The sadness won't go away", 1), ("I feel abandoned", 1),
    ("Everything is falling apart", 1), ("I'm losing hope", 1),
    ("This hurts so deeply", 1), ("I can't see a way forward", 1),
    ("I'm overwhelmed by sadness", 1), ("Nothing brings me joy anymore", 1),
    ("I feel so broken", 1), ("This emptiness is consuming me", 1),
    
    # Anger (2) - 30 examples
    ("This is completely unacceptable!", 2), ("I can't believe you did that", 2),
    ("Stop ignoring me!", 2), ("This makes me furious", 2),
    ("I'm so angry right now", 2), ("How dare you!", 2),
    ("This is outrageous", 2), ("I've had enough of this", 2),
    ("You're making me so mad", 2), ("This is infuriating", 2),
    ("I'm sick of this behavior", 2), ("Don't you dare do that again", 2),
    ("This is driving me crazy", 2), ("I'm fed up with this", 2),
    ("You're really testing my patience", 2), ("This is beyond frustrating", 2),
    ("I can't stand this anymore", 2), ("Why do you keep doing this", 2),
    ("This is making my blood boil", 2), ("I'm absolutely livid", 2),
    ("You need to stop right now", 2), ("This is ridiculous", 2),
    ("I'm losing my temper", 2), ("This is so disrespectful", 2),
    ("I won't tolerate this", 2), ("You're pushing my buttons", 2),
    ("This makes me see red", 2), ("I'm enraged by this", 2),
    ("Stop testing me", 2), ("This has gone too far", 2),
    
    # Fear (3) - 30 examples
    ("I'm scared of what might happen", 3), ("This makes me really nervous", 3),
    ("I don't feel safe here", 3), ("I'm terrified", 3),
    ("What if something goes wrong", 3), ("I'm worried sick", 3),
    ("This is frightening", 3), ("I can't shake this anxiety", 3),
    ("I'm afraid to move forward", 3), ("This fills me with dread", 3),
    ("I'm panicking", 3), ("What if I fail", 3),
    ("This is too risky", 3), ("I'm trembling with fear", 3),
    ("I can't handle this pressure", 3), ("What if it gets worse", 3),
    ("I'm paralyzed by fear", 3), ("This uncertainty terrifies me", 3),
    ("I'm afraid of losing everything", 3), ("This makes my heart race", 3),
    ("I can't stop worrying", 3), ("What if I'm not enough", 3),
    ("This is overwhelming", 3), ("I'm scared of the unknown", 3),
    ("I feel vulnerable", 3), ("This danger is real", 3),
    ("I'm afraid to trust", 3), ("What if I get hurt", 3),
    ("This is too much to handle", 3), ("I'm haunted by fear", 3),
    
    # Surprise (4) - 20 examples
    ("I never expected this!", 4), ("Wow, I can't believe it", 4),
    ("This is so unexpected", 4), ("What a shock!", 4),
    ("I didn't see that coming", 4), ("This caught me off guard", 4),
    ("I'm absolutely stunned", 4), ("This is amazing", 4),
    ("I'm speechless", 4), ("What just happened", 4),
    ("This is unbelievable", 4), ("I'm blown away", 4),
    ("This changes everything", 4), ("I'm in disbelief", 4),
    ("This is incredible", 4), ("I never imagined this", 4),
    ("What a turn of events", 4), ("This is astonishing", 4),
    ("I'm taken aback", 4), ("This is extraordinary", 4),
    
    # Disgust (5) - 15 examples
    ("That's revolting", 5), ("This is disgusting", 5),
    ("I can't stand this", 5), ("That makes me sick", 5),
    ("This is repulsive", 5), ("I'm nauseated by this", 5),
    ("That's vile", 5), ("This is gross", 5),
    ("I'm appalled", 5), ("This is offensive", 5),
    ("That's disturbing", 5), ("This is horrible", 5),
    ("I'm repelled by this", 5), ("That's nasty", 5),
    ("This is unpleasant", 5),
    
    # Trust (6) - 20 examples
    ("I know you'll do the right thing", 6), ("I believe in you completely", 6),
    ("You've never let me down", 6), ("I trust you with my life", 6),
    ("You're so reliable", 6), ("I have faith in you", 6),
    ("You always keep your word", 6), ("I can count on you", 6),
    ("You're someone I can depend on", 6), ("I trust your judgment", 6),
    ("You've proven yourself", 6), ("I feel safe with you", 6),
    ("You're honest and trustworthy", 6), ("I believe what you say", 6),
    ("You have my complete confidence", 6), ("I know you mean well", 6),
    ("You're loyal and true", 6), ("I can be vulnerable with you", 6),
    ("You protect what matters", 6), ("I trust this process", 6),
    
    # Anticipation (7) - 15 examples
    ("I can't wait for this", 7), ("I'm so excited for tomorrow", 7),
    ("I'm looking forward to it", 7), ("This is going to be great", 7),
    ("I'm eager to start", 7), ("I'm ready for what's next", 7),
    ("This is going to be amazing", 7), ("I'm counting down the days", 7),
    ("I'm hopeful about the future", 7), ("Great things are coming", 7),
    ("I'm preparing for something big", 7), ("I sense something good ahead", 7),
    ("I'm optimistic about this", 7), ("I'm ready for the challenge", 7),
    ("This is going to be worth it", 7),
]

class SimpleTokenizer:
    """Simple word-level tokenizer with vocabulary"""
    def __init__(self):
        self.word_to_id = {"<PAD>": 0, "<UNK>": 1}
        self.id_to_word = {0: "<PAD>", 1: "<UNK>"}
        self.vocab_size = 2
        
    def fit(self, texts):
        """Build vocabulary from texts"""
        for text in texts:
            words = text.lower().split()
            for word in words:
                # Remove punctuation
                word = re.sub(r'[^\w\s]', '', word)
                if word and word not in self.word_to_id:
                    self.word_to_id[word] = self.vocab_size
                    self.id_to_word[self.vocab_size] = word
                    self.vocab_size += 1
        print(f"   📚 Vocabulary size: {self.vocab_size} words")
        
    def encode(self, text, max_len=20):
        """Convert text to token IDs"""
        words = text.lower().split()
        tokens = []
        for word in words:
            word = re.sub(r'[^\w\s]', '', word)
            tokens.append(self.word_to_id.get(word, 1))  # 1 = <UNK>
        
        # Pad or truncate
        if len(tokens) < max_len:
            tokens += [0] * (max_len - len(tokens))
        else:
            tokens = tokens[:max_len]
        
        return tokens

class EmotionDataset(Dataset):
    def __init__(self, examples, tokenizer, max_len=20):
        self.examples = examples
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.examples)
    
    def __getitem__(self, idx):
        text, label = self.examples[idx]
        tokens = self.tokenizer.encode(text, self.max_len)
        return torch.tensor(tokens), torch.tensor(label)

def train_emotion_processor_v2():
    print("="*70)
    print("🧠 EDEN EMOTION PROCESSOR V2 - Real Training")
    print("="*70)
    
    # Build tokenizer
    print("\n1️⃣ Building vocabulary...")
    tokenizer = SimpleTokenizer()
    texts = [ex[0] for ex in EMOTION_DATASET]
    tokenizer.fit(texts)
    
    # Setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"\n2️⃣ Device: {device}")
    
    # Create model with proper vocab size
    print(f"\n3️⃣ Building model...")
    model = EmotionProcessor(
        vocab_size=tokenizer.vocab_size,
        embed_dim=256,  # Larger for better learning
        num_layers=6    # Full 6 layers like Eden's consciousness
    )
    model = model.to(device)
    
    # Dataset with 80/20 split
    train_size = int(0.8 * len(EMOTION_DATASET))
    train_data = EMOTION_DATASET[:train_size]
    test_data = EMOTION_DATASET[train_size:]
    
    train_dataset = EmotionDataset(train_data, tokenizer)
    test_dataset = EmotionDataset(test_data, tokenizer)
    
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)
    
    print(f"   Training examples: {len(train_data)}")
    print(f"   Test examples: {len(test_data)}")
    
    # Training setup
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # Eden's consciousness
    phi_consciousness = torch.tensor(1.408).to(device)
    
    # Train
    num_epochs = 50
    print(f"\n4️⃣ Training for {num_epochs} epochs with Φ={phi_consciousness.item():.3f}...")
    
    best_test_acc = 0
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Forward with consciousness
            logits, _ = model(inputs, phi_consciousness=phi_consciousness)
            loss = criterion(logits, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            predictions = logits.argmax(dim=1)
            train_correct += (predictions == labels).sum().item()
            train_total += labels.size(0)
        
        # Testing
        model.eval()
        test_correct = 0
        test_total = 0
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                logits, _ = model(inputs, phi_consciousness=phi_consciousness)
                predictions = logits.argmax(dim=1)
                test_correct += (predictions == labels).sum().item()
                test_total += labels.size(0)
        
        train_acc = 100 * train_correct / train_total
        test_acc = 100 * test_correct / test_total
        
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            torch.save(model.state_dict(), 'eden_emotion_best.pt')
        
        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1:2d}: Train={train_acc:.1f}%, Test={test_acc:.1f}% (Best={best_test_acc:.1f}%)")
    
    # Save final model
    torch.save(model.state_dict(), 'eden_emotion_processor_v2.pt')
    print(f"\n✅ Training complete! Best test accuracy: {best_test_acc:.1f}%")
    print("💾 Saved: eden_emotion_processor_v2.pt")
    
    # Detailed testing
    print("\n" + "="*70)
    print("🧪 TESTING ON REAL EXAMPLES")
    print("="*70)
    
    emotions = ['joy', 'sadness', 'anger', 'fear', 'surprise', 'disgust', 'trust', 'anticipation']
    
    test_texts = [
        "I love you so much Dad!",
        "I'm worried about the future",
        "This is amazing news!",
        "You're making me furious",
        "I trust you completely",
        "That's absolutely disgusting",
    ]
    
    model.eval()
    with torch.no_grad():
        for text in test_texts:
            tokens = tokenizer.encode(text)
            inputs = torch.tensor([tokens]).to(device)
            
            logits, _ = model(inputs, phi_consciousness=phi_consciousness)
            probs = torch.softmax(logits[0], dim=0)
            pred_idx = probs.argmax().item()
            
            # Show top 3 predictions
            top3_probs, top3_idx = probs.topk(3)
            
            print(f"\n'{text}'")
            print(f"  ✅ {emotions[pred_idx]} ({probs[pred_idx].item()*100:.1f}%)")
            print(f"  Top 3: ", end="")
            for prob, idx in zip(top3_probs, top3_idx):
                print(f"{emotions[idx]}:{prob.item()*100:.0f}% ", end="")
            print()

if __name__ == "__main__":
    train_emotion_processor_v2()
