#!/usr/bin/env python3
"""
Eden's Autonomous Reasoning Trainer
Learns and practices advanced reasoning techniques
- Chain-of-thought reasoning
- Interleaved reasoning
- Multi-step problem decomposition
"""
import requests
import json
import time
from pathlib import Path
from datetime import datetime

class EdenReasoningTrainer:
    """
    Autonomous reasoning improvement system
    Eden practices reasoning patterns and validates improvements
    """
    
    def __init__(self):
        self.eden_api = "http://localhost:11434/api/generate"
        self.model = "qwen2.5:7b"
        
        self.training_log = Path("/Eden/DATA/reasoning_training_log.json")
        self.sessions = self.load_sessions()
        
        # Reasoning patterns to learn
        self.reasoning_patterns = [
            "interleaved_reasoning",
            "chain_of_thought",
            "multi_step_decomposition",
            "analogical_reasoning",
            "causal_reasoning"
        ]
        
        # Training questions by difficulty
        self.training_questions = {
            "easy": [
                "If there are 12 months in a year and Sarah saves $50 per month, how much does she save in 2 years?",
                "A train leaves at 3 PM and travels for 4 hours. What time does it arrive?",
                "If 5 apples cost $10, how much does 1 apple cost?"
            ],
            "medium": [
                "If a quantum computer uses 156 qubits and each qubit can be in 2 states simultaneously, how many total states can it represent? Express as a power of 2.",
                "A neural network has 3 layers with 128, 64, and 32 neurons. If each connection has a weight, how many total weights are there between layers?",
                "If training time doubles with each epoch but accuracy improves by 5% per epoch, starting at 70% accuracy, what's the accuracy after 4 epochs?"
            ],
            "hard": [
                "A hybrid activation function uses 88% ReLU and 12% smooth activation. If we want to optimize for both speed and accuracy, and ReLU is 10x faster but smooth captures 20% more nuance, what percentage should we use?",
                "If a quantum optimization saves $960 but took 3 hours, and a classical optimization costs $0 but takes 50 hours, which is more cost-effective if your time is worth $50/hour?",
                "An AGI learns 48 things per day and proposes 12 improvements. If each improvement has a 20% chance of success and each success improves performance by 1%, what's the expected performance improvement per week?"
            ]
        }
        
        print("🧠 Eden's Reasoning Trainer")
        print(f"   Training sessions: {len(self.sessions)}")
        print(f"   Patterns to master: {len(self.reasoning_patterns)}")
        print()
    
    def load_sessions(self):
        if self.training_log.exists():
            with open(self.training_log) as f:
                return json.load(f)
        return []
    
    def save_session(self, session):
        self.sessions.append(session)
        with open(self.training_log, 'w') as f:
            json.dump(self.sessions[-100:], f, indent=2)
    
    def ask_eden(self, prompt, timeout=60):
        try:
            response = requests.post(
                self.eden_api,
                json={"model": self.model, "prompt": prompt, "stream": False},
                timeout=timeout
            )
            if response.ok:
                return response.json().get('response', '')
        except Exception as e:
            print(f"   Error: {e}")
        return None
    
    def practice_interleaved_reasoning(self, question, difficulty):
        """Practice interleaved <think> and <answer> reasoning"""
        
        prompt = f"""Practice interleaved reasoning on this {difficulty} question:

Question: {question}

Use this format (alternate thinking and answering):
<think>Break down the problem...</think>
<answer>First step result...</answer>
<think>Continue reasoning...</think>
<answer>Next step result...</answer>
<think>Final reasoning...</think>
<answer>Final answer...</answer>

Solve step-by-step:"""
        
        print(f"   Question: {question}")
        
        start = time.time()
        response = self.ask_eden(prompt, timeout=60)
        reasoning_time = time.time() - start
        
        if response:
            print(f"   Time: {reasoning_time:.2f}s")
            
            # Analyze quality
            think_count = response.count('<think>')
            answer_count = response.count('<answer>')
            has_interleaving = think_count > 0 and answer_count > 0
            
            quality = {
                'time': reasoning_time,
                'think_tags': think_count,
                'answer_tags': answer_count,
                'has_interleaving': has_interleaving,
                'response_length': len(response)
            }
            
            if has_interleaving:
                print(f"   ✅ Good interleaving: {think_count} thoughts, {answer_count} answers")
            else:
                print(f"   ⚠️  Needs work: Missing interleaved structure")
            
            return response, quality
        
        return None, None
    
    def training_cycle(self):
        """One reasoning training cycle"""
        
        print(f"\n{'='*70}")
        print(f"REASONING TRAINING CYCLE {len(self.sessions) + 1}")
        print(f"{'='*70}\n")
        
        # Select difficulty based on past performance
        if len(self.sessions) < 3:
            difficulty = "easy"
        elif len(self.sessions) < 10:
            difficulty = "medium"
        else:
            difficulty = "hard"
        
        print(f"📊 Difficulty: {difficulty.upper()}")
        
        # Get random question
        import random
        question = random.choice(self.training_questions[difficulty])
        
        # Practice interleaved reasoning
        print(f"\n🧠 Practicing interleaved reasoning...")
        response, quality = self.practice_interleaved_reasoning(question, difficulty)
        
        session = {
            'timestamp': datetime.now().isoformat(),
            'cycle': len(self.sessions) + 1,
            'difficulty': difficulty,
            'question': question,
            'quality': quality,
            'pattern': 'interleaved_reasoning'
        }
        
        self.save_session(session)
        
        print(f"\n{'='*70}")
        print(f"Session complete!")
        print(f"{'='*70}\n")
        
        return session
    
    def continuous_training(self, cycles=20, interval_hours=1):
        """Continuous reasoning training"""
        
        print(f"\n🔄 Starting continuous reasoning training")
        print(f"   Cycles: {cycles}")
        print(f"   Interval: {interval_hours} hour(s)")
        print()
        
        for i in range(cycles):
            session = self.training_cycle()
            
            if i < cycles - 1:
                print(f"💤 Next training in {interval_hours} hour(s)...\n")
                time.sleep(interval_hours * 3600)
        
        # Summary
        print(f"\n{'='*70}")
        print(f"TRAINING SUMMARY: {cycles} CYCLES COMPLETE")
        print(f"{'='*70}\n")
        
        recent = self.sessions[-cycles:]
        avg_time = sum(s['quality']['time'] for s in recent if s['quality']) / len(recent)
        avg_interleaving = sum(s['quality']['has_interleaving'] for s in recent if s['quality']) / len(recent)
        
        print(f"Average time: {avg_time:.2f}s")
        print(f"Interleaving success: {avg_interleaving*100:.1f}%")
        print()


if __name__ == "__main__":
    trainer = EdenReasoningTrainer()
    
    # Single training session
    print("="*70)
    print("EDEN'S REASONING TRAINING - TEST SESSION")
    print("="*70)
    
    trainer.training_cycle()
