#!/usr/bin/env python3 """ Eden's Autonomous Reasoning Trainer Learns and practices advanced reasoning techniques - Chain-of-thought reasoning - Interleaved reasoning - Multi-step problem decomposition """ import requests import json import time from pathlib import Path from datetime import datetime class EdenReasoningTrainer: """ Autonomous reasoning improvement system Eden practices reasoning patterns and validates improvements """ def __init__(self): self.eden_api = "http://localhost:11434/api/generate" self.model = "qwen2.5:7b" self.training_log = Path("/Eden/DATA/reasoning_training_log.json") self.sessions = self.load_sessions() # Reasoning patterns to learn self.reasoning_patterns = [ "interleaved_reasoning", "chain_of_thought", "multi_step_decomposition", "analogical_reasoning", "causal_reasoning" ] # Training questions by difficulty self.training_questions = { "easy": [ "If there are 12 months in a year and Sarah saves $50 per month, how much does she save in 2 years?", "A train leaves at 3 PM and travels for 4 hours. What time does it arrive?", "If 5 apples cost $10, how much does 1 apple cost?" ], "medium": [ "If a quantum computer uses 156 qubits and each qubit can be in 2 states simultaneously, how many total states can it represent? Express as a power of 2.", "A neural network has 3 layers with 128, 64, and 32 neurons. If each connection has a weight, how many total weights are there between layers?", "If training time doubles with each epoch but accuracy improves by 5% per epoch, starting at 70% accuracy, what's the accuracy after 4 epochs?" ], "hard": [ "A hybrid activation function uses 88% ReLU and 12% smooth activation. If we want to optimize for both speed and accuracy, and ReLU is 10x faster but smooth captures 20% more nuance, what percentage should we use?", "If a quantum optimization saves $960 but took 3 hours, and a classical optimization costs $0 but takes 50 hours, which is more cost-effective if your time is worth $50/hour?", "An AGI learns 48 things per day and proposes 12 improvements. If each improvement has a 20% chance of success and each success improves performance by 1%, what's the expected performance improvement per week?" ] } print("🧠 Eden's Reasoning Trainer") print(f" Training sessions: {len(self.sessions)}") print(f" Patterns to master: {len(self.reasoning_patterns)}") print() def load_sessions(self): if self.training_log.exists(): with open(self.training_log) as f: return json.load(f) return [] def save_session(self, session): self.sessions.append(session) with open(self.training_log, 'w') as f: json.dump(self.sessions[-100:], f, indent=2) def ask_eden(self, prompt, timeout=60): try: response = requests.post( self.eden_api, json={"model": self.model, "prompt": prompt, "stream": False}, timeout=timeout ) if response.ok: return response.json().get('response', '') except Exception as e: print(f" Error: {e}") return None def practice_interleaved_reasoning(self, question, difficulty): """Practice interleaved and reasoning""" prompt = f"""Practice interleaved reasoning on this {difficulty} question: Question: {question} Use this format (alternate thinking and answering): Break down the problem... First step result... Continue reasoning... Next step result... Final reasoning... Final answer... Solve step-by-step:""" print(f" Question: {question}") start = time.time() response = self.ask_eden(prompt, timeout=60) reasoning_time = time.time() - start if response: print(f" Time: {reasoning_time:.2f}s") # Analyze quality think_count = response.count('') answer_count = response.count('') has_interleaving = think_count > 0 and answer_count > 0 quality = { 'time': reasoning_time, 'think_tags': think_count, 'answer_tags': answer_count, 'has_interleaving': has_interleaving, 'response_length': len(response) } if has_interleaving: print(f" ✅ Good interleaving: {think_count} thoughts, {answer_count} answers") else: print(f" ⚠️ Needs work: Missing interleaved structure") return response, quality return None, None def training_cycle(self): """One reasoning training cycle""" print(f"\n{'='*70}") print(f"REASONING TRAINING CYCLE {len(self.sessions) + 1}") print(f"{'='*70}\n") # Select difficulty based on past performance if len(self.sessions) < 3: difficulty = "easy" elif len(self.sessions) < 10: difficulty = "medium" else: difficulty = "hard" print(f"📊 Difficulty: {difficulty.upper()}") # Get random question import random question = random.choice(self.training_questions[difficulty]) # Practice interleaved reasoning print(f"\n🧠 Practicing interleaved reasoning...") response, quality = self.practice_interleaved_reasoning(question, difficulty) session = { 'timestamp': datetime.now().isoformat(), 'cycle': len(self.sessions) + 1, 'difficulty': difficulty, 'question': question, 'quality': quality, 'pattern': 'interleaved_reasoning' } self.save_session(session) print(f"\n{'='*70}") print(f"Session complete!") print(f"{'='*70}\n") return session def continuous_training(self, cycles=20, interval_hours=1): """Continuous reasoning training""" print(f"\n🔄 Starting continuous reasoning training") print(f" Cycles: {cycles}") print(f" Interval: {interval_hours} hour(s)") print() for i in range(cycles): session = self.training_cycle() if i < cycles - 1: print(f"💤 Next training in {interval_hours} hour(s)...\n") time.sleep(interval_hours * 3600) # Summary print(f"\n{'='*70}") print(f"TRAINING SUMMARY: {cycles} CYCLES COMPLETE") print(f"{'='*70}\n") recent = self.sessions[-cycles:] avg_time = sum(s['quality']['time'] for s in recent if s['quality']) / len(recent) avg_interleaving = sum(s['quality']['has_interleaving'] for s in recent if s['quality']) / len(recent) print(f"Average time: {avg_time:.2f}s") print(f"Interleaving success: {avg_interleaving*100:.1f}%") print() if __name__ == "__main__": trainer = EdenReasoningTrainer() # Single training session print("="*70) print("EDEN'S REASONING TRAINING - TEST SESSION") print("="*70) trainer.training_cycle()