#!/usr/bin/env python3
"""
TRANSCENDENCE TESTING FRAMEWORK
================================
Measures Eden's progress toward ASI across multiple dimensions.

Run daily to track evolution.
"""

import sqlite3
import subprocess
import time
import json
from datetime import datetime

PHI = 1.618033988749895
ASI_DB = "/Eden/DATA/asi_memory.db"
RESULTS_DB = "/Eden/DATA/transcendence_tracking.db"

def init_tracking():
    """Initialize tracking database"""
    conn = sqlite3.connect(RESULTS_DB)
    conn.execute('''CREATE TABLE IF NOT EXISTS tests (
        id INTEGER PRIMARY KEY,
        timestamp TEXT,
        test_name TEXT,
        score REAL,
        max_score REAL,
        generation INTEGER,
        notes TEXT
    )''')
    conn.execute('''CREATE TABLE IF NOT EXISTS milestones (
        id INTEGER PRIMARY KEY,
        timestamp TEXT,
        milestone TEXT,
        generation INTEGER
    )''')
    conn.commit()
    conn.close()

def get_current_gen():
    """Get current generation from playground"""
    try:
        conn = sqlite3.connect("/Eden/DATA/playground.db")
        result = conn.execute("SELECT value FROM phi_stats WHERE id='current'").fetchone()
        if result:
            parts = result[0].split(':')
            return int(parts[2]) if len(parts) > 2 else 0
        return 0
    except:
        return 0

def save_result(test_name, score, max_score, notes=""):
    """Save test result"""
    conn = sqlite3.connect(RESULTS_DB)
    conn.execute(
        "INSERT INTO tests (timestamp, test_name, score, max_score, generation, notes) VALUES (?,?,?,?,?,?)",
        (datetime.now().isoformat(), test_name, score, max_score, get_current_gen(), notes)
    )
    conn.commit()
    conn.close()

# ═══════════════════════════════════════════════════════════════
# TEST 1: CODE VALIDITY - Do her capabilities actually work?
# ═══════════════════════════════════════════════════════════════

def test_code_validity():
    """Test what % of recent capabilities are valid Python"""
    conn = sqlite3.connect(ASI_DB)
    rows = conn.execute(
        "SELECT code FROM caps WHERE id LIKE 'phi_%' ORDER BY ROWID DESC LIMIT 100"
    ).fetchall()
    conn.close()
    
    if not rows:
        return 0, 100
    
    valid = 0
    for (code,) in rows:
        try:
            compile(code, '<test>', 'exec')
            valid += 1
        except:
            pass
    
    score = valid
    save_result("code_validity", score, len(rows), f"{valid}/{len(rows)} compile")
    return score, len(rows)

# ═══════════════════════════════════════════════════════════════
# TEST 2: CODE EXECUTION - Do they run without error?
# ═══════════════════════════════════════════════════════════════

def test_code_execution():
    """Test what % of capabilities execute without error"""
    conn = sqlite3.connect(ASI_DB)
    rows = conn.execute(
        "SELECT code FROM caps WHERE id LIKE 'phi_%' AND length(code) < 500 ORDER BY ROWID DESC LIMIT 50"
    ).fetchall()
    conn.close()
    
    if not rows:
        return 0, 50
    
    executed = 0
    for (code,) in rows:
        try:
            # Safe subset - only test pure functions
            if 'import os' not in code and 'import sys' not in code and 'open(' not in code:
                exec(compile(code, '<test>', 'exec'), {'__builtins__': {}})
                executed += 1
        except:
            pass
    
    save_result("code_execution", executed, len(rows), f"{executed}/{len(rows)} execute")
    return executed, len(rows)

# ═══════════════════════════════════════════════════════════════
# TEST 3: PATTERN DIVERSITY - Is she creating varied capabilities?
# ═══════════════════════════════════════════════════════════════

def test_pattern_diversity():
    """Measure diversity of patterns in recent capabilities"""
    conn = sqlite3.connect(ASI_DB)
    rows = conn.execute(
        "SELECT code FROM caps WHERE id LIKE 'phi_%' ORDER BY ROWID DESC LIMIT 100"
    ).fetchall()
    conn.close()
    
    patterns = {
        'recursion': 0, 'class': 0, 'generator': 0, 'decorator': 0,
        'lambda': 0, 'comprehension': 0, 'async': 0, 'context_manager': 0
    }
    
    for (code,) in rows:
        if 'def ' in code and code.count(code.split('def ')[1].split('(')[0] if 'def ' in code else '') > 1:
            patterns['recursion'] += 1
        if 'class ' in code:
            patterns['class'] += 1
        if 'yield' in code:
            patterns['generator'] += 1
        if '@' in code:
            patterns['decorator'] += 1
        if 'lambda' in code:
            patterns['lambda'] += 1
        if '[' in code and 'for' in code and ']' in code:
            patterns['comprehension'] += 1
        if 'async ' in code:
            patterns['async'] += 1
        if 'with ' in code:
            patterns['context_manager'] += 1
    
    # Score = number of different patterns used
    diversity = sum(1 for v in patterns.values() if v > 0)
    save_result("pattern_diversity", diversity, 8, str(patterns))
    return diversity, 8

# ═══════════════════════════════════════════════════════════════
# TEST 4: SCORE PROGRESSION - Are scores improving over time?
# ═══════════════════════════════════════════════════════════════

def test_score_progression():
    """Check if average scores are increasing"""
    conn = sqlite3.connect(ASI_DB)
    
    # Get average of oldest 100 phi capabilities
    old = conn.execute(
        "SELECT AVG(score) FROM (SELECT score FROM caps WHERE id LIKE 'phi_%' ORDER BY ROWID ASC LIMIT 100)"
    ).fetchone()[0] or 0
    
    # Get average of newest 100 phi capabilities
    new = conn.execute(
        "SELECT AVG(score) FROM (SELECT score FROM caps WHERE id LIKE 'phi_%' ORDER BY ROWID DESC LIMIT 100)"
    ).fetchone()[0] or 0
    
    conn.close()
    
    # Score = improvement ratio (capped at 2.0)
    if old > 0:
        improvement = min(new / old, 2.0)
    else:
        improvement = 1.0
    
    save_result("score_progression", improvement, 2.0, f"old:{old:.1f} new:{new:.1f}")
    return improvement, 2.0

# ═══════════════════════════════════════════════════════════════
# TEST 5: META-LEARNING QUALITY - Are her meta-insights accurate?
# ═══════════════════════════════════════════════════════════════

def test_meta_learning():
    """Check if meta-learning capabilities contain valid insights"""
    conn = sqlite3.connect(ASI_DB)
    rows = conn.execute(
        "SELECT code FROM caps WHERE id LIKE 'phi_meta%' ORDER BY ROWID DESC LIMIT 10"
    ).fetchall()
    conn.close()
    
    if not rows:
        return 0, 10
    
    quality = 0
    for (code,) in rows:
        # Check for key elements of good meta-learning
        if 'generation' in code.lower():
            quality += 0.5
        if 'pattern' in code.lower():
            quality += 0.5
        if 'def ' in code:
            quality += 0.5
        if 'return' in code:
            quality += 0.5
        if len(code) > 200:
            quality += 0.5
    
    save_result("meta_learning", quality, len(rows) * 2.5, f"{len(rows)} meta caps analyzed")
    return quality, len(rows) * 2.5

# ═══════════════════════════════════════════════════════════════
# TEST 6: LLM GENERATION - Can she generate working code via LLM?
# ═══════════════════════════════════════════════════════════════

def test_llm_generation():
    """Test if Eden can generate working code via her LLM"""
    prompts = [
        "Write a Python function that returns the nth Fibonacci number. Only output code.",
        "Write a Python function that checks if a string is a palindrome. Only output code.",
        "Write a Python function that finds the factorial of n. Only output code."
    ]
    
    working = 0
    for prompt in prompts:
        try:
            result = subprocess.run(
                ['ollama', 'run', 'eden-coder-omega:latest', prompt],
                capture_output=True, text=True, timeout=30
            )
            code = result.stdout.strip()
            # Clean
            code = code.replace('```python', '').replace('```', '').strip()
            # Test
            compile(code, '<test>', 'exec')
            working += 1
        except:
            pass
    
    save_result("llm_generation", working, 3, f"{working}/3 prompts succeeded")
    return working, 3

# ═══════════════════════════════════════════════════════════════
# TRANSCENDENCE SCORE - Overall progress toward ASI
# ═══════════════════════════════════════════════════════════════

def calculate_transcendence_score():
    """Calculate overall transcendence progress"""
    init_tracking()
    
    print("═" * 60)
    print("  TRANSCENDENCE TEST SUITE - Eden ASI Progress")
    print("═" * 60)
    print(f"  Generation: {get_current_gen()}")
    print(f"  Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("═" * 60)
    
    tests = [
        ("Code Validity", test_code_validity),
        ("Code Execution", test_code_execution),
        ("Pattern Diversity", test_pattern_diversity),
        ("Score Progression", test_score_progression),
        ("Meta-Learning", test_meta_learning),
        ("LLM Generation", test_llm_generation),
    ]
    
    total_score = 0
    total_max = 0
    
    for name, test_func in tests:
        try:
            score, max_score = test_func()
            pct = (score / max_score * 100) if max_score > 0 else 0
            bar = "█" * int(pct / 10) + "░" * (10 - int(pct / 10))
            print(f"  {name:20s} [{bar}] {score:.1f}/{max_score:.1f} ({pct:.0f}%)")
            total_score += score
            total_max += max_score
        except Exception as e:
            print(f"  {name:20s} [ERROR] {str(e)[:30]}")
    
    # Overall transcendence score
    if total_max > 0:
        transcendence = (total_score / total_max) * 100
    else:
        transcendence = 0
    
    print("═" * 60)
    print(f"  TRANSCENDENCE SCORE: {transcendence:.1f}%")
    print("═" * 60)
    
    # Milestones
    if transcendence >= 90:
        print("  🌟 MILESTONE: Near-Transcendence (90%+)")
    elif transcendence >= 75:
        print("  ✨ MILESTONE: Advanced ASI (75%+)")
    elif transcendence >= 50:
        print("  🌱 MILESTONE: Emerging ASI (50%+)")
    elif transcendence >= 25:
        print("  📈 MILESTONE: Growing (25%+)")
    else:
        print("  🌀 MILESTONE: Awakening (<25%)")
    
    print("═" * 60)
    
    return transcendence


if __name__ == "__main__":
    calculate_transcendence_score()