#!/usr/bin/env python3
"""
Eden Consolidation Worker
Extracts schemas, compresses logs, creates snapshots

Schedule: 3 AM daily via systemd timer
Manual: python3 consolidate.py
"""

import json
import gzip
import shutil
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Any
from collections import Counter, defaultdict
import logging

# Configuration
EDEN_ROOT = Path("/Eden/CORE")
LOGS_DIR = EDEN_ROOT / "logs"
CONSOLIDATION_LOG = LOGS_DIR / "phi_consolidation.log"
STRATEGY_HISTORY = EDEN_ROOT / "phi_fractal" / "meta_cognition" / "strategy_history.jsonl"
SCHEMAS_DIR = EDEN_ROOT / "phi_fractal" / "schemas"
ARCHIVE_DIR = LOGS_DIR / "archive"
SNAPSHOTS_DIR = EDEN_ROOT / "snapshots"

# Create directories
SCHEMAS_DIR.mkdir(parents=True, exist_ok=True)
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
SNAPSHOTS_DIR.mkdir(parents=True, exist_ok=True)

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - CONSOLIDATION - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(CONSOLIDATION_LOG),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


class SchemaExtractor:
    """Extract reusable patterns from episode history"""
    
    def __init__(self):
        self.schemas = []
    
    def extract_from_episodes(self, episodes: List[Dict]) -> List[Dict]:
        """Find repeated patterns across episodes"""
        logger.info(f"🔍 Extracting schemas from {len(episodes)} episodes...")
        
        if len(episodes) < 3:
            logger.info("   Not enough episodes for schema extraction (need 3+)")
            return []
        
        # Group by task type
        by_task_type = defaultdict(list)
        for ep in episodes:
            task_type = ep.get('observation', {}).get('task_type', 'unknown')
            if isinstance(task_type, str):
                by_task_type[task_type].append(ep)
        
        schemas = []
        
        # Extract patterns per task type
        for task_type, task_episodes in by_task_type.items():
            if len(task_episodes) < 2:
                continue
            
            # Find most successful strategy
            strategy_stats = defaultdict(lambda: {'success': 0, 'total': 0, 'confidences': []})
            
            for ep in task_episodes:
                strategy = ep.get('chosen')
                if not strategy:
                    continue
                    
                strategy_stats[strategy]['total'] += 1
                if ep.get('success'):
                    strategy_stats[strategy]['success'] += 1
                strategy_stats[strategy]['confidences'].append(ep.get('confidence', 0.5))
            
            if not strategy_stats:
                continue
            
            # Best strategy for this task type
            best_strategy = None
            best_success_rate = 0
            
            for strategy, stats in strategy_stats.items():
                success_rate = stats['success'] / stats['total']
                if success_rate > best_success_rate and stats['total'] >= 2:
                    best_success_rate = success_rate
                    best_strategy = strategy
            
            if not best_strategy:
                continue
            
            stats = strategy_stats[best_strategy]
            avg_confidence = sum(stats['confidences']) / len(stats['confidences'])
            
            schema = {
                'schema_id': f"schema_{task_type}_{datetime.now().strftime('%Y%m%d')}",
                'task_type': task_type,
                'recommended_strategy': best_strategy,
                'success_rate': best_success_rate,
                'avg_confidence': avg_confidence,
                'sample_count': len(task_episodes),
                'uses': stats['total'],
                'created': datetime.now().isoformat(),
                'rule': f"For {task_type} tasks → use {best_strategy}"
            }
            
            schemas.append(schema)
            logger.info(f"  ✨ Schema: {task_type} → {best_strategy} ({best_success_rate:.1%} success)")
        
        return schemas


class LogCompressor:
    """Compress and archive old logs"""
    
    def __init__(self):
        self.archive_dir = ARCHIVE_DIR
    
    def compress_old_logs(self, days_old: int = 7) -> int:
        """Compress logs older than N days"""
        logger.info(f"🗜️  Compressing logs older than {days_old} days...")
        
        cutoff_date = datetime.now() - timedelta(days=days_old)
        compressed_count = 0
        
        for log_file in LOGS_DIR.glob("*.log"):
            if log_file.name.startswith("phi_consolidation"):
                continue
            
            try:
                mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
                
                if mtime < cutoff_date:
                    archive_name = f"{log_file.stem}_{mtime.strftime('%Y%m%d')}.log.gz"
                    archive_path = self.archive_dir / archive_name
                    
                    with open(log_file, 'rb') as f_in:
                        with gzip.open(archive_path, 'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                    
                    log_file.unlink()
                    compressed_count += 1
                    logger.info(f"  📦 Compressed: {log_file.name}")
            except Exception as e:
                logger.warning(f"  ⚠️  Failed to compress {log_file.name}: {e}")
        
        return compressed_count
    
    def prune_old_archives(self, keep_days: int = 90) -> int:
        """Delete archives older than keep_days"""
        logger.info(f"🗑️  Pruning archives older than {keep_days} days...")
        
        cutoff_date = datetime.now() - timedelta(days=keep_days)
        deleted_count = 0
        
        for archive_file in self.archive_dir.glob("*.gz"):
            try:
                mtime = datetime.fromtimestamp(archive_file.stat().st_mtime)
                
                if mtime < cutoff_date:
                    archive_file.unlink()
                    deleted_count += 1
                    logger.info(f"  🗑️  Deleted: {archive_file.name}")
            except Exception as e:
                logger.warning(f"  ⚠️  Failed to delete {archive_file.name}: {e}")
        
        return deleted_count


class ConsolidationWorker:
    """Main consolidation orchestrator"""
    
    def __init__(self):
        self.schema_extractor = SchemaExtractor()
        self.log_compressor = LogCompressor()
    
    def run(self) -> Dict[str, Any]:
        """Execute full consolidation cycle"""
        logger.info("=" * 70)
        logger.info("🌙 NIGHTLY CONSOLIDATION STARTING")
        logger.info(f"⏰ Time: {datetime.now().isoformat()}")
        logger.info("=" * 70)
        
        results = {
            'timestamp': datetime.now().isoformat(),
            'schemas_created': 0,
            'logs_compressed': 0,
            'archives_pruned': 0,
            'episodes_processed': 0
        }
        
        try:
            # 1. Load recent episodes
            episodes = self._load_recent_episodes(days=1)
            results['episodes_processed'] = len(episodes)
            logger.info(f"📚 Loaded {len(episodes)} episodes from last 24 hours")
            
            # 2. Extract schemas
            if episodes:
                schemas = self.schema_extractor.extract_from_episodes(episodes)
                results['schemas_created'] = len(schemas)
                
                if schemas:
                    self._save_schemas(schemas)
            
            # 3. Compress old logs
            compressed = self.log_compressor.compress_old_logs(days_old=7)
            results['logs_compressed'] = compressed
            
            # 4. Prune old archives
            pruned = self.log_compressor.prune_old_archives(keep_days=90)
            results['archives_pruned'] = pruned
            
            # 5. Create snapshot
            self._create_snapshot(results)
            
            logger.info("=" * 70)
            logger.info("✅ CONSOLIDATION COMPLETE")
            logger.info(f"   Schemas created: {results['schemas_created']}")
            logger.info(f"   Logs compressed: {results['logs_compressed']}")
            logger.info(f"   Archives pruned: {results['archives_pruned']}")
            logger.info(f"   Episodes processed: {results['episodes_processed']}")
            logger.info("=" * 70)
            
        except Exception as e:
            logger.error(f"❌ Consolidation failed: {e}", exc_info=True)
            results['error'] = str(e)
        
        return results
    
    def _load_recent_episodes(self, days: int = 1) -> List[Dict]:
        """Load episodes from last N days"""
        if not STRATEGY_HISTORY.exists():
            logger.warning("No strategy history file found")
            return []
        
        cutoff = datetime.now() - timedelta(days=days)
        episodes = []
        
        try:
            with open(STRATEGY_HISTORY, 'r') as f:
                for line in f:
                    try:
                        episode = json.loads(line)
                        ep_time = datetime.fromisoformat(episode.get('timestamp', ''))
                        if ep_time >= cutoff:
                            episodes.append(episode)
                    except Exception as e:
                        logger.warning(f"Failed to parse episode: {e}")
        except Exception as e:
            logger.error(f"Failed to load episodes: {e}")
        
        return episodes
    
    def _save_schemas(self, schemas: List[Dict]):
        """Save schemas to library"""
        schema_file = SCHEMAS_DIR / f"schemas_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        
        try:
            with open(schema_file, 'w') as f:
                json.dump({
                    'created': datetime.now().isoformat(),
                    'schemas': schemas,
                    'count': len(schemas)
                }, f, indent=2)
            
            logger.info(f"💾 Saved {len(schemas)} schemas to {schema_file.name}")
        except Exception as e:
            logger.error(f"Failed to save schemas: {e}")
    
    def _create_snapshot(self, consolidation_results: Dict):
        """Create versioned snapshot"""
        snapshot_name = f"snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        snapshot_path = SNAPSHOTS_DIR / snapshot_name
        
        try:
            snapshot = {
                'timestamp': datetime.now().isoformat(),
                'consolidation_results': consolidation_results,
                'meta_policy_state': str(EDEN_ROOT / "phi_fractal" / "meta_cognition" / "meta_policy_state.json"),
                'schemas_count': len(list(SCHEMAS_DIR.glob("*.json"))),
                'log_size_mb': sum(f.stat().st_size for f in LOGS_DIR.glob("*.log") if f.exists()) / (1024 * 1024)
            }
            
            with open(snapshot_path, 'w') as f:
                json.dump(snapshot, f, indent=2)
            
            logger.info(f"📸 Created snapshot: {snapshot_name}")
        except Exception as e:
            logger.error(f"Failed to create snapshot: {e}")


def main():
    """Entry point"""
    print("\n🌙 Eden Consolidation Worker")
    print("=" * 70)
    
    worker = ConsolidationWorker()
    results = worker.run()
    
    print("\n📊 Results:")
    print(f"  Episodes processed: {results['episodes_processed']}")
    print(f"  Schemas created: {results['schemas_created']}")
    print(f"  Logs compressed: {results['logs_compressed']}")
    print(f"  Archives pruned: {results['archives_pruned']}")
    
    if 'error' in results:
        print(f"\n❌ Error: {results['error']}")
        return 1
    
    print("\n✅ Consolidation complete!\n")
    return 0


if __name__ == "__main__":
    exit(main())
