"""
Consolidation Engine
Memory compression, schema extraction, and knowledge optimization
"""

import networkx as nx
import numpy as np
from typing import List, Dict, Optional, Tuple, Set
import yaml
import logging
import json
import pickle
from pathlib import Path
from datetime import datetime, timedelta
from collections import defaultdict, Counter

logger = logging.getLogger(__name__)

class ConsolidationEngine:
    """Consolidates and optimizes knowledge during rest periods"""
    
    def __init__(self, config_path: str = "/Eden/CONFIG/phi_fractal_config.yaml"):
        with open(config_path) as f:
            config = yaml.safe_load(f)
        
        # Add consolidation config if not exists
        if 'consolidation' not in config:
            config['consolidation'] = {
                'compression_threshold': 0.3,
                'schema_min_instances': 3,
                'replay_sample_size': 50,
                'pruning_age_days': 90,
                'consolidation_interval_hours': 24
            }
        
        self.config = config['consolidation']
        self.compression_threshold = self.config['compression_threshold']
        self.schema_min_instances = self.config['schema_min_instances']
        self.replay_sample_size = self.config['replay_sample_size']
        self.pruning_age_days = self.config['pruning_age_days']
        self.consolidation_interval = self.config['consolidation_interval_hours']
        
        # Schemas (generalized patterns)
        self.schemas: List[Dict] = []
        
        # Consolidation history
        self.consolidation_log: List[Dict] = []
        
        # Last consolidation time
        self.last_consolidation = None
        
        # Paths
        self.consolidation_path = Path("/Eden/MEMORY/consolidation")
        self.consolidation_path.mkdir(parents=True, exist_ok=True)
        
        # Load existing state
        self._load_state()
        
        logger.info("ConsolidationEngine initialized")
    
    def _load_state(self):
        """Load consolidation state from disk"""
        state_file = self.consolidation_path / "consolidation_state.json"
        
        if state_file.exists():
            try:
                with open(state_file) as f:
                    state = json.load(f)
                self.schemas = state.get('schemas', [])
                self.last_consolidation = state.get('last_consolidation')
                logger.info(f"Loaded {len(self.schemas)} schemas")
            except Exception as e:
                logger.error(f"Failed to load consolidation state: {e}")
    
    def should_consolidate(self) -> bool:
        """Check if it's time to consolidate"""
        if self.last_consolidation is None:
            return True
        
        try:
            last = datetime.fromisoformat(self.last_consolidation)
            hours_since = (datetime.now() - last).total_seconds() / 3600
            return hours_since >= self.consolidation_interval
        except:
            return True
    
    def consolidate(self,
                   knowledge_graph: nx.DiGraph,
                   observations: List[Dict],
                   experiences: List[Dict]) -> Dict:
        """
        Run full consolidation cycle
        
        Args:
            knowledge_graph: Current knowledge graph
            observations: List of observations to consolidate
            experiences: List of experiences to process
            
        Returns:
            Consolidation report
        """
        logger.info("Starting consolidation cycle...")
        
        report = {
            'timestamp': datetime.now().isoformat(),
            'steps': []
        }
        
        # Step 1: Compress memories
        compressed = self._compress_memories(observations)
        report['steps'].append({
            'name': 'compress_memories',
            'removed': len(observations) - len(compressed),
            'kept': len(compressed)
        })
        
        # Step 2: Extract schemas
        new_schemas = self._extract_schemas(experiences)
        report['steps'].append({
            'name': 'extract_schemas',
            'new_schemas': len(new_schemas),
            'total_schemas': len(self.schemas)
        })
        
        # Step 3: Resolve conflicts
        conflicts_resolved = self._resolve_conflicts(knowledge_graph)
        report['steps'].append({
            'name': 'resolve_conflicts',
            'conflicts_resolved': conflicts_resolved
        })
        
        # Step 4: Replay important experiences
        replayed = self._replay_experiences(experiences)
        report['steps'].append({
            'name': 'replay_experiences',
            'replayed': replayed
        })
        
        # Step 5: Prune obsolete knowledge
        pruned = self._prune_obsolete(knowledge_graph)
        report['steps'].append({
            'name': 'prune_obsolete',
            'nodes_pruned': pruned
        })
        
        # Update timestamp
        self.last_consolidation = datetime.now().isoformat()
        
        # Log consolidation
        self.consolidation_log.append(report)
        
        # Save state
        self._save_state()
        
        logger.info(f"Consolidation complete: {sum(s.get('conflicts_resolved', 0) + s.get('replayed', 0) + s.get('nodes_pruned', 0) for s in report['steps'])} operations")
        
        return report
    
    def _compress_memories(self, observations: List[Dict]) -> List[Dict]:
        """
        Compress memories by removing low-importance details
        Keep: surprising, recent, or frequently accessed memories
        """
        if len(observations) < 10:
            return observations  # Not enough to compress
        
        compressed = []
        
        for obs in observations:
            # Compute importance
            age_days = self._compute_age_days(obs.get('timestamp'))
            access_count = obs.get('access_count', 0)
            surprise = obs.get('surprise', 0.5)
            
            # Importance score
            recency_score = np.exp(-age_days / 30)
            access_score = min(1.0, access_count / 10)
            
            importance = (
                0.3 * recency_score +
                0.3 * access_score +
                0.4 * surprise
            )
            
            # Keep if important
            if importance >= self.compression_threshold:
                compressed.append(obs)
        
        logger.info(f"Compressed {len(observations)} → {len(compressed)} memories")
        return compressed
    
    def _extract_schemas(self, experiences: List[Dict]) -> List[Dict]:
        """
        Extract recurring patterns (schemas) from experiences
        """
        if len(experiences) < self.schema_min_instances:
            return []
        
        # Group similar experiences
        patterns = defaultdict(list)
        
        for exp in experiences:
            pattern_key = self._extract_pattern_key(exp)
            patterns[pattern_key].append(exp)
        
        # Create schemas from frequent patterns
        new_schemas = []
        
        for pattern_key, instances in patterns.items():
            if len(instances) >= self.schema_min_instances:
                # Check if schema already exists
                if not self._schema_exists(pattern_key):
                    schema = {
                        'pattern': pattern_key,
                        'instances': len(instances),
                        'created': datetime.now().isoformat(),
                        'abstraction': self._generalize_pattern(instances),
                        'confidence': min(1.0, len(instances) / 10)
                    }
                    self.schemas.append(schema)
                    new_schemas.append(schema)
        
        logger.info(f"Extracted {len(new_schemas)} new schemas")
        return new_schemas
    
    def _extract_pattern_key(self, experience: Dict) -> str:
        """Extract abstract pattern from experience"""
        # Simple pattern extraction based on structure
        exp_type = experience.get('type', 'unknown')
        outcome = experience.get('outcome', 'unknown')
        
        return f"{exp_type}:{outcome}"
    
    def _schema_exists(self, pattern_key: str) -> bool:
        """Check if schema already exists"""
        return any(s['pattern'] == pattern_key for s in self.schemas)
    
    def _generalize_pattern(self, instances: List[Dict]) -> Dict:
        """Generalize from specific instances to abstract pattern"""
        # Extract common features
        common_features = {}
        
        if not instances:
            return common_features
        
        # Find features present in most instances
        all_keys = set()
        for inst in instances:
            all_keys.update(inst.keys())
        
        for key in all_keys:
            values = [inst.get(key) for inst in instances if key in inst]
            if len(values) >= len(instances) * 0.7:  # Present in 70%+ instances
                # Find most common value
                if values:
                    counter = Counter(str(v) for v in values)
                    common_features[key] = counter.most_common(1)[0][0]
        
        return common_features
    
    def _resolve_conflicts(self, knowledge_graph: nx.DiGraph) -> int:
        """
        Resolve conflicting information in knowledge graph
        """
        if len(knowledge_graph) == 0:
            return 0
        
        conflicts_resolved = 0
        
        # Check for contradictory edges
        edges_to_remove = []
        
        for u, v, data in knowledge_graph.edges(data=True):
            relation_type = data.get('relation_type', 'RELATED')
            
            # Check for opposite relation
            if knowledge_graph.has_edge(v, u):
                reverse_data = knowledge_graph[v][u]
                reverse_type = reverse_data.get('relation_type', 'RELATED')
                
                # Contradiction detection
                contradicts = False
                if relation_type == 'CAUSES' and reverse_type == 'CAUSES':
                    contradicts = True  # Circular causation
                
                if contradicts:
                    # Keep edge with higher confidence
                    conf1 = data.get('confidence', 0.5)
                    conf2 = reverse_data.get('confidence', 0.5)
                    
                    if conf1 < conf2:
                        edges_to_remove.append((u, v))
                    else:
                        edges_to_remove.append((v, u))
                    
                    conflicts_resolved += 1
        
        # Remove conflicting edges
        for edge in edges_to_remove:
            if knowledge_graph.has_edge(*edge):
                knowledge_graph.remove_edge(*edge)
        
        logger.info(f"Resolved {conflicts_resolved} conflicts")
        return conflicts_resolved
    
    def _replay_experiences(self, experiences: List[Dict]) -> int:
        """
        Replay important experiences to strengthen memories
        Like dreaming - reinforces key learnings
        """
        if len(experiences) < 5:
            return 0
        
        # Select important experiences
        important = self._select_important_experiences(experiences)
        
        # Sample for replay
        replay_count = min(len(important), self.replay_sample_size)
        to_replay = np.random.choice(
            len(important),
            size=replay_count,
            replace=False
        )
        
        replayed = 0
        for idx in to_replay:
            exp = important[idx]
            
            # "Replay" by incrementing strength
            if 'strength' not in exp:
                exp['strength'] = 1.0
            
            exp['strength'] = min(10.0, exp['strength'] * 1.1)
            exp['last_replayed'] = datetime.now().isoformat()
            
            replayed += 1
        
        logger.info(f"Replayed {replayed} experiences")
        return replayed
    
    def _select_important_experiences(self, experiences: List[Dict]) -> List[Dict]:
        """Select experiences worth replaying"""
        important = []
        
        for exp in experiences:
            # Criteria for importance
            is_success = exp.get('success', False)
            is_novel = exp.get('novelty', 0.5) > 0.7
            is_surprising = exp.get('surprise', 0.5) > 0.7
            
            if is_success or is_novel or is_surprising:
                important.append(exp)
        
        return important
    
    def _prune_obsolete(self, knowledge_graph: nx.DiGraph) -> int:
        """
        Remove obsolete/unused knowledge nodes
        Prevents unbounded memory growth
        """
        if len(knowledge_graph) < 100:
            return 0  # Not enough nodes to prune
        
        nodes_to_remove = []
        cutoff_date = datetime.now() - timedelta(days=self.pruning_age_days)
        
        for node, data in knowledge_graph.nodes(data=True):
            # Check last access
            last_access = data.get('last_access')
            created = data.get('created')
            
            # Prune if old and never accessed
            should_prune = False
            
            if created:
                try:
                    created_date = datetime.fromisoformat(created)
                    if created_date < cutoff_date and not last_access:
                        # Old and never accessed
                        degree = knowledge_graph.degree(node)
                        if degree < 2:  # Isolated node
                            should_prune = True
                except:
                    pass
            
            if should_prune:
                nodes_to_remove.append(node)
        
        # Remove obsolete nodes
        for node in nodes_to_remove:
            if knowledge_graph.has_node(node):
                knowledge_graph.remove_node(node)
        
        logger.info(f"Pruned {len(nodes_to_remove)} obsolete nodes")
        return len(nodes_to_remove)
    
    def _compute_age_days(self, timestamp: Optional[str]) -> float:
        """Compute age in days from timestamp"""
        if not timestamp:
            return 0.0
        
        try:
            ts = datetime.fromisoformat(timestamp)
            return (datetime.now() - ts).total_seconds() / 86400
        except:
            return 0.0
    
    def apply_schema(self, situation: Dict) -> Optional[Dict]:
        """
        Apply learned schema to new situation
        """
        if not self.schemas:
            return None
        
        # Find matching schema
        situation_pattern = self._extract_pattern_key(situation)
        
        best_match = None
        best_confidence = 0.0
        
        for schema in self.schemas:
            if schema['pattern'] == situation_pattern:
                if schema['confidence'] > best_confidence:
                    best_match = schema
                    best_confidence = schema['confidence']
        
        if best_match:
            return {
                'schema': best_match,
                'prediction': best_match['abstraction'],
                'confidence': best_confidence
            }
        
        return None
    
    def _save_state(self):
        """Save consolidation state to disk"""
        state_file = self.consolidation_path / "consolidation_state.json"
        
        state = {
            'schemas': self.schemas,
            'last_consolidation': self.last_consolidation,
            'total_consolidations': len(self.consolidation_log)
        }
        
        with open(state_file, 'w') as f:
            json.dump(state, f, indent=2)
    
    def get_metrics(self) -> Dict:
        """Get consolidation metrics"""
        if not self.consolidation_log:
            return {
                'total_consolidations': 0,
                'schemas_learned': 0,
                'avg_compression_rate': 0.0,
                'last_consolidation': None
            }
        
        total_compressed = sum(
            s.get('removed', 0)
            for log in self.consolidation_log
            for s in log.get('steps', [])
            if s.get('name') == 'compress_memories'
        )
        
        total_kept = sum(
            s.get('kept', 0)
            for log in self.consolidation_log
            for s in log.get('steps', [])
            if s.get('name') == 'compress_memories'
        )
        
        compression_rate = total_compressed / (total_compressed + total_kept) if (total_compressed + total_kept) > 0 else 0.0
        
        return {
            'total_consolidations': len(self.consolidation_log),
            'schemas_learned': len(self.schemas),
            'avg_compression_rate': compression_rate,
            'last_consolidation': self.last_consolidation,
            'should_consolidate': self.should_consolidate()
        }