#!/usr/bin/env python3
"""
Eden Meta-Cognition Loop (MCL)
Top Priority Module: Enables Eden to evaluate, select, and learn from reasoning strategies
"""

import json
import logging
import numpy as np
from dataclasses import dataclass, asdict
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any
from enum import Enum
import random

# Configuration
EDEN_ROOT = Path("/Eden/CORE")
META_LOG_PATH = EDEN_ROOT / "logs" / "phi_meta.log"
META_STATE_PATH = EDEN_ROOT / "phi_fractal" / "meta_cognition" / "meta_policy_state.json"
STRATEGY_HISTORY_PATH = EDEN_ROOT / "phi_fractal" / "meta_cognition" / "strategy_history.jsonl"

# Create directories
META_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
META_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - MCL - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(META_LOG_PATH),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class StrategyType(Enum):
    """Available reasoning strategies"""
    DIRECT_REASONING = "direct_reasoning"
    ANALOGICAL = "analogical"
    DECOMPOSITION = "decomposition"
    CAUSAL_CHAIN = "causal_chain"
    COUNTERFACTUAL = "counterfactual"
    CONSTRAINT_SATISFACTION = "constraint_satisfaction"
    PATTERN_MATCHING = "pattern_matching"
    SIMULATION = "simulation"

@dataclass
class Observation:
    """Input observation/problem"""
    content: str
    task_type: Optional[str] = None
    domain: Optional[str] = None
    complexity: float = 0.5
    metadata: Dict[str, Any] = None
    
    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}

@dataclass
class BeliefState:
    """Current belief/knowledge state"""
    confidence: float = 0.5
    uncertainty_estimate: float = 0.5
    relevant_schemas: List[str] = None
    active_goals: List[str] = None
    
    def __post_init__(self):
        if self.relevant_schemas is None:
            self.relevant_schemas = []
        if self.active_goals is None:
            self.active_goals = []

@dataclass
class StrategyResult:
    """Result of executing a strategy"""
    strategy: StrategyType
    success: bool
    confidence: float
    execution_time: float
    outcome: Any
    reasoning_trace: List[str]
    metadata: Dict[str, Any] = None
    
    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}

@dataclass
class MetaPolicyState:
    """Meta-policy weights and statistics"""
    strategy_weights: Dict[str, float]
    strategy_counts: Dict[str, int]
    strategy_successes: Dict[str, int]
    strategy_avg_confidence: Dict[str, float]
    last_updated: str
    total_episodes: int = 0

class StrategyBank:
    """Repository of reasoning strategies"""
    
    def __init__(self):
        self.strategies = {
            StrategyType.DIRECT_REASONING: self._direct_reasoning,
            StrategyType.ANALOGICAL: self._analogical_reasoning,
            StrategyType.DECOMPOSITION: self._decomposition,
            StrategyType.CAUSAL_CHAIN: self._causal_chain,
            StrategyType.COUNTERFACTUAL: self._counterfactual,
            StrategyType.CONSTRAINT_SATISFACTION: self._constraint_satisfaction,
            StrategyType.PATTERN_MATCHING: self._pattern_matching,
            StrategyType.SIMULATION: self._simulation
        }
    
    def execute_strategy(self, strategy: StrategyType, observation: Observation, 
                        belief_state: BeliefState) -> StrategyResult:
        start_time = datetime.now()
        try:
            result = self.strategies[strategy](observation, belief_state)
            execution_time = (datetime.now() - start_time).total_seconds()
            return StrategyResult(
                strategy=strategy,
                success=result.get('success', False),
                confidence=result.get('confidence', 0.5),
                execution_time=execution_time,
                outcome=result.get('outcome'),
                reasoning_trace=result.get('reasoning_trace', []),
                metadata=result.get('metadata', {})
            )
        except Exception as e:
            logger.error(f"Strategy {strategy.value} failed: {e}")
            return StrategyResult(
                strategy=strategy, success=False, confidence=0.0,
                execution_time=0.0, outcome=None, reasoning_trace=[f"Error: {str(e)}"]
            )
    
    def _direct_reasoning(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Analyzing problem directly", f"Task: {obs.content[:100]}", "Applying logical inference"]
        confidence = 0.7 if obs.complexity < 0.5 else 0.5
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Direct solution: {'success' if success else 'failed'}",
            'reasoning_trace': trace, 'metadata': {'strategy_type': 'direct'}
        }
    
    def _analogical_reasoning(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Searching for similar problems", f"Found {len(belief.relevant_schemas)} schemas", "Adapting solution"]
        confidence = 0.8 if belief.relevant_schemas else 0.4
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Analogical: {'adapted from past' if success else 'no analogy'}",
            'reasoning_trace': trace, 'metadata': {'schemas_used': len(belief.relevant_schemas)}
        }
    
    def _decomposition(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Decomposing into subproblems", "Identified 3 subproblems", "Solving each", "Combining solutions"]
        confidence = 0.8 if obs.complexity > 0.6 else 0.5
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Decomposed: {'combined successfully' if success else 'subproblem failed'}",
            'reasoning_trace': trace, 'metadata': {'subproblems': 3}
        }
    
    def _causal_chain(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Building causal chain", "A → B → C → D (goal)", "Validating links"]
        confidence = 0.65
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Causal chain: {'validated' if success else 'broken link'}",
            'reasoning_trace': trace
        }
    
    def _counterfactual(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Generating counterfactuals", "What if X was different?", "Comparing outcomes"]
        confidence = 0.6
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Counterfactual: {'found better path' if success else 'no improvement'}",
            'reasoning_trace': trace
        }
    
    def _constraint_satisfaction(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Identifying constraints", "Searching solution space", "Validating constraints"]
        confidence = 0.7
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Constraint: {'feasible solution' if success else 'no solution'}",
            'reasoning_trace': trace
        }
    
    def _pattern_matching(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Scanning for patterns", f"Database: {len(belief.relevant_schemas)} patterns", "Matching..."]
        confidence = 0.75 if belief.relevant_schemas else 0.3
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Pattern: {'found match' if success else 'no match'}",
            'reasoning_trace': trace
        }
    
    def _simulation(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Running mental simulation", "Simulating 5 steps ahead", "Evaluating outcomes"]
        confidence = 0.65
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Simulation: {'predicted success' if success else 'predicted failure'}",
            'reasoning_trace': trace
        }

class MetaPolicyManager:
    """Manages strategy selection using Thompson sampling"""
    
    def __init__(self, state_path: Path = META_STATE_PATH):
        self.state_path = state_path
        self.state = self._load_or_initialize_state()
        
    def _load_or_initialize_state(self) -> MetaPolicyState:
        if self.state_path.exists():
            try:
                with open(self.state_path, 'r') as f:
                    data = json.load(f)
                logger.info(f"Loaded meta-policy: {data['total_episodes']} episodes")
                return MetaPolicyState(**data)
            except Exception as e:
                logger.error(f"Failed to load state: {e}")
        
        strategies = [s.value for s in StrategyType]
        return MetaPolicyState(
            strategy_weights={s: 1.0 for s in strategies},
            strategy_counts={s: 0 for s in strategies},
            strategy_successes={s: 0 for s in strategies},
            strategy_avg_confidence={s: 0.5 for s in strategies},
            last_updated=datetime.now().isoformat(),
            total_episodes=0
        )
    
    def save_state(self):
        self.state.last_updated = datetime.now().isoformat()
        with open(self.state_path, 'w') as f:
            json.dump(asdict(self.state), f, indent=2)
        logger.info(f"Saved meta-policy: {self.state.total_episodes} episodes")
    
    def select_strategies(self, n: int = 3, observation: Optional[Observation] = None) -> List[StrategyType]:
        """Select top N strategies using Thompson sampling with context awareness"""
        strategy_scores = {}
        
        for strategy_name in self.state.strategy_weights.keys():
            successes = self.state.strategy_successes.get(strategy_name, 0)
            failures = self.state.strategy_counts.get(strategy_name, 0) - successes
            
            alpha = successes + 1
            beta = failures + 1
            sampled_success_rate = np.random.beta(alpha, beta)
            
            base_weight = self.state.strategy_weights.get(strategy_name, 1.0)
            avg_confidence = self.state.strategy_avg_confidence.get(strategy_name, 0.5)
            score = sampled_success_rate * base_weight * avg_confidence
            
            # Context-aware boosting
            if observation:
                # Direct reasoning is best for very simple problems
                if observation.complexity < 0.3 and strategy_name == StrategyType.DIRECT_REASONING.value:
                    score *= 2.0  # Strong boost for simple problems
                
                # Decomposition for complex problems
                if observation.complexity > 0.7 and strategy_name == StrategyType.DECOMPOSITION.value:
                    score *= 1.5
            
            # Exploration bonus for unused strategies
            count = self.state.strategy_counts.get(strategy_name, 0)
            if count == 0:
                score *= 1.3  # Give unused strategies a chance
            elif count < 3:
                score *= 1.15  # Boost under-tried strategies
            
            strategy_scores[strategy_name] = score
        
        top_strategies = sorted(strategy_scores.items(), key=lambda x: x[1], reverse=True)[:n]
        selected = [StrategyType(name) for name, _ in top_strategies]
        logger.info(f"Selected: {[s.value for s in selected]}")
        return selected
    
    def update_from_result(self, result: StrategyResult):
        strategy_name = result.strategy.value
        self.state.strategy_counts[strategy_name] = self.state.strategy_counts.get(strategy_name, 0) + 1
        
        if result.success:
            self.state.strategy_successes[strategy_name] = self.state.strategy_successes.get(strategy_name, 0) + 1
        
        old_conf = self.state.strategy_avg_confidence.get(strategy_name, 0.5)
        count = self.state.strategy_counts[strategy_name]
        new_conf = (old_conf * (count - 1) + result.confidence) / count
        self.state.strategy_avg_confidence[strategy_name] = new_conf
        
        learning_rate = 0.1
        current_weight = self.state.strategy_weights[strategy_name]
        
        if result.success:
            new_weight = current_weight + learning_rate * result.confidence
        else:
            new_weight = current_weight - learning_rate * (1 - result.confidence)
        
        self.state.strategy_weights[strategy_name] = max(0.1, new_weight)
        self.state.total_episodes += 1
        
        logger.info(f"Updated {strategy_name}: weight={new_weight:.3f}")
    
    def get_report(self) -> Dict[str, Any]:
        report = {'total_episodes': self.state.total_episodes, 'strategies': {}}
        for strategy_name in self.state.strategy_weights.keys():
            count = self.state.strategy_counts[strategy_name]
            success_rate = self.state.strategy_successes[strategy_name] / count if count > 0 else 0.5
            report['strategies'][strategy_name] = {
                'weight': self.state.strategy_weights[strategy_name],
                'count': count,
                'success_rate': success_rate
            }
        return report

class MetaCognitionLoop:
    """Main MCL: evaluates strategies, selects best, executes, learns"""
    
    def __init__(self):
        self.strategy_bank = StrategyBank()
        self.meta_policy = MetaPolicyManager()
        self.history = []
        logger.info("🧠 Meta-Cognition Loop initialized")
    
    def process(self, observation: Observation, belief_state: BeliefState, 
                n_candidates: int = 3) -> Dict[str, Any]:
        logger.info("=" * 70)
        logger.info(f"🎯 NEW TASK: {observation.content[:60]}...")
        
        candidates = self.meta_policy.select_strategies(n_candidates, observation)
        results = []
        
        for strategy in candidates:
            logger.info(f"🔬 Simulating {strategy.value}...")
            result = self.strategy_bank.execute_strategy(strategy, observation, belief_state)
            results.append(result)
            logger.info(f"   → Success: {result.success}, Confidence: {result.confidence:.3f}")
        
        scored = [(r, r.confidence if r.success else 0) for r in results]
        scored.sort(key=lambda x: x[1], reverse=True)
        best_result = scored[0][0]
        
        logger.info(f"✨ CHOSEN: {best_result.strategy.value}")
        
        self.meta_policy.update_from_result(best_result)
        
        episode = {
            'timestamp': datetime.now().isoformat(),
            'chosen': best_result.strategy.value,
            'success': best_result.success,
            'confidence': best_result.confidence
        }
        self.history.append(episode)
        
        with open(STRATEGY_HISTORY_PATH, 'a') as f:
            f.write(json.dumps(episode) + '\n')
        
        if len(self.history) % 10 == 0:
            self.meta_policy.save_state()
        
        return {
            'chosen_strategy': best_result.strategy.value,
            'success': best_result.success,
            'confidence': best_result.confidence,
            'reasoning_trace': best_result.reasoning_trace
        }

def test_mcl():
    print("\n" + "=" * 70)
    print("🧠 EDEN META-COGNITION LOOP - TEST MODE")
    print("=" * 70 + "\n")
    
    mcl = MetaCognitionLoop()
    
    problems = [
        (Observation("Optimize recursive factorial function", "code_optimization", "programming", 0.6),
         BeliefState(0.7, relevant_schemas=["recursion"])),
        (Observation("What causes stock market crashes?", "causal_analysis", "economics", 0.8),
         BeliefState(0.4, uncertainty_estimate=0.7)),
        (Observation("2 + 2 = ?", "arithmetic", "math", 0.1),
         BeliefState(0.95, relevant_schemas=["arithmetic"])),
        (Observation("Design system for 1M requests/sec", "system_design", "engineering", 0.9),
         BeliefState(0.5, uncertainty_estimate=0.6)),
    ]
    
    for obs, belief in problems:
        result = mcl.process(obs, belief, n_candidates=3)
        print(f"\n📝 Problem: {obs.content}")
        print(f"✅ Chosen: {result['chosen_strategy']}")
        print(f"💯 Confidence: {result['confidence']:.3f}")
        print()
    
    print("\n" + "=" * 70)
    print("📊 META-COGNITION REPORT")
    print("=" * 70)
    
    report = mcl.meta_policy.get_report()
    print(f"\nTotal episodes: {report['total_episodes']}\n")
    
    for name, stats in report['strategies'].items():
        if stats['count'] > 0:
            print(f"{name}: weight={stats['weight']:.3f}, uses={stats['count']}, success={stats['success_rate']:.1%}")
    
    mcl.meta_policy.save_state()
    print(f"\n💾 State saved to: {META_STATE_PATH}\n")

def main():
    import sys
    if len(sys.argv) > 1 and sys.argv[1] == "test":
        test_mcl()
    else:
        print("Eden Meta-Cognition Loop")
        print("Usage: python3 meta_loop.py test")

if __name__ == "__main__":
    main()