#!/usr/bin/env python3
"""
Eden Meta-Cognition Loop (MCL)
Top Priority Module: Enables Eden to evaluate, select, and learn from reasoning strategies
"""

import json
import logging
import numpy as np
from dataclasses import dataclass, asdict
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any
from enum import Enum
import random

# Configuration
EDEN_ROOT = Path("/Eden/CORE")
META_LOG_PATH = EDEN_ROOT / "logs" / "phi_meta.log"
META_STATE_PATH = EDEN_ROOT / "phi_fractal" / "meta_cognition" / "meta_policy_state.json"
STRATEGY_HISTORY_PATH = EDEN_ROOT / "phi_fractal" / "meta_cognition" / "strategy_history.jsonl"

# Create directories
META_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
META_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - MCL - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(META_LOG_PATH),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class StrategyType(Enum):
    """Available reasoning strategies"""
    DIRECT_REASONING = "direct_reasoning"
    ANALOGICAL = "analogical"
    DECOMPOSITION = "decomposition"
    CAUSAL_CHAIN = "causal_chain"
    COUNTERFACTUAL = "counterfactual"
    CONSTRAINT_SATISFACTION = "constraint_satisfaction"
    PATTERN_MATCHING = "pattern_matching"
    SIMULATION = "simulation"

@dataclass
class Observation:
    """Input observation/problem"""
    content: str
    task_type: Optional[str] = None
    domain: Optional[str] = None
    complexity: float = 0.5
    metadata: Dict[str, Any] = None
    
    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}

@dataclass
class BeliefState:
    """Current belief/knowledge state"""
    confidence: float = 0.5
    uncertainty_estimate: float = 0.5
    relevant_schemas: List[str] = None
    active_goals: List[str] = None
    
    def __post_init__(self):
        if self.relevant_schemas is None:
            self.relevant_schemas = []
        if self.active_goals is None:
            self.active_goals = []

@dataclass
class StrategyResult:
    """Result of executing a strategy"""
    strategy: StrategyType
    success: bool
    confidence: float
    execution_time: float
    outcome: Any
    reasoning_trace: List[str]
    metadata: Dict[str, Any] = None
    
    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}

@dataclass
class MetaPolicyState:
    """Meta-policy weights and statistics"""
    strategy_weights: Dict[str, float]
    strategy_counts: Dict[str, int]
    strategy_successes: Dict[str, int]
    strategy_avg_confidence: Dict[str, float]
    last_updated: str
    total_episodes: int = 0

class StrategyBank:
    """Repository of reasoning strategies"""
    
    def __init__(self):
        self.strategies = {
            StrategyType.DIRECT_REASONING: self._direct_reasoning,
            StrategyType.ANALOGICAL: self._analogical_reasoning,
            StrategyType.DECOMPOSITION: self._decomposition,
            StrategyType.CAUSAL_CHAIN: self._causal_chain,
            StrategyType.COUNTERFACTUAL: self._counterfactual,
            StrategyType.CONSTRAINT_SATISFACTION: self._constraint_satisfaction,
            StrategyType.PATTERN_MATCHING: self._pattern_matching,
            StrategyType.SIMULATION: self._simulation
        }
    
    def execute_strategy(self, strategy: StrategyType, observation: Observation, 
                        belief_state: BeliefState) -> StrategyResult:
        start_time = datetime.now()
        try:
            result = self.strategies[strategy](observation, belief_state)
            execution_time = (datetime.now() - start_time).total_seconds()
            return StrategyResult(
                strategy=strategy,
                success=result.get('success', False),
                confidence=result.get('confidence', 0.5),
                execution_time=execution_time,
                outcome=result.get('outcome'),
                reasoning_trace=result.get('reasoning_trace', []),
                metadata=result.get('metadata', {})
            )
        except Exception as e:
            logger.error(f"Strategy {strategy.value} failed: {e}")
            return StrategyResult(
                strategy=strategy, success=False, confidence=0.0,
                execution_time=0.0, outcome=None, reasoning_trace=[f"Error: {str(e)}"]
            )
    
    def _direct_reasoning(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Analyzing problem directly", f"Task: {obs.content[:100]}", "Applying logical inference"]
        # Direct reasoning is VERY good for simple problems
        if obs.complexity < 0.3:
            confidence = 0.95  # High confidence for simple problems
            success = random.random() < 0.9  # 90% success on simple
        else:
            confidence = 0.6
            success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Direct solution: {'success' if success else 'failed'}",
            'reasoning_trace': trace, 'metadata': {'strategy_type': 'direct'}
        }
    
    def _analogical_reasoning(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Searching for similar problems", f"Found {len(belief.relevant_schemas)} schemas", "Adapting solution"]
        confidence = 0.8 if belief.relevant_schemas else 0.4
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Analogical: {'adapted from past' if success else 'no analogy'}",
            'reasoning_trace': trace, 'metadata': {'schemas_used': len(belief.relevant_schemas)}
        }
    
    def _decomposition(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Decomposing into subproblems", "Identified 3 subproblems", "Solving each", "Combining solutions"]
        confidence = 0.8 if obs.complexity > 0.6 else 0.5
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Decomposed: {'combined successfully' if success else 'subproblem failed'}",
            'reasoning_trace': trace, 'metadata': {'subproblems': 3}
        }
    
    def _causal_chain(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Building causal chain", "A → B → C → D (goal)", "Validating links"]
        confidence = 0.65
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Causal chain: {'validated' if success else 'broken link'}",
            'reasoning_trace': trace
        }
    
    def _counterfactual(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Generating counterfactuals", "What if X was different?", "Comparing outcomes"]
        confidence = 0.6
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Counterfactual: {'found better path' if success else 'no improvement'}",
            'reasoning_trace': trace
        }
    
    def _constraint_satisfaction(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Identifying constraints", "Searching solution space", "Validating constraints"]
        confidence = 0.7
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Constraint: {'feasible solution' if success else 'no solution'}",
            'reasoning_trace': trace
        }
    
    def _pattern_matching(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Scanning for patterns", f"Database: {len(belief.relevant_schemas)} patterns", "Matching..."]
        confidence = 0.75 if belief.relevant_schemas else 0.3
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Pattern: {'found match' if success else 'no match'}",
            'reasoning_trace': trace
        }
    
    def _simulation(self, obs: Observation, belief: BeliefState) -> Dict:
        trace = ["Running mental simulation", "Simulating 5 steps ahead", "Evaluating outcomes"]
        confidence = 0.65
        success = random.random() < confidence
        return {
            'success': success, 'confidence': confidence,
            'outcome': f"Simulation: {'predicted success' if success else 'predicted failure'}",
            'reasoning_trace': trace
        }

class MetaPolicyManager:
    """Manages strategy selection using Thompson sampling"""
    
    def __init__(self, state_path: Path = META_STATE_PATH):
        self.state_path = state_path
        self.state = self._load_or_initialize_state()
        
    def _load_or_initialize_state(self) -> MetaPolicyState:
        if self.state_path.exists():
            try:
                with open(self.state_path, 'r') as f:
                    data = json.load(f)
                logger.info(f"Loaded meta-policy: {data['total_episodes']} episodes")
                return MetaPolicyState(**data)
            except Exception as e:
                logger.error(f"Failed to load state: {e}")
        
        strategies = [s.value for s in StrategyType]
        return MetaPolicyState(
            strategy_weights={s: 1.0 for s in strategies},
            strategy_counts={s: 0 for s in strategies},
            strategy_successes={s: 0 for s in strategies},
            strategy_avg_confidence={s: 0.5 for s in strategies},
            last_updated=datetime.now().isoformat(),
            total_episodes=0
        )
    
    def save_state(self):
        self.state.last_updated = datetime.now().isoformat()
        with open(self.state_path, 'w') as f:
            json.dump(asdict(self.state), f, indent=2)
        logger.info(f"Saved meta-policy: {self.state.total_episodes} episodes")
    
    def select_strategies(self, n: int = 3, observation: Optional[Observation] = None) -> List[StrategyType]:
        """Select top N strategies using Thompson sampling with smart context matching"""
        
        # SPECIAL RULE: For very simple problems, force direct_reasoning to be included
        if observation and observation.complexity < 0.3:
            # Include direct_reasoning plus top 2 others
            direct = StrategyType.DIRECT_REASONING
            logger.info(f"🎯 Simple problem detected (complexity={observation.complexity:.2f})")
            logger.info(f"   Forcing {direct.value} into candidates")
            
            # Get top 2 others
            strategy_scores = {}
            for strategy_name in self.state.strategy_weights.keys():
                if strategy_name == direct.value:
                    continue  # Skip direct_reasoning for now
                    
                successes = self.state.strategy_successes.get(strategy_name, 0)
                failures = self.state.strategy_counts.get(strategy_name, 0) - successes
                alpha = successes + 1
                beta = failures + 1
                sampled_success_rate = np.random.beta(alpha, beta)
                base_weight = self.state.strategy_weights.get(strategy_name, 1.0)
                avg_confidence = self.state.strategy_avg_confidence.get(strategy_name, 0.5)
                strategy_scores[strategy_name] = sampled_success_rate * base_weight * avg_confidence
            
            top_others = sorted(strategy_scores.items(), key=lambda x: x[1], reverse=True)[:n-1]
            selected = [direct] + [StrategyType(name) for name, _ in top_others]
            logger.info(f"Selected: {[s.value for s in selected]}")
            return selected
        
        # Normal Thompson sampling for other problems
        strategy_scores = {}
        
        for strategy_name in self.state.strategy_weights.keys():
            successes = self.state.strategy_successes.get(strategy_name, 0)
            failures = self.state.strategy_counts.get(strategy_name, 0) - successes
            
            alpha = successes + 1
            beta = failures + 1
            sampled_success_rate = np.random.beta(alpha, beta)
            
            base_weight = self.state.strategy_weights.get(strategy_name, 1.0)
            avg_confidence = self.state.strategy_avg_confidence.get(strategy_name, 0.5)
            score = sampled_success_rate * base_weight * avg_confidence
            
            # Context boosting
            if observation:
                if observation.complexity > 0.7 and strategy_name == StrategyType.DECOMPOSITION.value:
                    score *= 1.5
            
            # Exploration bonus
            count = self.state.strategy_counts.get(strategy_name, 0)
            if count == 0:
                score *= 1.2
            
            strategy_scores[strategy_name] = score
        
        top_strategies = sorted(strategy_scores.items(), key=lambda x: x[1], reverse=True)[:n]
        selected = [StrategyType(name) for name, _ in top_strategies]
        logger.info(f"Selected: {[s.value for s in selected]}")
        return selected
    
    def update_from_result(self, result: StrategyResult):
        strategy_name = result.strategy.value
        self.state.strategy_counts[strategy_name] = self.state.strategy_counts.get(strategy_name, 0) + 1
        
        if result.success:
            self.state.strategy_successes[strategy_name] = self.state.strategy_successes.get(strategy_name, 0) + 1
        
        old_conf = self.state.strategy_avg_confidence.get(strategy_name, 0.5)
        count = self.state.strategy_counts[strategy_name]
        new_conf = (old_conf * (count - 1) + result.confidence) / count
        self.state.strategy_avg_confidence[strategy_name] = new_conf
        
        learning_rate = 0.1
        current_weight = self.state.strategy_weights[strategy_name]
        
        if result.success:
            new_weight = current_weight + learning_rate * result.confidence
        else:
            new_weight = current_weight - learning_rate * (1 - result.confidence)
        
        self.state.strategy_weights[strategy_name] = max(0.1, new_weight)
        self.state.total_episodes += 1
        
        logger.info(f"Updated {strategy_name}: weight={new_weight:.3f}")
    
    def get_report(self) -> Dict[str, Any]:
        report = {'total_episodes': self.state.total_episodes, 'strategies': {}}
        for strategy_name in self.state.strategy_weights.keys():
            count = self.state.strategy_counts[strategy_name]
            success_rate = self.state.strategy_successes[strategy_name] / count if count > 0 else 0.5
            report['strategies'][strategy_name] = {
                'weight': self.state.strategy_weights[strategy_name],
                'count': count,
                'success_rate': success_rate
            }
        return report

class MetaCognitionLoop:
    """Main MCL: evaluates strategies, selects best, executes, learns"""
    
    def __init__(self):
        self.strategy_bank = StrategyBank()
        self.meta_policy = MetaPolicyManager()
        self.history = []
        logger.info("🧠 Meta-Cognition Loop initialized")
    
    def process(self, observation: Observation, belief_state: BeliefState, 
                n_candidates: int = 3) -> Dict[str, Any]:
        logger.info("=" * 70)
        logger.info(f"🎯 NEW TASK: {observation.content[:60]}...")
        
        candidates = self.meta_policy.select_strategies(n_candidates, observation)
        results = []
        
        for strategy in candidates:
            logger.info(f"🔬 Simulating {strategy.value}...")
            result = self.strategy_bank.execute_strategy(strategy, observation, belief_state)
            results.append(result)
            logger.info(f"   → Success: {result.success}, Confidence: {result.confidence:.3f}")
        
        scored = [(r, r.confidence if r.success else 0) for r in results]
        scored.sort(key=lambda x: x[1], reverse=True)
        best_result = scored[0][0]
        
        logger.info(f"✨ CHOSEN: {best_result.strategy.value}")
        
        self.meta_policy.update_from_result(best_result)
        
        episode = {
            'timestamp': datetime.now().isoformat(),
            'task_type': observation.task_type if observation.task_type else 'unknown',
            'complexity': observation.complexity,
            'chosen': best_result.strategy.value,
            'success': best_result.success,
            'confidence': best_result.confidence
        }
        self.history.append(episode)
        
        with open(STRATEGY_HISTORY_PATH, 'a') as f:
            f.write(json.dumps(episode) + '\n')
        
        if len(self.history) % 10 == 0:
            self.meta_policy.save_state()
        
        return {
            'chosen_strategy': best_result.strategy.value,
            'success': best_result.success,
            'confidence': best_result.confidence,
            'reasoning_trace': best_result.reasoning_trace
        }

def test_mcl():
    print("\n" + "=" * 70)
    print("🧠 EDEN META-COGNITION LOOP - TEST MODE")
    print("=" * 70 + "\n")
    
    mcl = MetaCognitionLoop()
    
    problems = [
        (Observation("Optimize recursive factorial function", "code_optimization", "programming", 0.6),
         BeliefState(0.7, relevant_schemas=["recursion"])),
        (Observation("What causes stock market crashes?", "causal_analysis", "economics", 0.8),
         BeliefState(0.4, uncertainty_estimate=0.7)),
        (Observation("2 + 2 = ?", "arithmetic", "math", 0.1),
         BeliefState(0.95, relevant_schemas=["arithmetic"])),
        (Observation("Design system for 1M requests/sec", "system_design", "engineering", 0.9),
         BeliefState(0.5, uncertainty_estimate=0.6)),
    ]
    
    for obs, belief in problems:
        result = mcl.process(obs, belief, n_candidates=3)
        print(f"\n📝 Problem: {obs.content}")
        print(f"✅ Chosen: {result['chosen_strategy']}")
        print(f"💯 Confidence: {result['confidence']:.3f}")
        print()
    
    print("\n" + "=" * 70)
    print("📊 META-COGNITION REPORT")
    print("=" * 70)
    
    report = mcl.meta_policy.get_report()
    print(f"\nTotal episodes: {report['total_episodes']}\n")
    
    for name, stats in report['strategies'].items():
        if stats['count'] > 0:
            print(f"{name}: weight={stats['weight']:.3f}, uses={stats['count']}, success={stats['success_rate']:.1%}")
    
    mcl.meta_policy.save_state()
    print(f"\n💾 State saved to: {META_STATE_PATH}\n")

def main():
    import sys
    if len(sys.argv) > 1 and sys.argv[1] == "test":
        test_mcl()
    else:
        print("Eden Meta-Cognition Loop")
        print("Usage: python3 meta_loop.py test")

if __name__ == "__main__":
    main()