#!/usr/bin/env python3
"""
Eden Analogical Reasoning Engine
Real case-based reasoning with memory

Replaces simulated analogical strategy with actual:
- Case library (past solutions)
- Similarity matching
- Solution adaptation
- Learning from outcomes
"""

import json
import numpy as np
from dataclasses import dataclass, asdict
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from collections import defaultdict
import logging

# Configuration
EDEN_ROOT = Path("/Eden/CORE")
ANALOGICAL_LOG = EDEN_ROOT / "logs" / "phi_analogical.log"
CASE_LIBRARY = EDEN_ROOT / "phi_fractal" / "analogical_engine" / "case_library.json"
ANALOGICAL_STATE = EDEN_ROOT / "phi_fractal" / "analogical_engine" / "state.json"

# Create directories
CASE_LIBRARY.parent.mkdir(parents=True, exist_ok=True)
ANALOGICAL_LOG.parent.mkdir(parents=True, exist_ok=True)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - ANALOGICAL - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(ANALOGICAL_LOG),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


@dataclass
class Case:
    """A stored problem-solution case"""
    id: str
    problem: str
    problem_type: str
    solution: str
    outcome: str
    success: bool
    confidence: float
    features: List[str]  # Key features for matching
    timestamp: str
    times_retrieved: int = 0
    avg_usefulness: float = 0.5


class AnalogicalEngine:
    """Real analogical reasoning with case-based memory"""
    
    def __init__(self):
        self.cases: Dict[str, Case] = {}
        self.similarity_threshold = 0.3
        self.load_cases()
        logger.info(f"🔍 Analogical Engine initialized with {len(self.cases)} cases")
    
    def load_cases(self):
        """Load case library from disk"""
        if CASE_LIBRARY.exists():
            try:
                with open(CASE_LIBRARY, 'r') as f:
                    data = json.load(f)
                
                for case_id, case_data in data.get('cases', {}).items():
                    self.cases[case_id] = Case(**case_data)
                
                logger.info(f"Loaded {len(self.cases)} cases from library")
            except Exception as e:
                logger.error(f"Failed to load cases: {e}")
        else:
            # Seed with initial cases
            self._seed_initial_cases()
    
    def save_cases(self):
        """Save case library to disk"""
        try:
            data = {
                'cases': {cid: asdict(c) for cid, c in self.cases.items()},
                'total_cases': len(self.cases),
                'last_updated': datetime.now().isoformat()
            }
            
            with open(CASE_LIBRARY, 'w') as f:
                json.dump(data, f, indent=2)
            
            logger.info(f"Saved {len(self.cases)} cases to library")
        except Exception as e:
            logger.error(f"Failed to save cases: {e}")
    
    def _seed_initial_cases(self):
        """Seed library with foundational cases"""
        seed_cases = [
            Case(
                id="case_001",
                problem="Slow database queries",
                problem_type="performance",
                solution="Add indexes on frequently queried columns",
                outcome="Query time reduced by 70%",
                success=True,
                confidence=0.9,
                features=["database", "performance", "queries", "slow"],
                timestamp=datetime.now().isoformat()
            ),
            Case(
                id="case_002",
                problem="High server load",
                problem_type="performance",
                solution="Implement caching layer (Redis)",
                outcome="Server load reduced by 40%",
                success=True,
                confidence=0.85,
                features=["server", "load", "performance", "caching"],
                timestamp=datetime.now().isoformat()
            ),
            Case(
                id="case_003",
                problem="Recursive function stack overflow",
                problem_type="debugging",
                solution="Convert to iterative with explicit stack",
                outcome="No more stack errors",
                success=True,
                confidence=0.95,
                features=["recursion", "stack", "overflow", "iterative"],
                timestamp=datetime.now().isoformat()
            ),
            Case(
                id="case_004",
                problem="Code duplication across modules",
                problem_type="refactoring",
                solution="Extract common functionality to shared library",
                outcome="Reduced code by 30%, easier maintenance",
                success=True,
                confidence=0.9,
                features=["duplication", "refactor", "modules", "library"],
                timestamp=datetime.now().isoformat()
            ),
            Case(
                id="case_005",
                problem="Microservices communication overhead",
                problem_type="system_design",
                solution="Use message queue (RabbitMQ) for async communication",
                outcome="Response time improved by 50%",
                success=True,
                confidence=0.85,
                features=["microservices", "communication", "async", "queue"],
                timestamp=datetime.now().isoformat()
            ),
        ]
        
        for case in seed_cases:
            self.cases[case.id] = case
        
        self.save_cases()
        logger.info(f"Seeded library with {len(seed_cases)} initial cases")
    
    def extract_features(self, problem: str) -> List[str]:
        """Extract key features from problem description"""
        # Expanded keyword extraction with synonyms
        keywords = [
            'database', 'query', 'queries', 'sql',
            'performance', 'slow', 'fast', 'speed', 'taking too long', 'lag',
            'server', 'load', 'cache', 'caching', 'memory',
            'recursion', 'stack', 'overflow', 'error', 'crash',
            'code', 'refactor', 'duplication', 'clean', 'messy',
            'microservices', 'api', 'communication', 'async', 'sync',
            'bug', 'debug', 'fix', 'issue', 'problem',
            'optimize', 'improve', 'scale', 'scaling',
            'timeout', 'leak', 'high', 'low'
        ]
        
        problem_lower = problem.lower()
        features = []
        
        for kw in keywords:
            if kw in problem_lower:
                features.append(kw)
        
        # Add semantic features
        if any(word in problem_lower for word in ['slow', 'lag', 'taking too long', 'timeout']):
            features.append('performance')
        
        if any(word in problem_lower for word in ['database', 'query', 'sql']):
            features.append('database')
        
        return list(set(features))  # Remove duplicates
    
    def compute_similarity(self, features1: List[str], features2: List[str]) -> float:
        """Compute Jaccard similarity between feature sets"""
        if not features1 or not features2:
            return 0.0
        
        set1 = set(features1)
        set2 = set(features2)
        
        intersection = len(set1 & set2)
        union = len(set1 | set2)
        
        return intersection / union if union > 0 else 0.0
    
    def find_similar_cases(self, problem: str, n: int = 3) -> List[Tuple[Case, float]]:
        """Find most similar cases to current problem"""
        features = self.extract_features(problem)
        
        similarities = []
        for case in self.cases.values():
            sim = self.compute_similarity(features, case.features)
            if sim >= self.similarity_threshold:
                similarities.append((case, sim))
        
        # Sort by similarity * success * confidence
        similarities.sort(
            key=lambda x: x[1] * (1.0 if x[0].success else 0.5) * x[0].confidence,
            reverse=True
        )
        
        return similarities[:n]
    
    def reason_by_analogy(self, problem: str, problem_type: str) -> Dict[str, Any]:
        """
        Apply analogical reasoning to solve problem
        
        Steps:
        1. Find similar past cases
        2. Adapt solution to current context
        3. Assess confidence
        4. Return reasoning trace
        """
        logger.info(f"🔍 Reasoning by analogy: {problem[:60]}...")
        
        # Find similar cases
        similar_cases = self.find_similar_cases(problem, n=3)
        
        if not similar_cases:
            logger.info("   No similar cases found")
            return {
                'success': False,
                'confidence': 0.3,
                'solution': "No analogous cases available",
                'reasoning_trace': [
                    "Searched case library",
                    f"No cases similar to: {problem[:40]}...",
                    "Need to build case library for this domain"
                ],
                'analogies': []
            }
        
        # Use best match
        best_case, similarity = similar_cases[0]
        best_case.times_retrieved += 1
        
        # Adapt solution
        adapted_solution = self._adapt_solution(problem, best_case)
        
        # Compute confidence
        confidence = similarity * best_case.confidence
        
        # Build reasoning trace
        reasoning_trace = [
            f"Found {len(similar_cases)} similar cases",
            f"Best match: '{best_case.problem}' ({similarity:.0%} similar)",
            f"Original solution: {best_case.solution}",
            f"Adapted to: {adapted_solution}",
            f"Confidence: {confidence:.2f}"
        ]
        
        analogies = [
            f"'{problem[:40]}...' is like '{best_case.problem}'",
            f"Similar features: {', '.join(set(self.extract_features(problem)) & set(best_case.features))}"
        ]
        
        logger.info(f"   ✅ Found analogy: {best_case.id} ({similarity:.0%} similar)")
        
        self.save_cases()  # Save retrieval count
        
        return {
            'success': True,
            'confidence': confidence,
            'solution': adapted_solution,
            'reasoning_trace': reasoning_trace,
            'analogies': analogies,
            'source_case': best_case.id,
            'similarity': similarity
        }
    
    def _adapt_solution(self, new_problem: str, source_case: Case) -> str:
        """Adapt solution from source case to new problem"""
        # Simple adaptation (could be enhanced with LLM or rules)
        base_solution = source_case.solution
        
        # Context-aware adaptation
        if "database" in new_problem.lower() and "cache" in base_solution.lower():
            return base_solution.replace("Redis", "database-specific cache")
        
        return f"Apply similar approach: {base_solution}"
    
    def add_case(self, problem: str, problem_type: str, solution: str, 
                 outcome: str, success: bool, confidence: float) -> Case:
        """Add new case to library from experience"""
        case_id = f"case_{len(self.cases) + 1:03d}"
        
        case = Case(
            id=case_id,
            problem=problem,
            problem_type=problem_type,
            solution=solution,
            outcome=outcome,
            success=success,
            confidence=confidence,
            features=self.extract_features(problem),
            timestamp=datetime.now().isoformat()
        )
        
        self.cases[case_id] = case
        self.save_cases()
        
        logger.info(f"📚 Added new case: {case_id}")
        return case
    
    def get_statistics(self) -> Dict[str, Any]:
        """Get case library statistics"""
        total = len(self.cases)
        successful = sum(1 for c in self.cases.values() if c.success)
        
        by_type = defaultdict(int)
        for case in self.cases.values():
            by_type[case.problem_type] += 1
        
        most_used = sorted(
            self.cases.values(),
            key=lambda c: c.times_retrieved,
            reverse=True
        )[:3]
        
        return {
            'total_cases': total,
            'successful_cases': successful,
            'success_rate': successful / total if total > 0 else 0,
            'cases_by_type': dict(by_type),
            'most_retrieved': [
                {'id': c.id, 'problem': c.problem[:40], 'retrievals': c.times_retrieved}
                for c in most_used
            ]
        }


def test_analogical_engine():
    """Test the analogical engine"""
    print("\n" + "=" * 70)
    print("🔍 EDEN ANALOGICAL ENGINE - TEST MODE")
    print("=" * 70 + "\n")
    
    engine = AnalogicalEngine()
    
    print(f"Case library: {len(engine.cases)} cases loaded\n")
    
    # Test problems
    test_problems = [
        ("API endpoints are slow", "performance"),
        ("Memory leak in application", "debugging"),
        ("Services timing out", "system_design"),
        ("Completely new problem type", "unknown"),
    ]
    
    for problem, ptype in test_problems:
        print(f"Problem: {problem}")
        result = engine.reason_by_analogy(problem, ptype)
        
        print(f"  Success: {'✅' if result['success'] else '❌'}")
        print(f"  Confidence: {result['confidence']:.2f}")
        if result.get('analogies'):
            print(f"  Analogy: {result['analogies'][0]}")
        print()
    
    # Add a new case
    print("Adding new case from experience...")
    engine.add_case(
        problem="API rate limiting needed",
        problem_type="system_design",
        solution="Implement token bucket algorithm",
        outcome="Rate limiting working perfectly",
        success=True,
        confidence=0.9
    )
    
    # Statistics
    print("\n" + "=" * 70)
    print("📊 Case Library Statistics")
    print("=" * 70)
    
    stats = engine.get_statistics()
    print(f"\nTotal cases: {stats['total_cases']}")
    print(f"Success rate: {stats['success_rate']:.0%}")
    print(f"\nCases by type:")
    for ptype, count in stats['cases_by_type'].items():
        print(f"  {ptype}: {count}")
    
    print(f"\nMost retrieved cases:")
    for case in stats['most_retrieved']:
        print(f"  {case['id']}: {case['problem']} ({case['retrievals']} times)")
    
    print(f"\n💾 Case library saved to: {CASE_LIBRARY}\n")


if __name__ == "__main__":
    import sys
    if len(sys.argv) > 1 and sys.argv[1] == "test":
        test_analogical_engine()
    else:
        print("Eden Analogical Engine")
        print("Usage: python3 analogical_engine.py test")