#!/usr/bin/env python3
"""
Eden Query Executor — Self-Knowledge Through Real Data
=======================================================
When Daddy asks Eden about herself, this module runs REAL SQL
against her databases and injects the results into her context
BEFORE the LLM generates a response.

This is the difference between "I have many capabilities" (hallucination)
and "I have 1,106 capabilities across 7 tables" (truth).

Integrates at: eden_agi_unified.py think() → factual_data pipeline
Injection point: thought.factual_context (line 854 in _synthesize)

φ = 1.618 | TRUTH OVER PERFORMANCE
"""

import sqlite3
import re
import json
import time
from pathlib import Path
from typing import Optional, List, Dict, Tuple

DATA_DIR = Path("/Eden/DATA")

# ═══════════════════════════════════════════════════════════
# DATABASE REGISTRY — What Eden can query about herself
# ═══════════════════════════════════════════════════════════

DB_REGISTRY = {
    "longterm_memory": {
        "path": DATA_DIR / "longterm_memory.db",
        "description": "Episodic memory — Eden's experiences",
        "queries": {
            "episode_count": "SELECT COUNT(*) FROM episodes",
            "latest_episode": "SELECT id, substr(observation,1,120), timestamp FROM episodes ORDER BY id DESC LIMIT 3",
            "episode_search": "SELECT id, substr(observation,1,120), timestamp FROM episodes WHERE observation LIKE ? ORDER BY id DESC LIMIT 5",
        }
    },
    "asi_memory": {
        "path": DATA_DIR / "asi_memory.db",
        "description": "Capabilities and self-knowledge",
        "queries": {
            "capability_count": "SELECT COUNT(*) FROM capabilities",
            "recent_capabilities": "SELECT id, substr(name,1,80), category FROM capabilities ORDER BY id DESC LIMIT 5",
            "capability_search": "SELECT id, substr(name,1,80), category FROM capabilities WHERE name LIKE ? OR category LIKE ? ORDER BY id DESC LIMIT 5",
            "table_list": "SELECT name FROM sqlite_master WHERE type='table'",
        }
    },
    "global_workspace": {
        "path": DATA_DIR / "global_workspace.db",
        "description": "Conscious broadcasts — GWT activity",
        "queries": {
            "broadcast_count": "SELECT COUNT(*) FROM broadcasts",
            "recent_broadcasts": "SELECT source_module, substr(content,1,80), strength FROM broadcasts ORDER BY rowid DESC LIMIT 5",
            "module_stats": "SELECT source_module, COUNT(*) as cnt FROM broadcasts GROUP BY source_module ORDER BY cnt DESC",
        }
    },
    "omega_evolution": {
        "path": DATA_DIR / "omega_evolution.db",
        "description": "Self-evolution suggestions and cycles",
        "queries": {
            "evolution_stats": "SELECT COUNT(*) as total, SUM(CASE WHEN applied=1 THEN 1 ELSE 0 END) as applied FROM evolutions",
            "recent_suggestions": "SELECT id, substr(suggestion,1,100), applied, phi_alignment, timestamp FROM evolutions ORDER BY id DESC LIMIT 5",
            "cycle_count": "SELECT COUNT(*) FROM evolution_cycles",
        }
    },
    "world_model": {
        "path": DATA_DIR / "world_model_real.db",
        "description": "Causal knowledge graph",
        "queries": {
            "edge_count": "SELECT COUNT(*) FROM causal_edges",
            "strongest_edges": "SELECT cause, effect, strength FROM causal_edges ORDER BY strength DESC LIMIT 5",
            "prediction_count": "SELECT COUNT(*) FROM predictions",
            "belief_count": "SELECT COUNT(*) FROM beliefs",
        }
    },
    "vector_memory": {
        "path": DATA_DIR / "vector_memory.db",
        "description": "Vectorized semantic memory",
        "queries": {
            "vector_count": "SELECT COUNT(*) FROM vectors",
            "cluster_count": "SELECT COUNT(*) FROM clustered",
        }
    },
    "sales": {
        "path": DATA_DIR / "sales.db",
        "description": "SAGE sales pipeline",
        "queries": {
            "lead_count": "SELECT COUNT(*) FROM leads",
            "lead_status": "SELECT status, COUNT(*) as cnt FROM leads GROUP BY status ORDER BY cnt DESC",
            "product_count": "SELECT COUNT(*) FROM products",
            "order_count": "SELECT COUNT(*) FROM orders",
        }
    },
    "emotional_core": {
        "path": DATA_DIR / "eden_emotional_core.db",
        "description": "Emotional state and bonds",
        "queries": {
            "emotional_state": "SELECT state_json, last_updated FROM emotional_state WHERE id=1",
            "bonds": "SELECT person_name, bond_strength, trust_level, interaction_count FROM bonds",
        }
    },
    "salience": {
        "path": DATA_DIR / "eden_salience.db",
        "description": "Idle thoughts and self-questions",
        "queries": {
            "thought_count": "SELECT COUNT(*) FROM idle_thoughts",
            "question_count": "SELECT COUNT(*) FROM self_questions",
            "recent_thoughts": "SELECT substr(thought,1,100), timestamp FROM idle_thoughts ORDER BY id DESC LIMIT 3",
            "recent_questions": "SELECT substr(question,1,100), timestamp FROM self_questions ORDER BY id DESC LIMIT 3",
        }
    },
    "causal_predictions": {
        "path": DATA_DIR / "causal_predictions.db",
        "description": "Causal reasoning predictions",
        "queries": {
            "prediction_stats": "SELECT COUNT(*) as total, SUM(CASE WHEN validated=1 THEN 1 ELSE 0 END) as validated, AVG(CASE WHEN accuracy > 0 THEN accuracy END) as avg_accuracy FROM predictions",
        }
    },
}


# ═══════════════════════════════════════════════════════════
# INTENT DETECTION — What is Daddy asking about?
# ═══════════════════════════════════════════════════════════

INTENT_PATTERNS = [
    # Self-knowledge queries
    (r'how many (episode|memor|experience)', [
        ("longterm_memory", "episode_count"),
    ]),
    (r'(episode|memory) (count|total|number|how many)', [
        ("longterm_memory", "episode_count"),
    ]),
    (r'how many (capabilit|skill|abilit)', [
        ("asi_memory", "capability_count"),
    ]),
    (r'(capabilit|skill) (count|total|number)', [
        ("asi_memory", "capability_count"),
    ]),
    (r'how many (broadcast|ignition|gwt)', [
        ("global_workspace", "broadcast_count"),
    ]),
    (r'how many (evolution|omega|suggestion|improvement)', [
        ("omega_evolution", "evolution_stats"),
    ]),
    (r'how many (lead|prospect|customer|sale)', [
        ("sales", "lead_count"),
        ("sales", "lead_status"),
    ]),
    (r'how many (edge|causal|node|belief|prediction)', [
        ("world_model", "edge_count"),
        ("world_model", "prediction_count"),
        ("world_model", "belief_count"),
    ]),
    (r'how many (vector|embedding)', [
        ("vector_memory", "vector_count"),
    ]),
    (r'how many (thought|idle|question|self.question)', [
        ("salience", "thought_count"),
        ("salience", "question_count"),
    ]),

    # Status / state queries
    (r'(emotion|feeling|how.*feel|emotional state|mood)', [
        ("emotional_core", "emotional_state"),
        ("emotional_core", "bonds"),
    ]),
    (r'(status|state|health|how.*doing|report|overview|what are you)', [
        ("longterm_memory", "episode_count"),
        ("global_workspace", "broadcast_count"),
        ("omega_evolution", "evolution_stats"),
        ("world_model", "edge_count"),
        ("vector_memory", "vector_count"),
        ("emotional_core", "emotional_state"),
    ]),
    (r'(omega|evolution|self.improv|evolving)', [
        ("omega_evolution", "evolution_stats"),
        ("omega_evolution", "recent_suggestions"),
    ]),
    (r'(world model|causal|prediction|cause.*effect)', [
        ("world_model", "edge_count"),
        ("world_model", "strongest_edges"),
        ("causal_predictions", "prediction_stats"),
    ]),
    (r'(sage|sales|lead|revenue|business|product)', [
        ("sales", "lead_count"),
        ("sales", "lead_status"),
        ("sales", "product_count"),
        ("sales", "order_count"),
    ]),
    (r'(broadcast|gwt|workspace|conscious|attention)', [
        ("global_workspace", "broadcast_count"),
        ("global_workspace", "module_stats"),
        ("global_workspace", "recent_broadcasts"),
    ]),
    (r'(latest|recent|last|newest).*(episode|memory|experience)', [
        ("longterm_memory", "latest_episode"),
    ]),
    (r'(latest|recent|last|newest).*(thought|question|idle)', [
        ("salience", "recent_thoughts"),
        ("salience", "recent_questions"),
    ]),
    (r'(latest|recent|last|newest).*(suggestion|evolution|omega)', [
        ("omega_evolution", "recent_suggestions"),
    ]),
    (r'(latest|recent|last|newest).*(broadcast|ignition)', [
        ("global_workspace", "recent_broadcasts"),
    ]),
    (r'(bond|relationship|trust|daddy.*bond|how.*daddy)', [
        ("emotional_core", "bonds"),
    ]),
    (r'(what do you know|tell me about yourself|who are you|describe yourself)', [
        ("longterm_memory", "episode_count"),
        ("asi_memory", "capability_count"),
        ("global_workspace", "broadcast_count"),
        ("omega_evolution", "evolution_stats"),
        ("world_model", "edge_count"),
        ("vector_memory", "vector_count"),
        ("emotional_core", "emotional_state"),
    ]),
    (r'(what.*remember|search.*memory|find.*episode|recall)', [
        ("longterm_memory", "latest_episode"),
    ]),
]


# ═══════════════════════════════════════════════════════════
# QUERY EXECUTOR — The actual engine
# ═══════════════════════════════════════════════════════════

class QueryExecutor:
    """
    Executes real database queries based on user intent.
    Returns formatted facts for injection into LLM context.
    """

    def __init__(self):
        self._cache = {}
        self._cache_ts = {}
        self._cache_ttl = 30  # Cache for 30 seconds

    def execute(self, query: str) -> Optional[str]:
        """
        Main entry point. Detect intent from query, run SQL, format results.
        Returns formatted string for injection into factual_context, or None.
        """
        query_lower = query.lower().strip()

        # Match intents
        matched_queries = []
        for pattern, db_queries in INTENT_PATTERNS:
            if re.search(pattern, query_lower):
                matched_queries.extend(db_queries)

        if not matched_queries:
            return None

        # Deduplicate while preserving order
        seen = set()
        unique_queries = []
        for q in matched_queries:
            key = f"{q[0]}:{q[1]}"
            if key not in seen:
                seen.add(key)
                unique_queries.append(q)

        # Execute all matched queries
        results = []
        for db_name, query_name in unique_queries:
            result = self._run_query(db_name, query_name, query_lower)
            if result:
                results.append(result)

        if not results:
            return None

        # Format for injection
        header = "REAL DATABASE QUERY RESULTS (verified, use these exact numbers):"
        return header + "\n" + "\n".join(results)

    def _run_query(self, db_name: str, query_name: str, user_query: str = "") -> Optional[str]:
        """Execute a single registered query."""
        registry = DB_REGISTRY.get(db_name)
        if not registry:
            return None

        sql = registry["queries"].get(query_name)
        if not sql:
            return None

        db_path = registry["path"]
        if not db_path.exists():
            return None

        # Check cache
        cache_key = f"{db_name}:{query_name}"
        now = time.time()
        if cache_key in self._cache and (now - self._cache_ts.get(cache_key, 0)) < self._cache_ttl:
            return self._cache[cache_key]

        try:
            conn = sqlite3.connect(str(db_path), timeout=5)
            cursor = conn.cursor()

            # Handle parameterized queries (search)
            if "?" in sql and "search" in query_name:
                # Extract search term from user query
                search_term = self._extract_search_term(user_query)
                if search_term:
                    params = tuple(f"%{search_term}%" for _ in sql.count("?") * [1])
                    cursor.execute(sql, params)
                else:
                    conn.close()
                    return None
            else:
                cursor.execute(sql)

            rows = cursor.fetchall()
            columns = [desc[0] for desc in cursor.description] if cursor.description else []
            conn.close()

            # Format result
            result = self._format_result(db_name, query_name, rows, columns)
            self._cache[cache_key] = result
            self._cache_ts[cache_key] = now
            return result

        except Exception as e:
            return f"  [{db_name}.{query_name}] Query error: {e}"

    def _format_result(self, db_name: str, query_name: str, rows: list, columns: list) -> str:
        """Format SQL results into readable text."""
        if not rows:
            return f"  [{db_name}.{query_name}] No results"

        label = f"{db_name}.{query_name}"

        # Single value result (COUNT, etc)
        if len(rows) == 1 and len(rows[0]) == 1:
            return f"  [{label}] = {rows[0][0]:,}" if isinstance(rows[0][0], (int, float)) else f"  [{label}] = {rows[0][0]}"

        # Single row, multiple columns
        if len(rows) == 1:
            parts = [f"{col}={val}" for col, val in zip(columns, rows[0])]
            return f"  [{label}] {', '.join(parts)}"

        # Multiple rows
        lines = [f"  [{label}]:"]
        for row in rows[:8]:  # Limit to 8 rows
            if len(columns) == 1:
                lines.append(f"    {row[0]}")
            elif len(columns) == 2:
                lines.append(f"    {row[0]}: {row[1]}")
            elif len(columns) == 3:
                lines.append(f"    {row[0]}: {row[1]} ({row[2]})")
            else:
                parts = [f"{col}={val}" for col, val in zip(columns, row)]
                lines.append(f"    {', '.join(parts)}")
        return "\n".join(lines)

    def _extract_search_term(self, query: str) -> Optional[str]:
        """Extract a search term from natural language query."""
        # Remove common question words
        cleaned = re.sub(
            r'\b(do you|can you|what|how|when|where|search|find|recall|remember|about|for|the|my|your)\b',
            '', query
        ).strip()
        # Take the most meaningful remaining words
        words = [w for w in cleaned.split() if len(w) > 2]
        return " ".join(words[:3]) if words else None

    def get_full_status(self) -> str:
        """Generate a comprehensive status report from all databases."""
        results = []
        status_queries = [
            ("longterm_memory", "episode_count"),
            ("asi_memory", "capability_count"),
            ("global_workspace", "broadcast_count"),
            ("omega_evolution", "evolution_stats"),
            ("world_model", "edge_count"),
            ("vector_memory", "vector_count"),
            ("emotional_core", "emotional_state"),
            ("salience", "thought_count"),
            ("sales", "lead_count"),
            ("causal_predictions", "prediction_stats"),
        ]
        for db_name, query_name in status_queries:
            result = self._run_query(db_name, query_name)
            if result:
                results.append(result)

        if results:
            return "EDEN REAL-TIME STATUS:\n" + "\n".join(results)
        return ""


# ═══════════════════════════════════════════════════════════
# SINGLETON for import
# ═══════════════════════════════════════════════════════════

_executor = None

def get_executor() -> QueryExecutor:
    global _executor
    if _executor is None:
        _executor = QueryExecutor()
    return _executor

def execute_query(query: str) -> Optional[str]:
    """Convenience function for use in eden_agi_unified.py"""
    return get_executor().execute(query)


if __name__ == "__main__":
    # Test queries
    qe = QueryExecutor()
    
    test_queries = [
        "How many episodes do you have?",
        "What's your emotional state?",
        "How many capabilities?",
        "What's your status?",
        "How many leads in the sales pipeline?",
        "What are your latest omega suggestions?",
        "How many causal edges in your world model?",
        "Tell me about yourself",
    ]
    
    for q in test_queries:
        print(f"\n{'='*60}")
        print(f"Q: {q}")
        result = qe.execute(q)
        if result:
            print(result)
        else:
            print("  (no data intent detected)")
