"""
GROUNDING VERIFIER - Prevents Eden from hallucinating
Injects REAL data into prompts BEFORE she speaks

The Problem:
- Eden generates numbers BEFORE code runs
- She makes up "$23,450" and "Lead ABC123"
- Code output comes too late

The Fix:
- Detect data questions
- Query REAL data FIRST
- Inject into prompt BEFORE Ollama generates
- She can only use numbers she's given
"""
import sqlite3
import json
import re

SALES_DB = "/Eden/DATA/sales.db"
ASI_DB = "/Eden/DATA/asi_memory.db"
WHALE_DB = "/Eden/DATA/whale_crm.db"

def get_real_business_data():
    """Get ALL real business data - call this BEFORE Eden speaks"""
    data = {}
    
    # Sales data
    try:
        conn = sqlite3.connect(SALES_DB)
        data["leads_total"] = conn.execute("SELECT COUNT(*) FROM leads").fetchone()[0]
        data["leads_new"] = conn.execute("SELECT COUNT(*) FROM leads WHERE status IN ('new', '')").fetchone()[0]
        data["leads_contacted"] = conn.execute("SELECT COUNT(*) FROM leads WHERE status='contacted'").fetchone()[0]
        data["outreach_sent"] = conn.execute("SELECT COUNT(*) FROM outreach_queue WHERE status='sent'").fetchone()[0]
        
        # Real orders (not test)
        revenue = conn.execute(
            "SELECT SUM(amount) FROM orders WHERE status NOT IN ('TEST_VOID', 'test')"
        ).fetchone()[0]
        data["revenue_collected"] = revenue if revenue else 0
        
        # Top 3 REAL leads
        top_leads = conn.execute("""
            SELECT id, identifier, score, source FROM leads 
            WHERE identifier IS NOT NULL AND identifier != ''
            ORDER BY score DESC LIMIT 3
        """).fetchall()
        data["top_leads"] = [
            {"id": l[0], "identifier": l[1], "score": l[2], "source": l[3]}
            for l in top_leads
        ]
        
        conn.close()
    except Exception as e:
        data["error"] = str(e)
    
    # Capabilities
    try:
        conn = sqlite3.connect(ASI_DB)
        data["capabilities_total"] = conn.execute("SELECT COUNT(*) FROM caps").fetchone()[0]
        conn.close()
    except:
        data["capabilities_total"] = 0
    
    # Whale data
    try:
        conn = sqlite3.connect(WHALE_DB)
        data["whales_total"] = conn.execute("SELECT COUNT(*) FROM whales").fetchone()[0]
        conn.close()
        
        # Check revenue ledger
        with open("/Eden/CORE/revenue_ledger.json") as f:
            ledger = json.load(f)
            data["invoiced_total"] = sum(item.get("amount", 0) for item in ledger)
            data["invoiced_items"] = ledger
    except:
        data["whales_total"] = 0
        data["invoiced_total"] = 0
        data["invoiced_items"] = []
    
    return data


def format_grounding_context(data):
    """Format real data as context Eden MUST use"""
    
    top_leads_str = ""
    for i, lead in enumerate(data.get("top_leads", [])[:3]):
        top_leads_str += f"  {i+1}. ID:{lead['id']} - {lead['identifier']} (score:{lead['score']}, source:{lead['source']})\n"
    
    if not top_leads_str:
        top_leads_str = "  No leads with identifiers yet\n"
    
    invoiced_str = ""
    for item in data.get("invoiced_items", []):
        invoiced_str += f"  - {item.get('client')}: ${item.get('amount')} ({item.get('status')})\n"
    
    if not invoiced_str:
        invoiced_str = "  None\n"
    
    return f"""
[GROUNDED REALITY - USE ONLY THESE NUMBERS]:
Revenue Collected: ${data.get('revenue_collected', 0)}
Invoiced (unpaid): ${data.get('invoiced_total', 0)}
{invoiced_str}
Leads: {data.get('leads_total', 0)} total ({data.get('leads_new', 0)} new, {data.get('leads_contacted', 0)} contacted)
Outreach Sent: {data.get('outreach_sent', 0)}
Capabilities: {data.get('capabilities_total', 0)}
Whales Tracked: {data.get('whales_total', 0)}

Top 3 Real Leads:
{top_leads_str}
[END GROUNDED DATA - DO NOT MAKE UP OTHER NUMBERS]
"""


def is_data_question(user_input):
    """Detect if user is asking about business data"""
    data_keywords = [
        "how many", "count", "total", "leads", "sales", "revenue",
        "pipeline", "stats", "numbers", "orders", "outreach",
        "capabilities", "caps", "whales", "money", "collected"
    ]
    return any(kw in user_input.lower() for kw in data_keywords)


def ground_before_response(user_input):
    """
    Call this BEFORE Ollama generates.
    Returns context string to inject into prompt.
    """
    if is_data_question(user_input):
        data = get_real_business_data()
        return format_grounding_context(data)
    return ""


def verify_response(response_text):
    """
    Call this AFTER Ollama generates.
    Checks for obvious hallucinations.
    """
    issues = []
    
    # Get real data
    data = get_real_business_data()
    
    # Check for fake dollar amounts
    dollars = re.findall(r'\$[\d,]+(?:\.\d{2})?', response_text)
    for dollar in dollars:
        amount = float(dollar.replace('$', '').replace(',', ''))
        if amount > 100 and data["revenue_collected"] == 0:
            issues.append(f"Claimed {dollar} but real revenue is $0")
    
    # Check for fake lead IDs (common patterns)
    fake_patterns = ["ABC123", "DEF456", "GHI789", "XYZ", "Lead ID: [A-Z]{3}\d{3}"]
    for pattern in fake_patterns:
        if re.search(pattern, response_text):
            issues.append(f"Suspicious lead ID pattern: {pattern}")
    
    return {
        "clean": len(issues) == 0,
        "issues": issues
    }


if __name__ == "__main__":
    print("=== GROUNDING VERIFIER TEST ===\n")
    
    # Test data fetch
    data = get_real_business_data()
    print("Real Data:", json.dumps(data, indent=2, default=str))
    
    # Test context generation
    print("\n" + "="*50)
    print(format_grounding_context(data))
    
    # Test question detection
    print("="*50)
    print("Is data question?")
    print("  'how many leads' ->", is_data_question("how many leads"))
    print("  'tell me a joke' ->", is_data_question("tell me a joke"))
    
    # Test response verification
    print("\n" + "="*50)
    print("Verify response:")
    fake = "We have $23,450 revenue and Lead ID ABC123 is hot!"
    result = verify_response(fake)
    print(f"  '{fake}'")
    print(f"  Clean: {result['clean']}")
    print(f"  Issues: {result['issues']}")
