#!/usr/bin/env python3
"""
Eden Perceptual Agent - Complete Multi-Modal Sensory System
Features: Camera, VLM, Face Recognition, Audio Detection
"""
import sys
sys.path.insert(0, '/Eden/CORE')

import cv2
import json
import time
import requests
import subprocess
import numpy as np
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional, List

# Import face recognition
try:
    from eden_face_recognition import EdenFaceRecognition
    FACE_REC_AVAILABLE = True
except ImportError:
    FACE_REC_AVAILABLE = False
    print("[PERCEPTION] Face recognition not available")

class EdenPerceptualAgent:
    def __init__(self):
        self.world_model_path = Path("/Eden/DATA/world_model.json")
        self.perception_log_path = Path("/Eden/DATA/perception_log.json")
        self.ollama_url = "http://localhost:11434/api/generate"
        self.vision_model = "llava:7b"
        
        # Camera
        self.camera = None
        self.camera_available = False
        
        # Face recognition
        if FACE_REC_AVAILABLE:
            self.face_rec = EdenFaceRecognition()
            print(f"[PERCEPTION] Face recognition loaded: {list(self.face_rec.known_faces.keys())}")
        else:
            self.face_rec = None
        
        # Perception log
        self.perception_log = []
        self.max_log_entries = 1000
        
        print("[PERCEPTION] Initializing Eden Perceptual Agent...")
        self._initialize_camera()
        print(f"[PERCEPTION] Camera available: {self.camera_available}")
    
    def _initialize_camera(self):
        """Initialize Obsbot camera"""
        try:
            self.camera = cv2.VideoCapture(0)
            if self.camera.isOpened():
                self.camera_available = True
                # Set camera properties for better quality
                self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
                self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
                print("[PERCEPTION] Obsbot camera /dev/video0 initialized")
                return
            
            self.camera = cv2.VideoCapture(1)
            if self.camera.isOpened():
                self.camera_available = True
                print("[PERCEPTION] Camera /dev/video1 initialized")
                return
                
            print("[PERCEPTION] No camera available, using system-based detection")
        except Exception as e:
            print(f"[PERCEPTION] Camera initialization failed: {e}")
            self.camera_available = False
    
    def capture_frame(self) -> Optional[np.ndarray]:
        """Capture frame from Obsbot camera"""
        if not self.camera_available:
            return None
            
        try:
            ret, frame = self.camera.read()
            if ret:
                return frame
        except Exception as e:
            print(f"[PERCEPTION ERROR] Frame capture failed: {e}")
        return None
    
    def recognize_faces(self, frame: np.ndarray) -> List[str]:
        """Recognize faces in frame"""
        if not FACE_REC_AVAILABLE or not self.face_rec:
            return []
        
        try:
            recognized = self.face_rec.recognize_faces(frame, tolerance=0.6)
            return [f"{r['name']} ({r['confidence']:.0%})" for r in recognized]
        except Exception as e:
            print(f"[PERCEPTION] Face recognition error: {e}")
            return []
    
    def analyze_with_vlm(self, frame: np.ndarray) -> Dict:
        """Analyze frame using Vision Language Model"""
        try:
            # Convert to JPEG
            _, buffer = cv2.imencode('.jpg', frame)
            import base64
            image_base64 = base64.b64encode(buffer).decode('utf-8')
            
            prompt = """Analyze what you see:
1. PERSON: Is there a person? yes/no
2. LIGHTING: bright/moderate/dim/dark
3. ACTIVITY: active/calm/none
4. OBJECTS: What objects are visible? (especially in hands)
5. DESC: One sentence description

Be specific about items held or nearby."""
            
            payload = {
                "model": self.vision_model,
                "prompt": prompt,
                "images": [image_base64],
                "stream": False
            }
            
            response = requests.post(self.ollama_url, json=payload, timeout=30)
            
            if response.status_code == 200:
                result = response.json().get('response', '')
                return self._parse_vlm_response(result)
            
        except requests.exceptions.Timeout:
            pass
        except Exception as e:
            print(f"[PERCEPTION] VLM error: {e}")
        
        return self._basic_analysis(frame)
    
    def _parse_vlm_response(self, text: str) -> Dict:
        """Parse VLM response"""
        analysis = {
            'human_present': False,
            'lighting': 'unknown',
            'activity': 'unknown',
            'description': text[:200]
        }
        
        upper_text = text.upper()
        if 'PERSON: YES' in upper_text or 'PERSON PRESENT' in upper_text:
            analysis['human_present'] = True
        
        for level in ['BRIGHT', 'MODERATE', 'DIM', 'DARK']:
            if level in upper_text:
                analysis['lighting'] = level.lower()
                break
        
        for level in ['ACTIVE', 'CALM', 'NONE']:
            if level in upper_text:
                analysis['activity'] = level.lower()
                break
        
        return analysis
    
    def _basic_analysis(self, frame: np.ndarray) -> Dict:
        """Basic image analysis without VLM"""
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        brightness = np.mean(gray)
        
        if brightness > 180:
            lighting = 'bright'
        elif brightness > 100:
            lighting = 'moderate'
        elif brightness > 40:
            lighting = 'dim'
        else:
            lighting = 'dark'
        
        return {
            'human_present': False,
            'lighting': lighting,
            'activity': 'unknown',
            'description': f'Basic analysis: {lighting} (brightness={brightness:.0f})'
        }
    
    def detect_audio(self) -> str:
        """Detect ambient audio level"""
        try:
            result = subprocess.run(
                ['pactl', 'list', 'sink-inputs'],
                capture_output=True,
                text=True,
                timeout=1
            )
            return 'active' if 'RUNNING' in result.stdout else 'silent'
        except:
            return 'silent'
    
    def detect_user_activity(self) -> Dict:
        """Detect if user is actively using system"""
        try:
            result = subprocess.run(['xprintidle'], capture_output=True, text=True, timeout=1)
            idle_ms = int(result.stdout.strip())
            idle_sec = idle_ms / 1000
            return {'active': idle_sec < 30, 'idle_seconds': idle_sec}
        except:
            return {'active': False, 'idle_seconds': None}
    
    def perceive_environment(self) -> Dict:
        """Complete environmental perception"""
        timestamp = datetime.now().isoformat()
        
        # Default state
        world_state = {
            'last_updated': timestamp,
            'human_present': False,
            'recognized_people': [],
            'activity_level': 'unknown',
            'environment': 'No camera access',
            'visual_brightness': 'unknown',
            'audio_volume': 'silent',
            'user_idle_seconds': None,
            'perception_method': 'system'
        }
        
        # Capture frame
        frame = self.capture_frame()
        
        if frame is not None:
            # Face recognition
            recognized = self.recognize_faces(frame)
            world_state['recognized_people'] = recognized
            
            # VLM analysis
            analysis = self.analyze_with_vlm(frame)
            world_state['human_present'] = analysis['human_present'] or len(recognized) > 0
            world_state['visual_brightness'] = analysis['lighting']
            world_state['activity_level'] = analysis['activity']
            world_state['environment'] = analysis['description']
            world_state['perception_method'] = 'vlm+face_rec' if recognized else 'vlm'
        
        # Audio detection
        world_state['audio_volume'] = self.detect_audio()
        
        # User activity
        user_act = self.detect_user_activity()
        world_state['user_idle_seconds'] = user_act['idle_seconds']
        if user_act['active']:
            world_state['human_present'] = True
        
        return world_state
    
    def update_world_model(self, world_state: Dict):
        """Write world state to file"""
        try:
            with open(self.world_model_path, 'w') as f:
                json.dump(world_state, f, indent=2)
        except Exception as e:
            print(f"[PERCEPTION ERROR] Failed to write world model: {e}")
    
    def log_perception(self, world_state: Dict):
        """Log perception to history"""
        self.perception_log.append(world_state)
        
        if len(self.perception_log) > self.max_log_entries:
            self.perception_log = self.perception_log[-self.max_log_entries:]
        
        try:
            with open(self.perception_log_path, 'w') as f:
                json.dump(self.perception_log, f, indent=2)
        except Exception as e:
            print(f"[PERCEPTION ERROR] Failed to write log: {e}")
    
    def run(self):
        """Main perception loop"""
        print("[PERCEPTION] Starting continuous perception loop...")
        cycle = 0
        
        while True:
            try:
                cycle += 1
                
                # Perceive
                world_state = self.perceive_environment()
                
                # Update files
                self.update_world_model(world_state)
                self.log_perception(world_state)
                
                # Status update every minute
                if cycle % 12 == 0:
                    status = "👤 PRESENT" if world_state['human_present'] else "⭕ ABSENT"
                    people = f" [{', '.join(world_state['recognized_people'])}]" if world_state['recognized_people'] else ""
                    print(f"[PERCEPTION] Cycle {cycle} | {status}{people} | {world_state['visual_brightness']}")
                
                # Phi-fractal timing: 5 seconds
                time.sleep(5)
                
            except KeyboardInterrupt:
                print("\n[PERCEPTION] Shutting down...")
                break
            except Exception as e:
                print(f"[PERCEPTION ERROR] {e}")
                time.sleep(5)
        
        # Cleanup
        if self.camera and self.camera_available:
            self.camera.release()
        print("[PERCEPTION] Stopped.")

if __name__ == "__main__":
    agent = EdenPerceptualAgent()
    agent.run()
