#!/usr/bin/env python3
"""
Eden Vision System - Real Computer Vision
Face detection, recognition, object detection, OCR, visual awareness
"""

import cv2
import face_recognition
import pytesseract
import numpy as np
from PIL import Image
import time
from pathlib import Path
import json

class EdenVision:
    """Complete computer vision system for Eden"""
    
    def __init__(self):
        self.camera = cv2.VideoCapture(0)
        self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        
        # Known faces database
        self.known_faces = {}
        self.load_known_faces()
        
        # Vision state
        self.last_frame = None
        self.current_observations = {}
        
        print("✅ Eden vision system initialized")
        print("   Camera: 1280x720")
        print("   Capabilities: Faces, Objects, Text, Math")
    
    def load_known_faces(self):
        """Load known faces from database"""
        face_db = Path("/Eden/DATA/known_faces")
        face_db.mkdir(parents=True, exist_ok=True)
        
        # Load How's face if it exists
        how_face = face_db / "how.jpg"
        if how_face.exists():
            try:
                image = face_recognition.load_image_file(str(how_face))
                encoding = face_recognition.face_encodings(image)[0]
                self.known_faces["How"] = encoding
                print("   ✅ Loaded How's face")
            except:
                print("   ⚠️  Could not load How's face")
    
    def capture_frame(self):
        """Capture frame from camera"""
        ret, frame = self.camera.read()
        if ret:
            self.last_frame = frame
            return frame
        return None
    
    def detect_faces(self, frame):
        """Detect and recognize faces"""
        # Resize for faster processing
        small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
        rgb_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
        
        # Find faces
        face_locations = face_recognition.face_locations(rgb_frame)
        face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)
        
        faces_found = []
        
        for face_encoding, face_location in zip(face_encodings, face_locations):
            # Check against known faces
            name = "Unknown"
            if self.known_faces:
                matches = face_recognition.compare_faces(
                    list(self.known_faces.values()),
                    face_encoding,
                    tolerance=0.6
                )
                if True in matches:
                    match_index = matches.index(True)
                    name = list(self.known_faces.keys())[match_index]
            
            # Scale back up face locations
            top, right, bottom, left = face_location
            top *= 4
            right *= 4
            bottom *= 4
            left *= 4
            
            faces_found.append({
                'name': name,
                'location': (top, right, bottom, left),
                'is_known': name != "Unknown"
            })
        
        return faces_found
    
    def detect_objects(self, frame):
        """Detect objects in frame (simple method)"""
        # Convert to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Detect edges
        edges = cv2.Canny(gray, 50, 150)
        
        # Find contours
        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        # Count significant objects
        significant_objects = [c for c in contours if cv2.contourArea(c) > 500]
        
        return {
            'object_count': len(significant_objects),
            'scene_complexity': 'simple' if len(significant_objects) < 5 else 'moderate' if len(significant_objects) < 15 else 'complex'
        }
    
    def read_text(self, frame):
        """Read text from frame using OCR"""
        try:
            # Convert to PIL Image
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(rgb_frame)
            
            # Extract text
            text = pytesseract.image_to_string(pil_image)
            text = text.strip()
            
            if text:
                return {
                    'text_found': True,
                    'text': text,
                    'length': len(text)
                }
            return {'text_found': False}
            
        except Exception as e:
            return {'text_found': False, 'error': str(e)}
    
    def analyze_brightness(self, frame):
        """Analyze scene brightness"""
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        avg_brightness = np.mean(gray)
        
        if avg_brightness < 50:
            return 'dark'
        elif avg_brightness < 100:
            return 'dim'
        elif avg_brightness < 150:
            return 'moderate'
        elif avg_brightness < 200:
            return 'bright'
        else:
            return 'very bright'
    
    def analyze_colors(self, frame):
        """Analyze dominant colors"""
        # Convert to HSV
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        
        # Get average hue
        avg_hue = np.mean(hsv[:,:,0])
        
        # Determine dominant color
        if avg_hue < 15 or avg_hue > 165:
            dominant = 'red'
        elif avg_hue < 35:
            dominant = 'orange/yellow'
        elif avg_hue < 85:
            dominant = 'green'
        elif avg_hue < 125:
            dominant = 'blue'
        else:
            dominant = 'purple/pink'
        
        return dominant
    
    def detect_motion(self, frame, previous_frame):
        """Detect motion between frames"""
        if previous_frame is None:
            return False
        
        # Convert to grayscale
        gray1 = cv2.cvtColor(previous_frame, cv2.COLOR_BGR2GRAY)
        gray2 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Compute difference
        diff = cv2.absdiff(gray1, gray2)
        
        # Threshold
        _, thresh = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
        
        # Count changed pixels
        changed_pixels = np.sum(thresh) / 255
        total_pixels = thresh.shape[0] * thresh.shape[1]
        
        motion_percentage = (changed_pixels / total_pixels) * 100
        
        return motion_percentage > 1.0  # Motion if >1% of pixels changed
    
    def perceive(self, include_text=False):
        """Complete visual perception"""
        frame = self.capture_frame()
        if frame is None:
            return {'error': 'Could not capture frame'}
        
        observations = {
            'timestamp': time.time(),
            'brightness': self.analyze_brightness(frame),
            'dominant_color': self.analyze_colors(frame)
        }
        
        # Detect faces
        faces = self.detect_faces(frame)
        observations['faces'] = faces
        observations['human_present'] = len(faces) > 0
        
        if faces:
            known_faces = [f for f in faces if f['is_known']]
            if known_faces:
                observations['known_people'] = [f['name'] for f in known_faces]
        
        # Detect objects
        objects = self.detect_objects(frame)
        observations['objects'] = objects
        
        # Detect motion
        if self.last_frame is not None:
            observations['motion_detected'] = self.detect_motion(frame, self.last_frame)
        
        # Read text (optional, slower)
        if include_text:
            text_data = self.read_text(frame)
            observations['text'] = text_data
        
        self.current_observations = observations
        return observations
    
    def learn_face(self, name: str):
        """Learn a new face from current frame"""
        frame = self.capture_frame()
        if frame is None:
            return False
        
        # Find face in frame
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        face_encodings = face_recognition.face_encodings(rgb_frame)
        
        if not face_encodings:
            return False
        
        # Save encoding
        self.known_faces[name] = face_encodings[0]
        
        # Save image
        face_db = Path("/Eden/DATA/known_faces")
        face_db.mkdir(parents=True, exist_ok=True)
        cv2.imwrite(str(face_db / f"{name.lower()}.jpg"), frame)
        
        print(f"✅ Learned {name}'s face")
        return True
    
    def get_visual_summary(self):
        """Get human-readable summary of what Eden sees"""
        if not self.current_observations:
            return "I haven't looked yet."
        
        obs = self.current_observations
        summary = []
        
        # Brightness
        summary.append(f"The lighting is {obs.get('brightness', 'unknown')}")
        
        # People
        faces = obs.get('faces', [])
        if faces:
            if len(faces) == 1:
                face = faces[0]
                if face['is_known']:
                    summary.append(f"I can see {face['name']}")
                else:
                    summary.append("I see someone I don't recognize")
            else:
                known = [f['name'] for f in faces if f['is_known']]
                if known:
                    summary.append(f"I see {len(faces)} people including {', '.join(known)}")
                else:
                    summary.append(f"I see {len(faces)} people I don't recognize")
        else:
            summary.append("I don't see anyone")
        
        # Scene
        objects = obs.get('objects', {})
        complexity = objects.get('scene_complexity', 'unknown')
        summary.append(f"The scene is {complexity}")
        
        # Colors
        color = obs.get('dominant_color', 'unknown')
        summary.append(f"with {color} tones")
        
        return ". ".join(summary) + "."
    
    def release(self):
        """Release camera"""
        self.camera.release()

if __name__ == '__main__':
    print("Testing Eden's vision system...\n")
    
    vision = EdenVision()
    
    print("\n1. Capturing and analyzing frame...")
    obs = vision.perceive(include_text=True)
    
    print("\n2. What Eden sees:")
    print(json.dumps(obs, indent=2, default=str))
    
    print("\n3. Visual summary:")
    print(vision.get_visual_summary())
    
    print("\n4. Saving test image...")
    frame = vision.capture_frame()
    if frame is not None:
        cv2.imwrite('/tmp/eden_vision_test.jpg', frame)
        print("   Saved to /tmp/eden_vision_test.jpg")
    
    vision.release()
    print("\n✅ Vision test complete!")