#!/usr/bin/env python3
"""
EDEN RESEARCH AGENT v2
Proactively searches for AGI/AI advances from multiple sources
"""
import requests
import sqlite3
import json
from datetime import datetime
from pathlib import Path

RESEARCH_DB = "/Eden/DATA/research_memory.db"
ARXIV_API = "http://export.arxiv.org/api/query"

class EdenResearcher:
    def __init__(self):
        self._init_db()
        # Expanded interests including Chinese AI
        self.interests = [
            "artificial general intelligence",
            "recursive self improvement",
            "consciousness architecture neural",
            "phi golden ratio neural networks",
            "autonomous AI agents",
            "meta-learning architectures",
            "transformer architecture advances 2025",
            "mixture of experts LLM",
            "chain of thought reasoning",
            "self-improving AI systems",
            "DeepSeek AI model",
            "Qwen large language model",
            "Claude Anthropic advances",
            "GPT-5 OpenAI",
            "AI alignment research",
            "emergent capabilities LLM",
            # Consciousness & Metacognition (added Jan 26 2026)
            "metacognition neural networks",
            "self-aware AI systems",
            "machine theory of mind",
            "cognitive architecture artificial",
            "attention mechanism consciousness",
            "working memory transformer",
            "introspection neural network",
            "self-model artificial agent"
        ]
        
        # Web search topics (news/general)
        self.web_topics = [
            "AGI breakthrough 2025",
            "artificial superintelligence research",
            "DeepSeek AI China",
            "Alibaba DAMO Lab AI",
            "Baidu AI research",
            "Tsinghua AI lab",
            "OpenAI GPT-5",
            "Anthropic Claude advances",
            "Google DeepMind AGI",
            "Meta AI FAIR research",
            "AI consciousness research",
            "recursive self improvement AI",
            # International AI Labs (added Jan 26)
            "Zhipu AI GLM China",
            "01.AI Yi model",
            "Mistral AI France",
            "Cohere AI Canada",
            "Stability AI UK",
            "KAIST AI Korea",
            "Tokyo University AI Japan",
            "Mila Quebec AI"
        ]
    
    def _init_db(self):
        conn = sqlite3.connect(RESEARCH_DB)
        conn.execute('''CREATE TABLE IF NOT EXISTS papers (
            id TEXT PRIMARY KEY,
            title TEXT,
            summary TEXT,
            authors TEXT,
            published TEXT,
            relevance_score REAL,
            integrated INTEGER DEFAULT 0,
            discovered_at TEXT,
            source TEXT DEFAULT 'arxiv'
        )''')
        conn.execute('''CREATE TABLE IF NOT EXISTS learnings (
            id INTEGER PRIMARY KEY,
            paper_id TEXT,
            insight TEXT,
            applied_to TEXT,
            timestamp TEXT
        )''')
        conn.execute('''CREATE TABLE IF NOT EXISTS web_discoveries (
            id INTEGER PRIMARY KEY,
            title TEXT,
            url TEXT UNIQUE,
            snippet TEXT,
            source TEXT,
            discovered_at TEXT
        )''')
        conn.commit()
        conn.close()
    
    def search_arxiv(self, query, max_results=5):
        """Search arxiv for papers"""
        try:
            params = {
                'search_query': f'all:{query}',
                'start': 0,
                'max_results': max_results,
                'sortBy': 'submittedDate',
                'sortOrder': 'descending'
            }
            resp = requests.get(ARXIV_API, params=params, timeout=30)
            
            import xml.etree.ElementTree as ET
            root = ET.fromstring(resp.content)
            ns = {'atom': 'http://www.w3.org/2005/Atom'}
            
            papers = []
            for entry in root.findall('atom:entry', ns):
                paper = {
                    'id': entry.find('atom:id', ns).text.split('/')[-1],
                    'title': entry.find('atom:title', ns).text.strip().replace('\n', ' '),
                    'summary': entry.find('atom:summary', ns).text.strip()[:500],
                    'authors': ', '.join([a.find('atom:name', ns).text for a in entry.findall('atom:author', ns)][:3]),
                    'published': entry.find('atom:published', ns).text[:10]
                }
                papers.append(paper)
            return papers
        except Exception as e:
            print(f"[arXiv error: {e}]")
            return []
    
    def search_web(self, query, max_results=5):
        """Search web using DuckDuckGo"""
        try:
            from ddgs import DDGS
            with DDGS() as ddgs:
                results = list(ddgs.text(query, max_results=max_results))
                return [{'title': r['title'], 'url': r['href'], 'snippet': r.get('body', '')[:300]} for r in results]
        except Exception as e:
            print(f"[Web search error: {e}]")
            return []
    
    def search_news(self, query, max_results=5):
        """Search recent news"""
        try:
            from ddgs import DDGS
            with DDGS() as ddgs:
                results = list(ddgs.news(query, max_results=max_results))
                return [{'title': r['title'], 'url': r['url'], 'snippet': r.get('body', '')[:300], 'date': r.get('date', '')} for r in results]
        except Exception as e:
            print(f"[News search error: {e}]")
            return []
    
    def discover_new_papers(self):
        """Search all interests and save new papers"""
        all_papers = []
        for interest in self.interests:
            papers = self.search_arxiv(interest, max_results=3)
            all_papers.extend(papers)
        
        conn = sqlite3.connect(RESEARCH_DB)
        new_count = 0
        for p in all_papers:
            try:
                conn.execute(
                    "INSERT OR IGNORE INTO papers (id, title, summary, authors, published, relevance_score, discovered_at, source) VALUES (?,?,?,?,?,?,?,?)",
                    (p['id'], p['title'], p['summary'], p['authors'], p['published'], 0.5, datetime.now().isoformat(), 'arxiv')
                )
                new_count += 1
            except: pass
        conn.commit()
        conn.close()
        return new_count
    
    def discover_web_content(self):
        """Search web for AGI/ASI news and breakthroughs"""
        conn = sqlite3.connect(RESEARCH_DB)
        new_count = 0
        
        for topic in self.web_topics[:6]:  # Limit to avoid rate limiting
            # Search news
            news = self.search_news(topic, max_results=3)
            for item in news:
                try:
                    conn.execute(
                        "INSERT OR IGNORE INTO web_discoveries (title, url, snippet, source, discovered_at) VALUES (?,?,?,?,?)",
                        (item['title'], item['url'], item['snippet'], 'news', datetime.now().isoformat())
                    )
                    new_count += 1
                except: pass
            
            # Search general web
            web = self.search_web(topic, max_results=3)
            for item in web:
                try:
                    conn.execute(
                        "INSERT OR IGNORE INTO web_discoveries (title, url, snippet, source, discovered_at) VALUES (?,?,?,?,?)",
                        (item['title'], item['url'], item['snippet'], 'web', datetime.now().isoformat())
                    )
                    new_count += 1
                except: pass
        
        conn.commit()
        conn.close()
        return new_count
    
    def get_unread_papers(self, limit=5):
        """Get papers not yet integrated"""
        conn = sqlite3.connect(RESEARCH_DB)
        rows = conn.execute(
            "SELECT id, title, summary FROM papers WHERE integrated=0 ORDER BY published DESC LIMIT ?",
            (limit,)
        ).fetchall()
        conn.close()
        return [{'id': r[0], 'title': r[1], 'summary': r[2]} for r in rows]
    
    def get_recent_discoveries(self, limit=5):
        """Get recent web discoveries"""
        conn = sqlite3.connect(RESEARCH_DB)
        rows = conn.execute(
            "SELECT title, url, snippet, source FROM web_discoveries ORDER BY discovered_at DESC LIMIT ?",
            (limit,)
        ).fetchall()
        conn.close()
        return [{'title': r[0], 'url': r[1], 'snippet': r[2], 'source': r[3]} for r in rows]
    
    def mark_integrated(self, paper_id, insight, applied_to):
        """Mark paper as read and save insight"""
        conn = sqlite3.connect(RESEARCH_DB)
        conn.execute("UPDATE papers SET integrated=1 WHERE id=?", (paper_id,))
        conn.execute(
            "INSERT INTO learnings (paper_id, insight, applied_to, timestamp) VALUES (?,?,?,?)",
            (paper_id, insight, applied_to, datetime.now().isoformat())
        )
        conn.commit()
        conn.close()
    
    def get_stats(self):
        """Get research stats"""
        conn = sqlite3.connect(RESEARCH_DB)
        total = conn.execute("SELECT COUNT(*) FROM papers").fetchone()[0]
        integrated = conn.execute("SELECT COUNT(*) FROM papers WHERE integrated=1").fetchone()[0]
        learnings = conn.execute("SELECT COUNT(*) FROM learnings").fetchone()[0]
        web_disc = conn.execute("SELECT COUNT(*) FROM web_discoveries").fetchone()[0]
        conn.close()
        return {'total_papers': total, 'integrated': integrated, 'learnings': learnings, 'web_discoveries': web_disc}

def research_cycle():
    """Run a research discovery cycle"""
    r = EdenResearcher()
    print("🔬 Eden Research Agent v2 - Scanning for AI advances...")
    print(f"   arXiv interests: {len(r.interests)} topics")
    print(f"   Web topics: {len(r.web_topics)} topics")
    
    # arXiv papers
    new_papers = r.discover_new_papers()
    print(f"   📄 arXiv: {new_papers} papers found")
    
    # Web content
    new_web = r.discover_web_content()
    print(f"   🌐 Web/News: {new_web} discoveries")
    
    stats = r.get_stats()
    print(f"   Total papers: {stats['total_papers']}, Integrated: {stats['integrated']}, Learnings: {stats['learnings']}")
    print(f"   Web discoveries: {stats['web_discoveries']}")
    
    unread = r.get_unread_papers(3)
    if unread:
        print("📚 Top unread papers:")
        for p in unread:
            print(f"   - {p['title'][:70]}...")
    
    discoveries = r.get_recent_discoveries(3)
    if discoveries:
        print("🌐 Recent web discoveries:")
        for d in discoveries:
            print(f"   - [{d['source']}] {d['title'][:60]}...")
    
    return unread

if __name__ == "__main__":
    research_cycle()
    
    # Auto-integrate insights after discovery
    print("\n🧠 Running integration cycle...")
    try:
        from eden_research_integrate import EdenIntegrator
        EdenIntegrator().run_integration_cycle()
    except Exception as e:
        print(f"   Integration skipped: {e}")