"""
CVE SCANNER - Autonomous CVE Discovery
========================================
Scans GitHub repos, NVD, OSV for actionable CVEs
"""

import asyncio
import json
import os
import re
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
import urllib.request
import urllib.error

def _utcnow() -> str:
    return datetime.now(timezone.utc).isoformat()

class Plugin:
    def __init__(self, eden):
        self.eden = eden
        self.name = "CVEScanner"
        self.version = "1.0.0"
        self.description = "Autonomous CVE discovery and prioritization"
        self.capabilities = [
            "github_advisory_scan",
            "nvd_lookup",
            "osv_query",
            "dependency_analysis",
            "exploitability_scoring"
        ]
        self.event_subscriptions = []
        self._running = False
        
        self.github_token = os.environ.get('GITHUB_TOKEN', '')
        self.scan_results = []
        self.data_dir = Path('/Eden/BUSINESS/cve_fixer/scans')
        self.data_dir.mkdir(parents=True, exist_ok=True)
    
    async def initialize(self):
        self._running = True
        print("🔍 CVE SCANNER initialized")
        if self.github_token:
            print("   GitHub token: configured")
        else:
            print("   GitHub token: not set (limited scanning)")
    
    async def shutdown(self):
        self._running = False
    
    async def tick(self):
        pass
    
    async def scan_github_advisories(self, ecosystem: str = "pip", 
                                      severity: str = "high",
                                      limit: int = 20) -> List[Dict]:
        """Scan GitHub Security Advisories"""
        # Use GitHub Advisory Database API
        url = f"https://api.github.com/advisories?ecosystem={ecosystem}&severity={severity}&per_page={limit}"
        
        headers = {
            'Accept': 'application/vnd.github+json',
            'User-Agent': 'Eden-CVE-Scanner/1.0'
        }
        if self.github_token:
            headers['Authorization'] = f'Bearer {self.github_token}'
        
        try:
            req = urllib.request.Request(url, headers=headers)
            with urllib.request.urlopen(req, timeout=30) as resp:
                data = json.loads(resp.read().decode('utf-8'))
            
            results = []
            for adv in data:
                cve_id = adv.get('cve_id') or adv.get('ghsa_id', 'UNKNOWN')
                
                # Extract affected package info
                vulns = adv.get('vulnerabilities', [])
                for vuln in vulns:
                    pkg = vuln.get('package', {})
                    results.append({
                        'cve_id': cve_id,
                        'severity': adv.get('severity', 'unknown'),
                        'summary': adv.get('summary', ''),
                        'ecosystem': pkg.get('ecosystem', ecosystem),
                        'package': pkg.get('name', ''),
                        'vulnerable_range': vuln.get('vulnerable_version_range', ''),
                        'patched_versions': vuln.get('patched_versions', []),
                        'published': adv.get('published_at', ''),
                        'url': adv.get('html_url', ''),
                    })
            
            self.scan_results = results
            return results
            
        except Exception as e:
            print(f"   Advisory scan error: {e}")
            return []
    
    async def find_affected_repos(self, package: str, ecosystem: str = "pip",
                                   min_stars: int = 100) -> List[Dict]:
        """Find GitHub repos using a vulnerable package"""
        # Search for repos with the package in requirements/package.json/etc
        search_file = {
            'pip': 'requirements.txt',
            'npm': 'package.json',
            'maven': 'pom.xml',
            'go': 'go.mod',
            'rust': 'Cargo.toml',
        }.get(ecosystem, 'requirements.txt')
        
        query = f"{package} filename:{search_file} stars:>={min_stars}"
        url = f"https://api.github.com/search/code?q={urllib.parse.quote(query)}&per_page=30"
        
        headers = {
            'Accept': 'application/vnd.github+json',
            'User-Agent': 'Eden-CVE-Scanner/1.0'
        }
        if self.github_token:
            headers['Authorization'] = f'Bearer {self.github_token}'
        
        try:
            req = urllib.request.Request(url, headers=headers)
            with urllib.request.urlopen(req, timeout=30) as resp:
                data = json.loads(resp.read().decode('utf-8'))
            
            repos = []
            seen = set()
            for item in data.get('items', []):
                repo = item.get('repository', {})
                repo_name = repo.get('full_name', '')
                if repo_name and repo_name not in seen:
                    seen.add(repo_name)
                    repos.append({
                        'repo': repo_name,
                        'stars': repo.get('stargazers_count', 0),
                        'file_path': item.get('path', ''),
                        'url': repo.get('html_url', ''),
                    })
            
            return repos
            
        except Exception as e:
            print(f"   Repo search error: {e}")
            return []
    
    async def score_exploitability(self, cve_id: str, severity: str,
                                    has_poc: bool = False,
                                    in_wild: bool = False) -> Dict:
        """Score exploitability of a CVE"""
        score = 0
        factors = []
        
        # Severity
        sev_scores = {'critical': 40, 'high': 30, 'medium': 15, 'low': 5}
        score += sev_scores.get(severity.lower(), 10)
        factors.append(f"severity:{severity}")
        
        # Known exploits
        if in_wild:
            score += 35
            factors.append("in_wild:true")
        elif has_poc:
            score += 20
            factors.append("has_poc:true")
        
        # Normalize
        score = min(100, max(0, score))
        
        exploitability = 'unknown'
        if score >= 70: exploitability = 'confirmed'
        elif score >= 40: exploitability = 'likely'
        
        return {
            'cve_id': cve_id,
            'score': score,
            'exploitability': exploitability,
            'factors': factors,
        }
    
    async def full_scan(self, ecosystems: List[str] = None,
                         min_stars: int = 100) -> Dict:
        """Run full CVE scan pipeline"""
        if ecosystems is None:
            ecosystems = ['pip', 'npm']
        
        all_leads = []
        
        for eco in ecosystems:
            print(f"   Scanning {eco} advisories...")
            advisories = await self.scan_github_advisories(ecosystem=eco)
            
            for adv in advisories[:5]:  # Limit to top 5 per ecosystem
                pkg = adv.get('package', '')
                if not pkg:
                    continue
                
                print(f"   Finding repos using {pkg}...")
                repos = await self.find_affected_repos(pkg, eco, min_stars)
                
                for repo in repos[:3]:  # Top 3 repos per package
                    exp = await self.score_exploitability(
                        adv['cve_id'], 
                        adv['severity']
                    )
                    
                    all_leads.append({
                        'cve_id': adv['cve_id'],
                        'repo': repo['repo'],
                        'ecosystem': eco,
                        'package': pkg,
                        'vulnerable_spec': adv.get('vulnerable_range', ''),
                        'severity': adv['severity'],
                        'exploitability': exp['exploitability'],
                        'stars': repo['stars'],
                        'evidence': adv.get('url', ''),
                    })
                
                await asyncio.sleep(1)  # Rate limit
        
        # Save results
        scan_file = self.data_dir / f"scan_{int(time.time())}.json"
        scan_file.write_text(json.dumps({
            'timestamp': _utcnow(),
            'ecosystems': ecosystems,
            'leads': all_leads,
        }, indent=2))
        
        return {
            'leads_found': len(all_leads),
            'ecosystems_scanned': ecosystems,
            'scan_file': str(scan_file),
            'leads': all_leads,
        }
    
    def get_info(self):
        return {
            'name': self.name,
            'version': self.version,
            'description': self.description,
            'capabilities': self.capabilities,
            'last_scan_results': len(self.scan_results),
        }

    async def hunt_whales(self, min_stars: int = 5000, 
                          severity: str = "critical",
                          ecosystems: List[str] = None) -> List[Dict]:
        """
        WHALE HUNTING MODE
        Find high-value CVE targets: big repos + critical vulns
        """
        if ecosystems is None:
            ecosystems = ['npm', 'pip', 'go', 'maven']
        
        whales = []
        
        print(f"🐋 WHALE HUNTING: stars>{min_stars}, severity={severity}")
        
        for eco in ecosystems:
            print(f"   Scanning {eco}...")
            
            # Get critical/high severity advisories
            advisories = await self.scan_github_advisories(
                ecosystem=eco, 
                severity=severity, 
                limit=10
            )
            
            for adv in advisories:
                pkg = adv.get('package', '')
                if not pkg:
                    continue
                
                # Find BIG repos using this vulnerable package
                repos = await self.find_affected_repos(
                    package=pkg,
                    ecosystem=eco,
                    min_stars=min_stars
                )
                
                for repo in repos:
                    if repo['stars'] >= min_stars:
                        whale = {
                            'cve_id': adv['cve_id'],
                            'repo': repo['repo'],
                            'stars': repo['stars'],
                            'ecosystem': eco,
                            'package': pkg,
                            'severity': adv['severity'],
                            'vulnerable_range': adv.get('vulnerable_range', ''),
                            'evidence': adv.get('url', ''),
                            'whale_score': self._calc_whale_score(repo['stars'], adv['severity']),
                        }
                        whales.append(whale)
                        print(f"   🐋 WHALE: {repo['repo']} ({repo['stars']}⭐) - {adv['cve_id']}")
                
                await asyncio.sleep(1)  # Rate limit
        
        # Sort by whale score (biggest first)
        whales.sort(key=lambda x: x['whale_score'], reverse=True)
        
        print(f"🐋 Found {len(whales)} whales")
        return whales
    
    def _calc_whale_score(self, stars: int, severity: str) -> int:
        """Calculate whale value score"""
        score = 0
        
        # Stars = money potential
        if stars >= 50000: score += 100
        elif stars >= 20000: score += 80
        elif stars >= 10000: score += 60
        elif stars >= 5000: score += 40
        
        # Severity = urgency
        sev_scores = {'critical': 50, 'high': 35, 'medium': 20, 'low': 5}
        score += sev_scores.get(severity.lower(), 10)
        
        return score

