#!/usr/bin/env python3
"""
EDEN ENTERPRISE SCANNER
Multi-tool security analysis - whale-ready
"""
import subprocess
import tempfile
import shutil
import json
import os
import re
from datetime import datetime
from pathlib import Path

class EnterpriseScanner:
    def __init__(self, repo_url):
        self.repo_url = repo_url
        self.repo_path = None
        self.findings = []
        self.stats = {
            "critical": 0,
            "high": 0,
            "medium": 0,
            "low": 0,
            "info": 0
        }
        self.scan_time = None
        self.tools_used = []
    
    def clone_repo(self, timeout=120):
        """Clone repo for analysis"""
        self.repo_path = tempfile.mkdtemp(prefix='eden_enterprise_')
        
        # Extract repo URL
        match = re.match(r'https?://github\.com/([^/]+)/([^/]+)', self.repo_url)
        if not match:
            return False
        
        owner, repo = match.groups()
        repo = repo.replace('.git', '')
        clone_url = f"https://github.com/{owner}/{repo}.git"
        
        print(f"📥 Cloning {owner}/{repo}...")
        
        try:
            result = subprocess.run(
                ['git', 'clone', '--depth', '50', clone_url, self.repo_path],
                capture_output=True, text=True, timeout=timeout
            )
            return result.returncode == 0
        except Exception as e:
            print(f"❌ Clone failed: {e}")
            return False
    
    def scan_secrets(self):
        """Deep secret detection with multiple patterns"""
        print("🔐 Scanning for secrets...")
        self.tools_used.append("secret_scanner")
        
        SECRET_PATTERNS = [
            # API Keys
            (r'(?i)(api[_-]?key|apikey)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})', 'API Key Exposure', 'HIGH'),
            (r'(?i)(secret[_-]?key|secretkey)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})', 'Secret Key Exposure', 'CRITICAL'),
            
            # AWS
            (r'AKIA[0-9A-Z]{16}', 'AWS Access Key ID', 'CRITICAL'),
            (r'(?i)aws_secret_access_key\s*[=:]\s*["\']?([a-zA-Z0-9/+=]{40})', 'AWS Secret Key', 'CRITICAL'),
            
            # Database
            (r'(?i)(mysql|postgres|mongodb|redis)://[^:]+:[^@]+@', 'Database Connection String', 'CRITICAL'),
            (r'(?i)(password|passwd|pwd)\s*[=:]\s*["\']([^"\']{8,})["\']', 'Hardcoded Password', 'HIGH'),
            
            # Tokens
            (r'ghp_[a-zA-Z0-9]{36}', 'GitHub Personal Access Token', 'CRITICAL'),
            (r'gho_[a-zA-Z0-9]{36}', 'GitHub OAuth Token', 'CRITICAL'),
            (r'(?i)bearer\s+[a-zA-Z0-9_\-\.]+', 'Bearer Token', 'HIGH'),
            (r'(?i)(jwt|token)\s*[=:]\s*["\']?ey[a-zA-Z0-9_\-]+\.ey[a-zA-Z0-9_\-]+', 'JWT Token', 'HIGH'),
            
            # Private Keys
            (r'-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----', 'Private Key', 'CRITICAL'),
            (r'-----BEGIN PGP PRIVATE KEY BLOCK-----', 'PGP Private Key', 'CRITICAL'),
            
            # Cloud
            (r'(?i)(azure|aws|gcp)[_-]?(key|secret|token|credential)', 'Cloud Credential Reference', 'MEDIUM'),
            (r'sk-[a-zA-Z0-9]{48}', 'OpenAI API Key', 'CRITICAL'),
            (r'(?i)stripe[_-]?(secret|api)[_-]?key', 'Stripe Key Reference', 'HIGH'),
        ]
        
        SKIP_DIRS = {'.git', 'node_modules', 'venv', '__pycache__', '.venv', 'vendor', 'dist', 'build'}
        SCAN_EXTENSIONS = {'.py', '.js', '.ts', '.jsx', '.tsx', '.env', '.yml', '.yaml', '.json', '.xml', '.conf', '.config', '.rb', '.php', '.go', '.java', '.sh', '.bash'}
        
        for root, dirs, files in os.walk(self.repo_path):
            dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
            
            for f in files:
                ext = Path(f).suffix.lower()
                if ext not in SCAN_EXTENSIONS and f not in ['.env', '.env.local', '.env.production']:
                    continue
                
                fpath = os.path.join(root, f)
                rel_path = os.path.relpath(fpath, self.repo_path)
                
                try:
                    with open(fpath, 'r', errors='ignore') as file:
                        for i, line in enumerate(file, 1):
                            for pattern, desc, severity in SECRET_PATTERNS:
                                if re.search(pattern, line):
                                    self.add_finding(
                                        severity=severity,
                                        category="Secrets",
                                        title=desc,
                                        file=rel_path,
                                        line=i,
                                        description=f"Potential {desc.lower()} detected",
                                        evidence=self.sanitize_evidence(line.strip()[:100]),
                                        recommendation=f"Remove hardcoded credential and use environment variables or secrets manager"
                                    )
                except:
                    pass
    
    def scan_dependencies(self):
        """Check for vulnerable dependencies"""
        print("📦 Scanning dependencies...")
        self.tools_used.append("dependency_scanner")
        
        # Python - requirements.txt, setup.py, Pipfile
        req_files = ['requirements.txt', 'requirements-dev.txt', 'requirements-prod.txt']
        for req_file in req_files:
            req_path = os.path.join(self.repo_path, req_file)
            if os.path.exists(req_path):
                self.scan_python_deps(req_path)
        
        # Node - package.json
        pkg_path = os.path.join(self.repo_path, 'package.json')
        if os.path.exists(pkg_path):
            self.scan_node_deps(pkg_path)
        
        # Go - go.mod
        go_path = os.path.join(self.repo_path, 'go.mod')
        if os.path.exists(go_path):
            self.scan_go_deps(go_path)
    
    def scan_python_deps(self, req_path):
        """Check Python dependencies for known vulnerabilities"""
        VULN_PACKAGES = {
            'django<2.2': ('CRITICAL', 'Django <2.2 has multiple security vulnerabilities'),
            'django<3.2': ('HIGH', 'Django <3.2 is end-of-life'),
            'flask<2.0': ('MEDIUM', 'Flask <2.0 has known security issues'),
            'requests<2.20': ('HIGH', 'Requests <2.20 vulnerable to CVE-2018-18074'),
            'urllib3<1.24.2': ('HIGH', 'urllib3 CRLF injection vulnerability'),
            'pyyaml<5.4': ('CRITICAL', 'PyYAML arbitrary code execution'),
            'jinja2<2.11.3': ('HIGH', 'Jinja2 XSS vulnerability'),
            'pillow<8.1.1': ('HIGH', 'Pillow multiple vulnerabilities'),
            'cryptography<3.3': ('MEDIUM', 'Cryptography security updates'),
            'paramiko<2.4.1': ('HIGH', 'Paramiko authentication bypass'),
        }
        
        try:
            with open(req_path) as f:
                for i, line in enumerate(f, 1):
                    line = line.strip().lower()
                    if not line or line.startswith('#'):
                        continue
                    
                    for vuln_pattern, (severity, desc) in VULN_PACKAGES.items():
                        pkg = vuln_pattern.split('<')[0]
                        if line.startswith(pkg):
                            self.add_finding(
                                severity=severity,
                                category="Dependencies",
                                title=f"Vulnerable package: {pkg}",
                                file=os.path.relpath(req_path, self.repo_path),
                                line=i,
                                description=desc,
                                evidence=line,
                                recommendation=f"Upgrade {pkg} to latest stable version"
                            )
        except:
            pass
    
    def scan_node_deps(self, pkg_path):
        """Check Node.js dependencies"""
        VULN_PACKAGES = {
            'lodash': ('4.17.21', 'HIGH', 'Prototype pollution'),
            'axios': ('0.21.1', 'HIGH', 'SSRF vulnerability'),
            'minimist': ('1.2.6', 'CRITICAL', 'Prototype pollution'),
            'node-fetch': ('2.6.7', 'HIGH', 'Exposure of sensitive information'),
            'express': ('4.17.3', 'MEDIUM', 'Open redirect'),
        }
        
        try:
            with open(pkg_path) as f:
                data = json.load(f)
                
            deps = {**data.get('dependencies', {}), **data.get('devDependencies', {})}
            
            for pkg, version in deps.items():
                if pkg in VULN_PACKAGES:
                    safe_version, severity, desc = VULN_PACKAGES[pkg]
                    # Simple version check (would need proper semver in production)
                    self.add_finding(
                        severity=severity,
                        category="Dependencies",
                        title=f"Potentially vulnerable: {pkg}",
                        file="package.json",
                        line=0,
                        description=f"{desc}. Current: {version}, Safe: >={safe_version}",
                        evidence=f'"{pkg}": "{version}"',
                        recommendation=f"Run: npm update {pkg}"
                    )
        except:
            pass
    
    def scan_go_deps(self, go_path):
        """Check Go dependencies"""
        # Would integrate with govulncheck in production
        pass
    
    def scan_code_security(self):
        """Static analysis for security issues"""
        print("🔍 Scanning code for vulnerabilities...")
        self.tools_used.append("sast_scanner")
        
        VULN_PATTERNS = {
            'python': [
                (r'eval\s*\(', 'CRITICAL', 'Dangerous eval() usage', 'Code Injection', 'eval() can execute arbitrary code. Use ast.literal_eval() for safe parsing.'),
                (r'exec\s*\(', 'CRITICAL', 'Dangerous exec() usage', 'Code Injection', 'exec() can execute arbitrary code. Avoid or sanitize input.'),
                (r'subprocess\.call\s*\([^)]*shell\s*=\s*True', 'HIGH', 'Shell injection risk', 'Command Injection', 'shell=True with user input allows command injection. Use shell=False with list args.'),
                (r'os\.system\s*\(', 'HIGH', 'OS command injection risk', 'Command Injection', 'os.system() is vulnerable to injection. Use subprocess with shell=False.'),
                (r'pickle\.loads?\s*\(', 'HIGH', 'Unsafe deserialization', 'Deserialization', 'pickle can execute arbitrary code. Use json or safer alternatives.'),
                (r'yaml\.load\s*\([^)]*\)', 'HIGH', 'Unsafe YAML loading', 'Deserialization', 'Use yaml.safe_load() instead of yaml.load().'),
                (r'\.execute\s*\(\s*["\'].*%.*["\']', 'CRITICAL', 'SQL Injection', 'Injection', 'String formatting in SQL queries. Use parameterized queries.'),
                (r'\.execute\s*\(\s*f["\']', 'CRITICAL', 'SQL Injection (f-string)', 'Injection', 'F-strings in SQL queries. Use parameterized queries.'),
                (r'\.raw\s*\(|\.extra\s*\(', 'MEDIUM', 'Django raw SQL', 'Injection', 'Raw SQL in Django. Ensure input is sanitized.'),
                (r'render_template_string\s*\(', 'HIGH', 'Server-side template injection', 'Injection', 'render_template_string with user input allows SSTI.'),
                (r'DEBUG\s*=\s*True', 'MEDIUM', 'Debug mode enabled', 'Configuration', 'Debug mode exposes sensitive information in production.'),
                (r'verify\s*=\s*False', 'HIGH', 'SSL verification disabled', 'Configuration', 'Disabling SSL verification allows MITM attacks.'),
                (r'ALLOWED_HOSTS\s*=\s*\[\s*["\*\'"]', 'MEDIUM', 'Wildcard allowed hosts', 'Configuration', 'Wildcard ALLOWED_HOSTS can enable host header attacks.'),
            ],
            'javascript': [
                (r'eval\s*\(', 'CRITICAL', 'Dangerous eval() usage', 'Code Injection', 'eval() executes arbitrary code. Avoid entirely.'),
                (r'innerHTML\s*=', 'HIGH', 'XSS via innerHTML', 'XSS', 'innerHTML with user input causes XSS. Use textContent or sanitize.'),
                (r'document\.write\s*\(', 'HIGH', 'XSS via document.write', 'XSS', 'document.write with user input causes XSS.'),
                (r'\.html\s*\([^)]*\$', 'HIGH', 'jQuery XSS risk', 'XSS', '.html() with user input causes XSS. Use .text() or sanitize.'),
                (r'dangerouslySetInnerHTML', 'HIGH', 'React XSS risk', 'XSS', 'dangerouslySetInnerHTML can cause XSS. Sanitize input first.'),
                (r'child_process\.exec\s*\(', 'CRITICAL', 'Command injection', 'Command Injection', 'exec() with user input allows command injection. Use execFile().'),
                (r'new\s+Function\s*\(', 'HIGH', 'Dynamic function creation', 'Code Injection', 'new Function() is similar to eval(). Avoid with user input.'),
                (r'localStorage\.(setItem|getItem)', 'LOW', 'Sensitive data in localStorage', 'Storage', 'localStorage is accessible to XSS. Avoid storing sensitive data.'),
                (r'password.*=.*["\'][^"\']+["\']', 'HIGH', 'Hardcoded password', 'Secrets', 'Hardcoded credentials in client-side code.'),
            ],
        }
        
        for root, dirs, files in os.walk(self.repo_path):
            dirs[:] = [d for d in dirs if d not in {'.git', 'node_modules', 'venv', '__pycache__', 'dist', 'build'}]
            
            for f in files:
                ext = Path(f).suffix.lower()
                lang = None
                
                if ext == '.py':
                    lang = 'python'
                elif ext in ['.js', '.jsx', '.ts', '.tsx']:
                    lang = 'javascript'
                
                if not lang:
                    continue
                
                fpath = os.path.join(root, f)
                rel_path = os.path.relpath(fpath, self.repo_path)
                
                try:
                    with open(fpath, 'r', errors='ignore') as file:
                        content = file.read()
                        lines = content.split('\n')
                        
                        for pattern, severity, title, category, rec in VULN_PATTERNS.get(lang, []):
                            for i, line in enumerate(lines, 1):
                                if re.search(pattern, line):
                                    self.add_finding(
                                        severity=severity,
                                        category=category,
                                        title=title,
                                        file=rel_path,
                                        line=i,
                                        description=title,
                                        evidence=line.strip()[:100],
                                        recommendation=rec
                                    )
                except:
                    pass
    
    def scan_configuration(self):
        """Check for misconfigurations"""
        print("⚙️ Scanning configurations...")
        self.tools_used.append("config_scanner")
        
        # Docker
        dockerfile = os.path.join(self.repo_path, 'Dockerfile')
        if os.path.exists(dockerfile):
            self.scan_dockerfile(dockerfile)
        
        # GitHub Actions
        workflows_dir = os.path.join(self.repo_path, '.github', 'workflows')
        if os.path.exists(workflows_dir):
            for f in os.listdir(workflows_dir):
                if f.endswith(('.yml', '.yaml')):
                    self.scan_github_actions(os.path.join(workflows_dir, f))
    
    def scan_dockerfile(self, dockerfile):
        """Check Dockerfile for security issues"""
        try:
            with open(dockerfile) as f:
                content = f.read()
                lines = content.split('\n')
            
            for i, line in enumerate(lines, 1):
                # Running as root
                if re.search(r'^USER\s+root', line, re.I):
                    self.add_finding('HIGH', 'Configuration', 'Container runs as root', 
                                    'Dockerfile', i, 'Running as root increases attack surface',
                                    line.strip(), 'Add USER directive with non-root user')
                
                # Latest tag
                if re.search(r'FROM\s+\S+:latest', line, re.I):
                    self.add_finding('MEDIUM', 'Configuration', 'Using :latest tag',
                                    'Dockerfile', i, 'Latest tag is mutable and unpredictable',
                                    line.strip(), 'Pin to specific version')
                
                # Secrets in ENV
                if re.search(r'ENV\s+.*(PASSWORD|SECRET|KEY|TOKEN)', line, re.I):
                    self.add_finding('HIGH', 'Secrets', 'Secret in Dockerfile ENV',
                                    'Dockerfile', i, 'Secrets in ENV are visible in image history',
                                    self.sanitize_evidence(line.strip()), 'Use runtime secrets or Docker secrets')
        except:
            pass
    
    def scan_github_actions(self, workflow_path):
        """Check GitHub Actions for security issues"""
        rel_path = os.path.relpath(workflow_path, self.repo_path)
        
        try:
            with open(workflow_path) as f:
                content = f.read()
                lines = content.split('\n')
            
            for i, line in enumerate(lines, 1):
                # Script injection
                if re.search(r'\$\{\{\s*github\.event\.(issue|comment|pull_request)\.', line):
                    self.add_finding('HIGH', 'Configuration', 'GitHub Actions script injection risk',
                                    rel_path, i, 'Untrusted input in workflow can lead to injection',
                                    line.strip(), 'Sanitize github.event.* inputs before use')
                
                # Hardcoded secrets
                if re.search(r'(password|token|key|secret)\s*:', line, re.I) and '${{' not in line:
                    self.add_finding('CRITICAL', 'Secrets', 'Potential hardcoded secret in workflow',
                                    rel_path, i, 'Secrets should use GitHub Secrets',
                                    self.sanitize_evidence(line.strip()), 'Use ${{ secrets.NAME }}')
        except:
            pass
    
    def add_finding(self, severity, category, title, file, line, description, evidence, recommendation):
        """Add a finding to the results"""
        self.findings.append({
            "id": len(self.findings) + 1,
            "severity": severity,
            "category": category,
            "title": title,
            "file": file,
            "line": line,
            "description": description,
            "evidence": evidence,
            "recommendation": recommendation,
            "status": "open"
        })
        
        # Update stats
        sev_lower = severity.lower()
        if sev_lower in self.stats:
            self.stats[sev_lower] += 1
    
    def sanitize_evidence(self, text):
        """Remove actual secret values from evidence"""
        # Mask anything that looks like a secret value
        text = re.sub(r'(["\'])[a-zA-Z0-9_\-]{20,}(["\'])', r'\1[REDACTED]\2', text)
        text = re.sub(r'(=\s*)[a-zA-Z0-9_\-]{20,}', r'\1[REDACTED]', text)
        return text[:150]
    
    def calculate_risk_grade(self):
        """Calculate overall risk grade A-F"""
        score = 100
        
        score -= self.stats['critical'] * 25
        score -= self.stats['high'] * 15
        score -= self.stats['medium'] * 5
        score -= self.stats['low'] * 1
        
        score = max(0, min(100, score))
        
        if score >= 90: return 'A'
        if score >= 80: return 'B'
        if score >= 70: return 'C'
        if score >= 60: return 'D'
        return 'F'
    
    def run_full_scan(self):
        """Run complete enterprise security scan"""
        start_time = datetime.now()
        
        print(f"\n{'='*60}")
        print(f"  🛡️ EDEN ENTERPRISE SECURITY SCAN")
        print(f"  Target: {self.repo_url}")
        print(f"{'='*60}\n")
        
        # Clone
        if not self.clone_repo():
            return None
        
        try:
            # Run all scans
            self.scan_secrets()
            self.scan_dependencies()
            self.scan_code_security()
            self.scan_configuration()
            
            self.scan_time = (datetime.now() - start_time).total_seconds()
            
            # Calculate grade
            grade = self.calculate_risk_grade()
            
            print(f"\n{'='*60}")
            print(f"  ✅ SCAN COMPLETE")
            print(f"  Time: {self.scan_time:.1f}s | Tools: {len(self.tools_used)}")
            print(f"{'='*60}")
            print(f"\n  RISK GRADE: {grade}")
            print(f"\n  FINDINGS:")
            print(f"    🔴 Critical: {self.stats['critical']}")
            print(f"    🟠 High:     {self.stats['high']}")
            print(f"    🟡 Medium:   {self.stats['medium']}")
            print(f"    🟢 Low:      {self.stats['low']}")
            print(f"\n  Total: {len(self.findings)} issues")
            
            return {
                "repo": self.repo_url,
                "grade": grade,
                "stats": self.stats,
                "findings": self.findings,
                "scan_time": self.scan_time,
                "tools_used": self.tools_used,
                "scanned_at": datetime.now().isoformat()
            }
            
        finally:
            # Cleanup
            if self.repo_path and os.path.exists(self.repo_path):
                shutil.rmtree(self.repo_path, ignore_errors=True)

def scan_repo(repo_url):
    """Quick function to scan a repo"""
    scanner = EnterpriseScanner(repo_url)
    return scanner.run_full_scan()

if __name__ == "__main__":
    import sys
    if len(sys.argv) > 1:
        result = scan_repo(sys.argv[1])
        if result:
            # Save results
            output = f"/Eden/PRO/scans/scan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            Path(output).write_text(json.dumps(result, indent=2))
            print(f"\n📄 Results saved: {output}")
    else:
        print("Usage: python3 enterprise_scanner.py <github_repo_url>")