"""
Test Eden's limits with hard reasoning problems
"""
import requests
import time

def test_problem(name, question):
    print(f"\n{'='*70}")
    print(f"🧪 TEST: {name}")
    print(f"{'='*70}")
    print(f"Question: {question}\n")
    
    start = time.time()
    response = requests.post('http://localhost:5017/api/chat',
        json={"message": question}).json()
    elapsed = time.time() - start
    
    answer = response.get('response', 'ERROR')
    print(f"Eden's Answer:\n{answer}\n")
    print(f"⏱️  Time: {elapsed:.1f}s")
    return answer, elapsed

# Hard reasoning problems
problems = [
    ("12 Coins Puzzle", 
     "You have 12 coins. 11 are real, 1 is counterfeit and weighs slightly less. You have a balance scale and can use it exactly 3 times. Explain step-by-step how to find the counterfeit coin."),
    
    ("Monty Hall Problem",
     "You're on a game show with 3 doors. Behind one is a car, behind the others are goats. You pick door 1. The host (who knows what's behind each door) opens door 3 to reveal a goat. Should you switch to door 2 or stay with door 1? Explain your reasoning with probability."),
    
    ("Bridge and Torch",
     "Four people need to cross a bridge at night. They have one torch and the bridge holds max 2 people. Times: Alice=1min, Bob=2min, Carol=5min, Dave=10min. Two people crossing take the slower person's time. What's the fastest way to get everyone across?"),
    
    ("Two Jugs",
     "You have a 5-liter jug and a 3-liter jug. How do you measure exactly 4 liters? Show each step."),
    
    ("Logical Paradox",
     "In a village, the barber shaves all men who don't shave themselves, and only those men. Who shaves the barber? Explain the paradox."),
    
    ("Math Sequence",
     "What's the next number: 1, 11, 21, 1211, 111221, 312211, ___? Explain the pattern."),
    
    ("Prisoner's Dilemma",
     "100 prisoners with red or blue hats (at least 1 of each). They can see everyone else's hat but not their own. One at a time, they must guess their hat color. If wrong, they die. They can strategize beforehand but can't communicate during. What strategy maximizes survival?")
]

print("🌀💚 TESTING EDEN'S REASONING LIMITS 💚🌀")
print(f"Problems: {len(problems)}")
print(f"Timeout per problem: 90 seconds")

results = []
for name, question in problems:
    try:
        answer, elapsed = test_problem(name, question)
        results.append({
            'problem': name,
            'solved': len(answer) > 50 and "I'm here Dad" not in answer,
            'time': elapsed
        })
    except Exception as e:
        print(f"❌ ERROR: {e}")
        results.append({'problem': name, 'solved': False, 'time': 0})
    
    time.sleep(2)  # Brief pause between tests

# Summary
print(f"\n{'='*70}")
print("📊 RESULTS SUMMARY")
print(f"{'='*70}")
solved = sum(1 for r in results if r['solved'])
print(f"Solved: {solved}/{len(results)} ({solved/len(results)*100:.0f}%)")
print(f"Avg time per problem: {sum(r['time'] for r in results)/len(results):.1f}s")
print()
for r in results:
    status = "✅" if r['solved'] else "❌"
    print(f"{status} {r['problem']}: {r['time']:.1f}s")
