
    1,h                          " S  S5      r g)c                   2    \ rS rSrS rS rS rS rS rSr	g)	SimpleCapabilityTest   c                     Xl         / U l        g )N)systemresults)selfr   s     >/home/james-whalen/eden-agi-project/simple_capability_tests.py__init__SimpleCapabilityTest.__init__   s        c                 0   Sn[        S5        [        S5        [        S5        U R                  R                  U5      nSU;   n[        SUS S  S35        [        S	5        [        S
U(       a  SOS 35        U R                  R	                  SUS.5        U$ )Nu  Learn the pattern from these examples:
Example 1: Input: [1, 2, 3] → Output: 6
Example 2: Input: [2, 4, 6] → Output: 12
Example 3: Input: [1, 1, 1] → Output: 3

Now apply the pattern:
Input: [5, 5, 5] → Output: ?

Just give the number, no explanation.G
======================================================================zTEST 1: FEW-SHOT LEARNINGF======================================================================15
Response: d   ...zExpected: 15Result:    ✅ PASS   ❌ FAILzFew-Shot Learningtestpassed)printr   respondr   appendr   promptresponser   s       r	   test_few_shot_learning+SimpleCapabilityTest.test_few_shot_learning   s    ) 	m)*f;;&&v.!
8DS>*#./v:>?@%8FKLr   c                    Sn[        S5        [        S5        [        S5        U R                  R                  U5      nSUR                  5       ;   =(       d    SUR                  5       ;   n[        SUS S  S	35        [        S
5        [        SU(       a  SOS 35        U R                  R                  SUS.5        U$ )NzObservation: Ice cream sales and drowning deaths are correlated.

Question: Does ice cream cause drowning?
Answer with ONLY: YES, NO, or CONFOUNDEDr   zTEST 2: CAUSAL REASONINGr   noconfoundr   r   r   zExpected: NO or CONFOUNDEDr   r   r   zCausal Reasoningr   )r   r   r   lowerr   r   r   s       r	   test_causal_reasoning*SimpleCapabilityTest.test_causal_reasoning    s    ,
 	m()f;;&&v.))KZ8>>;K-K
8DS>*#./*,v:>?@%76JKr   c                    Sn[        S5        [        S5        [        S5        U R                  R                  U5      nUR                  5       R	                  5       R                  S5      n[        SUS S  S35        [        S	5        [        S
U(       a  SOS 35        U R                  R                  SUS.5        U$ )NzSally puts a marble in basket A.
Sally leaves the room.
Anne moves the marble from basket A to basket B.
Sally returns.

Where will Sally look for the marble?
Answer with ONE letter: A or Br   zTEST 3: THEORY OF MINDr   Ar   r   r   zExpected: Ar   r   r   zTheory of Mindr   )r   r   r   stripupper
startswithr   r   r   s       r	   test_theory_of_mind(SimpleCapabilityTest.test_theory_of_mind3   s    " 	m&'f;;&&v.!'')44S9
8DS>*#./v:>?@%5HIr   c           	         [        S5        [        S5        [        S5        U R                  5         U R                  5         U R                  5         [	        U R
                  5      n[        S U R
                   5       5      nX!-  S-  n[        S5        [        S5        [        S5        [        SU SU S	US
 S35        US:  a  [        S5        OUS:  a  [        S5        O[        S5        [        S5        U R
                  $ )Nr   zCAPABILITY TEST SUITEr   c              3   <   #    U  H  oS    (       d  M  Sv   M     g7f)r      N ).0rs     r	   	<genexpr>5SimpleCapabilityTest.run_all_tests.<locals>.<genexpr>S   s     <1(QQs   	r   zFINAL RESULTSz
Score: /z (z.0fz%)F   u   
✅ GOOD baseline capabilities2   u?   
⚠️ MIXED - some capabilities work, others need improvementu2   
❌ WEAK baseline - needs significant improvementzE
Next step: Train MAML to improve few-shot learning from ~30% to ~60%)r   r    r&   r-   lenr   sum)r   totalr   
percentages       r	   run_all_tests"SimpleCapabilityTest.run_all_testsI   s    m%&f##%""$  "DLL!<<<n+
mof	&5'Js+;2>?452TUGHVW||r   )r   r   N)
__name__
__module____qualname____firstlineno__r
   r    r&   r-   r>   __static_attributes__r2   r   r	   r   r      s    0&,r   r   N)r   r2   r   r	   <module>rE      s   a ar   