
    Wh                     T    S r SSKrSSKrSSKJr  SSKr " S S5      r " S S5      rg)z
Experience Replay - Store and replay old examples to prevent forgetting
This is how DeepMind's DQN learned to play Atari games
    N)dequec                       \ rS rSrSrSS\4S jjrS\S\R                  S\R                  4S jr	SS
\S\4S jjr
S\4S jrSrg	)ExperienceBuffer
   z@Store experiences from all tasks and sample them during trainingmax_sizec                 :    Xl         [        US9U l        0 U l        g )Nmaxlen)r   r   buffertask_buffers)selfr   s     ]/home/james-whalen/eden-agi-project/real_capabilities/learning/continual/experience_replay.py__init__ExperienceBuffer.__init__   s     8,    task_idinputstargetsc                    [        X#5       H  u  pEU R                  R                  UUR                  5       UR                  5       S.5        XR                  ;  a#  [        U R                  S-  S9U R                  U'   U R                  U   R                  UR                  5       UR                  5       S.5        M     g)zAdd experience to buffer)r   inputtargetr   r	   )r   r   N)zipr   appendcloner   r   r   )r   r   r   r   inptargs         r   addExperienceBuffer.add   s    V-ICKK"**,   ///-2$--2:M-N!!'*g&--**,/  .r   N
batch_sizec                    Ub  X R                   ;   a  U R                   U   nOU R                  n[        U5      U:  a  [        U5      n[        R                  " [        U5      U5      n[        R                  " U Vs/ s H  oUS   PM	     sn5      n[        R                  " U Vs/ s H  oUS   PM	     sn5      nXg4$ s  snf s  snf )zSample batch of experiencesr   r   )r   r   lenrandomsamplelisttorchstack)r   r   r   r   experiencesexpr   r   s           r   r#   ExperienceBuffer.sample#   s    7.?.?#?&&w/F [[Fv;#VJmmDL*=kBks'lkBC++D8}DE CDs   =B>&Cc                     XR                   ;  a  gU R                   U   n[        R                  " U Vs/ s H  o3S   PM	     sn5      n[        R                  " U Vs/ s H  o3S   PM	     sn5      nXE4$ s  snf s  snf )z Get all data for a specific task)NNr   r   )r   r%   r&   )r   r   r   r(   r   r   s         r   get_task_dataExperienceBuffer.get_task_data6   so    +++""7+f=fs'lf=>++?8}?@ >?s   A5A:)r   r   r   )i'  N)__name__
__module____qualname____firstlineno____doc__intr   r%   Tensorr   r#   r+   __static_attributes__ r   r   r   r   
   sR    J 
3  u|| " s &	S 	r   r   c                   @    \ rS rSrSrS
S\4S jjrSS\S\4S jjrSr	g	)ReplayContinualLearnerA   zQ
Continual learner with experience replay
Combines EWC + Replay for best results
replay_ratioc                 :    Xl         [        5       U l        X l        g r-   )modelr   replay_bufferr:   )r   r<   r:   s      r   r   ReplayContinualLearner.__init__G   s    
-/(r   r   epochsc                    [         R                  R                  U R                  R	                  5       SS9n[        SS 35        [        SU S35        [        SU R                  S 35        [        S 5        [        U5       GH9  nU R                  R                  5         S	nS	nS	nU GH  u  pUR                  5         U R                  R                  X)U
5        US	:  a  [        U R                  R                  5      S	:  ar  [        [        U	5      U R                  -  5      nUS	:  aK  U R                  R                  U5      u  p[         R                   " X/5      n	[         R                   " X/5      n
U R                  U	5      n[         R"                  R$                  R'                  X5      nU R                  R(                  S	:  a  U R                  R+                  5       OS	nUU-   nUR-                  5         UR/                  5         UUR1                  5       -  nUR3                  S
5      u  nnXR5                  S	5      -  nUUR7                  U
5      R9                  5       R1                  5       -  nGM     SU-  U-  nU[        U5      -  nUS
-   S-  S	:X  d  GM  [        SUS
-    SU SUS SUS S3	5        GM<     U R                  R;                  X5        U R                  R=                  U5        U R                  =R(                  S
-  sl        [        SU S35        [        S[        U R                  R                  5       S35        g)z$Train with replay of old experiencesgMbP?)lr
z<============================================================zTraining Task z WITH Experience ReplayzReplay Ratio: z.0%r      g      Y@   zEpoch /z	 - Loss: z.4fz - Acc: z.2f%u	   ✅ Task z training completez   Buffer size: z experiencesN)r%   optimAdamr<   
parametersprintr:   rangetrain	zero_gradr=   r   r!   r   r3   r#   catnn
functionalcross_entropytasks_learnedewc_lossbackwardstepitemmaxsizeeqsumcompute_fisher_informationsave_optimal_params)r   train_loaderr   r?   	optimizerepoch
total_losscorrecttotalr   r   replay_sizereplay_inputsreplay_targetsoutputs	task_lossewc_penaltyloss_	predictedaccuracyavg_losss                         r   train_with_replay(ReplayContinualLearner.train_with_replayL   s   KK$$TZZ%:%:%<$G	6(mwi'>?@t005676]EJJJGE#/##% ""&&w@ Q;3t'9'9'@'@#AA#E"%c&kD4E4E&E"FK"Q8<8J8J8Q8QR]8^5 "'F+B!C"'))W,E"F **V, "HH//==gO	 8<zz7O7ORS7Sdjj113YZ !;.  diik)
&{{1~9a(9<<0446;;==G $0J g~-H!C$55H	Q!#uQwiq	(3xPXY\~]^_`_ #d 	

--lD

&&w/

  A% 	'"456 T%7%7%>%>!? @MNr   )r<   r=   r:   N)g333333?)   )
r.   r/   r0   r1   r2   floatr   r3   rn   r5   r6   r   r   r8   r8   A   s3    
)E )
@Os @OC @O @Or   r8   )	r2   r%   numpynpcollectionsr   r"   r   r8   r6   r   r   <module>ru      s1       5 5nKO KOr   