
    h                        S SK r S SKJr  S SKJr  S SKJrJr  S SKr	S SK
r
S SKJr  S SKJrJr  \R                   " \5      rSS\S\S	\4S
 jjrSS\
R,                  S\
R,                  S\\   S	\
R,                  4S jjrSS\
R,                  S\
R,                  S\S	\
R,                  4S jjrSS\
R,                  S\
R,                  S\S	\
R,                  4S jjr " S S5      r " S S5      r    S S\\\4   S\\\\
R>                     \
R>                  4      S\\
R@                     S\\
RB                     S\\
RD                     S	\
R,                  4S jjr#g)!    N)Mapping)contextmanager)OptionalUnion)logging)is_torch_npu_availableis_torch_xpu_availablenestedsepreturnc                 \   ^^ S[         S[        S[         SS4UU4S jjm0 nT" U SU5        U$ )z>Flatten dictionary and concatenate nested keys with separator.nestprefixintor   Nc                    > U R                  5        HI  u  p4TU;   a  [        ST SU S35      e[        U[        5      (       a  T" XAU-   T-   U5        MC  XBX-   '   MK     g )Nzseparator 'z' not allowed to be in key '')items
ValueError
isinstancer   )r   r   r   kvrecurser   s        B/home/james-whalen/.local/lib/python3.13/site-packages/trl/core.pyr   flatten_dict.<locals>.recurse    sc    JJLDAax ;se3OPQsRS!TUU!W%%A:+T2#$VZ  !     )dictstr)r
   r   flatr   s    ` @r   flatten_dictr       sA    %d %C %t % % % DFBKr   valuesmaskaxisc                     Ub!  X-  R                  US9UR                  US9-  $ X-  R                  5       UR                  5       -  $ )z,Compute mean of tensor with a masked values.)r#   )sum)r!   r"   r#   s      r   masked_meanr&   .   sJ    """-d0CCC""$txxz11r   unbiasedc                     [        X5      nX-
  n[        US-  U5      nU(       a,  UR                  5       nUS:X  a  [        S5      eXfS-
  -  nXW-  nU$ )z.Compute variance of tensor with masked values.   r   zThe sum of the mask is zero, which can happen when `mini_batch_size=1`;try increase the `mini_batch_size` or `gradient_accumulation_steps`   )r&   r%   r   )r!   r"   r'   meancentered_valuesvariancemask_sumbessel_corrections           r   
masked_varr0   6   sh    v$DmO?A-t4H88:q=V  %15/Or   
shift_meanc                     [        X5      [        X5      pCX-
  [        R                  " US-   5      -  nU(       d  XS-  nU$ )z!Whiten values with masked values.g:0yE>)r&   r0   torchrsqrt)r!   r"   r1   r+   varwhiteneds         r   masked_whitenr7   I   s<    F):f+C#S4Z!88HOr   c                   8    \ rS rSrSrS\S\4S jrS\4S jrSrg	)
LengthSamplerR   z
Samples a length
	min_value	max_valuec                 6    [        [        X5      5      U l        g N)listranger!   )selfr;   r<   s      r   __init__LengthSampler.__init__W   s    567r   r   c                 T    [         R                  R                  U R                  5      $ r>   )nprandomchoicer!   )rA   s    r   __call__LengthSampler.__call__Z   s    yy,,r   )r!   N)	__name__
__module____qualname____firstlineno____doc__intrB   rH   __static_attributes__ r   r   r9   r9   R   s&    8# 8# 8-# -r   r9   c                   2    \ rS rSrSr\\S 5       5       rSrg)PPODecorators^   Fc              #   r  #    S v   U R                   (       Ga  [        5       (       aI  [        R                  " 5         [        R
                  R                  5         [        R                  " 5         g [        5       (       aI  [        R                  " 5         [        R                  R                  5         [        R                  " 5         g [        R                  R                  5       (       aI  [        R                  " 5         [        R                  R                  5         [        R                  " 5         g g g 7fr>   )optimize_device_cacher	   gccollectr3   xpuempty_cacher   npucudais_available)clss    r   empty_device_cache PPODecorators.empty_device_cachea   s      	$$$%''

		%%'

'))

		%%'

((**



&&(

 + %s   D5D7rQ   N)	rJ   rK   rL   rM   rV   classmethodr   r_   rP   rQ   r   r   rS   rS   ^   s"    !  r   rS   shape	generatordevicedtypelayoutc                 j   UnU S   nU=(       d    [         R                  nU=(       d    [         R                  " S5      nUb  [        U[        5      (       d  UR                  R
                  OUS   R                  R
                  nXrR
                  :w  a.  US:X  a(  SnUS:w  a  [        R                  SU SU SU S35        O'XrR
                  :w  a  US	;   a  [        S
U SU S35      e[        U[        5      (       a  [        U5      S:X  a  US   n[        U[        5      (       a`  SU SS -   n [        U5       Vs/ s H  n[         R                  " XU   XSUS9PM     n	n[         R                  " U	SS9R                  U5      n	U	$ [         R                  " XXSUS9R                  U5      n	U	$ s  snf )zA helper function to create random tensors on the desired `device` with the desired `dtype`. When
passing a list of generators, you can seed each batch size individually. If CPU generators are passed, the tensor
is always created on the CPU.
r   cpuNmpszBThe passed generator was created on 'cpu' even though a tensor on zB was expected. Tensors will be created on 'cpu' and then moved to zl. Note that one can probably slightly speed up this function by passing a generator that was created on the z device.)r\   rY   zCannot generate a z! tensor from a generator of type .r*   )r*   )rc   rd   re   rf   )dim)r3   stridedrd   r   r?   typeloggerwarningr   lenr@   randncatto)
rb   rc   rd   re   rf   rand_device
batch_sizegen_device_typeilatentss
             r   randn_tensorry   t   s    KqJ$u}}F*u||E*F7A)T7R7R)**//XabcXdXkXkXpXpkk)o.FKXY_X` aKKQ( Sggmfn o +?0R1&9Z[jZkklmnn )T""s9~':aL	)T""uQRy  :&
& KK1k_ef& 	 
 ))G+..v6 N ++ebhillmstN
s   >#F0)/r>   )T)NNNN)$rW   collections.abcr   
contextlibr   typingr   r   numpyrE   r3   
accelerater   transformersr   r	   
get_loggerrJ   rn   r   r   r    Tensorboolr&   r0   r7   r9   rS   tupler?   	Generatorrd   re   rf   ry   rQ   r   r   <module>r      s   
 # % "    G 
		H	% C $ "2 2ELL 2 2Z_ZfZf 2u|| 5<< 4 SXS_S_ &%,, ell  X]XdXd 	- 	- 0 JN%)#'%)..d5??3U__DEF. U\\". EKK 	.
 U\\". \\.r   