
    oiT,                       % S r SSKJr  SSKrSSKJr  SSKJrJrJ	r	J
r
Jr  SSKr SSKJr  Sr\" 5       rS\S	'   S!S
 jr    S"S jr " S S\R.                  5      rSqS rS rS rS rSS.   S#S jjrS r      S$S jr SSS.     S%S jjr!SS.         S&S jjr"      S'S jr#SS.       S(S jjr$/ S Qr%g! \ a     SSKJr   N! \ a    Sr  Nf = ff = f))zDUtilities for enabling packed (padding-free) batches across Unsloth.    )annotationsN)OrderedDict)AnyIterableOptionalSequenceTuple)BlockDiagonalCausalMask    z-OrderedDict[Tuple[Tuple[int, ...], int], Any]_XFORMERS_MASK_CACHEc                ,    U b  U S::  a  g[        U 5      $ )Nr   intsliding_windows    O/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/utils/packing.py_window_cache_keyr   (   s    1!4~    c                   [         c  g [        U5      nX4n[        R                  U5      nUb  [        R	                  U5        U$ [         R
                  " [        U 5      5      nU(       a#  Ub   [        US5      (       a  UR                  US9nU[        U'   [        [        5      [        :  a  [        R                  SS9  U$ )Nmake_local_attentionwindow_sizeF)last)_XFormersBlockMaskr   r   getmove_to_endfrom_seqlenslisthasattrr   len_XFORMERS_MASK_CACHE_MAXSIZEpopitem)lengthsr   
window_key	cache_keycachedmasks         r   _get_cached_block_maskr(   .   s     !">2J%I!%%i0F((3**4=9Dd&749O+P+P((z(B&*#
 #??$$E$2Kr   c                  "    \ rS rSrSrSS jrSrg)_TrlPackingWarningFilterF   )zattention implementation is notzkernels-communityc                l   ^ UR                  5       m[        U4S jU R                   5       5      (       + $ )Nc              3  ,   >#    U  H	  oT;   v   M     g 7fN ).0	substringmessages     r   	<genexpr>2_TrlPackingWarningFilter.filter.<locals>.<genexpr>N   s     L^	G+^s   )
getMessageany	to_filter)selfrecordr2   s     @r   filter_TrlPackingWarningFilter.filterL   s)    ##%LT^^LLLLr   r/   N)r9   zlogging.LogRecordreturnbool)__name__
__module____qualname____firstlineno__r7   r:   __static_attributes__r/   r   r   r*   r*   F   s    I
Mr   r*   Fc                 z    [         (       a  g [        R                  " S5      R                  [	        5       5        Sq g )Nztrl.trainer.sft_trainerT)_TRL_FILTER_INSTALLEDlogging	getLogger	addFilterr*   r/   r   r   _ensure_trl_warning_filterrH   T   s/    /0::;S;UV r   c                    U c  g[        U S5      (       a  [        U SS5        [        U SS5      nUc  gU" 5        H  n[        U5        M     g)zJMark a module hierarchy so padding-free batches can exceed max_seq_length.Nmax_seq_length _unsloth_allow_packed_overlengthTchildren)r   setattrgetattrmark_allow_overlength)modulerL   childs      r   rO   rO   \   sP    ~v'((:DAvz40He$ r   c                L    [        5         [        U SS5        [        U SS5        g)z7Mutate an ``SFTConfig`` so TRL prepares packed batches.packingTpadding_freeNrH   rM   configs    r   configure_sample_packingrX   i   s      FIt$FND)r   c                2    [        5         [        U SS5        g)zMMutate an ``SFTConfig`` so TRL enables padding-free batching without packing.rT   TNrU   rV   s    r   configure_padding_freerZ   p   s     FND)r   seq_lengths)sequence_lengths_keyc                 ^^ U b  Uc  [        S5      e[        U 5        [        US5      (       a,  [        UR                  S5      (       a  SUR                  l        [        USS5      nUb  [        US5      (       d  g[        USS5      (       a  g[        US	5      (       a  S
Ul        [        US5      (       a  S
Ul        UR                  mSUU4S jjnXCl        S
Ul	        g)zAEnable runtime support for packed batches on an existing trainer.Nz"model and trainer must not be Noneargsremove_unused_columnsFdata_collator
torch_call_unsloth_packing_wrappedrT   Treturn_position_idsc                t  > T" U 5      nU (       a  [        U S   [        5      (       a  / nU  HC  nUR                  T5      n[        U[        5      (       d  M+  UR	                  S U 5       5        ME     U(       a=  [
        R                  " U[
        R                  S9US'   SU;   a  UR                  S5        U$ )Nr   c              3  8   #    U  H  n[        U5      v   M     g 7fr.   r   )r0   lengths     r   r3   Ienable_sample_packing.<locals>.torch_call_with_lengths.<locals>.<genexpr>   s     &Ivs6{{   dtypepacked_seq_lengthsattention_mask)	
isinstancedictr   r   extendtorchtensorint32pop)examplesbatchr[   exampler#   original_torch_callr\   s        r   torch_call_with_lengths6enable_sample_packing.<locals>.torch_call_with_lengths   s    #H-
8A;55%'K#!++&:;gx00&&&I&II $ .3ll/*+ $u,II./r   rt   zSequence[dict])

ValueErrorrO   r   r^   r_   rN   rT   rc   ra   rb   )modeltrainerr\   collatorrx   rw   s     `  @r   enable_sample_packingr   v   s     }=>>% wGGLL:Q$R$R-2*w6Hwx>>x3U;;x(( $x.//'+$"--   2(,H%r   c                   ^ [        USS5      nUb$  [        USS5      (       d  [        USS5      (       d  g[        U 5        [        US5      (       a  SUl        UR                  mS	U4S jjnX2l        SUl        g)
zQInject seq-length metadata when padding-free batching is enabled without packing.r`   N%_unsloth_padding_free_lengths_wrappedFrT   rc   Tc                \  > / nU (       al  [        U S   [        5      (       aT  U  HN  nUR                  S5      nUc&  UR                  S5      nUc  M-  [        U5      /nX2S'   UR	                  U5        MP     T" U 5      nU(       a&  [
        R                  " U[
        R                  S9US'   U$ )Nr   r[   	input_idsri   rk   )rm   rn   r   r    ro   rp   rq   rr   )rt   r[   rv   r#   idsru   rw   s         r   %torch_call_with_padding_free_metadataKenable_padding_free_metadata.<locals>.torch_call_with_padding_free_metadata   s    !#
8A;55#!++m4?!++k2C{ "3xjG-4M*""7+ $ $H-*/,,+E&' r   rz   )rN   rO   r   rc   ra   r   )r|   r}   r~   r   rw   s       @r   enable_padding_free_metadatar      sy    w6H8DeLLx77% x.//'+$"--* @59H2r   c                   U R                  S5      nUc  gUR                  U[        R                  SS9n[        R                  " UR                  5       S-   [        R                  US9nSUS'   [        R                  " US[        R                  USS S9  [        UR                  5       R                  5       5      nX4U4$ )	zBReturn packed sequence metadata expected by the attention kernels.rk   NT)devicerj   non_blocking   rj   r   r   )dimrj   out)
r   torp   rr   emptynumelcumsumr   maxitem)kwargsr   r[   r#   
cu_seqlens
max_seqlens         r   get_packed_info_from_kwargsr      s     **12KnnfekkRVnWGW]]_q0%++PVWJJqM	LL5;;jnMW[[]'')*J
**r   )r   	base_maskc               H   [         c  g U bh  U u  n  nUR                  S[        R                  5      nUR	                  5       S:X  a  g [        S UR                  5        5       5      n[        Xa5      nU$ UnUb)  US:  a#  Ub   [        US5      (       a  UR                  US9nU$ )Ncpur   c              3  8   #    U  H  n[        U5      v   M     g 7fr.   r   )r0   xs     r   r3   3build_xformers_block_causal_mask.<locals>.<genexpr>   s     @(?1A(?rh   r   r   )
r   r   rp   rr   r   tupletolistr(   r   r   )seq_infor   r   r[   _lengths_tensorr#   r'   s           r    build_xformers_block_causal_maskr      s     !$Q$u{{;!Q&@(=(=(?@@%g> K  &" 455,,>,JDKr   r   c                  U u  n  n[        UR                  5       R                  5       5      n[        R                  " Xf4[        S5      UUS9nSnUR                  5        H  n	[        U	5      n	U	S::  a  M  [        R                  " X4XS9n
[        R                  " [        R                  " X4US9SS9R                  5       nU
R                  U[        S5      5      n
Uba  US:  a[  X:  aV  [        R                  " XS9nUR                  S5      UR                  S5      -
  nX:  nU
R                  U[        S5      5      n
XXU	-   2XU	-   24'   X-  nM     UR                  S5      R                  S5      $ )Nz-infr   r   r   r   )diagonal)r   sumr   rp   fullfloatr   zerostriuonesr=   masked_fillarange	unsqueeze)r   rj   r   r   r[   r   total_tokensr'   offsetrf   blockupperidxdistwindow_masks                  r    build_sdpa_packed_attention_maskr      si    !KA{(--/0L::	$f	D F$$&VQ;V,eM

JJ'&9a

$& 	 !!%v7&"',,v7C==#cmmA&66D0K%%k5=AECHVvo%v'??@' '( >>!&&q))r   c               <   U c  g [        U [        R                  5      (       a  U R                  U[        R                  S9nO#[        R
                  " X[        R                  S9nUR                  S:w  a  UR                  S5      nUR                  5       S:X  a  g U$ )N)r   rj   r   r   )	rm   rp   Tensorr   int64rq   ndimreshaper   )r[   r   r#   s      r   _normalize_packed_lengthsr   %  sx    
 +u||,,..&%++.F,,{U[[Q||q//"%}}!Nr   i)ignore_indexc                  [        XR                  S9nUc  gU R                  S5      nUR                  S   n[        R
                  " USS9S-
  nXe:  n[        R                  " U5      (       d  Xg   nUR                  5       S:X  a  gX$U'   g)zKMark final token of every packed sample so CE ignores boundary predictions.r   Fr   r   )r   r   T)r   r   r   shaperp   r   allr   )shift_labelsr[   r   r#   flatr   boundary_positionsvalids           r   mask_packed_sequence_boundariesr   7  s     (>Q>QRG#D::a=LgQ7!;-E99U/6!Q&+	r   )	rX   rZ   r   r   rO   r   r   r   r   )r   Optional[int]r<   r   )r#   zTuple[int, ...]r   r   )r\   strr<   None)r   rn   r   torch.devicer<   0Optional[Tuple[torch.Tensor, torch.Tensor, int]])r   r   r   r   r   zOptional[Any])
r   z&Tuple[torch.Tensor, torch.Tensor, int]rj   ztorch.dtyper   r   r   r   r<   torch.Tensor)r[   r   r   r   r<   zOptional[torch.Tensor])r   r   r[   r   r   r   r<   r=   )&__doc__
__future__r   rE   collectionsr   typingr   r   r   r   r	   rp   xformers.ops.fmha.attn_biasr
   r   	Exceptionxformers.attn_biasr!   r   __annotations__r   r(   Filterr*   rD   rH   rO   rX   rZ   r   r   r   r   r   r   r   __all__r/   r   r   <module>r      s    K "  # ; ; "  " FQm C S!0Mw~~ M  !
%** !.	-- 	--
 
--`&:R+++ 6+, %)#	> " 	B %)$*4$* $* 	$*
 "$* $*N  	, 	 	
 
.
a	  ""T "!""s)   C C&CC"C&!C""C&