
    oi                        S /r SSKrSSKrSSKrSSKrSSKJrJrJ	r	J
r
Jr  SSKJr  SSKJr  \" 5       r\R$                  " SS\S9S	 5       r \R$                  " SS\S9S
 5       r \\S'   \R$                  " SS\S9     S:S\R*                  S\R*                  S\R*                  S\S\S\S\S\S\R*                  4S jj5       r\\S'   S\R*                  S\S\S\R*                  4S jr \\S'   S\R*                  S\R*                  S\S\S\R*                  4
S jr \\S '   S!\R*                  S"\S\R*                  4S# jr \\S$'    S;S%\R*                  S&\R*                  S'\S\R*                  4S( jjr\\S)'     S<S* jr\\S+'   S=S, jr \\S-'   S. r \\S/'   S0\R@                  " \5       3\S1'   \S1   RC                  S2S35      \S1'    " S4 S5\RD                  RF                  5      r$ \$\S5'    S>S6 jr% \%\S7'   SS8K&J'r'  \'\S9'   g)?RL_REPLACEMENTS    N)UnionCallableOptionalListDict   )DEVICE_TYPE)torch_compile_optionsT)dynamic	fullgraphoptionsc                     U R                  [        R                  5      n [        R                  " U SUR	                  S5      S9R                  S5      n[        R                  " U SS9nX#-
  nU$ )Ndimindexr   )totorchfloat32gather	unsqueezesqueeze	logsumexp)logitsr   selected_logitslogsumexp_valuesper_token_logpss        U/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/rl_replacements.pyselective_log_softmaxr!      s\    YYu}}%Fll6U__R=PQYYZ\]O vR8%8O    c                 d   [         R                  " U R                  SU R                  S   5      SSS9n[         R                  " UR                  S5      SSS9n/ n[	        X#5       H  u  pVUR                  [         R                  5      n[         R                  " USUR                  S5      S9R                  S5      n[         R                  " USS9nXx-
  n	UR                  U	5        M      [         R                  " U5      nUR                  U R                  S   U R                  S   45      nU$ )Nr      r   chunksr   r   r   r	   )r   chunkreshapeshapezipr   r   r   r   r   r   appendconcat)
r   r   chunked_logitschunked_indexall_per_token_logpschunk_logitschunk_indexr   r   r   s
             r    chunked_selective_log_softmaxr2   ,   s    [[FLL4D!EPQYZ[N[[r!2QaHM%(%G!#u}}5,,|2{G\G\]_G`aiijlm ??<rB)<""?3 &H 	,,':;-55v||AUV6XYr"   r!   hidden_stateslm_headr   r&   logit_scale_multiplylogit_scale_dividelogit_softcappingtemperaturereturnc                 >   U R                  SU R                  S   5      nUR                  S5      n	[        R                  " XSS9n
[        R                  " XSS9n/ n[	        X5       H  u  pUR                  UR                  5      UR                  5       -  nUS:w  a  X-  nUS:w  a  X-  nUS:w  a  U[        R                  " X-  5      -  nUR                  [        R                  5      nUS:w  a  X-  n[        R                  " USUR                  S5      S9R                  S5      n[        R                  " USS9nUU-
  nUR                  U5        M     [        R                  " U5      nUR                  U R                  S   U R                  S   45      nU$ )	Nr   r   r%                 ?r   r   r	   )r(   r)   r   r'   r*   r   dtypettanhr   r   r   r   r   r+   r,   )r3   r4   r   r&   r5   r6   r7   r8   flat_hidden_states
flat_indexchunked_hidden_statesr.   r/   chunk_hidden_statesr1   r0   r   r   r   s                      r    +chunked_hidden_states_selective_log_softmaxrD   A   s    '..r=3F3Fr3JKr"J!KK(:qQKK
qAM,/0E,U(*--gmm<wyy{J3&'>L$'<L#'%**\5U*VVL#u}}5#'5L,,|;CXCXY[C\]eefhi ??<R@),<<""?3% -V(  ,,':;-55}7J7J17M}ObObcdOe6fgr"   grpo_selective_log_softmax	input_idslogits_to_keeppad_token_idc                 ~    XR                   S   :  a  [        S5      eU SS2SU* 24   nX2:H  nUR                  SS9nU$ )zq
Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens
r	   z8logits_to_keep must be smaller than the sequence length.Nr   )r)   
ValueErrorsum)rF   rG   rH   prompt_sectionpadding_maskpad_token_countss         r    calculate_pad_tokens_in_promptrO   p   sX     ++STTq"2N?"223N"2L#''A'.r"   rO   completion_input_idsleft_pad_tokens_per_promptmax_left_padc                     U R                   u  pEU R                  nX!-
  n[        R                  " XVS9R	                  S5      nXR	                  S5      :  n	X:g  n
X-  nU$ )a)  
Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]

Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
and pad are pad tokens, this function would make a completion mask that would 0 out the pad
and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
devicer   r	   )r)   rU   r   aranger   )rP   rQ   rR   rH   
batch_sizecompletion_lenrU   num_tokens_to_maskindices
shift_masknon_padding_mask
final_masks               r     create_completion_attention_maskr^      si     "6!;!;J!((F%Bll>9CCAFG88;;J,<.Jr"   r^   tensorpad_idc                 l    X:g  n[         R                  " USSSS9n[         R                  " U SU5      nU$ )zD
Moves all padding tokens in each sequence of a batch to the right.
r	   T)r   
descendingstable)r   argsortr   )r_   r`   masksorted_indicespacked_tensors        r    left_pack_paddingrh      s8     D]]4Q4MNLLN;Mr"   rh   logprob_tensorattention_mask	pad_valuec                    U R                   nU R                  u  pEUR                  S   n[        R                  " UR                  UU R                  US9n[        R
                  " USS9n[        R                  " XSS9n	UR                  S5      U	-   n
[        R                  " XCS9R                  S5      R                  U
5      nX:  nX   nX   nX   nXX4'   U$ )z>
Aligns a log probability tensor with a given attention mask.
r	   )
fill_valuer=   rU   r   rT   )	rU   r)   r   fullr=   argmaxrV   r   	expand_as)ri   rj   rk   rU   rW   logprob_seq_lenmask_seq_lenpadded_logprobsleft_pad_countscolsdest_indicesrow_indices
valid_mask
valid_rows
valid_cols
valid_valss                   r    align_logprobs_with_maskr|      s     ""F"0"6"6J!''*Ljj""	O ll>q9O<<7D #,,Q/$6L ,,z9CCAFPPQ]^K
 ,J
 (J)J+J /9J*+r"   r|   c                 *   Uc  [        SUS-  5      nOUn[        R                  R                  5       (       a(  [        R                  R	                  5       u  pxUS-  S-  n	Sn
[        R
                  " U SSS[        R                  S9nX-  U-  U-  U
-  nX-  U-  U-  U-  U-  U
-  nX-  nX-   nUW	:*  n[        R                  " US	S
9nUR                  S   S:X  a  SU4$ US   R                  5       n[        UU   R                  5       5      nUU4$ )Nr$      i   @g?r   r   cpu)rU   r=   F)as_tuple)maxr   cudais_availablemem_get_inforV   r   nonzeror)   itemint)total_input_rowsseq_lenhidden_size
vocab_sizedtype_bytes
multiplierfinal_m
free_bytes_limit_gbbytes_to_gbb_vals	hidden_gbbase_logits	logits_gbtotal_mem_gbrx   valid_indicesbest_idxfinal_bs                       r    autotune_batch_and_chunksr      s+    aD)zz  

//1
'*C/K\\*Ar%u}}UF!K/+=LI+v5?*L{Z^iiK%I(L)JMM*u=M1"'zQ$$&H&"'')*GGr"   grpo_autotune_batch_and_chunksc                 n   [         R                  " U 5      R                  R                  5       n0 nUR                  5        H  nXS;   d  M
  X   XE'   M     UnUb[  U HU  n[	        X%5      (       d  M  [        X%5      nUc  M%  [        U5      [        [        4;   d  M@  [        U5      S:w  d  MQ  XaU'   MW     U$ )Nr   )
inspect	signature
parameterskeyshasattrgetattrtypelisttuplelen)SamplingParamsgeneration_kwargsvllm_sampling_paramsgood_sampling_params_keysnew_generation_kwargskeyoverwrited_keys          r    grpo_update_SamplingParamsr     s     ' 1 1. A L L Q Q S  %%'+):)?!& ( .',C+11!()=!C!-43GDRW>3Y^abp^quv^v-;c*	 -
 r"   r   c                 n	  ^^# UR                  SS5      n	UR                  SS5      n
UR                  SS5      nUR                  SS5      nUR                  SS 5      nUR                  S	S
5      nUR                  SS 5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  S5      n[        R                  " 5          U(       a4  Ub1  [        R                  " UT-  U-
  5      n[        R
                  " UUS9nS S S 5         US:w  a!  [        R                  " X-
  5      X-
  -
  S-
  nO>US:X  a"  UR                  UR                  S5      S5      nO[        R                  " U5      nUb  X-
  nOXR                  5       -
  nUS
:X  a  UnOZUS:X  aE  UT-  R                  S5      TR                  S5      R                  SS9-  nUR                  S5      nO[        SU S35      e[        R                  " U5      n[        R
                  " USU
-
  SU-   5      nUb(  [        R
                  " UUS9UR                  S5      -  nOUUR                  S5      -  n UUR                  S5      -  n[        R                  " UU5      * nU(       aI  UbF  UW-  n[        R                  " 5          [        R                  " X#-
  5      nUT-  nUT-  nS S S 5        OH[        R                  " / 5      R                  5       n[        R                  " / 5      R                  5       nUS:w  a  UUU-  -   nTR                  [        R                   5      mTR                  S5      m#U	S:X  aG  UT-  R                  S5      TR                  S5      R                  SS9-  R#                  5       nUU-  nOU	S:X  a7  UT-  R                  5       TR                  5       R                  SS9-  nUU-  nOdU	S:X  a.  UT-  R                  5       UR                  S5      U-  -  nUU-  nO0U	S:X  a  UU-  nUT-  R                  5       U-  nO[        SU	 35      eUU#4S  jn U " U5      u  n!n"UU!U"UW4$ ! , (       d  f       GNo= f! , (       d  f       GNf= f)!N	loss_typegrpoepsilon_lowg?epsilon_highmax_completion_lengthi    deltaimportance_sampling_leveltokennum_items_in_batch#current_gradient_accumulation_stepsr	   num_processesuse_vllmFvllm_importance_sampling_capg       @r   )r   r;   r<   sequencer   )minz#Unknown importance sampling level: z-. Possible values are 'token' and 'sequence'.bnpodr_grpodapozUnknown loss type: c                 2  > [         R                  " 5          TR                  5       nU R                  S   S:X  a  XR                  5       4sS S S 5        $ U T-  R	                  S5      T-  nUR                  5       nX4sS S S 5        $ ! , (       d  f       g = f)Nr	   )r   inference_modemeanr)   rK   )xcompletion_lengthmean_kl_per_rewardmean_klre   n_mask_per_rewards       r    masked_batch_mean,grpo_compute_loss.<locals>.masked_batch_mean  s|    !!# 1 6 6 8wwqzQ(&&(2 $#
 '($h^^A%69J%J",113(1 $##s   4B)B
B)getr   r   no_gradexpclamp	new_zerossize
zeros_likedetachrK   rJ   r   absr_   r   r   r   )$refnewoldsampling_per_token_logpsrF   re   beta
advantageskwargsr   r   r   r   r   r   r   r   r   r   r   importance_sampling_ratiokl_i	log_ratiolog_importance_weightscoef_1coef_2loss_1loss_2loss_iflat_is_ratioloss
normalizerr   r   r   r   s$        `                             @r    grpo_compute_lossr   "  s    

;/I**]C0K::nc2L"JJ'>EJJw%E &

+F P$8$?*0**5Z\]*^'JJ2Mzz*e,H#)::.Lc#R ##B'I	0<(-		3:AY2Y(Z%(-)/K)%	 
 	 s{yy#sy1C7 %
2==!a0D##C(D I	**,&	 G+!*	"j	0"+d"2!7!7!;dhhrl>P>PUX>P>Y!Y!7!A!A"!E12K1L M 
 	

 ii./F[[[!l2BCFV/*2F2Fq2II*..q11 j**1--Fii''F,833]]_IIc<=EDLE5<M _
 R '')R(//1s{$+%775==!D F$##B'$((2,*<*<*<*EEKKM99	f	""$txxz'7'7C'7'@@99	i	""$A9N(NO99	f	'6
""$z1.yk:;;
2 "34!8w"GUMAAO 
z _s   /<R#R%
R"%
R4r   zR@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options)
grpo_compute_loss_slowzdef grpo_compute_losszdef grpo_compute_loss_slowc                   :    \ rS rSr\SS j5       r \S 5       rSrg)UnslothEfficientGRPOi  Nc                 |  ^	^^!^"^#^$^%^& Tc  0 mU	U4S jm& UR                   n[        R                  " U5      n[        R                  " SUS9m$[        R                  " SUS9m![        R                  " SUS9m%/ m"/ m#U!U"U#U$U%U&4S jn [        R                  " USS[
        S9n[        R                  " XSS9n[        R                  " XSS9nUb  [        R                  " X+SS9nOS /U-  nUb  [        R                  " X;SS9nOS /U-  nUb  [        R                  " XKSS9nOS /U-  n[        R                  " XkSS9n[        R                  " X{SS9n[        R                  " XSS9nU
b  U
R                  5       OS	n[        UUUUUUUU5       H  u  nnnnnnnn U" UUUUUUU UU5	        M      UR                  U5        T$R                  U5        T!R                  U5        T%R                  U5        Ub+  [        R                  " T"SS
9m"[        R                  " T#SS
9m#OS m"S m#U R                  U5        T$T!T%T"T#4$ )Nc           
      b   > [        UU UUUUTU40 TD6u  ppnX-  nXR                  5       XX44$ N)r   r   )	new_logps	old_logps	ref_logpsr   rF   re   r   scalingr   r   r   r   r   scaled_lossr   extra_kwargss                 r    compute_loss2UnslothEfficientGRPO.forward.<locals>.compute_loss  sX    EV(
F 
FBDW] .K0AE aaar"   r	   rT   c	           
         > [         R                  R                  TSSS9" XX#XEXg5      u  u  n	u  n
u  ppnTR                  U5        TR                  U5        TR                  U5        TR	                  U5        TR	                  U5        XS S & g )N)r   T)argnumshas_aux)r   funcgrad_and_valueadd_r+   )new_logps_jold_logps_jref_logps_jsampling_per_token_logps_jinput_ids_jmask_jadvantages_jr   grad_inputs_jchunk_grad_input
chunk_lossunscaled_losschunk_completion_lengthchunk_mean_klchunk_deltachunk_flat_is_ratioaccumulated_completion_lengthaccumulated_deltaaccumulated_flat_is_ratioaccumulated_lossaccumulated_mean_klr   s                   r    accumulate_chunk6UnslothEfficientGRPO.forward.<locals>.accumulate_chunk  s     LQ  LV  LV  Le  Le Le L Q\fr	L}  I  "I*  /H}Wd  tG
 ..}=)../FG..}=00=%001DE/!r"   T)r   r   r   r   r%   r<   r   )rU   r   
empty_likezeroscompiler   r'   	get_scaler*   div_catsave_for_backward)'ctx
_new_logps
_old_logps
_ref_logps_sampling_per_token_logpsr4   
_input_ids_mask_advantagesr   scalern_chunksr   rU   grad_inputsr  grad_inputs_chunksr   r   r   r   rF   re   r   r   r  r   r   r   r  r  r  r  r  r  r  r  r  r   s'            `  `                    @@@@@@r    forwardUnslothEfficientGRPO.forward  sv   L	b" 	!!&&z2(-A(G(-A(G%(-A(G(*(*!	0 	0, 	 ==+
 #[[VWX[[aH	!Z!LI)I!Z!LI)I$0(-4Mhi(j$(,v'8$"[[VWX	"[[VWX"[[VWX
 )/(:&""$ "Iy)E]_hjnpz{ D]KkC]_jlr  uA *
 |* 	**84**84%**84**84$0 %		*; C(-		2KQR(S% $(,%k*)%
 	
r"   c                 :    U R                   u  nUS S S S S S S S S S S 4$ r   )saved_tensors)r  grad_outputdcompletion_lengthdmean_klddeltaddflat_is_ratio
grad_inputs          r    backwardUnslothEfficientGRPO.backward6  s0    ))D$dD$dDRVX\]]r"    )Nr	   N)__name__
__module____qualname____firstlineno__staticmethodr'  r1  __static_attributes__r3  r"   r    r   r     s5    @
 @
B 	^ ^ 	r"   r   c	                   ^N^O UR                   u  pU	R                  SS 5      nU	R                  SS 5      nU	R                  SS 5      nU	R                  SS 5      n[        U SS5      (       a  U	R                  SS 5      OS nU	R                  SS	5      nU	R                  S
S5      nU	R                  SS5      nU	R                  SS5      nU	R                  SS5      nU	R                  SS 5      nUb  U R                  OS U	S'   U R
                  U	S'   [        SU
S-   5       Vs/ s H  nU
U-  S:X  d  M  UPM     nnUS:X  a  U
nU[        [        R                  " UU5      [        U5      S-
  5         n[        U S5      (       dt  [        R                  R                  SS5      S:X  a  [        R                  O[        R                   U l        [        R                  R                  SS5      S:X  a  S U l         S[        R                  S'   U R$                  R'                  5       R(                  nU R"                  [        R                  [        R                   4;   a  SOSnUR                   S   nUR                   S   nUR                   S   nUR                   S   nU R*                  R,                  Gc  [        U S5      (       d  SU l        [1        UUUUUU R*                  R2                  5      u  nn UU-  U R*                  l        U U R*                  l        U R*                  R,                  nU R*                  R2                  n OU R4                  U R6                  -  S:X  aG  U R*                  R,                  nU R*                  R2                  n U ?U R*                  ?U R*                  ?OU R,                  nU R*                  R2                  n OpU R*                  R,                  U:  a  UnOU R*                  R,                  nU R*                  R2                  c  [9        SUS -  5      n OU R*                  R2                  n UGcB  [;        XU R<                  R>                  5      n!Ub  UR                   S   U-
  n"O:Ub  UR                   S   U-
  n"O$[        R8                  " U!5      RA                  5       n"[C        XR<                  R>                  5      nUS S 2UU"-   * S 24   n#[E        U#U!U"U R<                  R>                  5      RG                  URH                  5      nU R
                  (       a"  Ub  [        U SS5      (       a  [K        UU5      nOS nXR<                  R>                  :g  nURG                  URH                  5      nOUS S 2U* S 24   n#U RL                  RO                  U R$                  SS!9n$U$RQ                  5        HD  n%[        U%S"5      (       d  M  [        U%RR                  S#5      (       d  M3  SU%RR                  l*        MF      / n&[        RV                  " UUSS$9n'[        RV                  " U#USS$9n(S% n)SS K,n*UR                   S   n+U*R[                  U+U-  5      n,/ n-/ n'/ n./ n// n0Sn1[        SU+U,5       H  n2U2U,-   n3U-R]                  UU2U3 5        U'R]                  UU2U3 5        Ub  Ub  UU2U3 n4U/R]                  U45        U4R_                  SS&9Ra                  5       RA                  5       n5U1n6U1U5-   n7U.R]                  UU6U7 5        Ub  U0R]                  UU6U7 5        OU0R]                  S 5        U7n1M  U.R]                  S 5        U/R]                  S 5        U0R]                  S 5        M     Ub3  [c        U[        Rd                  5      (       d  U V8s/ s H  n8U8/PM     n9n8O	U)" UU5      n9[g        U-U'U.U/U0U9U(5      n:U R"                  c  [i        5       n;OF[        Rj                  Rm                  U R$                  Rn                  Rp                  U R"                  S'9n;S/S( jmO " UO4S) jS*[        Rr                  Rt                  5      mN   S0UN4S+ jjn<U: H  u  n=n>n?n@nAnBnCU;   Uc9  U$" U=U>U?W@WAWBS,9Rv                  nDUDS S 2UW"-   S-   * S 2S S 24   nDUDS S 2S S2S S 24   nDO'U$" U=U>U?W@WAWBUS-   S-9Rv                  nDUDS S 2S S2S S 24   nDU<" WDUWCU=R                   S   U -  UUUUUS.9	nE[        Rx                  R{                  5         S S S 5        U&R]                  WE5        M     [        R|                  " U&SS&9nFU;   [~        R                  UFUUUUU#UUU R                  U RL                  R                  UU	5      u  nGnHnInJnKS S S 5        S[        R                  S'   WGWHWIWJWK4$ s  snf s  sn8f ! , (       d  f       N= f! , (       d  f       NC= f)1Npixel_valuesimage_grid_thwpixel_attention_maskimage_sizes#vllm_importance_sampling_correctionFr   r8   r<   r5   r;   r6   r7   rR   r   r   r   r	   r   _autocast_dtypeACCELERATE_MIXED_PRECISIONfp16UNSLOTH_FORCE_FLOAT3201UNSLOTH_RETURN_HIDDEN_STATES       _has_autotunedTr$   r~   )keep_fp32_wrapper_hf_hookio_same_decicer%   c                 >    U c  S /U-  $ [         R                  " XSS9$ )Nr   r%   )r   r'   )r_   r&   s     r    chunk_optional-grpo_accumulated_loss.<locals>.chunk_optional  s$    >6F?"{{6a88r"   r   )device_typer=   c                 (    U c  g U R                  XS9$ )Nnon_blocking)r   )r_   rU   rS  s      r    	to_device(grpo_accumulated_loss.<locals>.to_device  s    >$yyy;;r"   c                   F   > \ rS rSrSr\U 4S j5       r\U 4S j5       rSrg)<grpo_accumulated_loss.<locals>.Unsloth_Offloaded_Log_Softmaxi  z?
Manual Gradient Checkpointing/CPU Offloading for Log Softmax.
c	                 B  > T
" USSS9U l         UR                  U l        UR                  U l        X l        UR                  U l        X0l        XEXgU4U l        [        R                  " 5          [        XU/U R                  Q76 n	S S S 5        U	$ ! , (       d  f       W	$ = f)Nr   TrR  )saved_hidden_statesrU   r=   r4   requires_gradlm_head_requires_gradr   argsr   r   rD   )r  r3   r4   r   r&   r5   r6   r7   r8   outputrT  s             r    r'  Dgrpo_accumulated_loss.<locals>.Unsloth_Offloaded_Log_Softmax.forward  s    
 '0uSW&XC#&--CJ%++CI!K(/(=(=C%I6H]hiCHD!E47HH !
 M !
 Ms   -B
Bc                   > T" U R                   U R                  5      nUR                  U R                  5      nUR	                  S5        U R
                  nU R                  n[        R                  " 5          [        X#U/U R                  Q76 nS S S 5        [        R                  R                  WU5        UR                  U R                  (       a  UR                  OS S S S S S S 4$ ! , (       d  f       N_= f)NT)rY  rU   r   r=   requires_grad_r4   r   r   enable_gradrD   r\  autogradr1  gradr[  )r  r+  r3   r4   r   r]  rT  s         r    r1  Egrpo_accumulated_loss.<locals>.Unsloth_Offloaded_Log_Softmax.backward  s    %c&=&=szzJM),,SYY7M((.kkG IIE""$D!E47HH %
 NN##FK8 "" # 9 9t	 	 %$s   8C))
C7r3  N)	r4  r5  r6  r7  __doc__r8  r'  r1  r9  )rT  s   r    Unsloth_Offloaded_Log_SoftmaxrW    s0    	 
	 
	( 
	 
	r"   rf  c	           
         > UR                   S   S::  a  US::  d  US:X  a  [        U UUUUUUU5      $ T	R                  XX#XEXg5      $ )Nr	   i      )r)   rD   apply)
r3   r4   r   r&   r5   r6   r7   r8   rW   rf  s
            r    efficient_log_softmax4grpo_accumulated_loss.<locals>.efficient_log_softmax8  sf     KKNd"zQ:q=>$"!	 	 166$! r"   )rF   rj   r;  r<  r=  r>  )rF   rj   r;  r<  r=  r>  rG   )r&   r5   r6   r7   r8   rW   )T)rH  r;   r;   r;   r	   rh  )Hr)   r   r   popr   r   ranger   npsearchsortedr   r   osenvironr   float16bfloat16r@  modelget_output_embeddingsweightr\  unsloth_grpo_mini_batchrI  r   unsloth_logit_chunk_multiplier_stepr   r   rO   processing_classrH   r   rh   r^   r   r=   r|   acceleratorunwrap_modelmodulesrK  rL  r'   mathceilr+   prodrK   
isinstanceTensorr*   nullcontextampautocastrU   r   rb  Functionr   r   synchronizer  r   ri  r   r#  matmulnew_hidden_statesr>   old_hidden_statesr   )PtrainerrF   rj   rG   completion_maskr   r   r   r$  r   bszqlenr;  r<  r=  r>  r   r8   r5   r6   r7   prev_max_left_padr   ifactorsr4   r   
total_rowsr   
hidden_dim	vocab_dimBr   rQ   rR   rP   unwrapped_modelmoduleall_logprobs_listattention_mask_chunkscompletion_ids_chunksrN  r~  total_samplesrW   input_ids_chunkspixel_values_chunksimage_grid_thw_chunkspixel_attention_mask_chunkscurrent_pixel_idxstartend
grid_slicebatch_pixel_countstart_pixel_idxend_pixel_idxr   image_sizes_chunkszipped_inputs
autocasterrj  input_ids_chunkattention_mask_chunkpixel_values_chunkimage_grid_thw_chunkpixel_attention_mask_chunkimage_sizes_chunkcompletion_idsnew_hidden_states_chunklogprobs_chunknew_logprobsr   r   r   r   r   
new_logits
old_logitsrf  rT  sP                                                                                 @@r    grpo_accumulated_lossr  ?  s	    IC::nT2LZZ 06N!::&<TB**]40KOVW^  aF  HM  PN  PNvzz*DdK  TX**]C0K!::&<cB!::&:C@!::&93?!::na8 	

-t4AUmUyW-Q-Q  @DF)* ))F:37+<+QsQw!|q+G<2~#xs2??7H=s7|A~NOH7-..35::>>B^`f3gkq3q%--w|  xF  xF::>>1373>Z^@W14BJJ-.mm113::G//EMM5>>3RR"XZK#Jooa Gq!Ja I||++3w 011%)G"5GZKItItMAz 4>q=GLL0:DGLL744A DDJ]]WHHHAM44A DDJ&4;//A DDJ<<//*<A44A<<66>Q40J DDJ%CI_f_w_w  `E  `E  &F"  $??1->L"$??1->L 99%?@EEGL%i1I1I1V1VW	(nl.J,K,L)LM:;OQkmy  |C  |T  |T  |a  |a  b  e  e  ft  fz  fz  { 8 DQXZ  BG  JH  JH'?@XZi'j$'+$#'?'?'L'LL'**>+?+?@(^O,<)<=))66w}}Z_6`O!))+6:&&76??DT+U+U-2FOO* , 	!KKqaH!KK(<QAN9
 OOA&M=1,-J"$q-4j 	% 45$$^E#%>?%,*B'c2J!((4 !+B 7 ; ; = B B D/O-0AAM&&|OM'RS#/+22(G ,2248 -  &&t,!((.'..t4? 5B z+u||'L'L1<=tf=+K;#M & ]
YY''gmm6J6J6O6OY`YpYp'q
<8(?(? 8t EGILMN> 
	"'.=$3)='9)=/I&7/ f , /Fa>\hKhklKlImInqrFr.s+.Ea"ai.P+.=$3)='9)=/I&7)7!);/ f , /Fa"ai.P+!6+"*003J>)='9&7 +!"
" 

&&(O P $$^4S 
V 99.A6L	AUA[A[$ LL&&B
>% 
" 25BJJ-."GUMAAs	 =R >b X 
s,   f0ff$$B f)8Af:)
f7	:
gr  )sft_prepare_datasetr  )r$   r;   r;   r;   r<   )r;   )rG  Nr   )r   )(__all__r   r   rp  numpyrn  typingr   r   r   r   r   rP  r
   temporary_patches.commonr   dictr   r  r!   r2   r  r   floatrD   rO   r^   rh   r|   r   r   r   	getsourcereplacerb  r  r   r  dataset_utilsr  r3  r"   r    <module>r     s'  $    	  8 8 $ ;& 4;PR S  4;PR S  +H' (4;PR
 "% #"*<<*\\* <<* 	*
  * * * * \\* S*X 1\, -||  \\	$ 4R0 1,, %  	
 \\4 6V2 3ell C ELL  '8# $
 /LL/LL/ / \\	/b /G* + &P 5N0 1" 0J, -ABD ,=# $Y*+,. ( ) ,-55$ ( )J	5>>22 J	V *>& ' g	P +@' ( .)<% &r"   