
    oi?                        S SK r SSKJr  S SKrS SKrS SKrS SKJr  \ R                  R                  R                  rS SKJr  SSKJr  SSKJrJrJr  S SKrS SKr\R0                  R3                  S5      c  S	qO\R6                  R9                  S
S5      S:H  q [4        (       a  S SKJr   \S:X  ad  \ R>                  RA                  5       u  r!r"\" \ R                  5      \" S5      :  a(  \!S::  a  \"S:  d  \" \5      \" S5      :  d
   S SK#J$r$  Sq%OS	q%O"\S:X  a
   S SK#J$r$  Sq%O\S:X  a
   S SK#J$r$  Sq%OS	q% / SQr&SSK'J(r(  S*S jr) S r* \S:X  a  \ RV                  RX                  O\ R>                  RX                  r-     S+S\ R\                  S\ R\                  S\ R\                  S\/S\/S\0S \1S!\04S" jjr2        S,S\ R\                  S#\ R                  Rf                  S\ R\                  S\/S\/S\0S \1S$\1S%\1S&\ R\                  4S' jjr4 \5" 5       q6S q7S S(K8J9r9  \ Rt                  Rv                  r;\ Rt                  Rx                  r<S-S) jr=g!   S	q% GNB= f!   S	q% GN)= f!   S	q% GN3= f).    N   )Version)Optional)__version__)DEVICE_TYPE)UNSLOTH_ENABLE_LOGGINGtorch_compile_optionsloggerunsloth_studioFUNSLOTH_STUDIO_DISABLED0)unsloth_efficient_ce_losscuda2.4.0      z3.0.0)linear_cross_entropyThipxpu)patch_loss_functionspost_patch_loss_functionHAS_CUT_CROSS_ENTROPYfused_linear_cross_entropyfast_linear_cross_entropy_unsloth_get_batch_samplesunsloth_fused_ce_loss)r   c                 Z  ^ ^  SS K n SS[        S[        4U 4S jjjm  SS[        S[        S[        4U4S jjjn [        [        R
                  5      [        S5      :  a  [        R                  " U5      nO"U(       a  [        R                  " US	S
[        S9n SS K	nUR                  R                  R                  nX4S'   [        UR                  R                  R                   S5      (       a  [        UR                  R                  R                   R"                  S5      (       aM  UR                  R                  R                   R"                  R$                  UR                  R                  l         [        S5        S[&        R(                  S'   g !   [        S5         g = f)Nr   zNUnsloth: Cannot patch loss functions - update transformers for faster modules!num_items_in_batchignore_indexc                    > US:X  a
  T" U UUS9nU$ Ub  SOSn[        U UUUS9nUS:X  a:  [        R                  " U5      (       a  UR                  UR                  5      nXR-  nU$ )N)logitslabelsn_itemssummean)r   	reduction)!torch_nn_functional_cross_entropytorch	is_tensortodevice)sourcetargetr   r   kwargslossr'   _fast_cross_entropy_losss          P/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/loss_utils.pyunsloth_fixed_cross_entropy9patch_loss_functions.<locals>.unsloth_fixed_cross_entropy^   s    4+  ,D$  "4!?VI4+(	D E!??#566);)>)>t{{)K&0    
vocab_sizec                 |   > Uc  g U n[         R                  " U5      nUSSS 24   USS S24'   XGS'   T	" XgX440 UD6nU$ )N.r   ).r8   )r)   
empty_like)
r"   r#   r6   r   r   r/   shift_logitsshift_labelsr0   r3   s
            r2   UnslothForCausalLMLoss4patch_loss_functions.<locals>.UnslothForCausalLMLossv   s]     >$''/!'QRS#2#X ,W*<GYrkqrr5   r   TF)dynamic	fullgraphoptionsForCausalLMfget__wrapped__z&Unsloth: Patched cross entropy losses.1UNSLOTH_PATCHED)Nr!   )transformers.loss.loss_utilsprintintr   r)   r   _disable_dynamocompiler	   transformers.modeling_utilsr0   
loss_utilsLOSS_MAPPINGhasattrmodeling_utilsPreTrainedModelloss_functionrB   rC   osenviron)r1   torch_compiletransformersr<   rM   r3   s   `    @r2   r   r   T   sx   + 	 be  * 	 ^b	$'	=@	WZ	 	 	!!"WW%55!&!6!67M!N	!&"+	"
 	 '$$//<<L"8 |**::HH&QQ++;;IINNP]^^''77EEJJVV 	##33A	
23$'BJJ !E^_s   F F*c                     U n[        US5      (       a6   UR                  5       Ul         UR                  n[        US5      (       a  M6   UR                  5       Ul        U $ !    N<= f!    U $ = f)Nmodel)rN   rQ   rW   )rW   current_models     r2   r   r      s}    M
-
)
)	 +8*E*E*GM' 	%++ -
)
) 	'4'B'B'D	$L	
 DLs   A" A) "A&)A.hidden_states	lm_weightr#   r   r   r'   logit_softcappingaccuracy_thresholdc                 T   Ub5  [         R                  " U5      (       a  UR                  U R                  SS9nUb  SOSnUS:X  a  S n[	        UR                  5         [        U R                  UR                  5      UUUUUSUS9nS S S 5        Ub  WU-  nW$ ! , (       d  f       N= f)NTnon_blockingr%   r&   r   )targetsr   softcapr'   shift
filter_eps)r)   r*   r+   r,   current_devicer   dtype)	rY   rZ   r#   r   r   r'   r[   r\   r0   s	            r2   r   r      s     %%//:L*M*M/22=3G3GX\2]+7VIA40		((	)#Y__-!',$-	
 
* %d5G.GtK 
*	)s   *B
B'lm_headlogit_scale_multiplylogit_scale_divideattention_maskc
                     Ub5  [         R                  " U5      (       a  UR                  U R                  SS9nUb  SOSnUS:X  a  S nUS:w  a  Un
OUS:w  a  SU-  n
OS n
[	        U UUSUU
UUSU	S9
nUb  X-  nU$ )	NTr^   r%   r&   r   g      ?i   )
rY   rf   r#   rb   r'   logit_scaler[   r   
chunk_sizeri   )r)   r*   r+   r,   r   )rY   rf   r#   r   r   r'   r[   rg   rh   ri   rk   r0   s               r2   r   r      s     %%//:L*M*M/22=3G3GX\2]+7VIA40q *	q	 ..$%!-#'D %d.GtKr5   )ParallelModec                 T   / nS nU R                   n[        US5      (       a  UR                  5       nUR                  R                  n	U	[
        ;  GaA  Sn
Sn [        US5      (       d  GO[        UR                  S5      (       d  GO UR                  n[        US5      (       a<  UR                  n[        US5      (       a  UR                  n[        US5      (       a  Un UR                  nSU;   d  SU;   a  SnU(       d  S	U;   d  S
U;   aX  [        R                  " U5      R                  R                  5       n[        U5      S   R                  [        R                  :H  n
O![        US5      (       d  OUR                   nGM0   X4[
        U	'   O[
        U	   u  p [!        U5       H  n U[#        U5      /-  nM      U
(       Ga  ['        U5      S:  Ga  SUS   ;   Ga   / nU H  nUS   nUSSS 24   S:g  nSU;   a  US   n[)        US5        [+        US5        SU;   a,  US   n[)        US5        [+        US5        UUSSS 24   S:g  -  nSU;   a  US   n[)        US5        [+        US5        UR-                  5       nUR/                  S5      nUb-  U[0        R2                  " US:  5      R5                  5       S-
  -  nUR7                  U5        M      [-        U5      nU R8                  R:                  (       a)  U R<                  R?                  U5      R-                  5       n[0        R@                  " U5      (       a  Ub  URC                  U5      n[E        U R8                  SS5      S:  a\  U R8                  RF                  [H        RJ                  :X  a4  URM                  S5      RO                  U R8                  RP                  5      n [V        (       a  [X        RZ                  " SU 35        Xg4$ ! [$         a       GMG  f = f! [R         a  n[U        U5      eS nAff = f)Nget_base_modelFTforward__qualname__rC   ForConditionalGenerationVisionText2TextCausalLM_fast_forwardr8   rW   r   r#   .r   r!   	input_idsri   token_type_idspacked_seq_lengthsn_gpuzUnsloth: num_items_in_batch = ).rW   rN   ro   	__class____name__ALLOWED_NUM_ITEMS_IN_BATCHrp   rC   rq   inspect	signature
parametersvaluestuplekind_VAR_KEYWORDrangenextStopIterationlenmark_staticmark_dynamicr%   getr)   count_nonzeroitemappendargsaverage_tokens_across_devicesacceleratorgatherr*   r+   getattrparallel_moderm   NOT_DISTRIBUTED	unsqueezerepeatry   	ExceptionRuntimeErrorr   r
   info)selfepoch_iteratornum_batchesr,   r   r/   batch_samplesr   m
model_name
has_kwargsis_vlmrp   rC   namer~   _token_countsxr#   token_countrv   ri   rw   countseq_lengths	exceptions                              r2   r   r     s   M 	

Aq"##%%J33
1i((%199n55uiiG w..%11;66"-"9"9K{N;;"-''D)T15F$5Nt+$/F#--g6AAHHJ	"9-b166':N:NN
1g&&A- . 	2<1E":.7
C
 ;	d>233M  
 	 c-(1,]1=M1M&	*L"8%c12g$6!# !+IA. A.#q(%&'7%8N3 3N37$;q$@AK#q(%&'7%8N3 3#)ee$89* U00qAFFH1LLE##E*- #. !$\!2yy66%)%5%5%<%<=O%P%T%T%V"122%);)>)>v)F&499gq1A5$)):Q:QUaUqUq:q *<)E)Ea)H)O)OPTPYPYP_P_)`& 	45G4HIJ" ,,A  		V  	*y))	*s+   /O:&G,P :
P
	P

P'P""P')T)Nr!   r&   r   auto)Nr!   r&   r   r   r   N)N)>r)   utilsr   rR   math	functoolstypingr   nn
functionalcross_entropyr(   tritonr   triton_version r   temporary_patches.commonr   r	   r
   r}   importlib.util	importlibutil	find_specUNSLOTH_STUDIO_ENABLEDrS   r   unsloth_studio.lossesr   r   get_device_capabilitymajorminorcut_cross_entropyr   r   __all__fused_lossesr   r   r   r   r,   rd   TensorrH   strfloatr   Linearr   dictr|   TRAINING_ITERATIONStransformers.training_argsrm   _dynamor   r   r    r5   r2   <module>r      s  "   	   $)HH$7$7$E$E ! 0  [ [  >>,-5"ZZ^^,EsKsR  &::335LE5!!"gg&66z	n%(88	*>$(! !&E&: $ E&: $ "  0F(N    &1E%9!!uzz?P?P
  $#%!"%  	
    <  "&!%!'#$#$#$*.& <<& 88??& !<<& 	&
 & & !& !& !& !<<&N  "V    4 }}((}}))q-d k
	*$)!& %
& %s$   
I I -I$ II!$I+