
    oiK                     V   S SK 7  S SKJr  SSKJr  SSKJrJr  SSKJ	r	  SSK
JrJrJrJr   SS	KJrJrJrJrJrJrJr  SSKJr   SSKJrJr  S#S jr        S$S\R>                  S\ \!   S\ \R>                     S\ \RD                     S\ \#\R>                        S\$S\$S\ \RD                     S\ \#\R>                  \R>                  4      S\#\R>                  \ \R>                     \ \#\R>                        4   4S jjr%        S$S\R>                  S\ \!   S\ \R>                     S\ \RD                     S\ \#\R>                        S\ \$   S\ \$   S\ \RD                     S\ \#\R>                  \R>                  4      4S jjr&SSK'J(r)  Sr*\RV                  RX                  RZ                  r.\R^                  r0  S%S\R>                  S\ \#\R>                        4S jjr1 S#S  jr2 " S! S"\35      r4g!   \" \5      r\\" S
5      :  d  \" S\ S35      e GN= f!   \r\r GN= f)&   )*)__version__    )dtype_from_config)
_get_dtypeVersion   )get_packed_info_from_kwargs)AttentionConfigAttentionContextrun_attentionselect_attention_backend)CohereAttentionCohereDecoderLayerCohereModelCohereForCausalLMCohereRotaryEmbeddingapply_rotary_pos_emb	repeat_kvz4.42z&Unsloth: Your transformers version of z does not support Cohere.
The minimum required version is 4.42.3.
Try `pip install --upgrade "transformers>=4.42.3"`
to obtain the latest transformers build, then restart this session.)*_prepare_4d_causal_attention_mask_for_sdpa)CohereSdpaAttentionCohereFlashAttention2Nc                 :   UR                  [        R                  SS9nX1R                  SSS9-  nUR	                  5       R                  SSS9nX@R
                  -  nX4R                  5       -  nU R                  US S & X2-  nUR                  UR                  5      $ )NT)copy)keepdim)	totorchfloat32meansquarevariance_epsilonrsqrt_weightdtype)selfX
out_weightXXvariances        O/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/models/cohere.pyfast_layernorm_inferencer,   >   s    	
emmD	)B&&t&
$$Byy{d3H%%%H//
BKKJqMB55>    hidden_statescausal_maskattention_maskposition_idspast_key_valueoutput_attentions	use_cachepadding_maskposition_embeddingsreturnc
                 l   [        U S5      (       a  U ?U ?U ?U ?U ?U ?U ?U ?U ?	UR                  5       u  pnU R                  R                  nU R                  nU R                  R                  nU R                  nUU-  U:X  d   eU R!                  X5      u  nnnUR#                  XUU5      R%                  SS5      nUR#                  XUU5      R%                  SS5      nUR#                  XUU5      R%                  SS5      n['        UUR(                  5      nU R*                  (       a,  [-        U R.                  U5      n[-        U R0                  U5      nUR2                  S   nUb  UUS   R2                  S   -  nU	(       a  U	u  nnO3U R4                  R7                  UUR(                  R8                  5      u  nnUb  UOUR;                  S5      n[=        UUUUU5      u  nnUb2  [>        RA                  US   U/SS9n[>        RA                  US   U/SS9nU(       a  UU4OS nUS L=(       a    US L n[C        U5      n[E        UUUSS	0S
S	[G        U SS 5      S.S9n[I        UUUUUURJ                  UUUS9	n[M        UUUUUS9nURO                  XUU-  5      n U RQ                  U U 5      n S n!U U!U4$ )Npaged_attentionr   r	   r   r1   )dimcausalTg        softmax_scale)	dropout_pr<   r=   )backend
n_kv_headsn_groupsflash_dense_kwargsflash_varlen_kwargs)	bszq_len
kv_seq_lenn_headshead_dimrequires_gradseq_infor0   r/   )configcontextQKV))hasattrpaged_attention_Kpaged_attention_Vr9   temp_QAtemp_KVRH_Q	attentionq_norm_out_weightk_norm_out_weightsizerK   num_attention_headsnum_key_value_groupsnum_key_value_headsrH   	apply_qkvview	transposer
   deviceuse_qk_normfast_layernorm_compiledq_normk_normshape
rotary_emb
get_cachedindexgetfast_rope_embeddingr   catr   r   getattrr   rI   r   reshapeapply_o)"r&   r.   r/   r0   r1   r2   r3   r4   r5   r6   argskwargsrD   rE   _rG   rA   r@   rH   rM   rN   rO   rJ   rF   cossinrope_position_ids
use_varlenr?   attention_configrL   Aattn_outputattn_weightss"                                     r+   CohereAttention_fast_forwardrz   J   s    t&''"" LLIN""!&&(MCkk--G((H00J}}H G+++nnT1GAq!	s7H-771=A	s:x0::1a@A	s:x0::1a@A*6188<H#DKK3#DKK3J!nQ'--b11
 &S??--j!((..IS %0fjj6P  q!S#/@ADAq!II~a(!,AI6II~a(!,AI6(aVdN %@.D*@J&z2G&&-$T?DA

 %33'!
G 	/7qVWXA))C((:;K,,t[1KLn44r-   c
                 >   U(       a  [        U S5      (       a  [        R                  U R                  R                  R
                  [        R                  SS9nUn[        U R                  X5      nU R                  " SUUUUUUUUS.UD6u  pn[        U R                  U5      nX-  nUU-  nUnONUn[        U R                  U5      nU R                  " SUUUUUUUUS.UD6u  pnU R                  U5      nX-   U-   nU4nU(       a  UU4-  nU(       a  UU4-  nU$ )N_flag_for_generationcuda:0r%   r`   )r.   r/   r0   r1   r2   r3   r4   r5    )rP   r   emptyinput_layernormr$   re   r   r,   	self_attnfast_swiglu_inferencemlprb   )r&   r.   r/   r0   r1   r2   r3   r4   r5   r6   ro   rp   r(   residualhidden_states_attentionself_attn_weightspresent_key_valuehidden_states_mlpoutputss                      r+   CohereDecoderLayer_fast_forwardr      sy    W$  [[  ''--u}}x ! 


 !0  -
 IM 
I
)%+'+ 1!'
I
 
I
E4E 2$((MJ+%%  /0D0DmTHL 
I
)%+'+ 1!'
I
 
I
E4E !HH]3 :=NNG%''%''Nr-   )sqrt   c           
         UnUR                  5       u  pxn	Uu  pUR                  nU R                  R                  nU R                  nU R                  R
                  nU R                  nU R                  R                  nUU-  nU
R                  S   nUS-   nU(       Ga2  [        R                  [        U-   S-   SXU4USS9U l        U R                  S S 2S4   U l        U R                  S S 2S4   U l        U
R                  SSSS5      U R                  S U& UR                  SSSS5      U R                  S U& [        R                  SUSU4USS9U l        [        R                  SUSUU-  4USS9U l        [        R                  X}SU4USS9U l        UU:w  a  [        R                  SUU4USS9U l        O U R                   S   S S 2S S 2S U24   U l        [        R                  X}S[        U-   4USS9U l        S[+        U R                  5      -  U l        US-  U l        U R0                  (       a  [        R                  U R2                  R4                  R                  [        R6                  SS9U l        [        R                  U R:                  R4                  R                  [        R6                  SS9U l        OS U l        S U l        OUU R                  R                  S   :  a  U R                  R?                  U R                  R                  S   [        -   SUUU45        U R                  S S 2S4   U l        U R                  S S 2S4   U l        U R(                  R?                  X}SU R(                  R                  S	   [        -   45        [A        U RB                  X`R                   S   S
9n[A        U RD                  X`R"                  S   S
9n[A        U RF                  X`R"                  S   S
9nURI                  USUU5      RK                  SS5      nURI                  USUU5      RK                  SS5      nURI                  USUU5      RK                  SS5      nU R0                  (       aB  [M        U R2                  UU R8                  5      n[M        U R:                  UU R<                  5      nU RN                  RQ                  UURR                  RT                  5      u  nnUU   RW                  S5      nUU   RW                  S5      nU R.                  nU R$                  nUS S 2S S 2S S 2US 24   US S 2S S 2S S 2S U24'   US S 2S S 2S S 2S U24   US S 2S S 2S S 2US 24'   [        RY                  US S 2S S 2S S 2S U24   US S 2S S 2S S 2S U24   S
9  UU-  nUR[                  UU5        US S 2S U2S S 2S S 24   nUS S 2S S 2S S 2US 24   US S 2S S 2S S 2S U24'   US S 2S S 2S S 2S U24   US S 2S S 2S S 2US 24'   [        RY                  US S 2S S 2S S 2S U24   US S 2S S 2S S 2S U24   S
9  UU-  nUR[                  UU5        UR                  SSSS5      U R                  U'   UR                  SSSS5      U R                  U'   U R                  S U R                  SSSS5      nU R                  S U R                  SSSS5      n[]        U R                  SS 5      nUb.  UU:  a(  SU-
  nUS S 2S S 2US 2S S 24   nUS S 2S S 2US 2S S 24   n OUUn nUR                  u    nn!nUS:w  an  US S 2S S 2S S S 2S S 24   R_                  XUU!U5      nU S S 2S S 2S S S 2S S 24   R_                  XUU!U5      n URa                  X}U!U5      nU Ra                  X}U!U5      n US:X  ak  UU R,                  -  n[c        UURK                  SS5      U R(                  S S 2S S 2S S 2S U!24   S
9n"[e        U"S	[        R6                  S9U"S S & [c        U"U US
9n"O[g        UUU USS9n"U"RK                  SS5      n"U"Ra                  USU5      n"[A        U Rh                  U"U R&                  S
9n"U"UU44$ )Nr:   r   r	   r}   r~   r      g      ?r   )outsliding_window)r;   r%   F)	attn_mask	is_causal)5rY   r%   rK   rZ   r[   r\   rH   hidden_sizere   r   r   KV_CACHE_INCREMENTr9   rQ   rR   permuterS   rT   rU   temp_OrV   	math_sqrtscalarhalf_head_dimra   rc   r$   r   rW   rd   rX   resize_fast_linear_forwardq_projk_projv_projr^   r_   r,   rf   rg   r`   rh   	unsqueezenegaddcmul_rl   expandrm   torch_matmultorch_nn_functional_softmaxscaled_dot_product_attentiono_proj)#r&   r.   r2   r1   
do_prefillr0   XnrD   rq   hdK1V1r%   rG   rA   r@   rH   r   attention_sizeseq_lenrF   QnKnVnrr   rs   hrU   RH_Kr   slicing_tokensKnnVnn
cached_lenrw   s#                                      r+   &CohereAttention_fast_forward_inferencer      sR    
B##%JCBFBHHEkk--G((H00J}}H ++))Kx'NhhrlG1J ${{')A-q#8L  +  

 "&!5!5ad!;!%!5!5ad!;+-::aAq+Ax(+-::aAq+Ax({{Q' # 
 {{Q
X-. # 
 KK1h'   
	
 [(++C%ux & DK ,,q/!Q*<=DK107:; % 

 Idmm44%]%*[[""((%--( &1 &D" &+[[""((%--( &1 &D" &*D"%)D"	t++11!4	4$$$$**1-0BB	
 "&!5!5ad!;!%!5!5ad!;1dnn22269KKL	
 
T[["LLO	DB	T[["LLO	DB	T[["LLO	DB	a(	+	5	5a	;B	aX	.	8	8A	>B	aX	.	8	8A	>B%dkk2t7M7MN%dkk2t7M7MN ))*biiooFHC
l

%
%a
(C
l

%
%a
(CA99D1aAB;DAq"1"1aBQB;DAq!"	IId1aBQB;tAq!RaRK'8I9#IBKKc	;J;1D 1aAB;DAq"1"1aBQB;DAq!"	IId1aBQB;tAq!RaRK'8I9#IBKKc
 ')jjAq!&<D7#&(jjAq!&<D7#				,	4	4Q1a	@B				,	4	4Q1a	@B T[[*:DAN!j>&A^+A~)*A~)*rS ))Aq*a1}!Qa"#**Xz8
 !Qa"#**Xz8
 kk#
H=kk#
H= ax
dkka#4>>!Q;J;:N+O
 +R
! Cr*(Sn%
 	
AqA			#q.)ADKK$++>Ar2h;r-   c                   ^  [        U 4S j[        [        5       5       5      nUS S 2S T R                  24   nT R                  R                  U5      nUR                  [        [        T R                  5      5      5      nUR                  u  pxn	US   S   R                  S   n
US:w  a$  [        UXx4UU
[        T R                  SS 5      S9nOS n/ n[        T R                  R                  5       H  u  p[        USS5      n[        XU5      u  pcUn[!        UR"                  XeU   5      n[%        UR&                  UX,   UU[)        UR&                  S5      (       + S	9u  nn[+        UR,                  U5      nUU-  nUU-  nUnUR/                  U5        M     [!        T R                  R0                  XeW   5      n[3        UU/ / S
9$ )Nc              3      >#    U  Hf  n[         R                  TR                  R                  S    R                  R
                  [         R                  [         R                  U5      S9v   Mh     g7f)r   r~   N)r   
empty_likemodellayersr   r$   r   r`   ).0xr&   s     r+   	<genexpr>5CohereModel_fast_forward_inference.<locals>.<genexpr>  s\       %A 	JJa 0077MM\\!_ 	 	

 %s   A.A1r   r:   r   r   )r   _per_layer_device_indexr9   )r.   r2   r1   r0   r   )last_hidden_statepast_key_valuesr.   
attentions)tuplerangeDEVICE_COUNTmax_seq_lengthr   embed_tokensr   r   r   rK   re   r   rl   	enumerater   move_to_devicer,   r   r   r   rP   r   r   appendnormBaseModelOutputWithPast)r&   	input_idsr   r1   r0   out_weightsr.   rD   rE   r   r   next_decoder_cacheidxdecoder_layerdevice_indexr   r   r   r   s   `                  r+   "CohereModel_fast_forward_inferencer     s      |$ K !2t22223IJJ++I6M!$$Z0A$++0N%OPM"((NCa #))"-G
axCL$T[[2BDI
 '

(9(9:}.GK&4'
# !0))=l:S
 3'' -!0!5+!/!()@)@BS!TT 	3!2 2-2C2C]S++%% !!"343 ;4 -

L(AM #),	 r-   c                   $    \ rS rSr\S 5       rSrg)FastCohereModeli  c                     [        S[        [        [        S9u  pU b(  [	        U[        5       5        [        U 5      [        l        [        [        l	        [        [        l	        [        [        l	        [        [        l	        [        [        l	        [!        ["        5      [$        l	        [&        [(        l	        [+        [$        5        SS Kn[        UR.                  R0                  R2                  l        g )Ncohere)
model_namerope_modulescaled_rope_moduleattention_moduler   )patch_linear_scalingLlamaRotaryEmbedding!LlamaLinearScalingRotaryEmbeddingr   execglobalseval__init__rz   forwardr   r   r   r   LlamaModel_fast_forwardr   CausalLM_fast_forwardr   r   PeftModel_fast_forwardPeftModelForCausalLM!fix_prepare_inputs_for_generation*transformers.models.cohere.modeling_coheremodelsr   modeling_coherer   )	init_namefunctiontransformerss      r+   	pre_patchFastCohereModel.pre_patch  s    2!.!B.	
	  79%'+IO$">&B#(D%%D"5$9.%
! (>$)*;<9 ! 	""22H 	r-   r   N)__name__
__module____qualname____firstlineno__staticmethodr   __static_attributes__r   r-   r+   r   r     s     r-   r   )N)NNNNFFNN)FN)5llama_utilsr   unsloth_zoo.hf_utilsr   unsloth_zoo.utilsr   r   utils.packingr
   utils.attention_dispatchr   r   r   r   r   r   r   r   r   r   r   r   transformers_versionImportError%transformers.modeling_attn_mask_utilsr   r   r   r,   r   TensorOptionalBlockDiagonalCausalMask
LongTensorTupleboolrz   r   mathr   r   r   nn
functionalsoftmaxr   matmulr   r   r   FastLlamaModelr   r   r-   r+   <module>r     s     2 1 7 
  &
, 6:-1/348#/3GK_5<<_5 12_5 U\\*	_5
 5++,_5 U5<<01_5 _5 _5 5++,_5 "%ell(B"CD_5 5<<%,,/%:M1NNO_5L 6:-1/348(- %/3GKC<<C 12C U\\*	C
 5++,C U5<<01C  ~C ~C 5++,C "%ell(B"CDCL # #hh1199 || m<<m U5<<01mn CLn [
"#7876?245I4J KR S
 	
 3$,)+s   G7 H 7"HH(