
    cCi&                        S r SSKJrJrJr  SSKrSSKJr  SSKJr  SSK	J
r
  SSKJrJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJrJr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'  SSK(J)r)  \RT                  " \+5      r, " S S\RZ                  5      r. " S S\5      r/ " S S\ 5      r0 " S S\%5      r1 " S S\$5      r2 " S S\!5      r3 " S  S!\"5      r4 " S" S#\#5      r5/ S$Qr6g)%zPyTorch Starcoder2 model.    )CallableOptionalUnionN)nn)check_model_inputs   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )	MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelMistralRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                   v   ^  \ rS rSrS\4U 4S jjrS\\\R                        S\R                  4S jr
SrU =r$ )Starcoder2MLP7   configc                 D  > [         TU ]  5         UR                  n[        R                  " X!R
                  UR                  S9U l        [        R                  " UR
                  X!R                  S9U l        [        UR                     U l        UR                  U l        g )Nbias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr	   
hidden_actactresidual_dropout)selfr$   	embed_dim	__class__s      k/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr)   Starcoder2MLP.__init__8   sq    &&	IIi)A)AX	ii 8 8)//Z&++, & 7 7    hidden_statesreturnc                     U R                  U5      nU R                  U5      nU R                  U5      n[        R                  R                  XR                  U R                  S9nU$ )Nptraining)r.   r1   r/   r   
functionaldropoutr2   r>   )r3   r9   s     r6   forwardStarcoder2MLP.forward@   sX    		-0/M2--m?T?T_c_l_l-mr8   )r1   r.   r/   r2   )__name__
__module____qualname____firstlineno__r    r)   r   tupletorchFloatTensorrA   __static_attributes____classcell__r5   s   @r6   r"   r"   7   s>    8/ 8XeE4E4E.F%G EL]L]  r8   r"   c                   b  ^  \ rS rSrSS\S\\   4U 4S jjjr\" SSSS9  SS	\	R                  S
\\	R                  \	R                  4   S\\	R                     S\\   S\\	R                     S\\   S\\	R                  \\	R                     \\\	R                        4   4S jj5       rSrU =r$ )Starcoder2AttentionH   r$   	layer_idxc                 r  > [         TU ]  XS9  UR                  U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l	        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR                  U R                  -  UR
                  UR                  S9U l        g )Nr$   rP   r&   )r(   r)   r2   r   r+   r*   num_attention_headshead_dimr-   q_projnum_key_value_headsk_projv_projo_projr3   r$   rP   r5   s      r6   r)   Starcoder2Attention.__init__I   s    < & 7 7ii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii : :T]] JFL^L^eketetur8   past_key_valuepast_key_valuesz4.58)new_nameversionr9   position_embeddingsattention_maskcache_positionkwargsr:   c           
         UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pUb$  XUS.nUR                  XU R                  U5      u  p[        nU R                  R                  S:w  a  [        U R                  R                     nU" U U	U
UU4U R                  (       d  SOU R                  U R                   [#        U R                  SS 5      S.UD6u  nnUR$                  " / UQSP76 R'                  5       nU R)                  U5      n[*        R,                  R/                  UU R0                  U R                  S	9nUU4$ )
Nr   r   )sincosrb   eagerg        sliding_window)r@   scalingri   r<   )shaperT   rU   view	transposerW   rX   r   updaterP   r   r$   _attn_implementationr   r>   attention_dropoutrj   getattrreshape
contiguousrY   r   r?   r@   r2   )r3   r9   r`   ra   r]   rb   rc   input_shapehidden_shapequery_states
key_statesvalue_statesrg   rf   cache_kwargsattention_interfaceattn_outputattn_weightss                     r6   rA   Starcoder2Attention.forwardQ   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ &#&nUL'6'='=jX\XfXfht'u$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ "));;;;FFHkk+.mm++4004== , 
 L((r8   )rW   rY   rU   r2   rX   )N)NN)rC   rD   rE   rF   r    r   intr)   r   rH   TensorrG   r
   
LongTensorr   r   rA   rJ   rK   rL   s   @r6   rN   rN   H   s    v/ vHSM v v %0A6R ,059.)||.) #5<<#=>.) !.	.)
 "%.) !!1!12.) -..) 
u||Xell3XeELL>Q5RR	S.) S.)r8   rN   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )Starcoder2DecoderLayer   r$   rP   c                   > [         TU ]  X5        [        XS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  UR                  S9U l        g )NrR   eps)r(   r)   rN   	self_attnr"   mlpr   	LayerNormr*   norm_epsiloninput_layernormpost_attention_layernormrZ   s      r6   r)   Starcoder2DecoderLayer.__init__   sf    +,FP (!||F,>,>FDWDWX(*V5G5GVM`M`(a%r8   )r   r   r   r   )	rC   rD   rE   rF   r    r~   r)   rJ   rK   rL   s   @r6   r   r      s     b/ bC b br8   r   c                       \ rS rSrSrg)Starcoder2RotaryEmbedding    NrC   rD   rE   rF   rJ   r   r8   r6   r   r          r8   r   c                   <  ^  \ rS rSrS\4U 4S jjr\" 5              SS\\R                     S\\R                     S\\R                     S\\\\\R                     4      S\\R                     S	\\   S
\\R                     S\\   S\4S jj5       rSrU =r$ )Starcoder2Model   r$   c           	      <  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                  " UR                  UR                  S9U l        UR                  U l        g s  snf )Nr   )r(   r)   r   
ModuleListrangenum_hidden_layersr   layersr   r*   r   normembedding_dropoutrZ   s      r6   r)   Starcoder2Model.__init__   sy     mmHMfNfNfHghHg9#F6Hgh
 LL!3!39L9LM	!'!9!9 is   B	input_idsra   position_idsr]   inputs_embeds	use_cacherb   rc   r:   c                    US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcD  Ub  UR	                  5       OSn	[
        R                  " XUR                  S   -   UR                  S9nUc  UR                  S5      nU R                  R                  c  [        O[        n
U
" U R                  UUUUUS9nUn[        R                  R                  XR                   U R"                  S9nU R%                  X5      nU R&                  S U R                  R(                    H  nU" U4UUUUUUS.UD6nM     U R+                  U5      n[-        UU(       a  US	9$ S S	9$ )
Nz:You must specify exactly one of input_ids or inputs_embeds)r$   r   r   )device)r$   input_embedsra   rb   r]   r   r<   )ra   r   r]   r   rb   r`   )last_hidden_stater]   )
ValueErrorembed_tokensr   r$   get_seq_lengthrH   arangerk   r   	unsqueezeri   r   r   r   r?   r@   r   r>   
rotary_embr   r   r   r   )r3   r   ra   r   r]   r   r   rb   rc   past_seen_tokensmask_functioncausal_maskr9   r`   decoder_layers                  r6   rA   Starcoder2Model.forward   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &--33dmm . 

 #oomJ![[)H4;;+H+HIM)	*) /#-$7	 	M J 		-0&+/8O
 	
>B
 	
r8   )r   r   r   )NNNNNNN)rC   rD   rE   rF   r    r)   r   r   rH   r   r   r   r
   listrI   boolr   r   r   rA   rJ   rK   rL   s   @r6   r   r      s    :/ :  151537KO59$(59?
E,,-?
 !.?
 u//0	?

 "%tE4E4E/F(F"GH?
   1 12?
 D>?
 !!1!12?
 +,?
 
!?
 ?
r8   r   c                       \ rS rSrSrg)Starcoder2ForCausalLM   r   Nr   r   r8   r6   r   r      r   r8   r   c                       \ rS rSrSrg)#Starcoder2ForSequenceClassification   r   Nr   r   r8   r6   r   r      r   r8   r   c                       \ rS rSrSrg) Starcoder2ForTokenClassification   r   Nr   r   r8   r6   r   r      r   r8   r   )r   r   Starcoder2PreTrainedModelr   r   )7__doc__typingr   r   r   rH   r   transformers.utils.genericr   activationsr	   cache_utilsr
   r   masking_utilsr   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.deprecationr   mistral.modeling_mistralr   r   r   r   r   r   r   r   r   configuration_starcoder2r    
get_loggerrC   loggerModuler"   rN   r   r   r   r   r   r   __all__r   r8   r6   <module>r      s   (   , ,   9 ! . R B 7 5 & 0 0
 
 
 7 
		H	%BII "8)* 8)vb0 b	 6 	I
l I
X	. 		*J 		'D 	r8   