
    cCi                        S SK JrJr  S SKrS SKJr  S SKJr  SSKJrJ	r	  SSK
JrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJr  SSKJr  SSKJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'  SSK(J)r)  \RT                  " \+5      r, " S S\#5      r- " S S\5      r. " S S\5      r/ " S S\%5      r0 " S S\$5      r1 " S S\ 5      r2 " S S \"5      r3 " S! S"\!5      r4 " S# S$\\05      r5/ S%Qr6g)&    )CallableOptionalN)nn)check_model_inputs   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)deprecate_kwarg   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                   (   ^  \ rS rSrU 4S jrSrU =r$ )
MistralMLP%   c                 >  > [         TU ]  U5        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     e/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/mistral/modular_mistral.pyr)   MistralMLP.__init__&   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )r/   r-   r.   )__name__
__module____qualname____firstlineno__r)   __static_attributes____classcell__r2   s   @r3   r#   r#   %   s    Y Yr5   r#   c                   6  ^  \ rS rSrS\S\4U 4S jjr\" SSSS9  SS	\R                  S
\
\R                  \R                  4   S\\R                     S\\   S\\R                     S\\   S\
\R                  \\R                     4   4S jj5       rSrU =r$ )MistralAttention-   r1   	layer_idxc                 h  > [         TU ]  X5        [        USS 5      =(       d    UR                  UR                  -  U l        [        R                  " UR                  UR                  U R
                  -  SS9U l        [        R                  " UR                  UR                  U R
                  -  SS9U l
        [        R                  " UR                  UR                  U R
                  -  SS9U l        [        R                  " UR                  U R
                  -  UR                  SS9U l        g )Nhead_dimFr&   )r(   r)   getattrr+   num_attention_headsrB   r   r*   q_projnum_key_value_headsk_projv_projo_projr0   r1   r@   r2   s      r3   r)   MistralAttention.__init__.   s    +
D9mV=O=OSYSmSm=mii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii : :T]] JFL^L^ejkr5   past_key_valuepast_key_valuesz4.58)new_nameversionhidden_statesposition_embeddingsattention_maskcache_positionkwargsreturnc           
      `   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pUb$  XUS.nUR                  XU R                  U5      u  p[        nU R                  R                  S:w  a  [        U R                  R                     nU" U U	U
UU4U R                  (       d  SOU R                  U R                   [#        U R                  SS 5      S.UD6u  nnUR$                  " / UQSP76 R'                  5       nU R)                  U5      nUU4$ )	Nr    r   )sincosrS   eagerg        sliding_window)dropoutscalingr[   )shaperB   rE   view	transposerG   rH   r   updater@   r   r1   _attn_implementationr   trainingattention_dropoutr]   rC   reshape
contiguousrI   )r0   rP   rQ   rR   rM   rS   rT   input_shapehidden_shapequery_states
key_statesvalue_statesrY   rX   cache_kwargsattention_interfaceattn_outputattn_weightss                     r3   forwardMistralAttention.forward6   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ &#&nUL'6'='=jX\XfXfht'u$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ "));;;;FFHkk+.L((r5   )rB   rG   rI   rE   rH   )NN)r6   r7   r8   r9   r!   intr)   r   torchTensortupler   r   
LongTensorr   r   rp   r:   r;   r<   s   @r3   r>   r>   -   s    l} l l %0A6R ,059*)||*) #5<<#=>*) !.	*)
 "%*) !!1!12*) -.*) 
u||Xell33	4*) S*)r5   r>   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )MistralDecoderLayerd   r1   r@   c                 `   > [         TU ]  X5        [        XS9U l        [	        U5      U l        g )N)r1   r@   )r(   r)   r>   	self_attnr#   mlprJ   s      r3   r)   MistralDecoderLayer.__init__e   s(    +)Mf%r5   )r|   r{   )	r6   r7   r8   r9   r!   rr   r)   r:   r;   r<   s   @r3   rx   rx   d   s    &} & & &r5   rx   c                       \ rS rSr\\S.rSrg)MistralPreTrainedModelk   )rP   
attentions N)r6   r7   r8   r9   rx   r>   _can_record_outputsr:   r   r5   r3   r   r   k   s    ,&r5   r   c                      \ rS rSr\" 5       \       SS\\R                     S\\R                     S\\R                     S\\
   S\\R                     S\\   S	\\R                     S
\\   S\4S jj5       5       rSrg)MistralModelr   N	input_idsrR   position_idsrM   inputs_embeds	use_cacherS   rT   rU   c                    US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcD  Ub  UR	                  5       OSn	[
        R                  " XUR                  S   -   UR                  S9nUc  UR                  S5      nU R                  R                  c  [        O[        n
U
" U R                  UUUUUS9nUnU R                  X5      nU R                  S U R                  R                    H  nU" U4UUUUUUS.UD6nM     U R!                  U5      n[#        UU(       a  US9$ S S9$ )	Nz:You must specify exactly one of input_ids or inputs_embeds)r1   r   r    )device)r1   input_embedsrR   rS   rM   r   )rR   r   rM   r   rS   rQ   )last_hidden_staterM   )
ValueErrorembed_tokensr	   r1   get_seq_lengthrs   aranger^   r   	unsqueezer[   r
   r   
rotary_emblayersnum_hidden_layersnormr   )r0   r   rR   r   rM   r   r   rS   rT   past_seen_tokensmask_functioncausal_maskrP   rQ   decoder_layers                  r3   rp   MistralModel.forwards   s|    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &"oomJ![[)H4;;+H+HIM)	*) /#-$7	 	M J 		-0&+/8O
 	
>B
 	
r5   r   )NNNNNNN)r6   r7   r8   r9   r   r   r   rs   rv   rt   r   FloatTensorboolr   r   r   rp   r:   r   r5   r3   r   r   r   s     151537+/59$(599
E,,-9
 !.9
 u//0	9

 "%9
   1 129
 D>9
 !!1!129
 +,9
 
!9
  9
r5   r   c                       \ rS rSrSrg)MistralForCausalLM   r   Nr6   r7   r8   r9   r:   r   r5   r3   r   r          r5   r   c                       \ rS rSrSrg)MistralForTokenClassification   r   Nr   r   r5   r3   r   r      r   r5   r   c                       \ rS rSrSrg) MistralForSequenceClassification   r   Nr   r   r5   r3   r   r      r   r5   r   c                       \ rS rSrSrg)MistralForQuestionAnswering   r   Nr   r   r5   r3   r   r      s    X[r5   r   )r   r   r   r   r   r   )7typingr   r   rs   r   transformers.utils.genericr   cache_utilsr   r	   masking_utilsr
   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.deprecationr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr!   
get_loggerr6   loggerr#   r>   rx   r   r   r   r   r   r   __all__r   r5   r3   <module>r      s    %   9 . R B 8 5 & @ @ 0   1 
		H	%Y Y4)~ 4)n&+ &1 <
: <
~	) 		$? 		'E 	 \"=?U [r5   