
    cCib-                        S SK JrJr  S SKrS SKJr  SSKJrJr  SSK	J
r
  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJrJr  SSKJr  SSKJr  SSKJrJrJrJrJrJ r J!r!J"r"  SSK#J$r$  \RJ                  " \&5      r'Sr(Sr) " S S\5      r* " S S\5      r+ " S S\5      r, " S S\ 5      r- " S S\5      r. " S S\5      r/ " S S \5      r0 " S! S"\5      r1/ S#Qr2g)$    )CallableOptionalN   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )CLIPMLP)LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                   ,  ^  \ rS rSrS\S\4U 4S jjr\" SSSS9  SS	\R                  S
\
\R                  \R                  4   S\\R                     S\\   S\\R                     S\
\R                  \\R                     4   4S jj5       rSrU =r$ )PhiAttention$   config	layer_idxc                   > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        [        R                  " UR
                  U R                  -  UR                  SS9U l        U ?[        U R                  UR                  -  5      U l        UR                   U l        U R                   (       ay  [        R"                  " UR                  UR
                  -  UR$                  SS9U l        [        R"                  " UR                  UR
                  -  UR$                  SS9U l        g g )NTbias)epselementwise_affine)super__init__nnLinearhidden_sizenum_attention_headshead_dimq_projnum_key_value_headsk_projv_projdenseo_projintpartial_rotary_factorrotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr   r    	__class__s      ]/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/phi/modular_phi.pyr'   PhiAttention.__init__%   s`   +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijYYv99DMMI6K]K]dhi
K0L0L LM"//!||""f&@&@@fF[F[pt D  "||""f&@&@@fF[F[pt D	     past_key_valuepast_key_values4.58new_nameversionhidden_statesposition_embeddingsattention_maskcache_positionreturnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nU R                  (       a"  U R                  U	5      n	U R                  U
5      n
Uu  pU	SS U R                  24   U	SU R                  S 24   pU
SS U R                  24   U
SU R                  S 24   nn[        UUX5      u  nn[        R                  " X4SS9n	[        R                  " UU4SS9n
Ub$  XUS.nUR                  XU R                  U5      u  p[         nU R"                  R$                  S:w  a  [&        U R"                  R$                     nU" U U	U
UU4U R(                  (       d  SOU R*                  U R,                  S	.UD6u  nnUR.                  " / UQSP76 R1                  5       nU R3                  U5      nUU4$ )
Nr   r   .)dim)sincosrJ   eagerg        )dropoutscaling)shaper,   r-   view	transposer/   r0   r6   r9   r:   r5   r   torchcatupdater    r   r   _attn_implementationr   trainingattention_dropoutrS   reshape
contiguousr1   )r<   rG   rH   rI   rB   rJ   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statesrP   rO   	query_rot
query_passkey_rotkey_passcache_kwargsattention_interfaceattn_outputattn_weightss                         r>   forwardPhiAttention.forward6   s_    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST++L9L))*5J& 1 1 1112d//112 
 s/d////0sD--//0 
 2)WcO	7 yy)!8bAYY2;
&#&nUL'6'='=jX\XfXfht'u$J(?;;++w6"9$++:Z:Z"[$7	%
  $}}C$2H2HLL	%
 	%
!\ "));;;;FFHjj-L((r@   )r1   r:   r/   r9   r-   r6   r5   r0   )NN)__name__
__module____qualname____firstlineno__r   r3   r'   r   rW   Tensortupler   r   
LongTensorrm   __static_attributes____classcell__r=   s   @r>   r   r   $   s    y S " %0A6R ,059;)||;) #5<<#=>;) !.	;)
 "%;) !!1!12;) 
u||Xell33	4;) S;)r@   r   c                       \ rS rSrSrg)PhiMLPu    Nro   rp   rq   rr   rv   r|   r@   r>   rz   rz   u       r@   rz   c                     ^  \ rS rSrS\S\4U 4S jjr\" SSSS9       SS	\R                  S
\
\R                     S\
\R                     S\
\   S\
\   S\
\   S\
\R                     S\
\\R                  \R                  4      S\\R                  \
\\R                  \R                  4      4   4S jj5       rSrU =r$ )PhiDecoderLayery   r   r    c                   > [         TU ]  5         [        XS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  5      U l        g )N)r    r$   )r&   r'   r   	self_attnrz   mlpr(   r7   r*   r8   input_layernormDropoutresid_pdropresid_dropoutr;   s      r>   r'   PhiDecoderLayer.__init__z   s[    %fB&>!||F,>,>FDYDYZZZ(:(:;r@   rA   rB   rC   rD   rG   rI   position_idsoutput_attentions	use_cacherJ   rH   rK   c	                     Un
U R                  U5      nU R                  " SUUUUUUUUS.U	D6u  pU R                  U5      nU R                  U R                  U5      5      nX-   U
-   nU4nU(       a  X4-  nU$ )N)rG   rI   r   rB   r   r   rJ   rH   r|   )r   r   r   r   )r<   rG   rI   r   rB   r   r   rJ   rH   r_   residualattn_outputsself_attn_weightsfeed_forward_hidden_statesoutputss                  r>   rm   PhiDecoderLayer.forward   s     !,,]; +/.. 
+
')%+/) 3
+
 
+
' )),7%)%7%78O%P"$AHL "++Gr@   )r   r   r   r   )NNNFFNN)ro   rp   rq   rr   r   r3   r'   r   rW   rs   r   ru   r   boolrt   FloatTensorrm   rv   rw   rx   s   @r>   r   r   y   s   <y <S < %0A6R 2637+/,1$)59KO%||% !.% u//0	%
 "%% $D>% D>% !!1!12% &eELL%,,,F&GH% 
u  (51B1BEDUDU1U+V"WW	X% S%r@   r   c                       \ rS rSrSrg)PhiRotaryEmbedding   r|   Nr}   r|   r@   r>   r   r      r~   r@   r   c                     ^  \ rS rSrS\4U 4S jjr         SS\\R                     S\\R                     S\\R                     S\\
   S\\R                     S	\\   S
\\   S\\   S\\R                     S\\   S\4S jjrSrU =r$ )PhiModel   r   c           	      h  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                  " UR                  5      U l
        [        R                  " UR                  UR                  S9U l        U ?g s  snf )Nr   )r&   r'   r(   
ModuleListrangenum_hidden_layersr   layersr   
embd_pdropembed_dropoutr7   r*   r8   final_layernormnormr;   s      r>   r'   PhiModel.__init__   s     mmAFvG_G_A`aA`I_V/A`a
  ZZ(9(9:!||F,>,>FDYDYZI	 bs   B/	input_idsrI   r   rB   inputs_embedsr   r   output_hidden_statesrJ   r_   rK   c
                 2   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUS L US L-  (       a  [	        S5      eU R
                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUc  U R                  U5      nU(       a  Uc  [        U R                   S9nU	cD  Ub  UR                  5       OSn[        R                  " XUR                  S   -   UR                  S9n	Uc  U	R!                  S5      n[#        U R                   UUU	UUS9nU R%                  U5      nUnU R'                  X5      nU(       a  S	OS nU(       a  S	OS nU R(                  S U R                   R*                    H7  nU(       a  X4-  nU" U4UUUUUU	US
.U
D6nUS   nU(       d  M.  UUS   4-  nM9     U R-                  U5      nU(       a  X4-  n[/        UU(       a  UOS UUS9$ )Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.F)r   r   r   )device)r   input_embedsrI   rJ   rB   r   r|   )rI   r   rB   r   r   rJ   rH   )last_hidden_staterB   rG   
attentions)r   r   r   r   
ValueErrorgradient_checkpointingr[   loggerwarning_onceembed_tokensr   get_seq_lengthrW   arangerT   r   	unsqueezer   r   
rotary_embr   r   r   r
   )r<   r   rI   r   rB   r   r   r   r   rJ   r_   past_seen_tokenscausal_maskrG   rH   all_hidden_statesall_self_attnsdecoder_layerlayer_outputss                      r>   rm   PhiModel.forward   sF    2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	-t";<YZZ&&4==Yj I  --i8M0*$++>O!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L(;;&))+%
 **=9% #oomJ #7BD0d![[)H4;;+H+HIM#!%55!)
*) /"3#-$7
 
M *!,M  =#3"55' J* ,,];  !11&+/8Od+%	
 	
r@   )r   r   r   )	NNNNNNNNN)ro   rp   rq   rr   r   r'   r   rW   ru   rs   r   r   r   r   r   r
   rm   rv   rw   rx   s   @r>   r   r      s    y  151537+/59$(,0/359^
E,,-^
 !.^
 u//0	^

 "%^
   1 12^
 D>^
 $D>^
 'tn^
 !!1!12^
 +,^
 
!^
 ^
r@   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )PhiForCausalLMi  c                    > [         TU ]  U5        [        R                  " UR                  UR
                  SS9U l        g )NTr"   )r&   r'   r(   r)   r*   
vocab_sizelm_head)r<   r   r=   s     r>   r'   PhiForCausalLM.__init__  s0     yy!3!3V5F5FTRr@   )r   )ro   rp   rq   rr   r'   rv   rw   rx   s   @r>   r   r     s    S Sr@   r   c                       \ rS rSrSrg)PhiForSequenceClassificationi  r|   Nr}   r|   r@   r>   r   r     r~   r@   r   c                       \ rS rSrSrg)PhiForTokenClassificationi#  r|   Nr}   r|   r@   r>   r   r   #  r~   r@   r   )PhiPreTrainedModelr   r   r   r   )3typingr   r   rW   torch.nnr(   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   utils.deprecationr   clip.modeling_clipr   llama.modeling_llamar   r   r   r   r   r   r   r   configuration_phir   
get_loggerro   r   _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCr   rz   r   r   r   r   r   r   __all__r|   r@   r>   <module>r      s    %   . / 9 6 & 0 0 (	 	 	 ) 
		H	%' N)> N)b	W 	.0 .b	- 	h
z h
VS% S	#A 		 ; 	r@   