
    ΅iM              #       n   % S r SSKrSSKJr  SSKJrJr  SSKJr  SSK	r	SSK
Jr  \" S5      r\" S5      r0 r\\	R                   R"                  \4   \S	'   \" 1 S
k5      rS\S\S\\\4   S\\\\4   /\\\4   4   4S jr S9SSSS.S\	R0                  S\	R0                  S\	R0                  S\\	R0                     S\S\S\S\	R0                  4S jjjr\" SS\5       S9SSSS.S\	R0                  S\	R0                  S\	R0                  S\\	R0                     S\S\S\S\	R0                  4S jjj5       rS\\   S\S\4S jrS \	R0                  S!\S\S\	R0                  4S" jrS#\	R0                  S$\	R0                  S%\S&\S\\   S'\S\	R0                  4S( jrS%\S&\SS4S) jr S#\	R0                  S$\	R0                  S%\S&\S\\   S\	R0                  4S* jr!   S:SSSSSS+SS,.S#\	R0                  S$\	R0                  S-\	R0                  S.\\	R0                     S/\\	R0                     S0\\	R0                     S1\S2\S3\S'\S\\   S4\S5\\   S\"\	R0                  \	R0                  \	R0                  \	R0                  4   4S6 jjjr#\" S7S\#5         S:SSSSSS+SS,.S#\	R0                  S$\	R0                  S-\	R0                  S.\\	R0                     S/\\	R0                     S0\\	R0                     S1\S2\S3\S'\S\\   S4\S5\\   S\"\	R0                  \	R0                  \	R0                  \	R0                  4   4S8 jjj5       r$g);zImplementations of ONNX operators as native Torch ops.

NOTE: Fake implementations:
    Refer to https://docs.pytorch.org/docs/stable/library.html#torch.library.register_fake
    for more details on how to create fake kernels.
    N)Callable)OptionalTypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_version	fake_implreturnc                 l   ^ ^^ S[         [        [        4   S[         [        [        4   4UU U4S jjnU$ )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    > ST 3n[         R                  R                  ST SU 3SS9" U 5      nU [        [	        [	        [         R
                  R                  T5      U5      '   UR                  T5        U$ )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr
   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   s      N/home/james-whalen/.local/lib/python3.13/site-packages/torch/onnx/ops/_impl.py	decorator_onnx_op.<locals>.decorator'   s|    =/*==**WIQxj) + 

  	wwuyy~~w'GRS 	y)    )r   r   r	   )r   r   r   r$   s   ``` r#   _onnx_opr'   "   s5    
	R( 	Xb"f-= 	 	 r&   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr(   r)   r*   c                "    U R                  5       $ )zFFake implementation for RotaryEmbedding-23 for torch.compile purposes.)clone)r+   r,   r-   r.   r(   r)   r*   s          r#   _rotary_embedding_23_fake_implr1   5   s     779r&   RotaryEmbedding   c                  ^^^^^^^^^ U R                   m[        T5      nTS   mTS   mTb  [        R                  " TR	                  5       S:H  U4S j5        [        R                  " TR                   S   T:H  UU4S j5        [        R                  " TR                   S   T:H  UU4S j5        [        R                  " TR	                  5       S:H  =(       a    TR	                  5       S:H  UU4S	 j5        OG[        R                  " TR	                  5       S
:H  =(       a    TR	                  5       S
:H  UU4S j5        US:X  a  [        R
                  " U S5      n OHUS
:X  aB  [        R                  " US:g  U4S j5        TS   nX-  n	TTXY/n
[        R                  " X
5      n [        R                  " [        U R                   5      S:H  S 5        U R                   S
   n	US:X  a  U	nU SS2SS2SS2SU24   nU SS2SS2SS2US24   nUS-  mTb  TT   mTT   mOTmTm[        R                  " TR                   S   T:H  =(       a    TR                   S   T:H  UUU4S j5        [        R                  " TR                   S   T:H  =(       a    TR                   S   T:H  UUU4S j5        [        R                  " TR                   S   T:H  UU4S j5        [        R                  " TR                   S   T:H  UU4S j5        [        R                  " TS5      m[        R                  " TS5      mU(       a%  USS2SS2SS2SSS24   nUSS2SS2SS2SSS24   nO[        R                  " USSS9u  pTU-  TU-  -
  nTU-  TU-  -   nU(       ag  [        R                  " US5      n[        R                  " US5      n[        R                  " UU4SS9n[        R                  " UUR                   5      nO[        R                  " UU4SS9n[        R                  " X4SS9nUS
:X  a  [        R                  " UT5      $ [        R
                  " US5      $ )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                  "   > ST R                    3$ )Nz6position_ids must be 2D when provided. Received shape shape)r.   s   r#   <lambda>%rotary_embedding_23.<locals>.<lambda>Z   s    L\M_M_L`ar&   c                  .   > ST  STR                   S    3$ )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r8   )
batch_sizer.   s   r#   r:   r;   ^   s"    LZLXdeqewewxyezd{|r&   r   c                  .   > ST ST R                   S    3$ )Nz;position_ids second dim (sequence) must match x.shape[-2] (r=   r   r8   )r.   sequence_lengths   r#   r:   r;   b   s;    QRaQbbno{  pB  pB  CD  pE  oF  Gr&   c                  <   > ST R                    STR                    3$ )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r8   r,   r-   s   r#   r:   r;   f   $     ((1'88J9??J[]r&      c                  <   > ST R                    STR                    3$ )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape rB   r8   rC   s   r#   r:   r;   l   rD   r&      )r   r6   r   rE   c                     > ST  3$ )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   )input_shapes   r#   r:   r;   y   s    abmanor&   c                      g)Nzx should be a 4D tensor by nowr   r   r&   r#   r:   r;      s    ,Lr&   c                  0   > STR                    ST  ST S3$ )Nzcos has shape  but expected (batch=, seq=, ...)r8   )r>   cosr@   s   r#   r:   r;      "    .+@FSbRccijr&   c                  0   > STR                    ST  ST S3$ )Nzsin has shape rL   rM   rN   r8   )r>   r@   sins   r#   r:   r;      rP   r&   c                  0   > ST R                   S    ST S3$ )NzLast dimension of cos cache (rS   ') should match rotary_embedding_dim/2 ().r8   )rO   rotary_embedding_dim_halfs   r#   r:   r;      ,    /		">ef  fA  AC  Dr&   c                  0   > STR                   S    ST  S3$ )NzLast dimension of sin cache (rS   rU   rV   r8   )rW   rR   s   r#   r:   r;      rX   r&   dim)
r9   lenr   _checkr[   permutereshape	unsqueezechunkcat)r+   r,   r-   r.   r(   r)   r*   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr>   rO   rI   rW   r@   rR   s    ```               @@@@@@r#   rotary_embedding_23ro   C   s    ''K[!JQJ!"oO !#a	
 	q!Z/|	
 	q!_4 G	
 	MMOq 9Y]]_%9]	
 	MMOq 9Y]]_%9]	
 Q MM!\*	qNo	
 "!n,	)G	MM!'	LLQWW"$LM
I q (Aq////0HQ12334L 4 9 
 
 	LL		!
"Fsyy|'Fj 
LL		!
"Fsyy|'Fj 
LL		"22 	D 
LL		"22 	D //QC //QC
 aAqt!tm$aAqt!tm$Xqb1 8cBhD8cBhD  tR(tR())T4Lb9==(..A99dD\r2YY/R8FQ}}V[11 ==..r&   scalere   c                 >    U b  U $ S[         R                  " U5      -  $ )z/Get the scale factor for attention computation.g      ?)mathsqrt)rp   re   s     r#   _get_scale_factorrt      s     %5GC$))I2F,FGr&   tensorr>   c                     U R                   S   U R                   S   pCXB-  nU R                  XX%5      R                  SS5      R                  5       $ )z1Reshape 3D tensor to 4D for multi-head attention.r   r6   )r9   view	transpose
contiguous)ru   r>   r)   r@   rd   re   s         r#   _reshape_3d_to_4drz      sH     $*<<?FLLO[(IJF	1a	r&   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec           	          US:X  a  [        XX#U5      $ [        R                  " [        R                  " XR	                  SS5      5      5      $ )z1Get QK output tensor based on the specified mode.r   r5   rS   )_compute_qk_output_for_mode_0r   
zeros_likematmulrx   )r{   r|   r}   r~   rp   r   s         r#   _get_qk_output_for_aten_spdar      sH     !,%U
 	

 QB0C DEEr&   c                 L   ^ ^ [         R                  " T T-  S:H  UU 4S j5        g)z-Validate Group Query Attention configuration.r   c                     > ST ST  S3$ )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   )r~   r}   s   r#   r:   -_validate_gqa_configuration.<locals>.<lambda>   s    - 344YZnYooxyr&   N)r   r]   )r}   r~   s   ``r#   _validate_gqa_configurationr      s"     
LL22a7yr&   c                     UnX#:w  a  X#-  nUR                  USS9n[        X@R                  S   5      n[        R                  " U5      nX-  n	XX-  n
[
        R                  " XR                  SS5      5      $ )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r   rZ   rE   r5   rS   )repeat_interleavert   r9   rr   rs   r   r   rx   )r{   r|   r}   r~   rp   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaleds              r#   r   r      sw     H2+C&&}!&<$UGGAJ7L<(J~H$H<<"4"4R"<==r&           )	is_causalkv_num_headsq_num_headsr   rp   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer   r   r   r   r   c                   U R                   S   n[        U R                   5      S:X  a  U R                   S   nU R                   nUb4  UUUR                   S   UR                   S   -   UR                   S   U-  4nO#UUUR                   S   UR                   S   U-  4nUnUUUUS   4nOU R                   S   nU R                   nUbK  UR                   S   UR                   S   UR                   S   UR                   S   -   UR                   S   4nOUR                   nUnU R                   S   U R                   S   U R                   S   US   4n[        R                  " XR                  U R
                  S9n[        R                  " UUR                  UR
                  S9n[        R                  " UUR                  UR
                  S9n[        R                  " UU R                  U R
                  S9nUUUU4$ )z@Fake implementation for Attention-23 for torch.compile purposes.r   rE   r   r6   dtypedevice)r9   r\   r   emptyr   r   )r{   r|   r   r   r   r   r   r   r   r   rp   r   r   r>   q_sequence_lengthoutput_shapepresent_key_shapepresent_value_shapeqk_output_shapern   present_keypresent_value	qk_outputs                          r#   _attention_23_fake_implr     s   " J 177|qGGAJww q!AGGAJ.
l*	! 

l*	! 0 a 	
 GGAJww 

q!AGGAJ.
	! !"/ GGAJGGAJGGAJa 	
 [[WWQXXFF++/qwwqxxPKKK 3177188TMO177188LI;y88r&   	Attentionc                	   Su  pn[        U R                  5      nU R                  S   n[        U R                  5      S:X  a]  [        R                  " US:g  =(       a    US:g  S 5        U R                  S   n[	        U UU5      n [	        UUU5      n[	        UUU5      n[        R                  " [        U R                  5      S:H  =(       a7    [        UR                  5      S:H  =(       a    [        UR                  5      S:H  S 5        U R                  U   n[        U
U5      n
Ub  [        R                  " XA/US	9OUR                  5       nUb  [        R                  " XR/US	9OUR                  5       nUUp!U R                  U   nUR                  U   nU R                  U   nUR                  U   nUS
:H  =(       a?    U	S:H  =(       a3    USL =(       a(    USL =(       d    UR                  [        R                  :H  n[        UU5        U(       aL  [        R                  R                  R                  U UUUS
UU
[        UU:g  5      S9n[        U UUUU
U	5      nGO]UU:w  a%  UU-  nUR                  UUS	9nUR                  UUS	9n[        R                   " UUU R                  U R"                  S9nU(       ay  [        R                  " USL S 5        [        R$                  " [        R&                  " UU[        R                  U R"                  S95      nUR)                  U) [+        S5      5      nUb@  UR                  [        R                  :X  a  UR)                  U) [+        S5      5      nOUU-   n[        XR                  S   5      n[,        R.                  " U5      n U U -  n!UU -  n"[        R0                  " U!U"R3                  SS5      5      n#U#nU#U-   n$U	S:X  a  U$nUS
:  a  U[        R4                  " U$U-  5      -  n$U	S:X  a  U$nUbu  U[6        ;   aU  U$R                  n%U$R9                  [:        R<                  U   5      n$[        R>                  " U$SS	9n&U&R9                  U%5      n&O+[        R>                  " U$SS	9n&O[        R>                  " U$SS	9n&U	S:X  a  U&n[        R0                  " U&U5      nUS:X  a1  UR3                  SS5      RA                  5       RC                  UUS5      nUUUU4$ )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r   r6   rE   r   rE   c                      g)Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r&   r#   r:   attention_23.<locals>.<lambda>  s    Qr&   r   rG   c                      g)Nz'Q, K, and V should be 4D tensors by nowr   r   r&   r#   r:   r     s    9r&   NrZ   r   )r   	dropout_pr   rp   
enable_gqar   c                      g)Nz'Cannot use both is_causal and attn_maskr   r   r&   r#   r:   r     s    +Tr&   z-infr5   rS   r6   )"r\   r9   r   r]   rz   rt   rb   r0   r   boolr   nn
functionalscaled_dot_product_attentionr   r   zerosr   trilonesmasked_fillfloatrr   rs   r   rx   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxry   rw   )'r{   r|   r   r   r   r   r   r   r   r   rp   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr>   r   q_head_sizer   r   r}   r~   kv_sequence_lengthcan_use_sdparn   r   r   	attn_biascausal_maskr   r   r   r   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxs'                                          r#   attention_23r   c  s   & ,3(L !''lOJ 177|q12!2Q	
 GGAJa[9a\:a\:	LLAGGEc!''la/ECLA4E9 ''(#Ke[1E
  			8-\2WWY  ! 			:/|4WWY  q '',/77<0-. 	3 	A!Q&	A%	A $?)//UZZ"?	    35IJ$$AA#';; B 
 1 !
	 "66/3GGM##M|#DA##M|#DA KK1
	
 LLT!#T  **

%&**88	K "--{lE&MJI  %**,%119*eFmL	 &	1	 )
; YY|,
z>z> !<<(2D2DR2LM %	 ()3 A%$I S="UZZw0F%GGL A%$I ( $QQ!-!3!3+#==>OP  #]]<R@
']]>:
"]]<R@
|<J A%"I j!, ! Q"--/44ZARTVW 	 ;y88r&   )N)NNN)%__doc__rr   collections.abcr   typingr   r   typing_extensionsr   r   torch.onnx.opsr   r   r	   r
   dict_ops
OpOverload__annotations__	frozensetr   strintr'   Tensorr   r1   ro   r   rt   rz   r   r   r   tupler   r   r   r&   r#   <module>r      s,    $ $ '  * t_T] AC UZZ22H<= B091 -!$19"b&1AxB (2r6"223. ,0	  !|||| || 5<<(	    \\ 

R!?@
 ,0	D/  !D/||D/||D/ ||D/ 5<<(	D/ D/ D/ D/ \\D/ AD/NHXe_ H H H

LL
&)
69

\\
F||F||F F 	F
 E?F F \\F$47	>||>||> > 	>
 E?> \\>4 )-'+)-Q9 !"!'+Q9||Q9||Q9 ||Q9 %	Q9
 u||$Q9 &Q9 Q9 Q9 Q9 Q9 E?Q9 Q9  }Q9 5<<u||U\\ABQ9h 
+r23
 )-'+)-9 !"!'+9||9||9 ||9 %	9
 u||$9 &9 9 9 9 9 E?9 9  }9 5<<u||U\\AB9 49r&   