
    biV                        S SK Jr  S SKJrJr  S SKJ	r	J
r
  / SQr\
S 5       r\
S 5       r\
S 5       r\
S 5       r\ " S	 S
5      5       r\
S 5       r\
S 5       r\	S 5       r\	S 5       r\
S 5       r\
S 5       r\
S 5       r\
S 5       rg)    )
_aggregate)_core	_standard)constexpr_functionjit)pack2unpack2packunpackfmaFloat2Tensorc           	      R    [         R                  " SSX/[         R                  SSS9$ )Nz'
        add.f32x2 $0, $1, $2;
        =l,l,lT   dtypeis_purer
   ttglinline_asm_elementwiseint64abs     t/home/james-whalen/.local/lib/python3.13/site-packages/triton/experimental/gluon/language/nvidia/blackwell/float2.py
_add_f32x2r      1    &&	 		
jj	 	    c           	      R    [         R                  " SSX/[         R                  SSS9$ )Nz'
        sub.f32x2 $0, $1, $2;
        r   Tr   r   r   r   s     r   
_sub_f32x2r       r   r   c           	      R    [         R                  " SSX/[         R                  SSS9$ )Nz'
        mul.f32x2 $0, $1, $2;
        r   Tr   r   r   r   s     r   
_mul_f32x2r"   +   r   r   c           	      T    [         R                  " SSXU/[         R                  SSS9$ )Nz.
        fma.rn.f32x2 $0, $1, $2, $3;
        z=l,l,l,lTr   r   r   r   r   cs      r   
_fma_f32x2r&   9   s3    &&	 		
q	jj	 	r   c                       \ rS rSr% \R
                  \S'   \S\R
                  4S j5       r\	S 5       r
\	S 5       r\	S 5       r\	S\R                  4S j5       rS	rg
)r   G   valuec                     Xl         g Nr)   )selfr)   s     r   __init__Float2Tensor.__init__K   s    
r   c                     [         R                  " [        U[        5      S5        [        [	        U R
                  UR
                  5      5      $ Nzrhs must be a Float2Tensor)r   static_assert
isinstancer   r   r)   r-   rhss     r   __add__Float2Tensor.__add__O   4    :c<8:VWJtzz399=>>r   c                     [         R                  " [        U[        5      S5        [        [	        U R
                  UR
                  5      5      $ r1   )r   r2   r3   r   r    r)   r4   s     r   __sub__Float2Tensor.__sub__T   r8   r   c                     [         R                  " [        U[        5      S5        [        [	        U R
                  UR
                  5      5      $ r1   )r   r2   r3   r   r"   r)   r4   s     r   __mul__Float2Tensor.__mul__Y   r8   r   axisc                 \    [        [        R                  " U R                  U[        S95      $ )N)r?   
combine_fn)r   r   reducer)   r   )r-   r?   s     r   sumFloat2Tensor.sum^   s    DKK

*UVVr   r,   N)__name__
__module____qualname____firstlineno__r   tensor__annotations__r   r.   r   r6   r:   r=   	constexprrC   __static_attributes__ r   r   r   r   G   s    ;;dkk   	? 	? 	? 	? 	? 	? 	W W 	Wr   r   c           	      h    [         R                  " SSX/[         R                  SSS9n[        U5      $ )Nz)
        mov.b64 $0, { $1, $2 };
        z=l,r,rTr   r   )r   r   r   r   )x0x1r)   s      r   r   r   c   s;    ''	 		jj	E r   c           	          [         R                  " SSU R                  /[         R                  [         R                  /SSS9$ )Nz)
        mov.b64 { $0, $1 }, $2;
        z=r,=r,lTr   r   )r   r   r)   float32)xs    r   r	   r	   r   s>    &&	 		
	||T\\*	 	r   c                    U  Vs/ s H  o"PM     n nX   S:  d
   SU 35       eX==   S-  ss'   U R                  US-   S5        [        [        [        U 5      5      5      nU[        U5      S-
     X1S-      sX1S-   '   U[        U5      S-
  '   [        R
                  " U 5      [        R
                  " U5      4$ s  snf )N   z'not enough elements to pack along axis r   )insertlistrangelenr   tupleshaper?   dpermutes       r   _get_split_shaper_      s    1QE;!MFtfMM	KAK	LL15U$%G3:3w<!;K3Lg]^V^N_0G1Hws7|a/0::edjj111 s   B;c                    U  Vs/ s H  o"PM     n nX==   S-  ss'   [        [        [        U 5      5      5      nUR                  US-   [        U5      5        [        R
                  " U 5      [        R
                  " U5      4$ s  snf )NrU   r   )rW   rX   rY   rV   r   rZ   r[   s       r   _get_join_shapera      sm    1QE	K1K5U$%GNN4!8S\*::edjj111	 s   Bc                     [        U R                  U5      nU R                  " US   6 R                  " US   6 R	                  5       u  p4[        X45      $ )Nr   r   )r_   r\   reshaper^   splitr   )rS   r?   sprO   rP   s        r   r
   r
      sH    )!''48BYY1&&1.446FB=r   c                     U R                   R                  n[        X!5      n[        U 5      u  pE[        R
                  " XE5      R                  " US   6 R                  " US   6 $ )Nr   r   )r)   r\   ra   r	   r   joinr^   rc   )rS   r?   r\   re   rO   rP   s         r   r   r      sQ    GGMME(5BQZFB99R$$be,44be<<r   c                     [         R                  " UR                  [         R                  :H  S5        [        R
                  " U R                  U[         R                  S9n[        X"5      $ )Nzfill_value must be a float32)r   )r   r2   r   rR   stdlib	full_liker)   r   )rS   
fill_valuefills      r   rj   rj      sI    z''4<<79WXAGGZt||DDr   c                 j    [        [        U R                  UR                  UR                  5      5      $ r+   )r   r&   r)   r$   s      r   r   r      s"    
177AGGQWW=>>r   N)triton.language.corer   	aggregate"triton.experimental.gluon.languager   r   r   ri   "triton.experimental.gluon._runtimer   r   __all__r   r    r"   r&   r   r   r	   r_   ra   r
   r   rj   r   rM   r   r   <module>rs      s!   8 Q F 
 
 
 
 
 
 
 
 W W W6   
 
 2 2 2 2   = =   ? ?r   