
    bin                         S SK JrJrJrJrJr  S SKrS SKJr  SSK	J
r  SSKJrJrJrJrJrJr  S SKJrJr  S SKJrJr  \" S	5      r\4S
\S\/ \4   4S jjrS rSS jrS\l          " S S5      r! " S S\\   5      r"g)    )SequenceListTypeVarTupleCallableN)TritonSemantic   )_core)
AutoLayoutDistributedLayoutDistributedLinearLayoutSliceLayoutSharedLayoutCoalescedLayout)GluonOpBuildercompute_tmem_reg_layout)flatten_values_to_irunflatten_ir_valuesTensorTycondmsg_fnc                 ,    U (       d  U" U" 5       5      eg N )r   r   categorys      f/home/james-whalen/.local/lib/python3.13/site-packages/triton/experimental/gluon/language/_semantic.py_checkr      s    vx       c                 T    [        U [        5      =(       a    [        S U  5       5      $ )Nc              3   B   #    U  H  n[        U[        5      v   M     g 7fr   
isinstanceint).0is     r   	<genexpr>_is_int_list.<locals>.<genexpr>   s     .Q5az!S/A/A5   )r"   r   allvalues    r   _is_int_listr,      s    eX&Q3.Q5.Q+QQr   c                   ^^^^^^ [        [        T[        5      S 5        [        TS;   U4S j5        [        [        T[        5      U4S j5        [        TS:  =(       a    TTS-
  -  S:H  S 5        [	        T5      m[        [        S	 T 5       5      U4S
 j5        [        T5      n[        US:H  S 5        Uc  / nTS:H  nU(       a  SOTmU(       a"  U H  n[        [        U5      U:H  S 5        M     [        U TUTTU5      m[        TS LUUU4S j5        U(       Ga  TS   n	TR                  (       dX  [        TR                  S   SU	S-  /:H  U4S j5        TR                  R                  SU	S-  /5        SS/TR                  S'   T$ TR                  S   SU	S-  /:w  a  U R                  mS[        TR                  5      -  n
[        U
ST-  :  U4S j5        TR                  nS HC  n[        TU5      n[        U5       H%  u  pUSU	S-  /:X  d  M  X   US   sUS'   X'   Ts  s  $    ME      SU 35       eT$ )Nc                      g)Nzinstr_variant must be a stringr   r   r   r   <lambda>*_compute_tmem_reg_layout.<locals>.<lambda>   s    3Sr   )32x32b16x64b16x128b16x256b16x32bx232x32b_splitnc                     > ST  3$ )Nzunknown instr_variant: r   )instr_variants   r   r/   r0      s    ,]O<r   c                  "   > S[        T 5      < 3$ )Nz!num_warps must be an int but got type	num_warpss   r   r/   r0      s    1RSWXaSbRe/fr      r	   r   c                      g)Nz)num_warps must be a power of two and >= 4r   r   r   r   r/   r0      s    Jur   c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   r!   )r$   dims     r   r&   +_compute_tmem_reg_layout.<locals>.<genexpr>   s     5uz#s##ur(   c                     > ST  3$ )Nz#shape entries must be ints but got r   shapes   r   r/   r0      s    Adejdk?lr      c                      g)Nzexpected a 2D tensorr   r   r   r   r/   r0      s    4r   r6   r1   c                      g)Nzcga_layout basis rank mismatchr   r   r   r   r/   r0   (   s    /Or   c                     > ST  ST ST 3$ )NzTMEM layout 'z' unsupported for shape z and num_warps r   )atom_variantr=   rE   s   r   r/   r0   3   s    ]<.0H_h^ijr   c                     > ST  3$ )NzJsplitn with 1 register requires the last lane basis to be [0, N / 2]. Got r   )
layout_objs   r   r/   r0   ;   s    ghrgstr       c                     > SST -   ST  S3$ )NzETo be able to `tmem.load` into `tl.split` you need to have more than rN    z-bit registers, as you need to use the instruction 32x32b.x1 twice. You can always load into instr_variant="32x32b" and then convert_layout to this layout otherwise.r   )bitwidths   r   r/   r0   B   s$     3h'q
 3]3]r   )
lane_bases
warp_basesz6splitn requires at least one basis of [0, N / 2]. Got )r   r"   strr#   listr)   lenr   	reg_basesrR   appendprimitive_bitwidthgetattr	enumerate)
element_tyrE   layoutr=   r8   
cga_layoutranksplitnbasisNnum_regrW   	bases_strbasesr%   rJ   rQ   rM   s    ` ``          @@@r   _compute_tmem_reg_layoutrf      sS   
:mS)+ST
=cc<>
:i%'fg
9>@yIM:q@BuvKE
35u557lmu:D
41945
o-F%8=LE3u:%'OP   )J :T!jl !H## :((,AF;tv  ''AF4)*AJ!!"%$ # !!"%!Q!V4!44HZ1122G".( +]^ #,,I9	
I6 )% 0HAAF+27(IbM/	"ux)) !1 : \RSYRZ[[5r   Tc                   .    \ rS rSrS\4S jrS rS rSrg)GluonCallerContextU   r=   c                     Xl         g r   r<   )selfr=   s     r   __init__GluonCallerContext.__init__W   s    "r   c                      SU R                    3$ )N_NWr<   rk   s    r   mangleGluonCallerContext.mangleZ   s    T^^$%%r   c                 Z    UR                  SUR                  U R                  5      5        g )Nzttg.num-warps)set_attrget_int32_attrr=   )rk   fnbuilders      r   initialize_callee$GluonCallerContext.initialize_callee]   s    
OW%;%;DNN%KLr   r<   N)	__name__
__module____qualname____firstlineno__r#   rl   rq   rx   __static_attributes__r   r   r   rh   rh   U   s    ## #&Mr   rh   c            
         ^  \ rS rSr% \R
                  r\r\\S'   S\4S jr	S r
S rS\\   S\\   4S jrS	\S
\S\4S jrS\S\S\4U 4S jjrS\S\\\4   4U 4S jjrS	\S\\   S\4U 4S jjrS	\S\\   S\4S jrS\S\S\4U 4S jjrU 4S jrS	\S\\   S\4U 4S jjrS rS rS@S jrS rS  rS! rS" r S# r!S$ r"S% r#S& r$S' r%S( r&S) r'S* r(S+ r)\*S, 5       r+S-\,\   S
\S.\S\\S/4   4S0 jr-S-\,\   S
\S\\S/4   4S1 jr.S	\S2\S3\S\4S4 jr/S\S\S\S\4S5 jr0S6\S7\S
\S\4S8 jr1S6\S\4S9 jr2S:\,\   S;\,\   4S< jr3S= r4S> r5S?r6U =r7$ )AGluonSemantica   rw   c                     Xl         g r   rw   )rk   rw   s     r   rl   GluonSemantic.__init__g   s    r   c                     U/ :X  a  UnO0[         R                  " X#U R                  R                  U5      5      nU R	                  X5      $ r   )ttgldistributed_typerw   get_gluon_layout_from_tensortensor)rk   handle	scalar_tyrE   tys        r   _wrap_handle_infer_layout'GluonSemantic._wrap_handle_infer_layoutj   s?    B;B&&y9b9bci9jkB{{6&&r   c                 x    U R                  UR                  UR                  R                  UR                  5      $ r   )r   r   r;   scalarrE   )rk   r   s     r   _wrap_tensor_infer_layout'GluonSemantic._wrap_tensor_infer_layoutq   s,    --fmmV[[=O=OQWQ]Q]^^r   	lhs_shape	rhs_shapec                 `   [        U5      [        U5      :w  a  [        SU SU 35      e/ n[        U5       Hs  u  pEX$   nUS:X  a  UR                  U5        M"  US:X  d  Xe:X  a  UR                  U5        M@  [        S[	        U5      -   S-   [	        U5      -   S-   [	        U5      -   5      e   U$ )N!Cannot broadcast, rank mismatch: , r	   z?Cannot make_shape_compatible: incompatible dimensions at index : z and )rV   
ValueErrorr[   rX   rT   )rk   r   r   	ret_shaper%   leftrights          r   _broadcast_shapesGluonSemantic._broadcast_shapest   s    y>S^+@2i[YZZ	 +GALEqy  '1*%-  &  "-/21v"68<"=?B4y"IKR"SUXY^U_"` a a , r   inputaxisreturnc                   ^^^ TR                    Vs/ s H  n[        R                  " U5      PM     nnUR                  TS5        TS:  a  T[	        TR                   5      -  m[        [        TR                  [        R                  5      U4S j5        TR                  R                  m[        [        T[        [        [        45      U4S j5        [        [        T[        [        45      =(       d    TR                  T:H  UU4S j5        U R                  R                  TR                   T5      nU R#                  UTR                  R$                  U5      $ s  snf )Nr	   r   c                  $   > ST R                   < 3$ Nz=expected expand_dims input to be a distributed_type but got: r:   r   s   r   r/   +GluonSemantic.expand_dims.<locals>.<lambda>       VW\WaWaVder   c                     > ST  3$ )Nz;expected expand_dims input to have a SliceLayout, but got: r   r]   s   r   r/   r      s    TU[T\]r   c                  (   > ST  STR                    3$ )Nz7expected expand_dims input layout to be sliced in axis z	 but got rA   )r   r]   s   r   r/   r      s    MdVS\]c]g]g\hir   )rE   r   _unwrap_if_constexprinsertrV   r   r"   r;   r   r]   r   r   r   rA   rw   create_expand_dimsr   r   r   )rk   r   r   x	dst_shaper   r]   s    ``   @r   expand_dimsGluonSemantic.expand_dims   s   ;@;;G;aT..q1;	Gq!!8C$$Dz%**d&;&;<e	g""z&;
O"LM]	_v
O<=StASi	k 00tD--fejj6G6GSS! Hs    Eabc                    > U R                  X5      u  p[        UR                  / :g  S 5        [        TU ]  X5      nU R                  U5      $ )Nc                      g)NzCannot join scalars in gluonr   r   r   r   r/   $GluonSemantic.join.<locals>.<lambda>   s    &Dr   )broadcast_impl_valuer   rE   superjoinr   )rk   r   r   r+   	__class__s       r   r   GluonSemantic.join   sG    ((.qww"}DEQ"--e44r   c                 j   > [         TU ]  U5      u  p#U R                  U5      U R                  U5      4$ r   )r   splitr   )rk   r   lhsrhsr   s       r   r   GluonSemantic.split   s5    7=#--c2D4R4RSV4WWWr   dimsc                 D   > [         TU ]  X5      nU R                  U5      $ r   )r   permuter   )rk   r   r   r+   r   s       r   r   GluonSemantic.permute   s"    ,--e44r   rE   c                   ^^^ [        [        TR                  [        R                  5      U4S j5        TR                  R                  5       m[        [        T5      [        T5      :H  UU4S j5        TT:X  a  T$ [        T5       H3  u  p4TU   U:w  d  M  US:w  d  M  [        STU    SU SU ST ST 3
5      e   [        R                  " TR                  R                  TTR                  R                  5      nU R                  R                  TR                  UR                  U R                  5      5      nU R                  Xe5      $ )	Nc                  $   > ST R                   < 3$ r   r:   r   s   r   r/   4GluonSemantic.broadcast_impl_shape.<locals>.<lambda>   r   r   c                     > ST ST  3$ )Nr   r   r   )rE   	src_shapes   r   r/   r      s    7XYbXccefkel5mr   r	   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension r   r   )r   r"   r;   r   r   get_block_shapesrV   r[   r   r   r]   rw   create_broadcastr   to_irr   )rk   r   rE   r%   itemret_tyr   r   s    ``    @r   broadcast_impl_shape"GluonSemantic.broadcast_impl_shape   s"   z%**d&;&;<e	gJJ//1	s9~U+-mnIL +GAQx4DAI #VW\]^W_V` aCCG& I%%&Cr)Bug"? @ @ ,
 &&uzz'8'8%ARARS..u||V\\$,,=WX{{6**r   r   r   c                   >^^	 UR                   mUR                   m	TR                  5       (       a  T	R                  5       (       d  [        T
U ]  X5      $ [	        [        T[        R                  5      U4S j5        [	        [        T	[        R                  5      U	4S j5        TR                  5       nT	R                  5       nU R                  X45      n[        TR                  [        5      n[        T	R                  [        5      nU(       a$  U(       d  U R                  UT	R                  5      nOjU(       a$  U(       d  U R                  UTR                  5      nO?TR                  T	R                  :w  a%  [        STR                   ST	R                   35      eU R                  X5      nU R                  X%5      nX4$ )Nc                     > ST < 3$ )Nz@expected broadcast left input to be a distributed_type but got: r   )lhs_tys   r   r/   4GluonSemantic.broadcast_impl_value.<locals>.<lambda>   s    YZ`Ycdr   c                     > ST < 3$ )NzAexpected broadcast right input to be a distributed_type but got: r   )rhs_tys   r   r/   r      s    Z[aZder   zLayout mismatch in broadcast: z vs )r;   is_blockr   r   r   r"   r   r   r   r   r]   r   set_auto_layoutr   r   )rk   r   r   r   r   r   is_lhs_autois_rhs_autor   r   r   s           @@r   r   "GluonSemantic.broadcast_impl_value   sT     (9(97/99z&$"7"78d	fz&$"7"78e	g ++-	++-	**9@	 
; 
;{&&sFMM:C&&sFMM:C]]fmm+=fmm_DQWQ^Q^P_`aa''7''7xr   c                    > X!-
  /nUc
  [        5       n[        R                  " [        R                  XC5      n[        TU ]  XUS9$ )N)r   )r   r   r   int32r   arange)rk   startendr]   rE   r   r   s         r   r   GluonSemantic.arange   sA    >\F&&tzz5Aw~e~88r   r   can_reorderc                 j   > [        U(       + S 5        [        TU ]	  XU5      nU R                  U5      $ )Nc                      g)Nz%can_reorder is not supported in gluonr   r   r   r   r/   'GluonSemantic.reshape.<locals>.<lambda>   s    (Or   )r   r   reshaper   )rk   r   r   r   r+   r   s        r   r   GluonSemantic.reshape   s1    ; OP+>--e44r   c                    [        U5      S:X  a  U$ [        R                  " UR                  X#5      nU R                  R                  UR                  U R                  5      UR                  5      n[        R                  " XT5      $ )Nr   )	rV   r   r   dtyperw   create_splatr   r   r   )rk   r+   rE   r]   r   r   s         r   splatGluonSemantic.splat   s^    u:?L&&u{{EB**6<<+Eu||T{{6**r   c                 b    U R                  X#5      nUc
  [        5       nU R                  XQU5      $ r   )make_scalarr   r   )rk   rE   r+   r   r]   r   s         r   fullGluonSemantic.full   s/    !!%/>\Fzz&00r   c                   ^^ UR                   m[        [        T[        R                  5      U4S j5        [        [        T[        R
                  5      U4S j5        [        R                  " TR                  TR                  T5      nUR                  U R                  5      nU(       a  U R                  R                  XQR                  5      (       d_  [        STR                   ST SU R                  TR                  TR                  5       SU R                  TTR                  5       35      eU R                  R                  XQR                  5      n[        R                   " Xd5      $ )Nc                     > ST < 3$ )Nz@expected convert_layout input to be a distributed_type but got: r   )r   s   r   r/   .GluonSemantic.convert_layout.<locals>.<lambda>   s    YZ\Y_`r   c                     > ST  3$ Nz4expected 'layout' to be a DistributedLayout but got r   r   s   r   r/   r          MfXVr   zlayout conversion from z to z) is not trivial.
The linear layouts are:

)r;   r   r"   r   r   r   r\   rE   r   rw   is_convert_layout_trivialr   	TypeErrorr]   to_linear_layoutcreate_convert_layoutr   )rk   r+   r]   assert_trivialr   	ret_ty_irr   r   s     `    @r   convert_layoutGluonSemantic.convert_layout   s   ZZz"d334`	bz&$"8"89V	X&&r}}bhhGLL.	$,,"H"HT`T`"a"a5bii[VH M88<8M8MbiiY[YaYa8b7cce#44VRXXFGI J J 33I||L{{6**r   c                 .  ^^^ [        [        T[        R                  5      U4S j5        [        [	        T5      U4S j5        [        [        T[        R
                  5      U4S j5        [        R                  " TTTT5      nUb@  U R                  R                  UR                  U R                  5      UR                  5      nO4U R                  R                  UR                  U R                  5      5      n[        R                  " UTTTT5      $ )Nc                     > ST  3$ )Nz,expected 'element_ty' to be a dtype but got r   )r\   s   r   r/   /GluonSemantic.allocate_shared.<locals>.<lambda>   s    =ijtiu;vr   c                     > ST  3$ Nz1all elements of 'shape' must be integers but got r   rD   s   r   r/   r         ._`e_f,gr   c                     > ST  3$ Nz/expected 'layout' to be a SharedLayout but got r   r   s   r   r/   r         HQr   )r   r"   r   r   r,   r   shared_memory_descriptor_typerw   create_local_allocr   r   shared_memory_descriptor)rk   r\   rE   r]   r+   r   r   s    ```   r   allocate_sharedGluonSemantic.allocate_shared   s    z*djj13vw|E"$ghz&$"3"34Q	S//
E65Q\\44RXXdll5KU\\ZF\\44RXXdll5KLF,,VZPUVVr   c                 V  ^ [        [        T[        R                  5      U4S j5        [        R                  " UR
                  UR                  T5      nU R                  R                  UR                  U R                  5      UR                  5      n[        R                  " XC5      $ )Nc                     > ST  3$ r   r   r   s   r   r/   +GluonSemantic.shared_load.<locals>.<lambda>  r   r   )r   r"   r   r   r   r   rE   rw   create_local_loadr   r   r   )rk   mem_descr]   r   r   s     `  r   shared_loadGluonSemantic.shared_load  sp    z&$"8"89V	X&&x~~x~~vN//T\\0JHOO\{{6**r   c                 X  ^^ [        [        T[        R                  5      U4S j5        [        TR                  TR                  :H  UU4S j5        [        TR
                  TR
                  :H  UU4S j5        U R                  R                  TR                  TR                  5        g )Nc                      > S[        T 5       3$ )Nz+expected 'value' to be a tensor, but got a r:   r*   s   r   r/   ,GluonSemantic.shared_store.<locals>.<lambda>  s    9deijoepdq7rr   c                  >   > STR                    ST R                    S3$ )Nzsource shape z and destination shape  must matchrD   r  r+   s   r   r/   r        u{{m3J8>>JZZefr   c                  >   > STR                    ST R                    S3$ )Nzsource dtype z and destination dtype r  r   r  s   r   r/   r    r  r   )	r   r"   r   r   rE   r   rw   create_local_storer   )rk   r  r+   s    ``r   shared_storeGluonSemantic.shared_store  sk    z%-/rsu{{hnn,f	hu{{hnn,f	h''Fr   c                    [        U[        R                  5      (       d  [        S[	        U5       35      e[        U[        R
                  5      (       d  [        S[	        U5       35      eUR                  UR                  :w  a&  [        SUR                   SUR                   S35      eUR                  UR                  :w  a&  [        SUR                   SUR                   S35      eUR                  UR                  [        UR                  5      * S  :w  a%  [        S	UR                   S
UR                   35      eUR                  R                  U R                  5      nUR                  R                  U R                  5      nU R                  R                  X4[        UR                  5      UR                  R                   5      $ )NzIbank_conflicts expects the register layout to be a distributed_type, got zTbank_conflicts expects the shared layout to be a shared_memory_descriptor_type, got zregister shape z and shared shape r  z$mismatched dtypes between register (z) and shared (z	) layoutsz,bank_conflicts NYI for subslices. Got shape z and alloc_shape )r"   r   r   r   r;   r
  rE   r   r\   alloc_shaperV   r]   _to_irrw   get_shared_bank_conflictsrU   rY   )rk   distr_ty	shared_tyreg_attrshared_attrs        r   bank_conflictsGluonSemantic.bank_conflicts  s   (D$9$9::[\`ai\j[kln n )T%G%GHHfgklugvfwx  >>Y__,x~~.>>PQZQ`Q`Paalmnn8#6#666x7J7J6K>ZcZnZnYooxy  ??i33S5I4I4JKK>y>OO`ajavav`wx  ??))$,,7&&--dll;||55hTRZR`R`Ma6>6I6I6\6\^ 	^r   c                   ^ [        [        T[        [        45      U4S j5        [        U[        5      (       d  [	        U5      n[
        R                  " T5      m[        T[        [        45      (       a  [
        R                  " T5      $ [
        R                  " U R                  R                  TR                  U R                  5      U5      5      $ )Nc                      > S[        T 5       3$ )Nz2Expected a DistributedLayout or SharedLayout, got r:   r   s   r   r/   0GluonSemantic.to_linear_layout.<locals>.<lambda>1  s    KDQWL>Zr   )r   r"   r   r   rU   r   r   r   r   	constexprrw   r   r$  )rk   r]   rE   s    ` r   r   GluonSemantic.to_linear_layout/  s    z&#4l"CDZ	\ %&&KE**62fz+BCDD>>&))~~dll;;FMM$,,<WY^_``r   c                 N    U R                   R                  UR                  5        g r   )rw   create_local_deallocr   )rk   r  s     r   shared_deallocGluonSemantic.shared_dealloc=  s    ))(//:r   c                   ^^ TR                   n[        [        T[        5      U4S j5        [        [        UR                  [
        5      U4S j5        U R                  R                  TR                  U R                  5      TR                  5      n[        R                  " UR                  UR                  T5      nU R                  XE5      $ )Nc                     > ST  3$ )Nz9set_auto_layout must set to a distributed layout but got r   r   s   r   r/   /GluonSemantic.set_auto_layout.<locals>.<lambda>C  s    RSYRZ[r   c                  6   > ST R                   R                   3$ )Nz4set_auto_layout input must have auto layout but got )r;   r]   r*   s   r   r/   r7  E  s    MejjN_N_M`ar   )r;   r   r"   r   r]   r   rw   create_set_auto_layoutr$  r   r   r   r\   rE   r   )rk   r+   r]   src_tyr   res_tys    ``   r   r   GluonSemantic.set_auto_layout@  s    z&"34[	]z&--4a	c44V]]4<<5PRWR^R^_&&v'8'8&,,O{{6**r   c                 P  ^^^ [        [        T[        5      U4S j5        [        [        T[        5      U4S j5        [        [        T[        5      U4S j5        S/UR                  -  nTUT'   [	        UR
                  5      nTUT'   UR                  n[        R                  " UR                  XgUR                  R                  5      nU R                  n	U	R                  UR                  U	5      UR                  U5      n
[        R                   " U
40 UR"                  D6$ )Nc                     > ST  3$ )Nz&expected 'start' to be an int but got r   )r   s   r   r/   -GluonSemantic.memdesc_slice.<locals>.<lambda>K  s    1WX]W^/_r   c                     > ST  3$ )Nz'expected 'length' to be an int but got r   )lengths   r   r/   r?  L  s    2YZ`Ya0br   c                     > ST  3$ )Nz$expected 'dim' to be an int but got r   r   s   r   r/   r?  M  s    /STWSX-Yr   r   )r   r"   r#   r_   rU   rE   r]   r   r
  r   r;   r#  rw   create_memdesc_subslicer   r   r  __dict__)rk   r  r   rA  rA   offsetsrE   r]   r   rw   r   s     ```      r   memdesc_sliceGluonSemantic.memdesc_sliceJ  s    z%%'_`z&#&(bcz#s#%YZ#%X^^$c
//x}}OhOhi,,00'1BHOOU\],,VCr{{CCr   c                   ^ U R                  T5      m[        TR                  [        R                  :H  U4S j5        UR
                  SS  nU R                  T5      R                  mUR                  n[        R                  " UR                  X4U5      nU R                  nUR                  UR                  U5      UR                  T5      n[        R                  " U40 UR                  D6$ )Nc                  "   > ST R                    3$ )Nz%expected 'index' to be int32 but got r:   indexs   r   r/   -GluonSemantic.memdesc_index.<locals>.<lambda>Z  s    3XY^YcYcXd1er   r	   )	to_tensorr   r;   r   r   rE   r   r]   r
  r   rw   create_memdesc_indexr   r  rD  )rk   r  rK  rE   r]   r   rw   r   s     `     r   memdesc_indexGluonSemantic.memdesc_indexX  s    u%uzzTZZ')efqr"u%,,//uU,,--bhhw.?RWX,,VCr{{CCr   c                 r  ^^ [        [        T5      U4S j5        [        [        T5      [        TR                  5      :H  UU4S j5        T Vs/ s H  nTR                  U   PM     nnTR                  R
                  nUS [        U5      TR                  -
   nUT Vs/ s H   o5[        U5      TR                  -
  S  U   PM"     sn-  nU R                  R                  TR                  T5      nU R                  R                  U5      n[        R                  " UTR                  UXhS9$ s  snf s  snf )Nc                     > ST  3$ )Nz1all elements of 'order' must be integers but got r   )orders   r   r/   -GluonSemantic.memdesc_trans.<locals>.<lambda>d  r  r   c                  <   > ST R                    S[        T5       S3$ )Nzsource rank (z) and order length (z) must match)r_   rV   )r  rS  s   r   r/   rT  g  s    mHMM?2Fs5zlR^_r   r\   rE   r#  r]   )r   r,   rV   rE   r;   r#  r_   rw   create_memdesc_transr   get_gluon_layout_from_memdescr   r  r   )	rk   r  rS  r%   rE   r#  new_alloc_shaper   r]   s	    ``      r   memdesc_transGluonSemantic.memdesc_transc  s   |E"$ghJ#hnn--_	a -22Eq"E2mm//%&Gs;'7(--'GHW\]W\RSK(88==(H(IJ1MW\]]228??EJ;;FC,,VV[9HY 	Y 3 ^s   D/!'D4c                   ^^ [        [        T5      U4S j5        [        [        R                  " T5      [        R                  " TR                  5      :H  UU4S j5        U R
                  R                  TR                  T5      nU R
                  R                  U5      nTR                  R                  n[        U5      TR                  -
  nUS U [        T5      -   n[        R                  " UTR                   TUUS9$ )Nc                     > ST  3$ r  r   rD   s   r   r/   /GluonSemantic.memdesc_reshape.<locals>.<lambda>t  r  r   c                  (   > ST R                    ST 3$ )Nz)memdesc_reshape total elements mismatch: z -> rD   )r  rE   s   r   r/   r^  w  s    @ 'tE74r   rV  )r   r,   mathprodrE   rw   create_memdesc_reshaper   rX  r;   r#  rV   r_   rU   r   r  r   )rk   r  rE   r   r]   r#  
prefix_lenrY  s    ``     r   memdesc_reshapeGluonSemantic.memdesc_reshapes  s    |E"$ghIIe		(.. 995	
 44X__eL;;FCmm//%5
%kz2T%[@,,~~'
 	
r   c                   ^^^ [        [        T[        R                  5      U4S j5        [        [	        T5      U4S j5        [        [        T[        R
                  5      U4S j5        [        R                  " TTTT5      nU R                  R                  UR                  U R                  5      UR                  5      n[        R                  " U40 UR                  D6$ )Nc                     > ST  3$ )Nz'expected 'dtype' to be a dtype but got r   r  s   r   r/   3GluonSemantic.memdesc_reinterpret.<locals>.<lambda>  s    8_`e_f6gr   c                     > ST  3$ r  r   rD   s   r   r/   rh    r  r   c                     > ST  3$ r  r   r   s   r   r/   rh    r	  r   )r   r"   r   r   r,   r   r
  rw   create_memdesc_reinterpretr   r   r  rD  )rk   r  r   rE   r]   r   r   s     ```  r   memdesc_reinterpret!GluonSemantic.memdesc_reinterpret  s    z%,.gh|E"$ghz&$"3"34Q	S//ufeL88$,,9OQYQ`Q`a,,VCr{{CCr   c                 f    U(       a  [         R                  " X#U5      nOUnU R                  X5      $ r   )r   r   r   )rk   r   r   r   r]   r;  s         r   wrap_tensorGluonSemantic.wrap_tensor  s+    **9HFF{{1%%r   c                 ,  ^^^ U  H5  m[        [        TR                  [        R                  5      U4S j5        M7     U  Vs/ s H  oR                  R
                  PM     snmTS   m[        [        U4S jTSS   5       5      U4S j5        g s  snf )Nc                  $   > ST R                   < 3$ Nz#expected distributed_type but got: r:   )r   s   r   r/   2GluonSemantic._check_same_layout.<locals>.<lambda>  s    HklmlrlrkuFvr   r   c              3   ,   >#    U  H	  oT:H  v   M     g 7fr   r   )r$   ll0s     r   r&   3GluonSemantic._check_same_layout.<locals>.<genexpr>  s     0Kq7Ks   r	   c                     > ST  3$ )Nz3Expected inputs to have matching layouts, but got: r   )layoutss   r   r/   rt    s    LWIVr   )r   r"   r;   r   r   r]   r)   )xsr   rw  rz  s    `@@r   _check_same_layout GluonSemantic._check_same_layout  sp    A:affd&;&;<>vw *,-"Q66=="-QZs0GABK00V	X .s   Binputsreverse.c                   ^ ^^^ TS   R                   R                  m[        T5      nU* Us=::  a  U:  d  O   SU SU S35       eUS:  a  X%-  nT H$  nUR                   R                  T:X  a  M   S5       e   T R                  R	                  T Vs/ s H  ofR
                  PM     snX$5      mU" T5        TR                  5       (       d   e[        UUU U4S j[        [        T5      5       5       5      $ s  snf )Nr   z
scan axis z must be < inputs rank ()z(all scan inputs must have the same shapec              3      >#    U  H>  nTR                  TR                  U5      TU   R                  R                  T5      v   M@     g 7fr   r   
get_resultr;   r   )r$   r%   r~  scan_oprk   rE   s     r   r&   1GluonSemantic.associative_scan.<locals>.<genexpr>  sD      )' **7+=+=a+@&)..BWBWY^__'   AA	)	r;   rE   rV   rw   create_scanr   verifytuplerange)	rk   r~  r   region_builder_fnr  r_   tr  rE   s	   ``     @@r   associative_scanGluonSemantic.associative_scan  s    q	$$5zu#t#Wz$7OPTvUV%WW#!8LDA66<<5(T*TT(  ,,**f+EfHHf+EtU'"~~ )3v;') ) 	)	 ,Fs   C:c                   ^ ^^^^^	^
 Tc  [        U 4S jT 5       5      mSmTS   R                  R                  m
[        T
5      m[	        STs=:*  =(       a    T:  Os  UU4S j5        T R                  T5        [        T
5       VVs/ s H  u  pEUT:w  d  M  UPM     snnm	[        U
4S jT 5       5      (       d   S5       eT R                  R                  T Vs/ s H  ofR                  PM     snT5      mU" T5        TR                  5       (       d   e[        UUU	U 4S j[        [        T5      5       5       5      $ s  snnf s  snf )Nc              3   n   >#    U  H*  nTR                  XR                  R                  /S S9v   M,     g7f)F)r   N)r   numelr+   )r$   r  rk   s     r   r&   *GluonSemantic.reduction.<locals>.<genexpr>  s+     _X^ST4<<GGMM?<NX^s   25r   c                     > ST ST  3$ )Nz/expected reduction axis to be in the range [0, z
) but got r   r   r_   s   r   r/   )GluonSemantic.reduction.<locals>.<lambda>  s    +Z[_Z``jkojp)qr   c              3   T   >#    U  H  oR                   R                  T:H  v   M     g 7fr   )r;   rE   )r$   r  rE   s     r   r&   r    s     9&Q66<<5(&s   %(z-all reduction inputs must have the same shapec              3      >#    U  H>  nTR                  TR                  U5      TU   R                  R                  T5      v   M@     g 7fr   r  )r$   r%   r~  	reduce_opr   rk   s     r   r&   r    sD      )' **9+?+?+BF1INNDYDY[dee'r  )r  r;   rE   rV   r   r|  r[   r)   rw   create_reducer   r  r  )rk   r~  r   r  r%   sr  r_   r  r   rE   s   ```    @@@@r   	reductionGluonSemantic.reduction  s   <_X^__FDq	$$5zqD4!qr'#,U#3A#341qDyQ#3A	9&999j;jj9LL..&/I&Q&/I4P	)$!!!! )3v;') ) 	) B 0Js   	EEE	num_binsmaskc                    [        [        UR                  5      S:H  S 5        [        UR                  R	                  5       S 5        [        US LS 5        UbN  U R                  X15      u  p1[        UR                  R                  R                  5       S 5        UR                  nUR                  U R                  5      nU R                  R                  UR                  X#U5      nU R                  U[        R                  U/U5      $ )Nr	   c                      g)Nz histogram only supports 1D inputr   r   r   r   r/   )GluonSemantic.histogram.<locals>.<lambda>  s    .Pr   c                      g)Nz%histogram only supports integer inputr   r   r   r   r/   r    s    -Tr   c                      g)Nz'histogram requires a destination layoutr   r   r   r   r/   r    s    +Tr   c                      g)Nz"Mask must have boolean scalar typer   r   r   r   r/   r    s    7[r   )r   rV   rE   r   is_intr   r;   r   is_boolr   r$  rw   create_histogramro  r   r   )rk   r   r  r  r]   layout_attrr   s          r   	histogramGluonSemantic.histogram  s    s5;;1$&PQu{{!!#%TUvT!#TU33D@KD499##++-/[\;;DmmDLL1..u||X[Y

XJGGr   c           	         [        US LS 5        [        US 5        [        [        UR                  5      S:H  S 5        [        R                  " UR
                  R                  UR                  S   UR                  S   -   /U5      nU R                  U R                  R                  UR                  UR                  UR                  U R                  5      5      U5      $ )Nc                      g)Nz!cat requires a destination layoutr   r   r   r   r/   #GluonSemantic.cat.<locals>.<lambda>  s    +Nr   c                      g)Nz;current implementation of `cat` always may reorder elementsr   r   r   r   r/   r    s    $ar   r	   c                      g)Nzcat requires a rank-1 inputr   r   r   r   r/   r    s    ,Ir   r   )r   rV   rE   r   r   r;   r   r   rw   
create_catr   r   )rk   r   r   r   r]   ret_types         r   catGluonSemantic.cat  s    vT!#NO{abs399~"$IJ((399Q<#))TU,;V:WY_`{{4<<223::szz8>>Z^ZfZfKghjrssr   srcrK  c                   ^^^^ [        [        TR                  [        R                  5      U4S j5        [        [        TR                  [        R                  5      U4S j5        [        TR                  R
                  R                  5       U4S j5        [        TR                  R                  5      m[        [        TR                  R                  5      T:H  S 5        [        T* Ts=:*  =(       a    T:  Os  UU4S j5        TS:  a  TT-  m[        T5       HL  nUT:X  a  M  [        TR                  R                  U   TR                  R                  U   :H  U4S j5        MN     U R                  R                  TR                  TR                  T5      nU R                  UTR                  R
                  TR                  R                  TR                  R                  5      $ )Nc                  $   > ST R                   < 3$ rs  r:   )r  s   r   r/   &GluonSemantic.gather.<locals>.<lambda>  s    FijmjrjriuDvr   c                  $   > ST R                   < 3$ rs  r:   rJ  s   r   r/   r    s    <UZZNKr   c                  8   > ST R                   R                  < 3$ )Nz&expected integer scalar type but got: )r;   r   rJ  s   r   r/   r    s    5[\a\f\f\m\m[p3qr   c                      g)Nz0source and index tensors must have the same rankr   r   r   r   r/   r    s    6hr   c                     > ST  ST S3$ )Nzgather axis z must be < source rank (r  r   r  s   r   r/   r    s    |D6AYZ^Y__`-ar   r   c                     > ST  S3$ )Nz
index dim z( must match the corresponding source dimr   )r   s   r   r/   r    s    *TF*RSr   )r   r"   r;   r   r   r   r  rV   rE   r  rw   create_gatherr   ro  r]   )rk   r  rK  r   dgatherr_   s    ```  @r   r  GluonSemantic.gather  sS   z#((D$9$9:<vwz%**d&;&;<K	Muzz  '')+qr388>>"s5::##$,.hiu##t#%ab!8DLDtADy

  #sxx~~a'88S  ++CJJdK9I9I5::K\K\]]r   c                     U R                   R                  UR                  UR                  U R                   5      U5      n[	        UR
                  R                  5      nXS==   S-  ss'   U R                  XBU5      $ )NrF   )rw   create_fp4_to_fpr   r   rU   r;   rE   r   )rk   r  	elem_typer   resultrE   s         r   	fp4_to_fpGluonSemantic.fp4_to_fp  s]    ..szz9??4<<;XZ^_SXX^^$q--fGGr   worker_num_warpsworker_num_regsc           	      .  ^ U H4  u  nm[        [        T[        [        R                  45      U4S j5        M6     [	        U5      S:  d   S5       eUS   u  pg[	        U5      S-
  nUSS  n	U[	        U5      :X  d   SU S[	        U5       S35       eU[	        U5      :X  d   SU S[	        U5       S35       eU R
                  n
U
R                  5       nU
R                  5       nU
R                  U5        UR                  Xg0 S	9n/ nUb  [        U5      nU
R                  U5        U Vs/ s H  oR                  5       PM     nnU	 VVs/ s H  u  nn[        U5      PM     nnn[        U/ 5      nU
R                  U5        U
R                  UUU5      nUR!                  5       R#                  U5        UR%                  U5        U
R'                  UR)                  5       / 5        U
R+                  U5      nU Vs/ s H  nUR                  5       PM     nnSn[-        U	5       H  u  nu  nm[/        UU   S
9nU
R'                  UR1                  U5      U5      nUU   n[3        [	        U5      5       Vs/ s H  nUR5                  UU-   5      PM     nn[7        UT Vs/ s H  nUR8                  PM     sn5      nUR                  UU0 US9  U
R;                  5         U[	        U5      -  nM     U
R=                  UR?                  5       5        [3        [	        U5      5       Vs/ s H  nURA                  U5      PM     nnUc  g [        [7        X Vs/ s H  oR8                  PM     sn5      5      $ s  snf s  snnf s  snf s  snf s  snf s  snf s  snf )Nc                      > S[        T 5       3$ )Nz9function arguments must be a tuple of arguments, but got r:   )argss   r   r/   /GluonSemantic.warp_specialize.<locals>.<lambda>  s    VW[\`WaVbcr   r	   z8expected at least one function for the default partitionr   zwarp specialize got z partitions but z warp countsz register counts)kwargsr<   )r  caller_context)!r   r"   r  r   rV   rw   get_insertion_point	new_blockset_insertion_point_to_startcall_JitFunctionr   create_warp_yieldget_typesumrestore_insertion_pointcreate_warp_specializeget_default_region	push_backset_requested_registerscreate_block_with_parentget_partition_op_holder!create_warp_specialize_partitionsr[   rh   
get_regionr  get_argumentr   r;   create_warp_returnset_insertion_point_afterget_operationr  )rk   functions_and_argsr  r  	generator_default_partitiondefault_argsnum_partitionsworkersrw   	insert_ptdefault_blockdefault_resultsmlir_resultsrresult_typesr  worker_args	mlir_argsws_oppartitions_oparg	arg_typesarg_itr%   funcr  blockj
block_argss                    `             r   warp_specializeGluonSemantic.warp_specialize  s   )GAt:dUDJJ$78ce * %&!+g-gg+*<Q*?'/014$QR("
 
 	f!.!11A#FVBWAXXde	f 
 "
 
 	i!.!11A#oBVAWWgh	i 
 ,,//1	  ))+,,];#445F]_4`&/@L!!,/.:;l

l; BIIga+D1IR(	''	2..|YHXY  ",,];%%o6 	(()F)F)H"MAA.Q/89yS\\^y	9(1OA|d/:J1:MNN44]5M5Ma5PR[\E#AIBGIBWXBWQ%,,VaZ8BWJX,Zd9Sds#((d9STJ&&tZSa&b&&(c)n$F  2 	))%*=*=*?@5:3|;L5MN5M((+5MN"(7X17XYZZ; < J : Y9S O 8Ys*   *M3
M8:M>4NN$NNc                 j    [         R                  " U R                  R                  R                  5      $ r   )r   r/  rw   optionsnum_ctasrp   s    r   r   GluonSemantic.num_ctas6  s!    ~~dll22;;<<r   c                    UR                   bK  [        UR                   [        5      (       d   e[        R                  " UR                   R
                  5      $ [        R                  " U R                  R                  R
                  5      $ r   )r  r"   rh   r   r/  r=   rw   r  )rk   r  s     r   r=   GluonSemantic.num_warps9  sb    ##/i668JKKKK>>)":":"D"DEE~~dll22<<==r   r   )F)8rz   r{   r|   r}   r   r   langr   __annotations__rl   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r  r  r   r*  r   r3  r   rF  rO  rZ  rd  rl  ro  staticmethodr|  r   r  r  r  r  r  r  r  r   r=   r~   __classcell__)r   s   @r   r   r   a   s   [[FD '_49 c  T T T T&5h 58 5 5Xx XE(H*<$= X5X 5U3Z 5H 5+( +5: +( +  x H :95X 5$s) 5$ 5
+1+
W+G^4a;+D	DY 
,D& X X)x'9 ) )"&)+03+?)*) 2 )# )UZ[ceh[hUi )(
Hx 
H3 
Hh 
HS[ 
Htx th tT th t^( ^8 ^3 ^8 ^,HX H8 H:[HSM :[dlmpdq :[x=> >r   r   r   )#typingr   r   r   r   r   r`  triton.language.semanticr    r
   r   _layoutsr   r   r   r   r   r   triton._C.libtriton.gluon_irr   r   triton.compiler.code_generatorr   r   r   r   r  rT   r   r,   rf   __triton_builtin__rh   r   r   r   r   <module>r     s    ; ;  3  x x P T: <F ! !xC0 !
R:z /3  +	M 	M\>N8, \>r   