
    bi                       S SK Jr  S SKJrJrJr  S SKJr  S SKJ	s  J
s  Js  Jr  S SKJrJr  S SKJrJr  \(       a  S SKJr  S SKJr  / S	Qr\" S
S9 " S S\R.                  5      5       r\ " S S\R2                  5      5       r\ S       SS jj5       r\ S     SS jj5       r\ S SS jj5       r\SSS jj5       rg)    )annotations)ListTupleTYPE_CHECKING)	dataclassN)PaddedSharedLayoutSwizzledSharedLayout)builtin_unwrap_if_constexpr)ir)shared_memory_descriptor)
async_load
async_waitmake_tensor_descriptortensor_descriptortensor_descriptor_typeT)eqc                  t    \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   SS
 jrSS jrSS jrSS jr	SS jr
Srg)r      z!The type for a tensor descriptor.zttgl.block_type
block_typezttgl.tuple_type
shape_typestrides_type)PaddedSharedLayout | SwizzledSharedLayoutlayoutc                <    SU R                    SU R                   S3$ )Nztensor_descriptor<z, >)r   r   selfs    l/home/james-whalen/.local/lib/python3.13/site-packages/triton/experimental/gluon/language/amd/gfx1250/tdm.py__str__tensor_descriptor_type.__str__   s     #DOO#4Bt{{m1EE    c                    X   nUS-  nU R                   R                  X5      u  pBU R                  R                  X5      u  pR[        X4XP5      nXb4$ )N   )r   _unflatten_irr   r   )r   handlescursorhandleshapestridesvalues          r   r%   $tensor_descriptor_type._unflatten_ir   sT    !55gF++99'J!&?}r"   c                    U R                   R                  R                  5       nUR                  U R                   R	                  U5      UU R
                  R                  U5      5      $ N)r   
element_tyis_int_signed!get_tensor_descriptor_layout_typeto_irr   _to_ir)r   builder	is_signeds      r   r3   tensor_descriptor_type._to_ir$   sT    OO..<<>	88OO!!'*KKw'
 	
r"   c                    UR                  U R                  U5      5        U R                  R                  X5        U R                  R                  X5        g r.   )appendr3   r   _flatten_ir_typesr   )r   r4   outs      r   r9   (tensor_descriptor_type._flatten_ir_types,   s=    

4;;w'())'7++G9r"   c           	         SU R                   R                  5        SU R                  R                  5        SU R                  R                  5        SU R                  R                  5        S3	$ )NTD_)r   mangler   r   r   r   s    r   r?   tensor_descriptor_type.mangle1   sb    DOO**,-Qt/E/E/G.H$J[J[JbJbJdIeefgkgrgrgygyg{f||~r"    N)returnstr)r&   List[ir.value]r'   intrB   zTuple[tensor_descriptor, int])r4   
ir.builderrB   zir.type)r4   rF   r:   zList[ir.type]rB   None)__name__
__module____qualname____firstlineno____doc____annotations__r    r%   r3   r9   r?   __static_attributes__rA   r"   r   r   r      s8    +!!55F
:
@r"   r   c                      \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   SS
 jr\S 5       r\S 5       r	\S 5       r
\S 5       rSrg)r   5   z4A descriptor representing a tensor in global memory.zir.valuer(   z
ttgl.tupler)   r*   r   typec                    UR                  U R                  5        U R                  R                  U5        U R                  R                  U5        g r.   )r8   r(   r)   _flatten_irr*   )r   r&   s     r   rS   tensor_descriptor._flatten_ir>   s6    t{{#

w'  )r"   c                .    U R                   R                  $ r.   )rQ   r   r   s    r   r   tensor_descriptor.block_typeC   s    yy###r"   c                B    U R                   R                  R                  $ r.   )rQ   r   r)   r   s    r   block_shapetensor_descriptor.block_shapeG   s    yy##)))r"   c                B    U R                   R                  R                  $ r.   )rQ   r   r/   r   s    r   dtypetensor_descriptor.dtypeK   s    yy##...r"   c                .    U R                   R                  $ r.   )rQ   r   r   s    r   r   tensor_descriptor.layoutO   s    yyr"   rA   N)r&   rD   rB   rG   )rH   rI   rJ   rK   rL   rM   rS   propertyr   rX   r[   r   rN   rA   r"   r   r   r   5   sr    >
  *
 $ $ * * / /    r"   r   c                   [        U5      nSUs=::  a  S::  d  O   SU S35       e[        U5      U:X  d   SU S[        U5       35       e[        U5      U:X  d   SU S[        U5       35       e[        U R                  [        R                  5      (       d   S	5       e[        U5      n[        U[        [        45      (       d   S
5       e[        U[        5      (       a  UR                  S:X  d   S5       eU R                  nUR                  USS9nUR                  USS9n	[        R                  " U5      n[        R                  " U5      n[        R                  " U R                  R                  U5      n
[        XR                  UR                  U5      nUR!                  S5      nUR"                  R%                  UR'                  UR"                  5      XxX5      n[)        XX+5      $ )a  Make a tensor descriptor object.

Args:
    base (tensor): base pointer of the tensor in global memory.
    shape (List[int]): shape of the tensor.
    strides (List[int]): strides of the tensor.
    block_shape (List[int]): block shape of the tensor.
    layout (PaddedSharedLayout | SwizzledSharedLayout): the layout of the tensor in shared memory.

Returns:
    tensor_descriptor: the created tensor descriptor object
r$      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got zExpected base to be a pointerzBExpected layout to be a PaddedSharedLayout or SwizzledSharedLayoutz3Expected max_phase to be 1 for SwizzledSharedLayoutFrequire_i64Tzero)len
isinstancer[   ttglpointer_typer   r   r	   	max_phaser(   _convert_to_ir_valuestupler   rQ   r/   r   _str_to_padding_optionr4   create_make_tensor_descriptorr3   r   )baser)   r*   rX   r   	_semanticndimbase_handleshape_handlesstride_handlesr   rQ   paddingr(   s                 r   r   r   T   s     u:D>>O=dV;OO>w<4R9TF2CCL>!RR{t#m'DTFJ^_bcj_k^l%mm#djj$"3"344U6UU4!&)Ff13GHII MLMI&.//1$[&[[$++K33Eu3MM44W$4ONJJuEjj!G!5!5{CJ!*jj',,OD..v6G<<T[[IZIZ=[]h=KVF VG::r"   c                2   UR                  USS9nUR                  U5      nUR                  n[        U5      nUb  UR                  O[        R
                  R                  5       nUR                  R                  U R                  XbR                  UU5        g)a  Load a block of tensor specified in tensor descriptor from global memory to shared memory asynchronously.

Args:
    src (tensor_descriptor): the source tensor descriptor.
    offsets (List[int]): the offsets from the base pointer in the tensor descriptor.
    dest (shared_memory_descriptor): the shared memory destination to store the loaded data.
    pred (bool, optional): Predicate to enable or disable the load. Defaults to True.
    mbarrier (shared_memory_descriptor, optional): The barrier object to signal "arrive" on.
Frb   N)	rj   	to_tensorr(   r   rg   r   r+   r4   %create_async_tdm_copy_global_to_local)	srcoffsetsdestpredmbarrierro   offset_handlespred_handlembarrier_handles	            r   r   r      s     44W%4PNt$D++K#H-H)1)=hoo477==?O;;CJJXcXcep<KMr"   c                    UR                  USS9nUR                  R                  U R                  XBR                  5        g)aW  Store a block of tensor specified in tensor descriptor from shared memory to global memory asynchronously.

Args:
    dest (tensor_descriptor): the destination tensor descriptor.
    offsets (List[int]): the offsets from the base pointer in the tensor descriptor.
    src (shared_memory_descriptor): the shared memory source to load the data.
Frb   N)rj   r4   %create_async_tdm_copy_local_to_globalr(   )rz   ry   rx   ro   r}   s        r   async_storer      s9     44W%4PN;;DKKYcYcdr"   c                P    [        U 5      n UR                  R                  U 5        g)zWait for the outstanding asynchronous tensor operations to complete.

Args:
    num_outstanding (int): number of outstanding async tensor operations to wait for.
N)r   r4   create_async_tdm_wait)num_outstandingro   s     r   r   r      s"     +?;O++O<r"   r.   )rn   zttgl.tensorr)   "List[ttgl.constexpr | ttgl.tensor]r*   r   rX   zList[ttgl.constexpr]r   r   rB   r   )TNN)rx   r   ry   r   rz   r   r{   boolr|   r   rB   rG   )rz   r   ry   r   rx   r   rB   rG   )r   N)rB   rG   )
__future__r   typingr   r   r   dataclassesr   (triton.experimental.gluon.language._coreexperimentalgluonlanguage_corerg   +triton.experimental.gluon.language._layoutsr   r	   r
   r   	triton._Cr   r   __all__	base_typer   
base_valuer   r   r   r   r   rA   r"   r   <module>r      s   " - - ! 7 7 ` RQ
o d!@T^^ !@ !@H      < 	 Y](;$F(;Ui(;#L(;ar(; 	(;V 	W[MM,DM`dM 	M( 	
e#'
e 	
e 	= 	=r"   