
    biL                        S SK Jr  S SKJrJr  S SKJrJr  S SKJ	r	  S SK
Jr  SS/r\" SS	9 " S
 S\5      5       r\" SS	9 " S S\5      5       rg)    )annotations)	dataclassfield)ListOptional)_unwrap_if_constexpr)DistributedLayoutAMDMFMALayoutAMDWMMALayoutT)frozenc                     ^  \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S
rS\S'   S
rS\S'   \" \	S9r
S\S'   U 4S jrS rSS jrS rS r\S 5       rSrU =r$ )r
      aV  
Represents a layout for AMD MFMA (matrix core) operations.

Args:
    version (int): The GPU architecture.
    instr_shape (List[int]): The shape in the form of (M, N, K) of the matrix.
    transposed (bool): Indicates the result tensor is transposed so that each thread holds consecutive elements in the same row instead of column, which is good for chained dot and global write.
    warps_per_cta (List[int]): The warp layout in the block.
    element_bitwidth Optional(int): Bit width of the output element type. Supported values are 32 and 64. Defaults to 32.
    tiles_per_warp Optional(List[int]): The tile layout within a warp. Defaults to unit tile layout, i.e., single tile on all dimensions.
    cga_layout (Optional[List[List[int]]]): Bases describing CTA tiling.

Current supported versions:

- 1: gfx908
- 2: gfx90a
- 3: gfx942
- 4: gfx950
intversion	List[int]instr_shapebool
transposedwarps_per_ctaNzOptional[int]element_bitwidthOptional[List[int]]tiles_per_warpdefault_factoryList[List[int]]
cga_layoutc                  > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        U R                  c  [        R                  U SS5        U R                  c.  [        R                  U SS/[        U R                  5      -  5        [        R                  U S	U R                  5        U R                  5         g )
Nr   r   r   r   r   r          r   )super__setattr__r   r   r   r   r   r   r   objectlenr   verify)self	__class__s    i/home/james-whalen/.local/lib/python3.13/site-packages/triton/experimental/gluon/language/amd/_layouts.py__post_init__AMDMFMALayout.__post_init__,   s   I';DLL'IJM+?@P@P+QRL*>t*OPO-A$BTBT-UV.0DTEZEZ0[\,.B4CVCV.WX  (t%7<&t%5sSASAS=T7TU4t?    c           	         UR                  U R                  U R                  U R                  U R                  U R
                  U R                  U R                  5      $ N)get_amd_mfma_layoutr   r   r   r   r   r   r   r%   builders     r'   _to_irAMDMFMALayout._to_ir<   sN    **LLOOOO!!
 	
r*   c                   S nU" U R                   (       a:  U R                    Vs/ s H"  nSR                  [        [        U5      5      PM$     snOS 5      nSU R                   SU" U R
                  5       SU R                   SU" U R                  5       SU R                   SU" U R                  5       SU S3$ s  snf )Nc                H    U c  gSR                  [        [        U 5      5      $ N _joinmapstrxs    r'   	stringify'AMDMFMALayout.mangle.<locals>.stringifyI       y88CQK((r*   ~MFMA_r6   _MFMA)
r   r8   r9   r:   r   r   r   r   r   r   r%   r=   vecr   s       r'   mangleAMDMFMALayout.mangleG   s    	)
 W[WfWf4??S?CS# 7?Slpq
t||nAi0@0@&A%B!DOOCTTUV_`d`r`rVsUttuvz  wL  wL  vM  MN  OX  Y]  Yl  Yl  Om  Nn  no  pz  o{  {@  A  	A  Ts   )B=c                  ^ U R                   S:  a  U R                   S::  d   S5       e[        U R                  5      S:X  d   S5       eSS/SS/SS/SS//nU R                  S	S
 U;   d   SU R                   35       eU R                  S;   d   S5       e[        U R                  5      m[        U4S jU R                   5       5      (       d   S5       eg )Nr      z#version must be in the [1, 4] range   z,instr_shape must follow the (M, N, K) formatr      @   r      zinvalid intrinsic shape )r   rK   z!element bitwidth must be 32 or 64c              3  @   >#    U  H  n[        U5      T:H  v   M     g 7fr,   r#   .0rD   ranks     r'   	<genexpr>'AMDMFMALayout.verify.<locals>.<genexpr>Y        ?3s8t#   cga_layout basis rank mismatch)r   r#   r   r   r   allr   )r%   valid_shapesrQ   s     @r'   r$   AMDMFMALayout.verifyQ   s    ||q T\\Q%6]8]]64##$)Y+YY)R2r(RGaW=!$4c8PQUQaQaPb6cc4$$0U2UU04%%&?t???aAaa?r*   c                T   [        U R                  [        U R                  5      U R                  [        U R
                  5      U R                  (       a  U R                  OS U R                  (       a  [        U R                  5      OS [        S U R                   5       5      45      $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr,   tuplerP   rD   s     r'   rR   )AMDMFMALayout.__hash__.<locals>.<genexpr>c        8%**   )	hashr   r]   r   r   r   r   r   r   r%   s    r'   __hash__AMDMFMALayout.__hash__[   s{    LL$""#OO$$$%%)%:%:D!!*.*=*=E$%%&4888
  	r*   c                ,    [        U R                  5      $ r,   r#   r   rc   s    r'   rQ   AMDMFMALayout.rankf       4%%&&r*    returnr:   )__name__
__module____qualname____firstlineno____doc____annotations__r   r   r   listr   r(   r0   rE   r$   rd   propertyrQ   __static_attributes____classcell__r&   s   @r'   r
   r
      su    & L&*m**.N'."'"=J= 	
Ab	 ' 'r*   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'   S	rS
\S'   S	rS
\S'   \" \	S9r
S\S'   U 4S jrS rSS jrS rS r\S 5       rSrU =r$ )r   k   a  
Represents a layout for AMD WMMA (matrix core) operations.

Args:
    version (int): Indicates the GPU architecture.
    transposed (bool): Indicates the result tensor is transposed.
    warps_per_cta (List[int]): Number of warps per CTA.
    instr_shape (Optional[List[int]]): Instruction shape (M, N, K). Defaults to (16, 16, 16).
    cga_layout (Optional[List[List[int]]]): Bases describing CTA tiling.

Current supported versions:

- 1: RDNA3; e.g., gfx1100, gfx1101
- 2: RDNA4; e.g., gfx1200, gfx1201
- 3: gfx1250
r   r   r   r   r   r   Nr   r   r   r   r   r   c                P  > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        U R                  c  S/[        U R
                  5      -  nO[        U R                  5      n[         TU ]  SU5        U R                  b  [        U R                  5      O/ SQn[         TU ]  S[        U5      5        [        R                  U SU R                  5        U R                  5         g )	Nr   r   r   r   r   )rJ   rJ   rJ   r   r   )r    r!   r   r   r   r   r   r#   r   r"   r   r$   )r%   r   r   r&   s      r'   r(   AMDWMMALayout.__post_init__   s    I';DLL'IJL*>t*OPO-A$BTBT-UV&S3t'9'9#::N1$2E2EFN,n=@D@P@P@\*4+;+;<bnM+?+LM4t?r*   c                    UR                  U R                  U R                  U R                  U R                  U R
                  U R                  5      $ r,   )get_amd_wmma_layoutr   r   r   r   r   r   r.   s     r'   r0   AMDWMMALayout._to_ir   sE    **LLOOOO
 	
r*   c                j   S nU" U R                   (       a:  U R                    Vs/ s H"  nSR                  [        [        U5      5      PM$     snOS 5      nSU R                   SU R
                   SU" U R                  5       SU" U R                  5       SU" U R                  5       SU S3$ s  snf )Nc                H    U c  gSR                  [        [        U 5      5      $ r4   r7   r;   s    r'   r=   'AMDWMMALayout.mangle.<locals>.stringify   r?   r*   r@   WMMA_r6   _WMMA)	r   r8   r9   r:   r   r   r   r   r   rC   s       r'   rE   AMDWMMALayout.mangle   s    	)
 W[WfWf4??S?CS# 7?Slpq
t||nAdoo%6a	$BTBT8U7VVWXabfbubuXvWwwx  zC  DH  DT  DT  zU  yV  VW  Xb  Wc  ch  i  	i  Ts   )B0c                   ^ U R                   S:  a  U R                   S::  d   S5       e[        U R                  5      m[        U4S jU R                   5       5      (       d   S5       eg )Nr   rI   z#version must be in the [1, 3] rangec              3  @   >#    U  H  n[        U5      T:H  v   M     g 7fr,   rN   rO   s     r'   rR   'AMDWMMALayout.verify.<locals>.<genexpr>   rT   rU   rV   )r   r#   r   rW   r   )r%   rQ   s    @r'   r$   AMDWMMALayout.verify   sW    ||q T\\Q%6]8]]64%%&?t???aAaa?r*   c                >   [        U R                  U R                  [        U R                  5      U R
                  (       a  [        U R
                  5      OS U R                  (       a  [        U R                  5      OS [        S U R                   5       5      45      $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr,   r\   r^   s     r'   rR   )AMDWMMALayout.__hash__.<locals>.<genexpr>   r`   ra   )rb   r   r   r]   r   r   r   r   rc   s    r'   rd   AMDWMMALayout.__hash__   sr    LLOO$$$%*.*=*=E$%%&4'+'7'7E$""#T888
  	r*   c                ,    [        U R                  5      $ r,   rg   rc   s    r'   rQ   AMDWMMALayout.rank   ri   r*   rj   rk   )rm   rn   ro   rp   rq   rr   r   r   r   rs   r   r(   r0   rE   r$   rd   rt   rQ   ru   rv   rw   s   @r'   r   r   k   so      L'+K$+*.N'."'"=J="
ib ' 'r*   N)
__future__r   dataclassesr   r   typingr   r   triton.language.corer   +triton.experimental.gluon.language._layoutsr	   __all__r
   r   rj   r*   r'   <module>r      sl    " ( ! 5 I  $X'% X' X'v $O'% O' O'r*   