
    bi-                     f    S SK r S SKJr  S SKJr  S SKJr  SSKJr  SSK	J
r
  SS	K	Jr  S
 rS rS rg)    N)knobs)_core)_check   )_unwrap_if_constexpr)DotOperandLayout   )AMDWMMALayoutc                   ^  [        US LS 5        UR                  R                  n[        [        U[        5      =(       a    UR
                  T :H  U 4S j5        UR                  R                  n[        [        U[        5      =(       a:    [        UR                  [        5      =(       a    UR                  R
                  T :H  S 5        UR                  R                  n[        [        U[        5      =(       a:    [        UR                  [        5      =(       a    UR                  R
                  T :H  S 5        g )Nc                      g)Nzacc is required r       e/home/james-whalen/.local/lib/python3.13/site-packages/triton/experimental/gluon/language/amd/_ops.py<lambda>_verify_wmma.<locals>.<lambda>   s    $5r   c                     > ST  3$ )Nz@Expected layout to be an instance of AMDWMMALayout with version r   )versions   r   r   r      s    RSZR[\r   c                      g)NzOExpected a's layout to be a DotOperandLayout with parent matching AMDWMMALayoutr   r   r   r   r   r          ar   c                      g)NzOExpected b's layout to be a DotOperandLayout with parent matching AMDWMMALayoutr   r   r   r   r   r      r   r   )r   typelayout
isinstancer
   r   r   parent)r   abaccr   a_layoutb_layouts   `      r   _verify_wmmar       s    
3d?56XX__F
6=)Gfnn.G\^ vv}}H
8-. 	/:hoo}3] 	/OO##w.ac
 vv}}H
8-. 	/:hoo}3] 	/OO##w.acr   c           	          [        XX#5        UR                  XU[        R                  R                  SUR
                  S9R                  n[        R                  " XSR                  5      $ )zAShared implementation for AMD WMMA operations for Gluon builtins N)input_precisionmax_num_imprecise_acc	out_dtype)
r    dotr   languagefp32_defaultdtypehandlettgltensorr   )r   r   r   r   semanticr)   s         r   _wmmar-   !   sT    Q$\\!U^^5P5Phl$'II  //5v ;;vxx((r   c	                    ^ ^^^^ S mUU UUU4S jn	U	" SX5      nU	" SXE5      nTR                  T XTXEUSSS[        R                  S9n
[        R                  " U
R                  UR
                  5      $ )zEShared implementation for AMD WMMA scaled and MFMA scaled operation. c                     UR                   R                   Vs/ s H  o3PM     nnUnUR                  S:X  a  SOSnU S:X  a  US   U-  nUS-  US'   U$ US   U-  nUS-  US'   US   US   sUS'   US'   U$ s  snf )Ne2m1r   r	   r       )r   shapevalue)op_idxoperandformatsoperand_shapescale_shapeunpack_factorks           r   _get_scale_shape%_mma_scaled.<locals>._get_scale_shape-   s    $+LL$6$67$6q$67##\\V3Q;B-/A2gKO
  B-/A2gKO/:2B,KO[_ 8s   A7c                   > U S:X  a  TOT	nT" XU5      n[        U[        R                  5      (       aQ  UR                  R                  S:w  a7  [
        R                  " U5      UR                  R                  :X  d   S5       eU$ T
" UR                  R                  U5      n[        U5      nUc  SOUnTR                  XF[        R                  U5      $ )Nr   r	   zIncompatible scale shape   )r   r*   r+   numelr5   mathprodr   r   r   fulluint8)r6   scaler8   r7   r;   scale_layoutscale_valuer>   r   r   scale_fnr,   s          r   #_create_and_broadcast_default_scale8_mma_scaled.<locals>._create_and_broadcast_default_scale:   s    {!&v?eT[[))ekk.?.?1.D 99[)U[[->->>Z@ZZ>L 3 3[A*51)1d{}}[tzz<PPr   r   r	   FT)	fast_math
lhs_k_pack
rhs_k_packr$   )
dot_scaledr*   float32r+   r)   r   )r   a_scalea_formatr   b_scaleb_formatr   rJ   r,   rK   outputr>   s   `  `   ``  @r   _mma_scaledrW   *   sv    Q Q 2!WGG1!WGG  Gq'S\anr,0DLL ! JF;;v}}chh//r   )rC   tritonr   "triton.experimental.gluon.languager   r*   ,triton.experimental.gluon.language._semanticr   r   _layoutsr   r
   r    r-   rW   r   r   r   <module>r\      s*      < ? ( ' #c*)#0r   