
    ΅iD                        % S r SSKrSSKrSSKrSSKJrJrJrJrJ	r	J
r
  SSKrSSKrSSKrSSKr\R                  R!                  5       r\=(       a    \R                  R%                  5       S:  r\(       a  \R(                  " S5      OSr\(       a
  \" S 5      rO	\" S 5      r\" S 5      r\" S	 5      r\" S
 5      r\" S 5      r\" S 5      r\" S 5      r\" S 5      r\" S 5      r\" S 5      r\" S 5      r \" S 5      r!\" S 5      r"\" S 5      r#\" S 5      r$\" S 5      r%\" S 5      r&S r'S r(S r)S r*S r+S r,S r-S r.\" S  5      r/\0\1S!'   \" S" 5      r2\0\1S#'   \" S$ 5      r3\0\1S%'   \" S& 5      r4\0\1S''   \=(       a    \(       + r5\0\1S('   \" S) 5      r6\0\1S*'   \" S+ 5      r7\0\1S,'   \" S- 5      r8\0\1S.'   S/ r9S0 r:S1 r;S2 r<\" S3 5      r=\0\1S4'   \" S5 5      r>\0\1S6'   \" S7 5      r?\0\1S8'   \" S9 5      r@\0\1S:'   \(       a    SSKArB\BR                  R                  5       rDOS;rDS;qGS< rH\R                  S= 5       rJ\R                  SSS> j5       rK\R                  S? 5       rLSSS@SA.SB jjrMSC rNSD rOSE rPSF rQSG rRSH rS\R" 5       rT\S" 5       rUSI\R                  R                  S4SJ jrXSI\R                  \R                  R                  S4SK jrZSL r[SM r\SN r]SO r^SP r_SQr`\R                  " \R                  R                  SL =(       a
    \P" 5       \`:  SRR                  " \`6 5      re\(       d"  \R                  R!                  5       (       a   egg! \E\F4 a    S;rDS;r GN:f = f)Tz>This file is allowed to initialize CUDA context when imported.    N)LazyVal
TEST_NUMBATEST_WITH_ROCM	TEST_CUDA
IS_WINDOWSIS_MACOS   zcuda:0c                      [         $ N)r        ]/home/james-whalen/.local/lib/python3.13/site-packages/torch/testing/_internal/common_cuda.py<lambda>r      s    r   c                      [         =(       a@    [        R                  R                  R	                  [        R
                  " S[        S95      $ )N      ?device)r   torchbackendscudnnis_acceptabletensorCUDA_DEVICEr   r   r   r   r      s1    !wu~~/C/C/Q/QRWR^R^_ajuRv/w!wr   c                  l    [         (       a(  [        R                  R                  R	                  5       $ S$ )Nr   )
TEST_CUDNNr   r   r   versionr   r   r   r   r      s$    zzU^^%9%9%A%A%C%XWX%Xr   c                      [         R                  R                  (       a<  [        S [         R                  R                  R	                  S5      S S  5       5      $ S$ )Nc              3   8   #    U  H  n[        U5      v   M     g 7fr   int).0vs     r   	<genexpr><lambda>.<locals>.<genexpr>   s     %W6Vc!ff6V   .r	   r   r   )r   r   hiptuplesplitr   r   r   r   r      sE    [`[h[h[l[l%Wemm6G6G6M6Mc6RSUTU6V%W W xrx xr   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ )N)      r   cudais_availableget_device_capabilityr   r   r   r   r      ,    ejj557hEJJ<\<\<^bh<hhr   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ )N)   r   r.   r   r   r   r   r      r2   r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ )N   r   r.   r   r   r   r   r      r2   r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ )N)r7   r,   r.   r   r   r   r   r       r2   r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ )N)   r   r.   r   r   r   r   r   !   r2   r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ Nr:   	   r.   r   r   r   r   r   "   r2   r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ N)r>   r   r.   r   r   r   r   r   #   r2   r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ N)
   r   r.   r   r   r   r   r   $   ,    uzz668jUZZ=]=]=_cj=jjr   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:  $ )N)   r   r.   r   r   r   r   r   %   rD   r   c                     [         R                  R                  5       =(       a    [         R                  R                  5       S:H  =(       a)    [	        [         R
                  R                  S S 5      S:  =(       dQ    [         R                  R                  5       S:H  =(       a)    [	        [         R
                  R                  S S 5      S:  $ )N)   r   r	      )rC      )r   r/   r0   r1   r    r   r   r   r   r   r   '   s    %**113 iJJ446'Agc%--J\J\]_^_J`FaegFg hJJ446'Afc%--J\J\]_^_J`FadfFfir   c                      [         R                  R                  5       =(       a.    [         R                  R                  5       S;   =(       d    [        $ )N))r7   r	   )r:   r7   )r   r/   r0   r1   IS_THORr   r   r   r   r   *   s2    EJJ335}5::;[;[;]aq;q;|u|}r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:H  $ r<   r.   r   r   r   r   r   +   ,    %**113d

8X8X8Z^d8ddr   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:H  $ r@   r.   r   r   r   r   r   ,   rN   r   c                      [         R                  R                  5       =(       a!    [         R                  R                  5       S:H  $ rB   r.   r   r   r   r   r   -   s,    5::224f9Y9Y9[_f9ffr   c                   ^ [         R                  R                  5       (       d  g[         R                  R                  S5      R                  n[
        R                  R                  SU5      m[        U4S jU  5       5      $ )NFr/   /PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDEc              3   ,   >#    U  H	  oT;   v   M     g 7fr   r   )r!   archeffective_archs     r   r#   +evaluate_gfx_arch_within.<locals>.<genexpr>6   s     <)$~%)s   )	r   r/   r0   get_device_propertiesgcnArchNameosenvirongetany)	arch_listgcn_arch_namerU   s     @r   evaluate_gfx_arch_withinr_   /   s\    ::""$$JJ44V<HHMZZ^^$UWdeN <)<<<r   c                      [        / SQ5      $ )N)gfx940gfx941gfx942gfx950r_   r   r   r   CDNA3OrLaterrf   8   s    #$LMMr   c                      [        SS/5      $ )Ngfx90arc   re   r   r   r   CDNA2OrLaterri   ;   s    #Xx$899r   c                      [         (       a:  / SQn [        R                  R                  SS5      S:w  a  U / SQ-  n [	        U 5      $ [
        (       a  [        (       + =(       a    [        $ g)Nrh   rc   gfx1100gfx1201rd   'TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL0gfx1101gfx1102gfx1150gfx1151gfx1200F)r   rY   rZ   r[   r_   r   r   SM80OrLaterr]   s    r   *evaluate_platform_supports_flash_attentionrx   >   sO    ~H	::>>CSISPPPI'	22y~-+-r   c                  j    [         (       a(  [        R                  R                  R	                  5       $ g)NF)r   r   r   r/   is_ck_sdpa_availabler   r   r   "evaluate_platform_supports_ck_sdpar{   H   s#    ~~~""7799r   c                      [         (       a:  / SQn [        R                  R                  SS5      S:w  a  U / SQ-  n [	        U 5      $ [
        (       a  gg)Nrk   rn   ro   rp   TF)r   rY   rZ   r[   r_   r   rw   s    r   .evaluate_platform_supports_efficient_attentionr}   N   sE    ~H	::>>CSISPPPI'	22yr   c                  R    [         (       + =(       a    [        =(       a	    [        S:  $ )Ni_ )r   rv   TEST_CUDNN_VERSIONr   r   r   *evaluate_platform_supports_cudnn_attentionr   X   s    QKQ5G55PQr   c                     [         (       a  g[        5       S:  d  g[        R                  R                  R                  [        R                  R                  R                  5      n U c  g[        U R                  S5      S   5      S:  $ )NFrF   r:   r&   r   i:  )	r   _get_torch_cuda_versionr   utilscollect_envget_nvidia_driver_versionrunr    r*   )driver_versions    r   (evaluate_platform_supports_green_contextr   [   sk    z"$/[[,,FFu{{G^G^GbGbcN~##C(+,33r   c                      [        5       $ r   )rx   r   r   r   r   r   e       :d:fr   !PLATFORM_SUPPORTS_FLASH_ATTENTIONc                      [        5       $ r   )r}   r   r   r   r   r   f   s    <j<lr   #PLATFORM_SUPPORTS_MEM_EFF_ATTENTIONc                      [        5       $ r   )r   r   r   r   r   r   g   r   r   !PLATFORM_SUPPORTS_CUDNN_ATTENTIONc                  B    [         =(       d    [        =(       d    [        $ r   )r   r   r   r   r   r   r   r   i   s    :[ ;V2S;V2U;Vr   !PLATFORM_SUPPORTS_FUSED_ATTENTIONPLATFORM_SUPPORTS_FUSED_SDPAc                  (    [         =(       a    [        $ r   )r   rv   r   r   r   r   r   o   s    y/H[/Hr   PLATFORM_SUPPORTS_BF16c                      [        5       $ r   )r   r   r   r   r   r   q   s    8`8br   PLATFORM_SUPPORTS_GREEN_CONTEXTc                      [        5       $ r   )r{   r   r   r   r   r   s       2T2Vr   PLATFORM_SUPPORTS_CK_SDPAc                     [         R                  R                  5       (       a  [         R                  R                  (       as  S/n [
        S:  a  U R                  S/5        [
        S:  a  U R                  S5        U  H2  nU[         R                  R                  S5      R                  ;   d  M2    g   g	[        =(       d!    [         R                  R                  5       S:H  $ g	)
Ngfx94)r4   r-   gfx120)r4   r,   gfx95r   Tr=   F)r   r/   r0   r   r(   ROCM_VERSIONextendappendrW   rX   SM90OrLaterr1   archsrT   s     r   evaluate_platform_supports_fp8r   u   s    zz  ==IEv%hZ(v%W%5::;;A>JJJ 
  N%**"B"B"D"NNr   c                  v   [         R                  R                  5       (       a  [         R                  R                  (       a_  S[         R
                  R                  5       ;  a  gS/n U  H2  nU[         R                  R                  S5      R                  ;   d  M2    g   g[        =(       a    [        (       + $ g)NUSE_FBGEMM_GENAIFrc   r   T)r   r/   r0   r   r(   
__config__showrW   rX   r   SM100OrLaterr   s     r   +evaluate_platform_supports_fp8_grouped_gemmr      s    zz  ==!)9)9)>)>)@@JE5::;;A>JJJ 
  3|#33r   c                     [         R                  R                  5       (       a]  [         R                  R                  (       a8  [
        S:  a,  S[         R                  R                  S5      R                  ;   $  g[        $ g)Nr6   rd   r   F)	r   r/   r0   r   r(   r   rW   rX   r   r   r   r   "evaluate_platform_supports_mx_gemmr      s_    zz  ==v%5::#C#CA#F#R#RRR &   r   c                      [         R                  R                  5       (       aO  [         R                  R                  (       d0  S[         R
                  R                  5       ;   n U =(       a    [        $ g)Nr   F)r   r/   r0   r   r(   r   r   IS_SM100)built_with_fbgemm_genais    r   -evaluate_platform_supports_mxfp8_grouped_gemmr      sI    zz  ):):"48H8H8M8M8O"O&383r   c                      [        5       $ r   )r   r   r   r   r   r      r   r   PLATFORM_SUPPORTS_MX_GEMMc                      [        5       $ r   )r   r   r   r   r   r      s    .L.Nr   PLATFORM_SUPPORTS_FP8c                      [        5       $ r   )r   r   r   r   r   r      s    ;f;hr   "PLATFORM_SUPPORTS_FP8_GROUPED_GEMMc                      [        5       $ r   )r   r   r   r   r   r      s    =j=lr   $PLATFORM_SUPPORTS_MXFP8_GROUPED_GEMMFc                      [         (       d   S5       e[        (       dI  [        [        R                  R                  5       5       H  n [        R                  " SSU  3S9  M     Sqg g )Nz?CUDA must be available when calling initialize_cuda_context_rngrJ   zcuda:r   T)r   __cuda_ctx_rng_initializedranger   r/   device_countrandn)is    r   initialize_cuda_context_rngr      sS    9WWW9%%uzz..01AKKE!+. 2%)"	 &r   c               #     #    [         R                  R                  R                  R                  n  S[         R                  R                  R                  l        [         R                  R
                  R                  S S S SS9   S v   S S S 5        U [         R                  R                  R                  l        g ! , (       d  f       N8= f! U [         R                  R                  R                  l        f = f7f)NFenabled	benchmarkdeterministic
allow_tf32r   r   r/   matmulr   r   flagsold_allow_tf32_matmuls    r   tf32_offr      s     !NN//66AAF05""-^^!!''TXej'k l 1F""- lk 1F""-5   /C;AC B<
C *C;<
C
C +C88C;c              #   6  #    [         R                  R                  R                  R                  nU R
                  n S[         R                  R                  R                  l        Xl        [         R                  R                  R                  S S S SS9   S v   S S S 5        U[         R                  R                  R                  l        X0l        g ! , (       d  f       N>= f! U[         R                  R                  R                  l        X0l        f = f7f)NTr   )r   r   r/   r   r   	precisionr   r   )selftf32_precisionr   old_precisions       r   tf32_onr      s     !NN//66AANNM'04""-'^^!!''TXei'j k 1F""-&	 kj 1F""-&s5   ;DAC% CC% $0D
C"C% %1DDc               #     #    [         R                  R                  R                  R                  n  S[         R                  R                  R                  l        [         R                  R
                  R                  SSSSS9   Sv   SSS5        U [         R                  R                  R                  l        g! , (       d  f       N8= f! U [         R                  R                  R                  l        f = f7f)z}
Context manager to temporarily enable TF32 for CUDA operations.
Restores the previous TF32 state after exiting the context.
TNr   r   r   s    r   tf32_enabledr      s      "NN//66AAF04""-^^!!''D ( 
 

 1F""-
 

 1F""-r   T)only_ifc                0   ^ ^^^ S mU 4S jmUUU4S jnU$ )Nc                 Z    [        5          U" 5         S S S 5        g ! , (       d  f       g = fr   r   )r   function_calls     r   with_tf32_disabled+tf32_on_and_off.<locals>.with_tf32_disabled  s    ZO ZZs   
*c                 `   > [        U T5         U" 5         S S S 5        g ! , (       d  f       g = fr   )r   )r   r   r   s     r   with_tf32_enabled*tf32_on_and_off.<locals>.with_tf32_enabled
  s    T>*O +**s   
-c                    >^ ^ [         R                  " T 5      R                  n[        UR	                  5       5      m[
        R                  " T 5      UU UUU4S j5       nU$ )Nc                    >^ TR                  [        TU SS95        [        R                  R	                  5       =(       a    TnST;   a/  U=(       a&    [        R
                  " TS   5      R                  S:H  nST;   a/  U=(       a&    TS   [        R                  [        R                  1;   nU(       a#  T" TS   UU4S j5        T" TS   UU4S j5        g T" S	0 TD6  g )
NFstrictr   r/   dtyper   c                     > T " S0 TD6$ Nr   r   fkwargss   r   r   Ctf32_on_and_off.<locals>.wrapper.<locals>.wrapped.<locals>.<lambda>  s    1;v;r   c                     > T " S0 TD6$ r   r   r   s   r   r   r     s    !+f+r   r   )	updatezipr   r/   is_tf32_supportedr   typefloat32	complex64)argsr   cond	arg_namesr   r   r   r   s    ` r   wrapped1tf32_on_and_off.<locals>.wrapper.<locals>.wrapped  s    MM#ie<=:://1=gD6!OfX.>!?!D!D!N& UU]]EOO4T!T"6&>3FG!&.2EFFr   )inspect	signature
parametersr)   keys	functoolswraps)r   paramsr   r   r   r   r   s   `  @r   wrapper tf32_on_and_off.<locals>.wrapper  sP    ""1%00&++-(				 	 
	 r   r   )r   r   r   r   r   s   `` @@r   tf32_on_and_offr     s    & Nr   c                 F   ^  [         R                  " T 5      U 4S j5       nU$ )Nc                  ^   > [        5          T" U 0 UD6sS S S 5        $ ! , (       d  f       g = fr   r   )r   r   r   s     r   r   with_tf32_off.<locals>.wrapped)  s    Zd%f% ZZs   
,)r   r   )r   r   s   ` r   with_tf32_offr  (  s%    __Q& & Nr   c                  b   S[         R                  R                  5       ;  a  g[         R                  R                  5       R                  S5      n [         R                  R                  5       U [	        S5      -   S  R                  S5      S   n[        S UR                  S5       5       5      $ )NMagmar'   zMagma 
r   c              3   8   #    U  H  n[        U5      v   M     g 7fr   r   r!   xs     r   r#   %_get_magma_version.<locals>.<genexpr>5  s     8!7AQ!7r%   r&   )r   r   r   findlenr*   r)   )positionversion_strs     r   _get_magma_versionr  0  s    e&&++--$$&++H5H""'')(S]*B*CDJJ4PQRSK8!2!23!7888r   c                      [         R                  R                  c  g[        [         R                  R                  5      n [	        S U R                  S5       5       5      $ )Nr'   c              3   8   #    U  H  n[        U5      v   M     g 7fr   r   r	  s     r   r#   *_get_torch_cuda_version.<locals>.<genexpr>;       9!8AQ!8r%   r&   )r   r   r/   strr)   r*   )cuda_versions    r   r   r   7  sE    }}!u}}))*L9!3!3C!8999r   c                      [         (       a  [        R                  R                  c  g[	        [        R                  R                  5      n U R                  SSS9S   n [        S U R                  S5       5       5      $ )Nr'   -rJ   maxsplitr   c              3   8   #    U  H  n[        U5      v   M     g 7fr   r   r	  s     r   r#   *_get_torch_rocm_version.<locals>.<genexpr>B  r  r%   r&   r   r   r   r(   r  r*   r)   )rocm_versions    r   _get_torch_rocm_versionr  =  sc    >U]]..6u}}(()L%%cA%6q9L9!3!3C!8999r   c                      [         (       + $ r   )r   r   r   r   !_check_cusparse_generic_availabler!  D  s    r   c                  2   [         (       d  g[        R                  R                  (       d  g[	        [        R                  R                  5      n U R                  SSS9S   n [        S U R                  S5       5       5      nUS L =(       d    US:  (       + $ )	NFr  rJ   r  r   c              3   8   #    U  H  n[        U5      v   M     g 7fr   r   r	  s     r   r#   5_check_hipsparse_generic_available.<locals>.<genexpr>O  s     G/F!s1vv/Fr%   r&   )r,   rJ   r  )r  rocm_version_tuples     r   "_check_hipsparse_generic_availabler&  G  s}    >==u}}(()L%%cA%6q9LG|/A/A#/FGG"d*I.@6.IJJr   r/   c                    [         R                  R                  [         R                  R                  SS5      [         R                  R                  SS5      5      R	                  U S9n[         R                  R                  [         R                  R                  SS5      [         R                  R                  SS5      5      R	                  U S9n[         R
                  " 5          [        UR                  5       UR                  5       SS9 H  u  pVUR                  U5        M     S S S 5        SS0nUb  UR                  U5        U" UR                  5       40 UD6nU" UR                  5       40 UD6n	X4X4$ ! , (       d  f       NX= f)Nr:   r   Tr   lrr   )
r   nn
SequentialLineartono_gradr   r   copy_r   )
r   optimizer_ctoroptimizer_kwargsmod_controlmod_scalingcsr   opt_controlopt_scalings
             r   !_create_scaling_models_optimizersr7  W  s/    ((%%ehhooa&;UXX__QPQ=RSVV^dVeK((%%ehhooa&;UXX__QPQ=RSVV^dVeK	..0+2H2H2JSWXDAGGAJ Y 
 C[F#&' !7!7!9DVDK !7!7!9DVDK[== 
s   (AE<<
F
c           
         [         R                  " SXS9[         R                  " SXS94[         R                  " SXS9[         R                  " SXS94[         R                  " SXS9[         R                  " SXS94[         R                  " SXS9[         R                  " SXS94/n[         R                  R                  5       R	                  U 5      nSn[        XUS9XEU4-   $ )N)r:   r:   )r   r   r	   )r   r/  r0  )r   r   r)  MSELossr,  r7  )r   r   r/  r0  dataloss_fn	skip_iters          r   _create_scaling_caser=  i  s    [[u<ekk&X]>mn[[u<ekk&X]>mn[[u<ekk&X]>mn[[u<ekk&X]>mnpD
 hh ##F+GI,GW		"# #r   c                 H    [         (       d  U $ [        R                  " U 5      $ r   )IS_SM89unittestexpectedFailurefuncs    r   xfailIfSM89rD  x  s    w4BH$<$<T$BBr   c                 d    [         (       a$  [        5       S:  a  [        R                  " U 5      $ U $ )zUxfail on SM89 only for CUDA < 13. On CUDA 13+, test should pass on all architectures.)rI   r   )r?  r   r@  rA  rB  s    r   xfailIfSM89PreCUDA13rF  {  s(    w*,w6''--Kr   c                 H    [         (       d  U $ [        R                  " U 5      $ r   )r   r@  rA  rB  s    r   xfailIfSM100OrLaterrH        #|4G)A)A$)GGr   c                 H    [         (       d  U $ [        R                  " U 5      $ r   )SM120OrLaterr@  rA  rB  s    r   xfailIfSM120OrLaterrL    rI  r   c                 ^    [         (       d  [        (       d  U $ [        R                  " U 5      $ r   )r   	IS_JETSONr@  rA  rB  s    r   xfailIfDistributedNotSupportedrO    s      II4RH4L4LT4RRr   r   z2Requires CUDA {}.{} to match Tritons ptxas version)gh㈵>)f__doc__r   r   
torch.cuda$torch.testing._internal.common_utilsr   r   r   r   r   r   r   
contextlibrY   r@  r/   is_initialized"CUDA_ALREADY_INITIALIZED_ON_IMPORTr   TEST_MULTIGPUr   r   r   r   r   SM53OrLaterSM60OrLaterSM70OrLaterSM75OrLaterrv   SM89OrLaterr   r   rK  rL   rN  r?  IS_SM90r   r_   rf   ri   rx   r{   r}   r   r   r   bool__annotations__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   
numba.cudanumbar0   TEST_NUMBA_CUDAImportErrorRuntimeErrorr   r   contextmanagerr   r   r   r   r  r  r   r  r!  r&  TEST_CUSPARSE_GENERICTEST_HIPSPARSE_GENERICoptimSGDr7  floatr=  rD  rF  rH  rL  rO  TRITON_PTXAS_VERSIONskipIfr   r(   formatrequires_triton_ptxas_compatr   r   r   <module>rn     s   F    u u   	  &+ZZ%>%>%@ " <ejj5571<(1ell8$t*+JwxJXY xyhihihihihihihijkjk
 i j }~	
d
e
d
efg=N:R4 +22f*g !4 g,34l,m #T m*12f*g !4 g*1 3V +W !4 W &/%E~3E d E&'HI  I(/0b(c  c")*V"W 4 W #**V"W 4 W%&NO t O+23h+i "D i-45l-m $d m**113
 O # * F F 
' 
' F FXD F9::	K :; ;=  .4EKKOOfj >$ !'ekk%++//lp #CHHS  'u}}/@/@D/H/}MdMfi}M}/c/j/j  mA  0B C  *zz((***** *O & 
s   M M,+M,