
    h5                        % S SK r S SKrS SKJr  S SKJrJr  S SKrS SKJ	r	  \R                  " 5       r\R                  \S'    " S S\ R                  5      r " S S	\ R                  5      r\ " S
 S5      5       r\" 5       r\R&                  r\R(                  r\" SS9 " S S5      5       r\" SS9 " S S5      5       r " S S\ R                  5      r\" SS9 " S S5      5       rg)    N)	dataclass)OptionalUnion)is_MI300loggerc                   "    \ rS rSrSrSrS rSrg)ScalingType   dynamicdisabledc                 V    U [         R                  L a  gU [         R                  L d   eg)Ndyndis)r	   DYNAMICDISABLEDselfs    O/home/james-whalen/.local/lib/python3.13/site-packages/torchao/float8/config.py	short_strScalingType.short_str   s*    ;&&&;/////     N)__name__
__module____qualname____firstlineno__r   r   r   __static_attributes__r   r   r   r	   r	      s    G Hr   r	   c                   &    \ rS rSrSrSrSrS rSrg)ScalingGranularity!   zE
Defines the granularity of scaling strategies for casting to float8

tensorwiseaxiswisec                 V    U [         R                  L a  gU [         R                  L d   eg)Ntenaxs)r   
TENSORWISEAXISWISEr   s    r   r   ScalingGranularity.short_str,   s,    %000-66666r   r   N)	r   r   r   r   __doc__r&   r'   r   r   r   r   r   r   r   !   s    
 J Hr   r   c                   N    \ rS rSrSr\R                  r\R                  r	S r
Srg)Float8TypeConfig4   z
Configuration for selecting the preferred float8 type pair, either e4m3fn/e5m2 or e4m3fnuz/e5m2fnuz.

Currently, ROCm supports 1. fnuz variants in MI300. 2. OCP F8 variants in MI350/Navi4.
c                     [         R                  R                  (       a_  [         R                  R	                  5       (       a;  [        5       (       a+  [         R                  U l        [         R                  U l	        g g g g )N)
torchversionhipcudais_availabler   float8_e4m3fnuz
e4m3_dtypefloat8_e5m2fnuz
e5m2_dtyper   s    r   __post_init__Float8TypeConfig.__post_init__B   sN    ==!8!8!:!:xzz#33DO#33DO @J!:r   )r4   r6   N)r   r   r   r   r)   r.   float8_e4m3fnr4   float8_e5m2r6   r7   r   r   r   r   r+   r+   4   s&     $$J ""J4r   r+   T)frozenc                       \ rS rSr% Sr\R                  r\\S'   \	R                  r\	\S'   Sr\\R                     \S'   S rS rS	rg)

CastConfigN   z;
Configuration for maybe casting a single tensor to float8
scaling_typescaling_granularityNtarget_dtypec                     [         S[        S0U R                     nU R                  R	                  5        SU R
                  R	                  5        SU 3$ )Ne4m3e5m2_)r4   r6   rA   r?   r   r@   )r   dtypes     r   r   CastConfig.short_strX   sU    VZ89J9JK##--/0$2J2J2T2T2V1WWXY^X_``r   c                    U R                   [        R                  L a$  U R                  [        R
                  L d   S5       eU R                  b=  U R                  R                  (       a  U R                  R                  S:X  d   S5       eg g )NzGonly dynamic scaling type is supported for axiswise scaling granularity   z)must specify a 8-bit floating-point dtype)	r@   r   r'   r?   r	   r   rA   is_floating_pointitemsizer   s    r   r7   CastConfig.__post_init__\   s    ##'9'B'BB$$(;(;; Y;   (//D4E4E4N4NRS4S	76	7 
4S )r   r   )r   r   r   r   r)   r	   r   r?   __annotations__r   r&   r@   rA   r   r.   rF   r   r7   r   r   r   r   r=   r=   N   sI     !, 3 3L+3.@.K.K+K*.L(5;;'.a7r   r=   c                   (    \ rS rSr% SrSr\\S'   Srg)Float8GemmConfigf   z"
Configuration for a float8 gemm.
Fuse_fast_accumr   N)	r   r   r   r   r)   rQ   boolrM   r   r   r   r   rO   rO   f   s     !ND r   rO   c                        \ rS rSrSrSrSrSrg)Float8LinearRecipeNamer   r!   rowwiserowwise_with_gw_hpr   N)r   r   r   r   r&   ROWWISEROWWISE_WITH_GW_HPr   r   r   r   rT   rT   r   s    J
 G .r   rT   c                   V   \ rS rSr% Sr\" 5       r\\S'   Sr\	\   \S'   \" 5       r
\\S'   Sr\	\   \S'   \" 5       r\\S'   Sr\	\   \S	'   \" S
S9r\\S'   \" 5       r\\S'   \" 5       r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   S r\S\\\4   SS 4S j5       rSrg)Float8LinearConfig   zQ
Configuration for converting a `torch.nn.Linear` module to float8
for training.
cast_config_inputN!cast_config_input_for_grad_weightcast_config_weight!cast_config_weight_for_grad_inputcast_config_grad_output'cast_config_grad_output_for_grad_weightT)rQ   gemm_config_outputgemm_config_grad_inputgemm_config_grad_weightFenable_fsdp_float8_all_gatherpad_inner_dimemulate!force_recompute_fp8_weight_in_bwdround_scales_to_power_of_2c                 |   U R                   c!  [        R                  U SU R                  5        U R                  c!  [        R                  U SU R
                  5        U R                  c!  [        R                  U SU R                  5        U R
                  R                  [        R                  :w  a/  U R                  (       a   SU R
                  R                   35       eU R                  nU R
                  nU R                  nU R                   nU R                  nU R                  nXS4X5S4XFS44 HM  u  pxn	UR                  [        R                  L n
UR                  [        R                  L nX:X  a  ME   SU	 35       e   XS	[        4X%S
[        4X6S[         44 Hr  u  pxpUR"                  c  [        R                  USU5        UR"                  c  [        R                  USU5        UR"                  UR"                  :X  a  Mj   U S35       e   U R$                  (       a  [&        R)                  S5        g g )Nr^   r`   rb   zPenable_fsdp_float8_all_gather only supports tensorwise scaling granularity, got output
grad_inputgrad_weightz#incompatible operand precision for inputweightgrad_outputrA   z< must be cast to the same dtype in both matmuls it's used inz`config.force_recompute_fp8_weight_in_bwd` is deprecated and will be removed in a future release. Please see https://github.com/pytorch/ao/issues/2251 for more details.)r^   object__setattr__r]   r`   r_   rb   ra   r@   r   r&   rf   r?   r	   r   r4   r6   rA   ri   r   warning)r   cc_icc_wcc_gocc_i_gwcc_w_gicc_go_gwcc1cc2	gemm_nameis_disabled_1is_disabled_2operand_namedefault_dtypes                 r   r7    Float8LinearConfig.__post_init__   sR    11994;Q;Q 11994;R;R 77?9,, ""66:L:W:WW99 bcgczcz  dO  dO  cP  Q9
 %%&&,,8888??
 "\*.$
Ci
  ,,0D0DDM,,0D0DDM 1 5i[A1$
 GZ0Hj1mZ86
1Cl '""3F'""3F##s'7'77 . \]76
 11NN { 2r   recipe_namereturnc           
      R   [        U 5      [        :X  a<  [         Vs/ s H  oR                  PM     nnX;   d   SU  SU 35       e[        U 5      n U [        R                  L a
  [        5       $ U [        R                  L a`  [        [        R                  [        S9n[        [        R                  [        S9n[        [        R                  [        S9n[        UUUSS9$ U [        R                  L a  [        [        R                  S9n[        [        R                  S9n[        [        R                  [        S9n[        [        R                  S9n[        [        R                  S9n[        [        R                  [        S9n[        UUUUUUSS	9$ [        S
U  35      es  snf )z
Input: `Float8LinearRecipeName` value, or a string representing a `Float8LinearRecipeName` value
Output: a `Float8LinearConfig` configured to implement the specified recipe
zrecipe_name z not in valid names )r@   rA   T)r]   r_   ra   rj   )r@   )r?   )r?   rA   )r]   r_   ra   r^   r`   rb   rj   zunknown recipe_name )typestrrT   valuer&   r[   rX   r=   r   r'   r4   rY   r	   r   AssertionError)	r   nvalid_namesru   rv   rw   ry   rx   rz   s	            r   from_recipe_name#Float8LinearConfig.from_recipe_name  s    #,BC,Bq77,BKC- {m+?}M- 1=K0;;;%''2:::$6$?$?jD $6$?$?jD $6$?$?jE &"&#'(-+/  2EEE2D2M2MND2D2M2MND $6$?$?jE !5G5R5RSG !k.B.BCG!(11
H &"&#'(-29298@+/  !#7}!EFFq Ds   F$r   )r   r   r   r   r)   r=   r]   rM   r^   r   r_   r`   ra   rb   rO   rc   rd   re   rf   rR   rg   rh   ri   rj   r7   staticmethodr   rT   r   r   r   r   r   r   r[   r[      s   * %/Lz0>B%x
';B%/\
1>B%x
';B*4,Z6DH+Xj-AH ,<4+P(P/?/A,A0@0B-B +0!4/  M4 GT
 /4%t3 (-,BH @G1367@G	@G @Gr   r[   )enumloggingdataclassesr   typingr   r   r.   torchao.utilsr   	getLoggerr   LoggerrM   Enumr	   r   r+   type_configr4   r6   r=   rO   rT   r[   r   r   r   <module>r      s      ! "  " **, ,$))  & 4 4 4(  ##
##
 $7 7 7. $! ! !.TYY .0 $IG IG IGr   