
    h3                        S r SSKJr  SSKrSSKJrJr  SSKJr  SSK	J
r
Jr  SSKJrJrJr  SSKJr  \R$                  R&                   " S	 S
\R(                  R*                  5      5       r " S S\R.                  R0                  5      rg)zD
A simple module swap UX for a float8 version of `torch.nn.Linear`.
    )OptionalN)Float8LinearConfigScalingType)tensor_already_casted_to_fp8)get_maybe_axiswise_dimhp_tensor_to_float8_dynamic)GemmInputRoleLinearMMConfigScaledMMConfig)!WeightWithDynamicFloat8CastTensorc            	       t    \ rS rSrSr\S\R                  S\R                  S\S\	4S j5       r
\S 5       rS	rg
)matmul_with_hp_or_float8_args   z
Like torch.matmul, but with the arguments in either high precision or float8.
* if the arguments are in high precision, they are cast to float8 according
  to the specified config
* if the arguments are in float8, we assume the cast honored the config
input_hpweight_hp_tlinear_mm_configconfigc                    U R                  X5        X0l        X@l        Un[        U5      (       a  UnOUR                  R
                  [        R                  L a  UnOm[        UUR                  R                  U[        R                  UR                  R                  [        SUR                  R                  5      UR                  S9n[        U5      (       a  UnOUR                  R
                  [        R                  L a  UnOm[        UUR                  R                  U[        R                   UR                  R                  [        SUR                  R                  5      UR                  S9nUR"                  nUR%                  SUS   5      n	[&        R(                  " X5      n
U
R$                  " / US S QU
R"                  S   P76 n
U
$ )Ngemm_input_rolescaling_granularityaxiswise_dimround_scales_to_power_of_2r   )save_for_backwardr   r   r   cast_config_inputscaling_typer   DISABLEDr   target_dtyper	   INPUTr   r   r   cast_config_weightWEIGHTshapereshapetorchmm)ctxr   r   r   r   cinput_maybe_fp8weight_maybe_fp8_t
orig_shapeinput_maybe_fp8_reshapedres_bitss              V/home/james-whalen/.local/lib/python3.13/site-packages/torchao/float8/float8_linear.pyforward%matmul_with_hp_or_float8_args.forward%   s    	h4/
'11&O  --1E1EE&O9##00  - 3 3$%$7$7$K$K3++?? ,-+G+G
O (44!,!!..+2F2FF!,!<$$11  - 4 4$%$8$8$L$L3q++?? ,-+G+G
" %**
#2#:#:2z"~#N 884I##IZ_IhnnR6HI    c                 b   U R                   u  p#U R                  nUR                  nUR                  SUS   5      n[	        U5      (       a  UnOUR
                  R                  [        R                  L a  UnOw[        UUR
                  R                  U R                  [        R                  UR
                  R                  [        SUR
                  R                  5      UR                   S9n[	        U5      (       a  UnOUR"                  R                  [        R                  L a  UnOw[        UUR"                  R                  U R                  [        R$                  UR"                  R                  [        SUR"                  R                  5      UR                   S9n[&        R(                  " UUR+                  5       5      n	U	R                  " / US S QU	R                  S   P76 n	UR                  n
UR                  SU
S   5      n[	        U5      (       a  UnOUR,                  R                  [        R                  L a  UnOw[        UUR,                  R                  U R                  [        R                  UR,                  R                  [        SUR,                  R                  5      UR                   S9n[	        U5      (       a  UnOUR.                  R                  [        R                  L a  UnOw[        UUR.                  R                  U R                  [        R0                  UR.                  R                  [        SUR.                  R                  5      UR                   S9n[&        R(                  " UR+                  5       U5      nSnXR+                  5       /UQ7$ )Nr   r   r   )NN)saved_tensorsr   r#   r$   r   cast_config_grad_outputr   r   r   r   r   r   r	   GRAD_OUTPUTr   r   r   !cast_config_weight_for_grad_inputr"   r%   r&   t'cast_config_grad_output_for_grad_weight!cast_config_input_for_grad_weightr    )r'   grad_outputr   r   r(   grad_output_orig_shapegrad_output_reshaped#grad_output_reshaped_maybe_fp8_dim0weight_t_maybe_fp8_dim0
grad_inputinput_hp_orig_shapeinput_hp_reshaped#grad_output_reshaped_maybe_fp8_dim1input_reshaped_maybe_fp8_dim1grad_weightempty_gradss                   r.   backward&matmul_with_hp_or_float8_args.backward]   sO    # 1 1JJ "-!2!2*2227Mb7QR ((<==2F/&&33{7K7KK2F/2M$))66$$ - 9 9$%$=$=$Q$Q311EE ,-+G+G
3/ (44&1#00==AUAUU&1#&A33@@$$ - 4 4$%$G$G$[$[3;;OO ,-+G+G
'# XX/#%%'

  '' 
#CR(
*4*:*:2*>

 'nn$,,R1DR1HI ((<==2F/55BB##$ 3G/2M$99FF$$ - 9 9$%$M$M$a$a3q@@TT ,-+G+G
3/ ((9::,=)00==AUAUU,=),G!33@@$$ - 3 3$%$G$G$[$[3q::NN ,-+G+G
-) hh/113)

 !==?8[88r1    N)__name__
__module____qualname____firstlineno____doc__staticmethodr%   Tensorr
   r   r/   rF   __static_attributes__rH   r1   r.   r   r      s`     5,,5 \\5 )	5
 #5 5n n9 n9r1   r   c                      ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrU 4S jr	\
 SS\\   4S	 jj5       rS
rU =r$ )Float8Linear   z
Note: this is **not** a public API and is only intended to be used
inside of this repository. Please file an issue if you would benefit
from this being a public API.

A wrapper around a `torch.nn.Linear` module which does fp8 compute.
c           
        > UR                  S5      n[        TU ]  " U0 UD6  UR                  R                  U l        UR                  R                  U l        UR                  R                  U l	        X0l
        [        [        UR                  U R                  R                  R                  SU R                  R                   5      [        UR                  U R                  R"                  R                  SU R                  R                   5      [        UR                  U R                  R$                  R                  SU R                  R                   5      5      U l        g)z^
Additional arguments on top of `torch.nn.Linear`'s arguments:
* `config`: Float8LinearConfig
r   FN)popsuper__init__r   r   scaling_type_inputr!   scaling_type_weightr4   scaling_type_grad_outputr   r
   r   emulategemm_config_outputuse_fast_accumpad_inner_dimgemm_config_grad_inputgemm_config_grad_weightr   )selfargskwargsr   	__class__s       r.   rW   Float8Linear.__init__   s    H%$)&) #)":":"G"G#)#<#<#I#I (.(F(F(S(S% ...==))	 22AA))	 33BB))	!!
r1   inputreturnc                 v   [         R                  " 5       (       a&  [         R                  " 5       nUR                  U5      n[        R                  UU R                  R                  5       U R                  U R                  5      nU R                  b'  X0R                  R                  UR                  5      -   nU$ N)r%   is_autocast_enabledget_autocast_gpu_dtypetor   applyweightr7   r   r   biasdtype)ra   rf   autocast_dtypeoutputs       r.   r/   Float8Linear.forward   s     $$&& #99;NHH^,E.44KKMMO!!KK	
 99 iill6<<88Fr1   c                   > U R                   nSUR                  R                  5        3nSUR                  R                  5        3nSUR                  R                  5        3nX#U/nUR
                  UR                  :w  a,  UR                  SUR
                  R                  5        35        UR                  UR                  :w  a,  UR                  SUR                  R                  5        35        UR                  UR                  :w  a,  UR                  SUR                  R                  5        35        SR                  U5      n[        TU ]-  5        SU S	3nU$ )
Nzi:zw:zgo:zi_gw:zw_gi:zgo_gw:,z, cast_configs=")r   r   	short_strr!   r4   r9   appendr6   r8   joinrV   
extra_repr)	ra   r(   cicwcgopartscast_config_strsrd   s	           r.   rz   Float8Linear.extra_repr  sA   KK!%%//123!&&00234A--779:;..!2E2EELL5!D!D!N!N!P QRS..!2F2FFLL5!D!D!N!N!P QRS448Q8QQLLBBLLNOP ((5/w!#$OO3DAFr1   r   c                 Z   Uc
  [        5       n[        R                  " S5         U " UR                  UR                  SUS9nSSS5        UR
                  Wl        UR                  Ul        UR                  (       a  UR                  R                  [        R                  L d   e[        R                  R                  [        UR
                  UR                  UR                   R                  R"                  5      UR
                  R$                  S9Ul        U$ ! , (       d  f       N= f)z
Create an nn.Linear with fp8 compute from a regular nn.Linear

Args:
    mod (torch.nn.Linear): nn.Linear to convert
    config (Optional[Float8LinearConfig]): configuration for conversion to float8
NmetaF)ro   r   )requires_grad)r   r%   devicein_featuresout_featuresrn   ro   enable_fsdp_float8_all_gatherr!   r   r   DYNAMICnn	Parameterr   r   r   r   r   )clsmodr   new_mods       r.   
from_floatFloat8Linear.from_float%  s     >')F\\&!  	G " xx //,,99[=P=PPPP"XX//1NN,,NN55BB
 &nn:: 0 GN 7 "!s   D
D*)r   r   rZ   rX   rY   ri   )rI   rJ   rK   rL   rM   rW   r%   rO   r/   rz   classmethodr   r   r   rP   __classcell__)rd   s   @r.   rR   rR      sW    %
NU\\ ell ($  04) +,) )r1   rR   )rM   typingr   r%   torchao.float8.configr   r    torchao.float8.distributed_utilsr   #torchao.float8.float8_scaling_utilsr   r   %torchao.float8.float8_training_tensorr	   r
   r   torchao.float8.fsdp_utilsr   _dynamoallow_in_graphautogradFunctionr   r   LinearrR   rH   r1   r.   <module>r      sv      A I 
 H o9ENN$;$; o9 o9d@588?? @r1   