
    +h=              
          S r SSKrSSKrSSKJr  SSKJrJrJrJrJ	r	  SSK
Jr  SSKJrJrJrJrJrJr  SS	KJr  \(       a  SS
KJr  \" 5       (       a  SSKrSSKJr  \" SS5      (       aq  \R4                  \R6                  \R8                  \R:                  \R<                  \R>                  \R@                  \RB                  \RD                  \RF                  4
r$O#\R4                  \R6                  \R8                  4r$\" 5       (       a  SSK%J&r&  S r'\" 5       (       a/  \" SS5      (       a!  \" 5       (       a  \" SS5      (       a  \'" 5         \RP                  " \)5      r*S r+S r, " S S\5      r-g)z
Adapted from
https://github.com/huggingface/transformers/blob/3a8eb74668e9c2cc563b2f5c62fac174797063e0/src/transformers/quantizers/quantizer_torchao.py
    N)fnmatch)TYPE_CHECKINGAnyDictListUnion)version   )get_module_from_nameis_torch_availableis_torch_versionis_torchao_availableis_torchao_versionlogging   )DiffusersQuantizer)
ModelMixinz>=z2.5)	quantize_c                  `   [         R                  S4[         R                  S4[         R                  S4[         R                  S4[         R
                  S4[         R                  S4[         R                  S4/n  SS	KJ	n  SS
K
Jn  SSKJn  SSKJnJn  U R#                  XSXBU/5        [         R.                  R1                  U S9  g ! [$        [&        4 a4  n[(        R+                  S5        [(        R-                  U5         S nAN]S nAff = f! [         R.                  R1                  U S9  f = f)Nztorch.uint1ztorch.uint2ztorch.uint3ztorch.uint4ztorch.uint5ztorch.uint6ztorch.uint7r   )	NF4Tensor)Float8AQTTensorImpl)UInt4Tensor)UintxAQTTensorImplUintxTensorzhUnable to import `torchao` Tensor objects. This may affect loading checkpoints serialized with `torchao`)safe_globals)torchuint1uint2uint3uint4uint5uint6uint7torchao.dtypesr   #torchao.dtypes.floatx.float8_layoutr   !torchao.dtypes.uintx.uint4_layoutr   !torchao.dtypes.uintx.uintx_layoutr   r   extendImportErrorModuleNotFoundErrorloggerwarningdebugserializationadd_safe_globals)r   r   r   r   r   r   es          h/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/quantizers/torchao/torchao_quantizer.py_update_torch_safe_globalsr2   I   s    	m$	m$	m$	m$	m$	m$	m$LH,KAU[7I`ijk 	,,,,G ,- v	
 	Q	 	,,,,Gs*   ;.C D*DD DD D-z2.6.00.7.0c                 (   SSK Jn  SSKJn  [	        X5      (       a*  U R
                  R                   SU R                  5        S3$ [	        X5      (       a<  U R
                  R                   SU R                   S[        U R                  5       S3$ g )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
r$   r5   7torchao.quantization.linear_activation_quantized_tensorr6   
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr5   r6   s      r1   r>   r>   q   s    4g&00""++,Af.G.G.I-J!LL&::""++,L9P9P8QQZ[mnt  oL  oL  \M  [N  NO  P  	P ;    c                    [        U R                  5      nUc7  SU R                  R                  S    SU R                  R                  S    S3$ SU R                  R                  S    SU R                  R                  S    SU 3$ )Nzin_features=   z, out_features=r   z, weight=Noner9   )r>   rA   shape)selfrA   s     r1   _linear_extra_reprrG   |   s    ,F~dkk//23?4;;CTCTUVCWBXXeffdkk//23?4;;CTCTUVCWBXXabhaijjrB   c                   @  ^  \ rS rSrSrSrS/rU 4S jrS rS r	SS	 jr
S
\\\\\4   4   S\\\\\4   4   4S jrSSSSS\S\\\4   S\4
S jrSSSSS\SSS\\\4   S\\   4S jrS r/ 4SSS\\   4S jjrS S jrS!S jr\S 5       r\S\4S j5       rSrU =r$ )"TorchAoHfQuantizer   zB
Diffusers Quantizer for TorchAO: https://github.com/pytorch/ao/.
Ftorchaoc                 (   > [         TU ]  " U40 UD6  g N)super__init__)rF   quantization_configkwargsr<   s      r1   rO   TorchAoHfQuantizer.__init__   s    ,77rB   c                    [        5       (       d  [        S5      e[        R                  " [        R
                  R                  S5      5      nU[        R                  " S5      :  a  [        SU S35      eSU l        UR                  SS 5      n[        U[        5      (       aK  SUR                  5       ;   d  S	UR                  5       ;   a#  U R                  (       a  [        S
5      eSU l        U R                  (       aw  UR                  SS 5      nU(       a]  [        R                  " [        R
                  R                  S5      5      nU[        R                  " S5      :  a  [        SU S35      eg g g )NziLoading a TorchAO quantized model requires the torchao library. Please install with `pip install torchao`r   r3   zOThe minimum required version of `torchao` is 0.7.0, but the current version is z/. Please upgrade with `pip install -U torchao`.F
device_mapcpudiskzYou are attempting to perform cpu/disk offload with a pre-quantized torchao model This is not supported yet. Please remove the CPU or disk device from the `device_map` argument.Tweights_onlyz2.5.0zlIn order to use TorchAO pre-quantized model, you need to have torch>=2.5.0. However, the current version is .)r   r)   r	   parse	importlibmetadataRuntimeErroroffloadgetr;   dictvaluespre_quantized
ValueError)rF   argsrQ   torchao_versionrT   rW   torch_versions          r1   validate_environment'TorchAoHfQuantizer.validate_environment   se   #%%{  "--	(:(:(B(B7(KLW]]733abqar  sb  c  ZZd3
j$''
))++v9J9J9L/L%%$z 
 $(DL!::nd;L 'i.@.@.H.H.Q R 7==#99& G  HU  GV  VW  X  :  rB   c                 :   U R                   R                  nUR                  S5      (       d  UR                  S5      (       a0  Ub-  U[        R                  :w  a  [
        R                  SU S35        Uc%  [
        R                  S5        [        R                  nU$ )Nintuintz%You are trying to set torch_dtype to zu for int4/int8/uintx quantization, but only bfloat16 is supported right now. Please set `torch_dtype=torch.bfloat16`.a  Overriding `torch_dtype` with `torch_dtype=torch.bfloat16` due to requirements of `torchao` to enable model loading in different precisions. Pass your own `torch_dtype` to specify the dtype of the remaining non-linear layers, or pass torch_dtype=torch.bfloat16, to remove this warning.)rP   
quant_type
startswithr   bfloat16r+   r,   )rF   torch_dtyperk   s      r1   update_torch_dtype%TorchAoHfQuantizer.update_torch_dtype   s    --88
  '':+@+@+H+H&;%..+H;K= Ie f
 NNx
  ..KrB   returnc                    U R                   R                  nUR                  S5      (       d  UR                  S5      (       a  [        R                  $ US:X  a4  U R                   R
                  R                  S[        R                  5      $ UR                  S5      (       a{  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  S.[        US   5         $ UR                  S5      (       d  UR                  S	5      (       a  [        R                   $ [#        U[$        5      (       a  U$ / S
Qn['        SU S[$         S35      e)Nint8int4uintx_weight_onlydtyperj   )rD   r   r
               rw   floatfp)autobalancedbalanced_low_0
sequentialz$You have set `device_map` as one of zr on a TorchAO quantized model but a suitable target dtype could not be inferred. The supported target_dtypes are: z. If you think the dtype you are using should be supported, please open an issue at https://github.com/huggingface/diffusers/issues.)rP   rk   rl   r   rs   quant_type_kwargsr^   uint8r   r   r   r    r!   r"   r#   ri   rm   r;   'SUPPORTED_TORCH_DTYPES_FOR_QUANTIZATIONrb   )rF   target_dtyperk   possible_device_mapss       r1   adjust_target_dtype&TorchAoHfQuantizer.adjust_target_dtype   sC   --88
  ((J,A,A&,I,I::..++==AA'5;;WW""6**;;;;;;;;;;;;;; *Q- " " ""7++z/D/DT/J/J>>!l$KLL  T23G2H IGGnFo p@A
 	
rB   
max_memoryc                 `    UR                  5        VVs0 s H
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)rF   r   keyvals       r1   adjust_max_memory$TorchAoHfQuantizer.adjust_max_memory   s5    5?5E5E5GH5Gc9n5G
H Is   *modelr   param_valueztorch.Tensor
param_name
state_dictc                   ^ UR                  SS 5      n[        U4S jU R                   5       5      (       a  gUS:X  a  U R                  (       a  g[	        UT5      u  px[        U[        R                  R                  5      =(       a    US:H  $ )Nparam_devicec              3   J   >#    U  H  oS -   T;   =(       d    UT:H  v   M     g7f)rX   N ).0r   r   s     r1   	<genexpr>>TorchAoHfQuantizer.check_if_quantized_param.<locals>.<genexpr>   s'     gKfCc	Z'?SJ->?Kfs    #FrU   rA   )	popanymodules_to_not_convertr]   r   r;   r   nnLinear)	rF   r   r   r   r   rQ   r   moduletensor_names	      `     r1   check_if_quantized_param+TorchAoHfQuantizer.check_if_quantized_param   sp     zz.$7g4KfKfgggU"t|| #7uj"IFfehhoo6TK8<STrB   target_deviceztorch.deviceunexpected_keysc                    [        X5      u  pU R                  (       az  [        R                  R	                  UR                  US95      UR                  U	'   [        U[        R                  5      (       a!  [        R                  " [        U5      Ul        gg[        R                  R	                  U5      R                  US9UR                  U	'   [        XR                  R                  5       5        g)z
Each nn.Linear layer that needs to be quantized is processed here. First, we set the value the weight tensor,
then we move it to the target device. Finally, we quantize the module.
)deviceN)r   ra   r   r   	Parameterto_parametersr;   r   types
MethodTyperG   
extra_reprr   rP   get_apply_tensor_subclass)
rF   r   r   r   r   r   r   rQ   r   r   s
             r1   create_quantized_param)TorchAoHfQuantizer.create_quantized_param   s     35E /4hh.@.@WdAe.fF{+&")),,$)$4$45G$P! - /4hh.@.@.M.P.PXe.P.fF{+f66PPRSrB   c                     SSSSS.nU R                   R                  nUR                  5        H  u  p4[        X#5      (       d  M  Us  $    [	        SU< 35      e)a  
This factor is used in caching_allocator_warmup to determine how many bytes to pre-allocate for CUDA warmup.
- A factor of 2 means we pre-allocate the full memory footprint of the model.
- A factor of 4 means we pre-allocate half of that, and so on

However, when using TorchAO, calculating memory usage with param.numel() * param.element_size() doesn't give
the correct size for quantized weights (like int4 or int8) That's because TorchAO internally represents
quantized tensors using subtensors and metadata, and the reported element_size() still corresponds to the
torch_dtype not the actual bit-width of the quantized data.

To correct for this:
- Use a division factor of 8 for int4 weights
- Use a division factor of 4 for int8 weights
   rw   )zint4_*zint8_*zuint*zfloat8*zUnsupported quant_type: )rP   rk   r   r   rb   )rF   map_to_target_dtyperk   patternr   s        r1   get_cuda_warm_up_factor*TorchAoHfQuantizer.get_cuda_warm_up_factor  sb    $ *+a!PQR--88
%8%>%>%@!Gz++## &A 3J>BCCrB   keep_in_fp32_modulesc                 \   U R                   R                  U l        [        U R                  [        5      (       d  U R                  /U l        U R                  R	                  U5        [        U[
        5      (       ad  [        UR                  5       5      S:  aG  UR                  5        VVs/ s H  u  pVUS;   d  M  UPM     nnnU R                  R	                  U5        U R                   Vs/ s H	  oc  M  UPM     snU l        U R                   UR                  l         g s  snnf s  snf )NrD   )rV   rU   )
rP   r   r;   listr(   r_   lenkeysr   config)	rF   r   rT   r   rQ   r   valuekeys_on_cpur   s	            r1   $_process_model_before_weight_loading7TorchAoHfQuantizer._process_model_before_weight_loading3  s     '+&>&>&U&U#$55t<<+/+F+F*GD'##**+?@ j$''C
0A,BQ,F1;1A1A1C`1C:3uP_G_31CK`''..{; =A<W<W&n<W&v<W&n#+/+C+C( a 'os   -D#=D#/D)9D)c                     U$ rM   r   )rF   r   s     r1   #_process_model_after_weight_loading6TorchAoHfQuantizer._process_model_after_weight_loadingN  s    rB   c                    U(       a  [         R                  S5        g[        R                  " [        R
                  R                  S5      5      [        R                  " S5      :  nU(       d  [         R                  S5        U R                  (       a-  U R                  R                  c  [         R                  S5        gU$ )Nzftorchao quantized model does not support safe serialization, please set `safe_serialization` to False.Fhuggingface_hubz0.25.0zMtorchao quantized model is only serializable after huggingface_hub >= 0.25.0 a  The model contains offloaded modules and these modules are not quantized. We don't recommend saving the model as we won't be able to reload them.If you want to specify modules to not quantize, please specify modules_to_not_convert in the quantization_config.)	r+   r,   r	   rY   rZ   r[   r]   rP   r   )rF   safe_serialization_is_torchao_serializables      r1   is_serializable"TorchAoHfQuantizer.is_serializableQ  s    NNx #*==1C1C1K1KL]1^#_cjcpcpd
 $
  (NNjk<<D44KKSNND ''rB   c                 L    U R                   R                  R                  S5      $ )Nrs   )rP   rk   rl   rF   s    r1   is_trainableTorchAoHfQuantizer.is_trainablei  s    ''22==fEErB   c                     g)NTr   r   s    r1   is_compileable!TorchAoHfQuantizer.is_compileablem  s    rB   )r   r]   )r   torch.dtyperq   r   )r   r   rM   )r=   
__module____qualname____firstlineno____doc__requires_calibrationrequired_packagesrO   rf   ro   r   r   strr   ri   r   r   boolr   r   r   r   r   r   r   propertyr   r   __static_attributes____classcell__)r<   s   @r1   rI   rI      sd    !"8 D*
BDeCHo1E,F 4PSUZ[^`c[cUdPdKe UU $U 	U
 cNU 
U(TT $T 	T
 &T cNT cT6D: +-	DD #3i	D6(0 F F   rB   rI   ).r   rZ   r   r   typingr   r   r   r   r   	packagingr	   utilsr   r   r   r   r   r   baser   models.modeling_utilsr   r   torch.nnr   rs   float8_e4m3fnfloat8_e5m2r   r   r   r    r!   r"   r#   r   torchao.quantizationr   r2   
get_loggerr=   r+   r>   rG   rI   r   rB   r1   <module>r      s?  
    8 8   & 3 e$$
 JJKKKKKKKKKKKKKK3
/" JJ3
/ .H: w''4))  
		H	%Pkk+ krB   