
    cCi|                         S SK JrJr  SSKJr  SSKJr  \(       a  SSKJr  SSK	J
r
JrJrJr  SSKJr  \" 5       (       a  S S	Kr\R"                  " \5      r " S
 S\5      rg	)    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availablelogging)QuantizationConfigMixinNc                      ^  \ rS rSrSrSrSrSrS/rS\	4U 4S jjr
S rSS
 jrSSSSS\SS4S jr  SS jrSS jrS\\   S\S	\\   4S jr\SS\S   4S jj5       rSS jrSSS\S	\4S jrSrU =r$ )FPQuantHfQuantizer!   z
Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
FTfp_quantquantization_configc                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__r   )selfr   kwargs	__class__s      d/home/james-whalen/.local/lib/python3.13/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   FPQuantHfQuantizer.__init__+   s    ,77#6     c                 z   [         R                  R                  5       (       d  [        S5      e[	        5       (       d&  U R
                  R                  (       d  [        S5      eU R
                  R                  (       a  [        R                  S5        [        5       (       d  [        S5      eUc&  U R
                  R                  (       d  [        S5      e[        U[        5      (       aP  SUR                  5       ;   d  SUR                  5       ;   a'  U R
                  R                  (       d  [        S5      eg g g )	NzPFPQuant quantization is only supported on GPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   r   pseudoquantizationImportErrorloggerwarningr
   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environment'FPQuantHfQuantizer.validate_environment/   s   zz&&((%b  $%%d.F.F.Y.Y S  ##66NN ] %&&ghhd&>&>&Q&QF 
 z4((*++--:;L;L;N1N,,??d  @ 2O )r   returnc                     Uc'  [         R                  S5        [        R                  nU$ U[        R                  :w  a  [	        SU S35      eU$ )NzJ`dtype` is None. Setting `dtype=torch.bfloat16` for qutlass compatibility.zInvalid `dtype` z=. fp_quant quantization only supports `dtype=torch.bfloat16`.)r'   infor!   bfloat16r)   )r   dtypes     r   update_dtypeFPQuantHfQuantizer.update_dtypeQ   sM    =KKdeNNE  enn$/w6stuur   modelr	   param_valueztorch.Tensor
param_nametarget_deviceztorch.devicec                    [        X5      u  pgUR                  S5      (       aA  [        R                  R	                  UR                  U5      SS9Ul        S Ul        S Ul        g UR                  S5      (       aI  [        R                  R	                  UR                  U5      5      Ul        S Ul        S Ul        S Ul	        g [        R                  R	                  UR                  U5      5      Ul        UR                  5         g )Nz.qweightF)requires_gradz	.dqweight)r   endswithr!   nn	Parametertoqweightweightdqweightscalespre_forward)r   r7   r8   r9   r:   r   module_s           r   create_quantized_param)FPQuantHfQuantizer.create_quantized_paramZ   s     );	 z**"XX//}-# 0 FN !FM"FO{++#hh001NOFO FM!FN FM **;>>-+HIr   c                     SSK Jn  SSKJn  U" UU" U R                  5      S9  U R                  UR
                  l        g )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)r   rK   integrations.fp_quantrL   r   config)r   r7   r   rK   rL   s        r   $_process_model_before_weight_loading7FPQuantHfQuantizer._process_model_before_weight_loading   s7    
 	:A$#89Q9Q#R	
 ,0+C+C(r   c                     U$ r    )r   r7   r   s      r   #_process_model_after_weight_loading6FPQuantHfQuantizer._process_model_after_weight_loading   s    r   missing_keysprefixc                   ^^	 SSK Jn  UR                  5        VVs1 s H  u  pV[        Xd5      (       d  M  UiM     snnm	S[        S[
        4U	U4S jjnU Vs/ s H  o" U5      (       a  M  UPM     sn$ s  snnf s  snf )Nr   FPQuantLinearkeyr0   c                    >^ ^ T R                  S5      (       d  T R                  S5      (       a  gT ST  3m[        UU 4S jT 5       5      $ )Nz.weightz.biasF.c              3   D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   rS   ).0namefull_keyr[   s     r   	<genexpr>QFPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude.<locals>.<genexpr>   s      R>4s{6dh&66>s    )r=   any)r[   ra   fp_quant_namesrW   s   `@r   should_exclude>FPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude   sD    ||I&&#,,w*?*? 3%(HR>RRRr   )r   rZ   named_modulesr*   strbool)
r   r7   rV   rW   rZ   r`   rF   rf   r[   re   s
      `     @r   update_missing_keys&FPQuantHfQuantizer.update_missing_keys   su    *383F3F3Hn3H<4JW]Lm$3Hn	S 	S 	S 	S  ,G|>#3F|GG o Hs   A6A6A<-A<c                 j    U R                   R                  nU(       d  [        R                  S5        U$ )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr'   r(   )r   r7   	trainables      r   is_trainableFPQuantHfQuantizer.is_trainable   s0    ,,AA	NN E r   c                     g)NTrS   )r   safe_serializations     r   is_serializable"FPQuantHfQuantizer.is_serializable   s    r   c                 X    SSK Jn  [        X5      u  pV[        XT5      (       a  US;   a  gg)Nr   rY   )rB   rA   rC   TF)r   rZ   r   r*   )r   r7   r9   r   rZ   rF   tensor_names          r   param_needs_quantization+FPQuantHfQuantizer.param_needs_quantization   s+    *25Ef,,@a1ar   )r   )r4   torch.dtyper0   rz   )r7   r	   r   )__name__
__module____qualname____firstlineno____doc__requires_calibration requires_parameters_quantizationis_qat_trainablerequired_packagesr   r   r.   r5   ri   rH   rP   rT   listrk   propertyr   rp   rt   rj   rx   __static_attributes____classcell__)r   s   @r   r   r   !   s     !'+$#7,C 7 D$ $ $$ 	$
 &$LD DHtCy H# HRVWZR[ H (+<"=  .? S _c  r   r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r!   
get_loggerr{   r'   r   rS   r   r   <module>r      sM    +  2 0 \ \ ? 			H	%R Rr   