
    cCi                         S SK JrJrJr  S SKJr  SSKJr  \" 5       (       a  SSKr\R                  " \
5      r " S S\5      rg)	   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizer    Nc                      ^  \ rS rSrSrSrS/rS\4U 4S jjrS r	SS	 jr
S
 rS rS r\S 5       rS\4S jrSS\4S jjrSrU =r$ )CompressedTensorsHfQuantizer   zu
Quantizer for the compressed_tensors package.  Loads and restores models to
quantized state with compressed_tensors
Tcompressed_tensorsquantization_configc                    > [         TU ]  " U40 UD6  [        5       (       d  [        S5      eUR	                  5         SSKJn  UR                  U5      U l        UR                  U l	        Xl
        g )NuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`r	   )ModelCompressor)super__init__r   ImportError	post_initcompressed_tensors.compressorsr   from_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__s       n/home/james-whalen/.local/lib/python3.13/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   %CompressedTensorsHfQuantizer.__init__$   si    ,77.003  	%%'B)AABUV1@@#6     c                 l    [        5       (       d  [        S5      e[        5       (       d  [        S5      eg )Nr   z;torch is required for using compressed-tensors quantization)r   r   r   )r   argsr   s      r   validate_environment1CompressedTensorsHfQuantizer.validate_environment7   s9    .003  "##[\\ $r   returnc                     Uc'  [         R                  S5        [        R                  nU$ U[        R                  :w  a  [         R                  S5        U$ )NzELoading model using torch.float16 for compressed-tensors quantizationzZWe suggest you to set `dtype=torch.float16` for better efficiency with compressed_tensors.)loggerinfotorchfloat16)r   dtypes     r   update_dtype)CompressedTensorsHfQuantizer.update_dtypeA   sD    =KK_`MME  emm#KKtur   c                    SSK Jn  U R                  R                  nU" XU R                  5        U R                  R
                  (       d  U R                  R                  (       a  U R                  R                  US9  g g )Nr	   )apply_quantization_configmodel)compressed_tensors.quantizationr.   r   r   r   is_quantization_compressedis_sparsification_compressedcompress_model)r   r0   r   r.   ct_quantization_configs        r   $_process_model_before_weight_loadingACompressedTensorsHfQuantizer._process_model_before_weight_loadingI   s`    M!%!D!D 	"%ATATU$$??''DDOO***7 Er   c                     U R                   R                  (       a  U R                  (       a  U R                   R                  (       a  U R                  R                  US9  gg)z3Decompress loaded model if necessary - need for qatr/   N)r   r2   r   r3   r   decompress_model)r   r0   r   s      r   #_process_model_after_weight_loading@CompressedTensorsHfQuantizer._process_model_after_weight_loadingV   sE     $$??H[H[%%BBOO,,5,9 Cr   c                     SSSSSS.nUR                  5       bD  UR                  5       R                  b)  UR                  5       R                  R                  U5        U$ )Nlocal_colwiselocal_rowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)get_text_configbase_model_tp_planupdate)r   configadditional_plans      r   update_tp_plan+CompressedTensorsHfQuantizer.update_tp_plan^   s_    @OFU>MDS@O
 !!#/F4J4J4L4_4_4k""$77>>Or   c                     g)NT r   s    r   is_trainable)CompressedTensorsHfQuantizer.is_trainablek       r   c                 h    U R                   (       + =(       d    U R                  R                  (       + $ )z7Loaded Models can carry out quantization aware training)r   r   r2   rH   s    r   is_qat_trainable-CompressedTensorsHfQuantizer.is_qat_trainableo   s'     &&&ad.F.F.a.a*aar   c                     g)z>Models quantized using compressed tensors can be saved to diskTrG   )r   safe_serializations     r   is_serializable,CompressedTensorsHfQuantizer.is_serializablet   rK   r   )r   r   r   )r*   torch.dtyper$   rS   )N)__name__
__module____qualname____firstlineno____doc__requires_calibrationrequired_packagesr   r   r"   r+   r6   r:   rD   propertyrI   boolrM   rQ   __static_attributes____classcell__)r   s   @r   r   r      ss    
  -.7,C 7&]8:  b$ b
$  r   r   )utilsr   r   r   utils.quantization_configr   baser   r(   
get_loggerrT   r&   r   rG   r   r   <module>rc      s@     Q P ?  			H	%[; [r   