
    +h                         S SK JrJrJrJrJrJr  SSKJr  \(       a  SSK	J
r
  SSKJrJrJrJrJrJrJr  \" 5       (       a  \" 5       (       a  S SKrSS	KJrJrJrJrJr  \R2                  " \5      r " S
 S\5      rg)    )TYPE_CHECKINGAnyDictListOptionalUnion   )DiffusersQuantizer   )
ModelMixin)get_module_from_nameis_accelerate_availableis_accelerate_versionis_gguf_availableis_gguf_versionis_torch_availableloggingN   )GGML_QUANT_SIZESGGUFParameter#_dequantize_gguf_and_restore_linear_quant_shape_from_byte_shape_replace_with_gguf_linearc                   n  ^  \ rS rSrSrU 4S jrS rS\\\	\
\4   4   S\\\	\
\4   4   4S jrSS jrSS	 jrS
 rSSS\	S   S\S\\\4   S\4
S jr  S SSS\	S   S\SSS\\\\4      S\\\      4S jjr/ 4SSS\\   4S jjrS!S jr\S 5       r\S\4S j5       r\S\4S j5       rS rSrU =r$ )"GGUFQuantizer$   Tc                    > [         TU ]  " U40 UD6  UR                  U l        UR                  U l        UR                  U l        [        U R                  [        5      (       d  U R                  /U l        g g N)super__init__compute_dtypepre_quantizedmodules_to_not_convert
isinstancelist)selfquantization_configkwargs	__class__s      b/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/quantizers/gguf/gguf_quantizer.pyr    GGUFQuantizer.__init__'   si    ,770>>0>>&9&P&P#$55t<<+/+F+F*GD' =    c                     [        5       (       a  [        SS5      (       a  [        S5      e[        5       (       a  [	        SS5      (       a  [        S5      eg )N<z0.26.0zoLoading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`z0.10.0zhTo load GGUF format files you must have `gguf` installed in your environment: `pip install gguf>=0.10.0`)r   r   ImportErrorr   r   )r&   argsr(   s      r*   validate_environment"GGUFQuantizer.validate_environment1   sX    &((,A#x,P,P B  !""oc8&D&Dz  'Er,   
max_memoryreturnc                 `    UR                  5        VVs0 s H
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)r&   r3   keyvals       r*   adjust_max_memoryGGUFQuantizer.adjust_max_memory<   s5    6@6F6F6HI6H(#c:o6H
I Js   *c                 |    U[         R                  :w  a  [        R                  SU S35        [         R                  $ )Nztarget_dtype z3 is replaced by `torch.uint8` for GGUF quantization)torchuint8loggerinfo)r&   target_dtypes     r*   adjust_target_dtype!GGUFQuantizer.adjust_target_dtypeA   s.    5;;&KK-~5hij{{r,   c                 $    Uc  U R                   nU$ r   )r!   )r&   torch_dtypes     r*   update_torch_dtype GGUFQuantizer.update_torch_dtypeF   s    ,,Kr,   c                     UR                   nUR                   nUR                  n[        U   u  px[        XHU5      n	X:w  a  [	        U SU	 SU 35      eg)Nz% has an expected quantized shape of: z, but received shape: T)shape
quant_typer   r   
ValueError)
r&   
param_namecurrent_paramloaded_paramloaded_param_shapecurrent_param_shaperI   
block_size	type_sizeinferred_shapes
             r*   check_quantized_param_shape)GGUFQuantizer.check_quantized_param_shapeK   sq    )//+11!,,
 0 <
56HU_`0,CNCSSij|i}~  r,   modelr   param_value)r   ztorch.TensorrK   
state_dictc                 0    [        U[        5      (       a  gg)NTF)r$   r   )r&   rU   rV   rK   rW   r(   s         r*   check_if_quantized_param&GGUFQuantizer.check_if_quantized_paramZ   s     k=11r,   target_deviceztorch.deviceunexpected_keysc                 2   [        X5      u  pXR                  ;  a   XR                  ;  a  [        U SU	 S35      eXR                  ;   a  UR	                  U5      UR                  U	'   XR                  ;   a  UR	                  U5      UR                  U	'   g g )Nz- does not have a parameter or a buffer named .)r   _parameters_buffersrJ   to)
r&   rU   rV   rK   r[   rW   r\   r(   moduletensor_names
             r*   create_quantized_param$GGUFQuantizer.create_quantized_paramg   s     35E000[5Wx'TU`Taabcdd,,,.9nn].KF{+//)+6>>-+HFOOK( *r,   keep_in_fp32_modulesc                     UR                  SS 5      nU R                  R                  U5        U R                   Vs/ s H	  ofc  M  UPM     snU l        [        XR                  XPR                  S9  g s  snf )NrW   )r#   )getr#   extendr   r!   )r&   rU   
device_maprf   r(   rW   rb   s          r*   $_process_model_before_weight_loading2GGUFQuantizer._process_model_before_weight_loadingz   sg     ZZd3
##**+?@<@<W<W&n<W&v<W&n#!%%zJeJe	
 'os   A3A3c                     U$ r    )r&   rU   r(   s      r*   #_process_model_after_weight_loading1GGUFQuantizer._process_model_after_weight_loading   s    r,   c                     gNFrn   r&   s    r*   is_serializableGGUFQuantizer.is_serializable       r,   c                     grr   rn   rs   s    r*   is_trainableGGUFQuantizer.is_trainable   rv   r,   c                     g)NTrn   rs   s    r*   is_compileableGGUFQuantizer.is_compileable   s    r,   c                    UR                   R                  S:H  nU(       aw  [        R                  S5        [	        [
        S5      (       a  [
        R                  R                  5       O[
        R                  R                  5       nUR                  U5        [        XR                  5      nU(       a  UR                  S5        U$ )NcpuzModel was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to accelerator. After dequantization, will move the model back to CPU again to preserve the previous device.accelerator)devicetyper>   r?   hasattrr<   r   current_acceleratorcudacurrent_devicera   r   r#   )r&   rU   is_model_on_cpur   s       r*   _dequantizeGGUFQuantizer._dequantize   s    ,,++u4KK f
 5-00 !!557ZZ..0 
 HHV3E;V;VWHHUOr,   )r!   r#   r"   )r@   torch.dtyper4   r   )rD   r   r4   r   )NN)rU   r   )__name__
__module____qualname____firstlineno__use_keep_in_fp32_modulesr    r1   r   strr   intr9   rA   rE   rS   r   boolrY   r   r   rd   rk   ro   propertyrt   rx   r{   r   __static_attributes____classcell__)r)   s   @r*   r   r   $   s   #HDeCHo1E,F 4PSUZ[^`c[cUdPdKe 


 :; 	
 cN 
& 04/3II :;I 	I
 &I T#s(^,I "$s),I. +-	

 #3i	
    d      r,   r   )typingr   r   r   r   r   r   baser
   models.modeling_utilsr   utilsr   r   r   r   r   r   r   r<   r   r   r   r   r   
get_loggerr   r>   r   rn   r,   r*   <module>r      sg    B B % 3   -//  
		H	%E& Er,   