
    oixD                       S SK Jr  S SKrS SKrS SKJrJrJr  S SKrS SK	J
r
  S SKJr  S SKJrJr  S SKJrJr  S SKJr  S S	KJr  S S
KJrJrJr   " S S5      rSS jr\R:                  " 5       SSS jj5       r\R:                  " 5       SS j5       r " S S5      r \R:                  " 5          S     SS jj5       r!g)    )annotationsN)CallableOptionalUnion)clear_device_cache)snapshot_download)HFValidationErrorLocalEntryNotFoundError)SafetensorError	safe_open)cached_file)get_checkpoint_shard_files)is_bnb_4bit_availableis_bnb_availableis_xpu_availablec                  l   ^  \ rS rSrS
U 4S jjr\SS j5       r\SS j5       rS rS r	S r
S rS	rU =r$ )NFQuantizer#   c                  > [         TU ]  " U0 UD6  Xl        X l        X0l        X@l        U R                  S:X  a?  U R                  U R                  S9U l        U R                  R                  U5      U l        g U R                  S:X  a?  U R                  U R                  S9U l        U R                  R                  U5      U l        g [        S5      e)Nnormal)num_bitsuniformz-Other quantization methods not supported yet.)super__init__r   devicemethod
block_sizecreate_normal_mapnorm_lookup_tabletocreate_uniform_mapNotImplementedError)selfr   r   r   r   argskwargs	__class__s          P/home/james-whalen/.local/lib/python3.13/site-packages/peft/utils/loftq_utils.pyr   NFQuantizer.__init__$   s    $)&) $;;("%)%;%;T]]%;%SD"%)%;%;%>%>v%FD"[[I%%)%<%<dmm%<%TD"%)%;%;%>%>v%FD"%&UVV    c                    U (       aX  [         R                  " SSSUS-
  -  5      n[         R                  " SSSUS-
  -  5      n[         R                  " X#SS  /5      nU$ [         R                  " SSSU-  5      nU$ )Nr         )torchlinspacecat)	symmetricr   negativepositivetables        r'   r!   NFQuantizer.create_uniform_map3   sv    ~~b!Q8a<-@AH~~aA(Q,,?@HIIx!"67E  NN2q!X+6Er)   c                    SSK Jn  SU-  nU(       a~  UR                  [        R
                  " SU -
  XS-   5      5      R                  5       n/ n[        [        U5      S-
  5       H$  nUR                  SXW   -  SXWS-      -  -   5        M&     UnOUR                  [        R
                  " U SUS-  S-   5      S S 5      R                  5       nS/n	UR                  [        R
                  " U SUS-  5      S S 5      * R                  5       n
X-   U
-   n[        R                  " U5      nUR                  5       R                  nXfR                  5       -  nU$ ! [         a    [        S5      ef = f)Nr   )normzMThe required package 'scipy' is not installed. Please install it to continue.r,   r-   g      ?r+   )scipy.statsr7   ImportErrorppfr.   r/   tolistrangelenappendTensorsortvaluesmax)offsetr1   r   r7   
variationsvrA   indexv1v2v3s              r'   r   NFQuantizer.create_normal_map?   sP   	o( [
F
FNKLSSUAFs1vz*cAHnsQqy\/AAB +A %..jAo6IJ3BOPWWYBB88ENN63
aH"MNNVVXB"Aa%%**,)  	omnn	os   E E0c                4   [         R                  " U5      R                  5       nX-  nUR                  S5      n[         R                  " U R
                  5      R                  SS5      n[         R                  " XE-
  5      n[         R                  " USS9nXr4$ )Nr+   r-   dim)r.   absrB   	unsqueezetensorr   reshapeargmin)r#   weightmax_absweight_normedweight_normed_expanded
L_reshapedabs_diffqweights           r'   quantize_tensorNFQuantizer.quantize_tensorY   s    ))F#'')(!.!8!8!< \\$"8"89AA!RH
 993@A ,,xR0r)   c                    UR                  5       nU R                  U   nXB-  nUR                  UR                  5      nU$ )N)flattenr   rQ   shape)r#   rY   rT   qweight_flattenrU   rS   s         r'   dequantize_tensorNFQuantizer.dequantize_tensori   s=    !//+..?(.r)   c           	        [        UR                  5      S:w  a"  [        S[        UR                  5       S35      eUR                  S   UR                  S   -  U R                  -  S:w  a9  [        SUR                  S    SUR                  S    SU R                   S	35      eUR                  u  p#UR                  nUR                  5       nUR                  S
U R                  5      nU R                  S:X  a!  UR                  5       R                  S
S9S   nO>U R                  S:X  a#  UR                  S
S9SUR                  S
S9-  -   nO[        S5      eUR                  S
5      nXg-  nUR                  S
5      nU R                  R                  SS
5      n	[        R                  " X-
  5      n
[        R                   " U
S
S9nUR                  S
SU R"                  -  5      n[        R$                  " X#-  S-  U R"                  -  S4[        R&                  US9n[)        SU R"                  -  5       H:  nUS S 2U4   XR"                  -  -  US S 2U4'   US S 2S4==   US S 2U4   -  ss'   M<     XUR                  4$ )Nr,   +Only support 2D matrix, but your input has  dimensions.r   r-   zWeight with shape (z x z!) is not dividable by block size .r+   r   rL   r   g      @zMethod not supported yet.   dtyper   )r=   r^   
ValueErrorr   r   r]   rQ   r   rN   rB   meanstdr"   rO   r   r.   rR   r   zerosuint8r<   )r#   rS   MNr   weight_flattenweight_block
weight_maxweight_divabsrW   rX   rY   qweight_packis                 r'   quantize_blockNFQuantizer.quantize_blocks   sW   v||!J3v||K\J]]ijkk<<?V\\!_,t>!C%fll1o%6c&,,q/9J K226//1B!E 
 ||  )%--b$//B;;("%))+//B/7:J[[I%%**r*2S<;K;KPR;K;S5SSJ%&ABB))"-
$1%//3++33Ar:
99]78,,xR0 //"a4==&89{{AEQJ$>#B%++^de qDMM)*A#AqDMQ->>GAqDMA'!Q$-/ + 55r)   c                >   UR                   n[        R                  " UR                  S   SU R                  -  4[        R
                  US9n[        SU R                  -  5       H  nUR                  [        R                  5      SU R                  -  -  nUR                  [        R                  5      nU R                  U   R                  5       US S 2U4'   XR                  -	  nM     UR                  SU R                  5      nX-  nUR                  U5      nU$ )Nr   rf   rg   r,   r+   )r   r.   rl   r^   r   float32r<   r    longr   squeezerQ   r   )	r#   rY   rr   weight_shaper   rS   ru   lookup_table_idxrq   s	            r'   dequantize_blockNFQuantizer.dequantize_block   s    gmmA.T]]0BC5==aghqDMM)*A&zz%**54==8HH/225::>112BCKKMF1a4L.G	 + ~~b$//:*-r)   )r   r   r   r   r   )r,   cudar   @   )F   )g+ew?Fr,   )__name__
__module____qualname____firstlineno__r   staticmethodr!   r   rZ   r`   rv   r~   __static_attributes____classcell__)r&   s   @r'   r   r   #   sK    W 	 	  2  '6R r)   r   c                   [        U R                  5       5      nUS:w  a  [        SU S35      e[        R                  R                  U SS9u  p4nU[        R                  " [        R                  " U5      SS2SU24   5      -  n[        R                  " [        R                  " U5      SU2SS24   5      U-  nXgX4XQS.$ )	zf
:param weight: The matrix to decompose, of shape (H, W) :param reduced_rank: the final rank :return:
r,   rc   rd   F)full_matricesNr   )LRUSVhreduced_rank)r=   sizeri   r.   linalgsvdsqrtdiag)rS   r   matrix_dimensionr   r   r   r   r   s           r'   _low_rank_decompositionr      s     6;;=)1FGWFXXdeff ||e<HA"	UZZ

1a<&789:A

5::a=<!234r9A"SSr)   c                
   [        5       (       a  SS KnO[        S5      eUS;  a  [        S5      eUS::  a  [        S5      eU R                  5       u  pVU R                  nU R
                  n[        R                  " SU SU SU S	U S
U 3
5        [        5       (       a  US;   a  [        XSSS9n	Un
O[        5       (       a  SOSn
U R                  U
[        R                  S9n U R                  5       n[        U5       H  n[!        5         US:X  az  [        5       (       ak  UR"                  R%                  UR                  S5      SSSS9R                  U
5      nUR&                  R)                  UR*                  UR,                  5      nO(W	R/                  U5      u  nnnU	R1                  UUU5      nX-
  n[3        XS9nUS   US   US   nnnU [        R4                  " UU5      -
  nM     WWnnWR                  XxS9UU4$ )Nr   z>bitsandbytes is not available, please install it to use LoftQ.)r,   r   rf   z&Only support 2, 4, 8 bits quantizationz+Number of iterations must be greater than 0z	Weight: (z, z
) | Rank: z | Num Iter: z | Num Bits: )r,   rf   r   r   )r   r   r   r   xpur   r   rh   r   cpuFnf4)requires_gradcompress_statistics
quant_typer   r   r   r   )r   bitsandbytesri   r   r   rh   logginginfor   r   r   r    r.   ry   cloner<   r   nn
Params4bit
functionaldequantize_4bitdataquant_staterv   r~   r   mm)rS   r   r   num_iterbnbout_feature
in_featurer   rh   	quantizercompute_deviceresru   rY   dequantized_weightquantized_weightrT   r^   outputr   r   lora_Alora_Bs                          r'   
loftq_initr      s    "YZZy ABB1}FGG$kkmK]]FLLELL
K=:,jmT\S]]jksjtu !""h&&8^`a	"2"4"4&YYnEMMYBF
,,.C8_q=244ff''uUZ_ ( b   "%!?!?gNaNa!b/8/G/G/L,gu!*!;!;<LgW\!]) )H#C[&+vn7Ml1uxx1~%# & FF   <ffLLr)   c                r   SS K nUS:w  a  [        S5      e[        5       (       d  [        S5      e[        5       (       a  SOSnUR                  R                  U R                  U R                  5      nUR                  U[        R                  S9nX-
  n[        5         [        XsS9nUS	   US
   US   p:n	X4$ )Nr   r   z0Only 4 bit quantization supported at the moment.z0bitsandbytes 4bit quantization is not available.r   r   r   r   r   r   r   )r   ri   r   r   r   r   r   r   r    r.   ry   r   r   )rY   rS   r   r   r   r   r   residualr   r   r   s              r'   _loftq_init_newr      s    1}KLL ""KLL.00UfN77gFYFYZYYnEMMYBF*H$XIFfSk6.3I,A4Kr)   c                  $    \ rS rSrSrS rS rSrg)_SafetensorLoaderi  z
Simple utility class that loads tensors with safetensors from a single file or sharded files.

Takes care of file name normalization etc.

c                   Uc)   [        UR                  R                  R                  SS9nSnUR                  U5      (       d  [        R                  R                  X$5      nX l        [        UR                  5       SS 5      U l        SU l        SU l        S U l        [        R                  R'                  U5      (       d  UR)                  [        R                  R*                  5      S	   n [-        U[/        US
5      5      u  pgSU l        U Vs0 s H/  oR)                  [        R                  R*                  5      S   U_M1     n	nUS   R5                  5        VV
s0 s H
  u  pXU
   _M     sn
nU l        g g ! [        [
        4 a  n[        S5      UeS nAf[         a  n[        S5      UeS nAff = f! [0         a  n[3        SU S35      UeS nAff = fs  snf s  sn
nf )NT)local_files_onlyzThe provided model does not appear to be a transformers model or is a local model. In this case, you must pass the model_path argument that points to the safetensors file.zNThe model.safetensors file must be present on disk, but it could not be found.zmodel.safetensorsbase_model_prefixbase_model.model.Fr   zmodel.safetensors.index.jsonzCould not find file for zA, ensure that there is a (sharded) safetensors file of the model.r+   
weight_map)r   
base_modelconfig_name_or_pathAttributeErrorr	   ri   r
   endswithospathjoin
model_pathgetattrget_base_modelr   prefix
is_shardedr   exists
rpartitionsepr   r   OSErrorFileNotFoundErroritems)r#   
peft_modelr   excsuffixpar_dirresolved_archive_filesharded_metadatakfile_maprE   s              r'   r   _SafetensorLoader.__init__  s   
.z/D/D/K/K/Y/Ylpq
 %""6**j9J$!()B)B)DFY[_!`)ww~~j)) ++BGGKK8;G:T[2PQ;7% #DOBWXBWQRWW[[1"5q8BWHX:J<:X:^:^:`a:`$!q1+~:`aDO *) #$56  a  +  d,  '.zl:{| YasG   (E= 0F: 6G!G!=F7FF7&F22F7:
GGGc                |   U R                   (       d  U R                  nOU R                  U   n[        USSS9 n UR	                  U5      nS S S 5        U$ ! [
         aI  nU R                  (       a1  U[        U R                  5      S-   S  nUR	                  U5      n S nANVUeS nAff = f! , (       d  f       W$ = f)Nptr   )	frameworkr   r-   )r   r   r   r   
get_tensorr   r   r=   )r#   name	file_pathfrP   r   s         r'   r   _SafetensorLoader.get_tensor6  s    I-IyD?1	d+ @  # ))D$:$: ;a ? ABD\\$/FI @? s4   B,A
B) =B$B,"B$$B))B,,
B;)r   r   r   r   r   N)r   r   r   r   __doc__r   r   r    r)   r'   r   r     s    'bRr)   r   c                   [        5       (       d  [        S5      eSSKJn  SnSn[	        X5      nU R                  5        GH  u  p[        X5      (       d  M  UR                  U5      (       d  [        S5      eSnU[        U5      S nUR                  US	-   5      n
U	R                  U   n[        U	R                  U
S
US9u  pU(       d<  XR                  U   R                  l        XR                   U   R                  l        M  U	R                  U   R                  R                  nU	R                   U   R                  R                  nXR                  U   R                  l        XR                   U   R                  l        U" X5      nU(       d:  XR                  U   R                  l        XR                   U   R                  l        AAGM     U(       d  [        S5      eg)aB  
Replace the LoRA weights of a model quantized with bitsandbytes, using the LoftQ technique.

The replacement is done on the fly by loading in the non-quantized weights from a locally stored safetensors model
file and initializing the LoRA weights such that the quantization error between the original and quantized weights
is minimized.

As lazy loading is not possible with pickle, normal PyTorch checkpoint files cannot be supported.

Depending on the model size, calling this function may take some time to finish.

Args:
    peft_model (`PeftModel`):
        The model to replace the weights of. Must be a quantized PEFT model with LoRA layers.
    model_path (`Optional[str]`):
        The path to the model safetensors file. If the model is a Hugging Face model, this will be inferred from
        the model's config. Otherwise, it must be provided.
    adapter_name (`str`):
        The name of the adapter to replace the weights of. The default adapter name is "default".
    callback (`Optional[Callable[[PeftModel, str], bool]]`):
        A callback function that will be called after each module is replaced. The callback function should take
        the model and the name of the current module as input and return a boolean indicating whether the
        replacement should be kept. If the callback returns False, the replacement will be rolled back. This can be
        very useful to confirm that the LoftQ initialization actually decreases the quantization error of the
        model. As an example, this callback could generate logits for given input and compare it with the logits
        from the original, non-quanitzed model with the same input, and only return `True` if there is an
        improvement. As this is a greedy optimization, it's possible that calling this function multiple times
        yields incremental improvements.
zHbitsandbytes must be installed and the model must be quantized in 4bits.r   )
Linear4bitr   Fz8The passed model does not appear to be a valid PeftModelTNz.weightr   )r   r   z%No bnb LoRA module found on the model)r   ri   peft.tuners.lorar   r   named_modules
isinstance
startswith	TypeErrorr=   r   rr   rS   r   r   r   )r   r   adapter_namecallbackr   r   	any_matchsafetensor_loaderr   modulerP   r   r   r   lora_A_beforelora_B_beforeshould_replaces                    r'   replace_lora_weights_loftqr   J  s   H !""cdd+ !FI)*A #002&--v&&VWW	CKM""--dY.>?xx-(Yef6<MM,'..36<MM,'..3l3::??l3::??28l#**/28l#**/!*36CMM,'..36CMM,'..3=; 3> @AA r)   )    )r-   )rS   z'Union[torch.Tensor, torch.nn.Parameter]r   intr   r   )r   r   r   r   )NdefaultN)r   zOptional[str]r   strr   z0Optional[Callable[[torch.nn.Module, str], bool]])"
__future__r   r   r   typingr   r   r   r.   accelerate.utils.memoryr   huggingface_hubr   huggingface_hub.errorsr	   r
   safetensorsr   r   transformers.utilsr   transformers.utils.hubr   peft.import_utilsr   r   r   r   r   no_gradr   r   r   r   r   r)   r'   <module>r	     s   $ #  	 , ,  6 - M 2 * = W WG GTT" .M .Mb  (B BJ  !%!AE	NBNB NB ?	NB NBr)   