
    oi                        S r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
  SSKJr  SS	KJr  SS
KJr            SS jrg)zD
This module contains the implementation of the LoraPlus optimizer.
    )annotations)
attrgetterN)	Optimizer)ALL_LAYERNORM_LAYERS)get_parameter_names   )	PeftModel)	Embeddingc                  [        U [        5      nU Vs/ s H  nSU;  d  M  UPM     nn0 0 0 0 S.nU R                  5        H{  u  phUR                  (       d  M  [	        U5      " U 5      n	[        U	[        5      (       a	  XS   U'   MG  SU;   d  UR                  S:X  a  Xe;   a	  XS   U'   Mk  XS   U'   Mt  XS   U'   M}     X$S	'   UR                  S
S5      n
UR                  SS5      n[        US   R                  5       5      U
US.[        US   R                  5       5      U
US.[        US   R                  5       5      U
X#-  S.[        US   R                  5       5      SX#-  S./nU" U40 UD6n/ SQnUR                  U;   au  SSKnUR                  R                  R                  5       nU R!                  5        H9  n	[        U	["        R                  5      (       d  M$  UR%                  U	SSS05        M;     U$ s  snf )u  
Creates a LoraPlus optimizer.

Efficient Low Rank Adaptation of Large Models: https://huggingface.co/papers/2402.12354

Reference: https://github.com/nikhil-ghosh-berkeley/loraplus/

Args:
    model (`torch.nn.Module`): The model to be optimized.
    optimizer_cls (`torch.optim.Optimizer`): The optimizer class to be used.
    lr (`float`): The learning rate to be used for the optimizer.
    loraplus_lr_ratio (`float`):
        The ratio of learning ηB/ηA where ηA (lr) is passed in as the optimizer learning rate. Should be ≥1. Should
        be set in tandem with the optimizer learning rate (lr); should be larger when the task is more difficult
        and the model needs to update its features to learn well. In this case, it helps to make the learning rate
        slightly smaller (e.g., by a factor of 2) than typical vanilla LoRA learning rates
    loraplus_lr_embedding (optional `float`):
        If LoRA modules are added to embedding layers your can specify a different learning rate for them. Default
        value 1e-6.
    kwargs (`dict`): Additional keyword arguments to be passed to the optimizer.

Returns:
    `torch.optim.Optimizer`: An instance of the specified optimizer class configured with the model's parameters
    organized into groups with custom learning rates.
bias)groupAgroupBgroupB_no_decay	embeddingr   lora_B   r   r   r   lrloraplus_weight_decayg        loraplus_lr_embeddinggư>)paramsweight_decayr   )Adam8bit	AdamW8bitPagedAdam8bitPagedAdamW8bitr   Nweight
optim_bits    )r   r   named_parametersrequires_gradr   
isinstancer
   ndimpoplistvalues__name__bitsandbytesoptimGlobalOptimManagerget_instancemodulesnnregister_module_override)modeloptimizer_clsr   loraplus_lr_ratiokwargsdecay_parametersnameparam_groupsparammoduler   r   optimizer_grouped_parameters	optimizereight_bit_namesr'   managers                    R/home/james-whalen/.local/lib/python3.13/site-packages/peft/optimizers/loraplus.pycreate_loraplus_optimizerr<       s!   : +52FG)9P)9V4=O)9P	L --/""D!%(fi((.3%d+q'/4X&t,8=./5+0"4( 0 4L"JJ'>D"JJ'>E <188:;1	
 <4;;=>1'	
 <188:;1(	
 <(9:AACD(	
!$ . :EfEIRO0$$77DDFmmoF&",,//00LRTCUV & w Qs
   
G?G?)
r.   r	   r/   ztype[Optimizer]r   floatr0   r=   returnr   )__doc__
__future__r   operatorr   torch.nnr,   torch.optimr   transformers.pytorch_utilsr   transformers.trainer_pt_utilsr   
peft_modelr	   tuners.lora.layerr
   r<        r;   <module>rJ      sW    #   ! ; = " )YY%4Y=BYW\YYrI   