
    oiM                        S SK Jr  S SKrS SKJr  S SKrS SKJr  S SKJr  SSK	J
r
  SrS	r " S
 S\R                  5      rSS jrSS jrSS jr S         SS jjrg)    )annotationsN)Any)nn)PreTrainedModel   )ArrowConfigtask_gks_c                     ^  \ rS rSrSrU 4S jr\R                  " 5       S 5       rSS jr	\R                  " 5       S 5       r
\R                  " 5       S 5       rSS jrS	 rS
rU =r$ )ArrowLoraLinearLayer   zW
This class represent the main logic of the arrow routing algorithm for linear layers.
c                Z  > [         TU ]  5         Xl        SU l        UR                  U l        UR
                  U l        UR                  U l        UR                  R                  5       U l        UR                  U l
        UR                  U l        SU l        / U l        Xl        SU l        g )NFT)super__init__in_features_protos_readytop_krouter_temperaturetemperaturerng_seedtask_adapter_namescopygks_adapter_namesuse_gksgks_donegks_added_adapter_namescast_input_dtype_enabled)selfr   arrow_config	__class__s      P/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/lora/arrow.pyr   ArrowLoraLinearLayer.__init__#   s    &"!''
'::$--++002 	 ** 	 $++')$&(,%    c                   UR                  5        Vs/ s HM  nX2;   d  M
  US:w  d  M  UR                  S5      (       a#  U[        S5      S R                  5       (       a  MK  UPMO     nn[	        U R
                  5      [	        U5      :X  a  g[        U R
                  5      [        U5      :  a)  U Vs/ s H  oUU R
                  ;  d  M  UPM     snU l        UR                  5       U l        SU l        gs  snf s  snf )zv
Called when adapters are added/removed/renamed so Arrow can refresh its internal state before the next forward
pass.
arrow_routerr
   NF)	keys
startswithlenisdigitsortedr   r   r   r   )r   lora_Alora_Bkall_ts_adapter_namesxs         r!   on_adapter_change&ArrowLoraLinearLayer.on_adapter_change7   s     [[] 
"{  N2 <=LL<P<PUVWZ[aWbWdUeUmUmUo " 	  
 $))*f5I.JJ t&&'#.B*CC7K+p7K!X\XoXoOoA7K+pD( #7";";"="! 
 ,qs!   	C4C45C4!C42C9	C9c                j   UR                  [        R                  5      nUR                  [        R                  5      nUR                  U-  nSnU R                  bL  [        R
                  " UR                  R                  S9nUR                  [        U R                  5      5        [        R                  " UR                  S5      UR                  UR                  US9n	XR                  5       U-   -  n	[        U5       H,  n
UR                  XuU	-  -  -  nXR                  5       U-   -  n	M.     U	$ )u  
Computes the top *right* singular vector of ΔW = B @ A without forming ΔW.

Theory:
    For any matrix M, the right singular vectors are the eigenvectors of Mᵀ M. If ΔW = B @ A (with A ∈
    ℝ^{r×in}, B ∈ ℝ^{out×r}), then
        ΔWᵀ ΔW = (B @ A)ᵀ (B @ A) = Aᵀ (Bᵀ B) A ∈ ℝ^{in×in}.
    Therefore, the dominant right singular vector of ΔW is the dominant eigenvector of M := Aᵀ (Bᵀ B) A. We
    find it by *power iteration* on the linear operator
        v ↦ Aᵀ (Bᵀ B) (A v),
    which avoids materializing ΔW (out×in) or M (in×in). The result lives in the input/token space (size =
    in_features), which is exactly what Arrow needs. (Right singular vectors ≡ eigenvectors of MᵀM; power
    iteration converges to the dominant eigenvector under mild conditions.)
=============================== Practical notes:
    - We perform all iteration in float32 for numerical stability, then cast back
    to the LoRA dtype/device before storing/using the prototype.
    - Convergence is checked with a simple fixed-iter cap (`iters`) and/or
    `allclose` tolerance (`tol`).
    - The returned vector is unique up to sign (±), as with any singular vector.
    Downstream code should be sign-invariant.
N)devicer   )dtyper3   	generator)totorchfloat32Tr   	Generatorr3   typemanual_seedintrandnsizer4   normrange)r   ABitersepsA32B32Cgenv_ws               r!   top_right_singular_vec_from_BA3ArrowLoraLinearLayer.top_right_singular_vec_from_BAP   s    0 dd5==!dd5==!EECK ==$//9COOC./ KK399SZZSVWC uAAg'AVVX^$A 
 r#   c                p   U R                   (       a  g/ nU R                   He  nX   R                  nX$   R                  nU R                  XV5      nUR	                  UR
                  UR                  S9nUR                  U5        Mg     [        R                  " USS9n	U R                  SU	SS9  SU l         g)	a  
Computes a prototype vector for each LoRA module in every layer by applying Singular Value Decomposition (SVD)
to the `lora_A` matrix and extracting the top right singular vector.

These prototypes are later used to calculate the cosine similarity between each input token and each expert.
The resulting similarity scores serve as coefficients to compute a weighted average of the corresponding LoRA
modules, effectively routing each token through its most relevant experts.

** This prototype computation is done is done once for all experts and is re-done on newly added adapters.**

Args:
    lora_A : Matrices A in LoRA layer.
    lora_B (optional): Matrices B in LoRA layer. Defaults to None.
N)r4   r3   r   dim
prototypesF)
persistentT)r   r   weightrM   r6   r4   r3   appendr7   stackregister_buffer)
r   r+   r,   protosnamerB   rC   proto32protoproto_stacks
             r!   build_prototypes%ArrowLoraLinearLayer.build_prototypes}   s    " ++D##A##A 99!?GJJQWWQXXJ>EMM%  , kk&a0 	\;5I!r#   c                n   U R                   (       d  gU R                  (       a  U R                  (       d  g[        R                  " U R
                   Vs/ s H  o1U   R                  PM     snSS9R                  S5      n[        R                  " U R
                   Vs/ s H  o2U   R                  PM     snSS9R                  S5      nU R                  SL ab  U R                   HQ  nX   R                  R                  R                  U5        X&   R                  R                  R                  U5        MS     OaU R                   HQ  nX   R                  R                  R                  U5        X&   R                  R                  R                  U5        MS     SU l        / U l        gs  snf s  snf )a  
This function performs General Knowledge Subtraction. It takes an average of provided general_adapters, and
subtract it from each task_adapter. This subtraction tries to purify the task adapters, based on
"forgetting-via-negation" principle. Forgetting-via-negation is a task-arithmetic operation, explained in:
https://huggingface.co/papers/2212.04089 The task adapters will be more focused and isolated, enhancing the
performance on new tasks.

Args:
    lora_A : Matrices A in LoRA layer.
    lora_B : Matrices A in LoRA layer.
Nr   rP   FT)r   r   r   r7   rV   r   rT   meanr   datasub_)r   r+   r,   navg_Aavg_BrY   s          r!   gen_know_sub!ArrowLoraLinearLayer.gen_know_sub   s]    ||]]4#?#? KK4;Q;Q R;Qa!1!1;Q RXYZ__E KK4;Q;Q R;Qa!1!1;Q RXYZ__E
 }}% 33DL'',,11%8L'',,11%8 4 !88DL'',,11%8L'',,11%8 9
 !DM+-D() !S !Ss   F-F2c                t    Uc  g[        U SS5      nU(       a  UR                  U:X  a  U$ UR                  US9$ )aT  
Whether to cast the dtype of the input of the forward method.

Usually, we want to enable this to align the input dtype with the dtype of the weight, but by setting
layer.cast_input_dtype=False, this can be disabled if necessary.

Enabling or disabling can be managed via the peft.helpers.disable_lora_input_dtype_casting context manager.
Nr   T)r4   )getattrr4   r6   )r   r/   r4   r   s       r!   _cast_input_dtype&ArrowLoraLinearLayer._cast_input_dtype   s@     9#*41KT#R (agg.>Htt%t  r#   c                   U R                  XU R                  S      R                  R                  5      nUR                  GtpgnUR                  SU5      n	U	R                  S5      U R                  R                  S5      p[        R                  " U R                   Vs/ s H  oU   PM	     snU	R                  U	R                  S9n[        R                  " XR                  R                  -  5      n[        R                  " XR                  SS9u  nnU	R                  X4[!        S5      5      nUR#                  SUU5        [        R$                  " UU R&                  -  SS9n[        R(                  " U R                   Vs/ s H  oU   R                  PM     snSS9n[        R(                  " U R                   Vs/ s H  oU   R                  PM     snSS9n[        R*                  " SU	U5      n[        R*                  " SUU5      nUUR                  SSS5      -  n[        R*                  " S	UU5      nU" U5      nUR                  S5      nUR
                  " U/UQUP76 $ s  snf s  snf s  snf )
u  
Applies Arrow routing inside a LoRA layer.

Steps:
1. Compute cosine similarity between each token representation and all adapter prototypes.
2. Select the top-k experts per token and normalize their scores with a softmax.
3. Project tokens into each selected expert’s low-rank space (A weights).
4. Map back to the output space (B weights).
5. Aggregate expert outputs via the weighted sum of their contributions.
6. Apply dropout, scaling, and return the reshaped delta.

- Conceptually, this is a Mixture-of-Experts (MoE) over LoRA adapters,
where coefficients are derived from prototype similarity.

Returns:
    delta: LoRA output adjustment computed by Arrow routing.
r   )r3   r4   r   rP   z-infztf, erf -> terzter, eor -> teozte, teo -> to)rj   r   rT   r4   shapeviewr?   rR   r7   tensorr3   absr9   topkr   new_fullfloatscatter_softmaxr   rV   einsum)r   r/   r+   r,   dropoutscalingrC   restF_intoktErc   scales_tenssimtop_vidx
full_scorecoeffA_stackB_stackzy
delta_flatdeltaout_dims                             r!   forwardArrowLoraLinearLayer.forward   s   $ ""1T-D-DQ-G&H&O&O&U&UV$ffRxx{DOO0031 ll!%!8!89!8AQZ!89::))
 iioo///0 ZZZZQ7
s\\1&%-8
AsE*j4+;+;;C ++9P9PQ9PAay//9PQWXY++9P9PQ9PAay//9PQWXY LL)38 LL*Aw7   B** \\/5!<
 
#//"%zz!,d,G,,M :  RQs    I,I1I6)r   r   r   r   r   r   r   r   r   r   r   )   g:0yE>)r4   ztorch.dtype)__name__
__module____qualname____firstlineno____doc__r   r7   no_gradr0   rM   r]   rf   rj   r   __static_attributes____classcell__)r    s   @r!   r   r      sr    -( ]]_# #0+Z ]]_!" !"F ]]_&. &.P!"?- ?-r#   r   c                   SnU GHF  n[        5       nSnSnU R                  5        H  u  px[        US5      (       d  M  X8R                  ;   d  M)  UR                  U   R                  n	UR
                  U   R                  n
UR                  S5      S   nUR                  U5        U	R                  S   nU	R                  U
R                  4nM     Uc  XVUS.nM  XRS   :w  a  [        SU S	U S
US    35      eXbS   :w  a  [        SU SU S
US    35      eXBS   :w  d  GM   [        SU S[        U5       S[        US   5       35      e   [        US   5      nU[        US   5      4$ )z
After loading all adapters into `model`, check they share:
  - the same LoRA rank (r)
  - identical weight shapes
  - identical sets of target_modules
Returns (sorted list of target module names, agreed rank r).
Nr+   .rm   r   )rshapesmodulesr   [z] rank mismatch: z != r   z] shape mismatch: r   z-] target_modules mismatch:
  this adapter -> z
  reference   -> )setnamed_moduleshasattrr+   rT   r,   splitaddrn   
ValueErrorr*   r=   )modeladapter_names	referencerY   curr_modulescurr_rcurr_shapes	full_namemodulerB   rC   mod_nameagreed_moduless                r!   %check_loaded_lora_compatibility_arrowr     s    Iu!&!4!4!6Ivx((T]]-BMM$'..MM$'..$??3/3  * ww0 "7 $UI3' 1TF*;F84	RUGW!XYY11 1TF*<[MiX`NaMb!cdd33 v ))/)=(> ?((.y/C(D'EG / : Ii01N3y~...r#   c           	        SSK Jn  Sn SSKnUR                  R                  nSn SSKJn  UR                  UR                  4nUb  Xc4-   nUb  Xe4-   n/ nU R                  5        H  u  p[        U	S5      (       d  M  U Hv  n
U
[        U	S0 5      ;   d  M  [        U	SS5      =(       d    [        U	SS5      nUb  UOU	n[        X5      (       a  MP  UR                  X[        U5      R                  45        Mx     M     U(       aC  S/nU H   u  pnUR                  SU
 S	U S
U 35        M"     [!        SR#                  U5      5      eg! [         a     GN3f = f! [         a     GN;f = f)z
Validate that every module holding LoRA weights for any of `adapter_names` is Linear-like: nn.Linear,
bitsandbytes.nn.Linear4bit, nn.Conv1d, or transformers.models.gpt2.modeling_gpt2.Conv1D. If not, raise.
r   N)Conv1Dr+   
base_layeroriginal_modulezzLoRA adapters must only target Linear-like layers (nn.Linear, nn.Conv1d, HF Conv1D, or bitsandbytes.nn.Linear4bit). Found:z  - adapter 'z' on module 'z
' of type 
)torch.nnr   bitsandbytes
Linear4bitImportError&transformers.models.gpt2.modeling_gpt2r   LinearConv1dr   r   ri   
isinstancerU   r;   r   	TypeErrorjoin)r   r   r   r   bnbHFConv1Dallowed_types	offendersr   r   rY   baselayer_to_checklinestnames                  r!   )ensure_adapters_target_linear_layers_onlyr   H  s   
 J"VV&&
 HM YY		*M%5%3I"002	68$$%768R88"6<>j'&RceiBjD-1-=T6N%nDD!(($4;O;X;X)YZ & 3 W
 '0"DULL=mI;jQVPWXY '0		%()) 7    s"   E
 E 

EE
E)(E)c                   [         R                  R                  U 5      (       aU  [         R                  R                  [         R                  R	                  U S5      5      (       d  [        SU  S35      eU S4$ U R                  S5      R                  S5      n[        U5      S:  a>  SR	                  USS 5      n[        U5      S:  a  SR	                  USS 5      nX#4$ US4$ U S4$ )aU  
Resolve a user-provided adapter `path` into (model_id, subfolder).

Supports:
  - Local path to a folder that contains `adapter_config.json`
  - Hub path with subfolder, e.g. "user/repo/ts_expert_0[/more/...]", which becomes:
        model_id="user/repo", subfolder="ts_expert_0[/more/...]"
  - Plain Hub repo id "user/repo" (no subfolder)
zadapter_config.jsonzLocal adapter path 'z)' does not contain 'adapter_config.json'.N/   )	ospathisdirisfiler   r   stripr   r(   )r   partsmodel_id	subfolders       r!   _resolve_adapter_sourcer   y  s     
ww}}Tww~~bggll41FGHH3D69bcddTzJJsO!!#&E
5zQ88E"1I&u:>qr+I&&~:r#   c                   Ub  [        U5      S:X  a  [        S5      eSSKJnJn  [        US   5      u  px[         S3n	[        U5      n
Ub
  SU
;  a  XS'   UR                  " U 4UU	S.U
D6n[        S[        U5      5       HJ  n[         U 3n[        X   5      u  p[        U5      nUb  SU;  a  UUS'   UR                  " SUUS.UD6  ML     [        [        U5      5       Vs/ s H  n[         U 3PM     snUl        UR                  (       a  Ub  [        U5      S:X  a  [        S5      e[        [        U5      5       HJ  n[         U 3n[        X<   5      u  p[        U5      nUb  SU;  a  UUS'   UR                  " SUUS.UD6  ML     [        [        U5      5       Vs/ s H  n[         U 3PM     snUl        O/ Ul        [        XR                  UR                  -   S	9u  nn[!        XR                  UR                  -   S	9  U" UUUS
9nUR#                  SUS9  UR%                  S5        U$ s  snf s  snf )Nr   zF`task_specific_adapter_paths` should contain at least one adapter path)
LoraConfig	PeftModel0r   )r   adapter_namer   zDYou should provide general LoRA paths if you want to use GenKnowSub.)r   )r   target_modulesr   r%   )r   peft_config )r(   r   peftr   r   r   TASK_ADAPTER_PREFIXdictfrom_pretrainedrA   load_adapterr   r   GKS_ADAPTER_PREFIXr   r   r   add_adapterset_adapter)
base_modeltask_specific_adapter_pathsr   general_adapter_pathsadapter_kwargsr   r   	model_id0sub0initial_ts_expert_namefirst_kwargsr   its_expert_namemidsubmore_kwargsgen_expert_name
gks_kwargsr   r   
router_cfgs                         r!   create_arrow_modelr     s    #*c2M.NRS.Sabb*-.I!.LMOI 34A6'LK|;$([!%%+ 	E 1c567/04*+F+IJ>*?{+='*K$ 	
'	
 	
 8 MRRUVqRrLs&tLsq*=)>qc'BLs&tL# (C0E,F!,Kcdds012A!3 4QC8O./D/GHHCn-J;j#@*-
;' ,  3 OTTWXmTnNo)pNo-?,@*DNo)p&)+&=<<|?]?]]NA .<<|?]?]] !%
J
 
>zJ	n%LI 'u  *qs   &H:(H?)r   	list[str])r   strreturnztuple[str, str | None])N)
r   r   r   r   r   r   r   zlist[str] | Noner   r   )
__future__r   r   typingr   r7   r   transformersr   configr   r   r   Moduler   r   r   r   r   r   r#   r!   <module>r      s    # 	    (    |-299 |-~(/V.*b< /3	II!*I I ,	I
 Ir#   