
    oi6                     ,    S SK Jr  S SKr " S S5      rg)    )OptionalNc                      \ rS rSrSr        SS\\   S\\   S\\   S\\   S\S	\\   S
\S\\   4S jjr	S r
S rS rS\R                  4S jr\S\\R                  \R                  \R                  4   4S j5       r\SS\\R                  \R                  4   4S jj5       rSS jrSS jrS\R                  4S jr\SS\S\S\4S jj5       rSrg)IncrementalPCA   a%  
An implementation of Incremental Principal Components Analysis (IPCA) that leverages PyTorch for GPU acceleration.
Adapted from https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/decomposition/_incremental_pca.py

This class provides methods to fit the model on data incrementally in batches, and to transform new data based on
the principal components learned during the fitting process.

Args:
    n_components (int, optional): Number of components to keep. If `None`, it's set to the minimum of the
        number of samples and features. Defaults to None.
    copy (bool): If False, input data will be overwritten. Defaults to True.
    batch_size (int, optional): The number of samples to use for each batch. Only needed if self.fit is called.
        If `None`, it's inferred from the data and set to `5 * n_features`. Defaults to None.
    svd_driver (str, optional): name of the cuSOLVER method to be used for torch.linalg.svd. This keyword
        argument only works on CUDA inputs. Available options are: None, gesvd, gesvdj, and gesvda. Defaults to
        None.
    lowrank (bool, optional): Whether to use torch.svd_lowrank instead of torch.linalg.svd which can be faster.
        Defaults to False.
    lowrank_q (int, optional): For an adequate approximation of n_components, this parameter defaults to
        n_components * 2.
    lowrank_niter (int, optional): Number of subspace iterations to conduct for torch.svd_lowrank.
        Defaults to 4.
    lowrank_seed (int, optional): Seed for making results of torch.svd_lowrank reproducible.
Nn_componentscopy
batch_size
svd_driverlowrank	lowrank_qlowrank_niterlowrank_seedc	                     Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        S U l        U R                  (       a  U R                  5         g g )N)
r   r   r	   r
   r   r   r   r   n_features__validate_lowrank_params)	selfr   r   r	   r
   r   r   r   r   s	            T/home/james-whalen/.local/lib/python3.13/site-packages/peft/utils/incremental_pca.py__init__IncrementalPCA.__init__.   sO     )	$$"*(<<))+     c                     U R                   c-  U R                  c  [        S5      eU R                  S-  U l         g U R                   U R                  :  a  [        S5      eg )NzKn_components must be specified when using lowrank mode with lowrank_q=None.   z8lowrank_q must be greater than or equal to n_components.)r   r   
ValueError)r   s    r   r   'IncrementalPCA._validate_lowrank_paramsG   sY    >>!  ( !noo!..2DN^^d///WXX 0r   c                 T    [         R                  R                  USU R                  S9$ )NF)full_matricesdriver)torchlinalgsvdr
   r   Xs     r   _svd_fn_fullIncrementalPCA._svd_fn_fullO   s"    ||tOOr   c                 P   U R                   S Ln[        R                  R                  US9   U(       a   [        R                  " U R                   5        [        R
                  " XR                  U R                  S9u  p4nX4UR                  4sS S S 5        $ ! , (       d  f       g = f)N)enabled)qniter)	r   r   randomfork_rngmanual_seedsvd_lowrankr   r   mH)r   r"   seed_enabledUSVs         r   _svd_fn_lowrankIncrementalPCA._svd_fn_lowrankR   su    ((4\\""<"8!!$"3"34''^^4CUCUVGA!:	 988s   A!B
B%returnc                 R   [         R                  [         R                  /n[        U[         R                  5      (       d$  [         R
                  " U[         R                  S9nO!U R                  (       a  UR                  5       nUR                  u  p4U R                  c  OWU R                  U:  a  [        SU R                   SU S35      eU R                  U:  a  [        SU R                   SU 35      eUR                  U;  a  UR                  [         R                  5      nU$ )z
Validates and converts the input data `X` to the appropriate tensor format.

Args:
    X (torch.Tensor): Input data.

Returns:
    torch.Tensor: Converted to appropriate format.
)dtypezn_components=z invalid for n_features=z<, need more rows than columns for IncrementalPCA processing.z6 must be less or equal to the batch number of samples )r   float32float64
isinstanceTensortensorr   cloneshaper   r   r6   to)r   r"   valid_dtypes	n_samples
n_featuress        r   _validate_dataIncrementalPCA._validate_dataZ   s    u}}5!U\\**Qemm4AYY	A !	$+ 1 122J:, WM M  * 1 122hirhst  77,&U]]#Ar   c                    U R                   S   S:X  a  XU4$ US:  a  Uc  [        S5      eUc  [        S5      e[        R                  " U R                   S   /U R                  S9nX4-   nUc<  [        R
                  " U R                   S   [        R                  U R                  S9nOX-  nU R                  S[        R                  S9nXg-   U-  nXt-  n	X	-
  n
U
R                  S[        R                  S9R                  5       nU
R                  5         U
R                  S[        R                  S9nXU-  -  nUc  X-  nO<X#-  nUR                  5       U-  nUU-   X-  Xo-  U-
  R                  5       -  -   nUU-  nXU4$ )a"  
Computes the incremental mean and variance for the data `X`.

Args:
    X (torch.Tensor): The batch input data tensor with shape (n_samples, n_features).
    last_mean (torch.Tensor): The previous mean tensor with shape (n_features,).
    last_variance (torch.Tensor): The previous variance tensor with shape (n_features,).
    last_sample_count (torch.Tensor): The count tensor of samples processed before the current batch.

Returns:
    Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Updated mean, variance tensors, and total sample count.
r   z6last_mean should not be None if last_sample_count > 0.z:last_variance should not be None if last_sample_count > 0.device   )r6   rF   )dimr6   )r=   r   r   r;   rF   zerosr8   sumsquaresquare_double)r"   	last_meanlast_variancelast_sample_countnew_sample_countupdated_sample_countlast_sumnew_sumupdated_meanTtemp
correctionnew_unnormalized_varianceupdated_variancelast_unnormalized_variancelast_over_new_countupdated_unnormalized_variances                    r   _incremental_mean_and_var(IncrementalPCA._incremental_mean_and_var}   s     771:?->>>q   !YZZ$ !]^^ <<QXXF0C{{1771:U]]188TH 4H%%AU]]%3 *.BB&uXX!5==X9@@B
$(HH%--H$H!!2B%BB! 8O)6)J&"3":":"<?O"O*+,%<@^ah@h?p?p?rrs *
  =?SS/CCCr   c                    U(       aZ  [         R                  " [         R                  " U 5      SS9n[         R                  " X[	        U R
                  S   5      4   5      nOZ[         R                  " [         R                  " U5      SS9n[         R                  " U[	        UR
                  S   5      U4   5      nXSU R
                  S    R                  SS5      -  n XR                  SS5      -  nX4$ )a  
Adjusts the signs of the singular vectors from the SVD decomposition for deterministic output.

This method ensures that the output remains consistent across different runs.

Args:
    u (torch.Tensor): Left singular vectors tensor.
    v (torch.Tensor): Right singular vectors tensor.
    u_based_decision (bool, optional): If True, uses the left singular vectors to determine the sign flipping.
        Defaults to True.

Returns:
    Tuple[torch.Tensor, torch.Tensor]: Adjusted left and right singular vectors tensors.
r   rH   rG   N)r   argmaxabssignranger=   view)uvu_based_decisionmax_abs_colssignsmax_abs_rowss         r   	_svd_flipIncrementalPCA._svd_flip   s       <<		!!<LJJquQWWQZ/@!@ABE <<		!!<LJJqqwwqz!2L!@ABE	<QWWQZ %%a,,	ZZAtr   c                    U(       a  U R                  U5      nUR                  u  p4U R                  c
  SU-  U l        U R                  X0R                  U R                  =(       d    SS9 H  nU R                  X   SS9  M     U $ )a$  
Fits the model with data `X` using minibatches of size `batch_size`.

Args:
    X (torch.Tensor): The input data tensor with shape (n_samples, n_features).
    check_input (bool, optional): If True, validates the input. Defaults to True.

Returns:
    IncrementalPCA: The fitted IPCA model.
   r   )min_batch_sizeF)check_input)rB   r=   r	   gen_batchesr   partial_fit)r   r"   rs   r@   rA   batchs         r   fitIncrementalPCA.fit   s     ##A&A !	??"*nDO%%iQUQbQbQgfg%hEQX59 i r   c                    [        U S5      (       + nU(       a  U R                  U5      nUR                  u  pEU(       aZ  SU l        SU l        [
        R                  " S/UR                  S9U l        XPl	        U R                  (       d  [        XE5      U l
        XPR                  :w  a  [        S5      eU R                  XR                  U R                  U R                  5      u  pgnU(       a  X-  nO[
        R                  " USS9n	X-  n[
        R                  " U R                  R!                  5       U-  U-  5      n
XR                  U	-
  -  n[
        R"                  " U R$                  R'                  S5      U R(                  -  UU45      nU R*                  (       a  U R-                  U5      u  pnOU R/                  U5      u  pnU R1                  XSS	9u  pUS
-  US-
  -  nUS
-  [
        R2                  " Xx-  5      -  nXl        USU R                   U l        USU R                   U l        X`l        Xpl        USU R                   U l        USU R                   U l        U R                  XE4;  a#  XR                  S R                  5       U l        U $ [
        R                  " SUR                  S9U l        U $ )a3  
Incrementally fits the model with batch data `X`.

Args:
    X (torch.Tensor): The batch input data tensor with shape (n_samples, n_features).
    check_input (bool, optional): If True, validates the input. Defaults to True.

Returns:
    IncrementalPCA: The updated IPCA model after processing the batch.
components_Nr   rE   z]Number of features of the new batch does not match the number of features of the first batch.ra   )rb   rG   F)rj   r   rG   g        )hasattrrB   r=   mean_var_r   r;   rF   n_samples_seen_r   r   minr   r^   meansqrtrM   vstacksingular_values_rg   rz   r   r2   r#   rn   rJ   explained_variance_explained_variance_ratio_noise_variance_)r   r"   rs   
first_passr@   rA   col_meancol_varn_total_samplescol_batch_meanmean_correction_factormean_correctionr/   r0   Vtexplained_varianceexplained_variance_ratios                    r   ru   IncrementalPCA.partial_fit   s    !}55
##A&A !	 DJDI#(<<AHH#ED )$$$'	$>!)))o  .2-K-Kzz499d&:&:.
*? MA"ZZq1NA%*ZZ1E1E1L1L1NQ`1`dm0m%n"4

^8STO))..w7$:J:JJ#A <<++A.HA"((+HA"qu=T_q%89#$a4%))G4M*N#N .1 1 12 !"5D$5$5 6
	#56I8I8I#J )ABUDDUDU)V&Y$;;#56G6G6I#J#O#O#QD   $)<<AHH#ED r   c                     XR                   -
  n[        R                  " UR                  5       U R                  R
                  5      R                  UR                  5      $ )aT  
Applies dimensionality reduction to `X`.

The input data `X` is projected on the first principal components previously extracted from a training set.

Args:
    X (torch.Tensor): New data tensor with shape (n_samples, n_features) to be transformed.

Returns:
    torch.Tensor: Transformed data tensor with shape (n_samples, n_components).
)r|   r   mmrM   rz   rV   r>   r6   r!   s     r   	transformIncrementalPCA.transform-  sA     

Nxx
D$4$4$6$67::177CCr   nrr   c              #      #    Sn[        [        X-  5      5       H   nX1-   nXR-   U :  a  M  [        X55      v   UnM"     X0:  a  [        X05      v   gg7f)a  Generator to create slices containing `batch_size` elements from 0 to `n`.

The last slice may contain less than `batch_size` elements, when `batch_size` does not divide `n`.

Args:
    n (int): Size of the sequence.
    batch_size (int): Number of elements in each batch.
    min_batch_size (int, optional): Minimum number of elements in each batch. Defaults to 0.

Yields:
    slice: A slice of `batch_size` elements.
r   N)rf   intslice)r   r	   rr   start_ends         r   rt   IncrementalPCA.gen_batches<  s^      s1?+,A$C#a'##E - 9/! s   AA)r	   rz   r   r   r   r   r   r   r   r|   r   r   r~   r   r   r
   r}   )NTNNFN   N)T)r   )__name__
__module____qualname____firstlineno____doc__r   r   boolstrr   r   r#   r2   r   r:   rB   staticmethodtupler^   rn   rw   ru   r   rt   __static_attributes__ r   r   r   r      sZ   6 '+#$($(#'&*,sm, tn, SM	,
 SM, , C=, , sm,2YP!5<< !F 6D	u||U\\5<<7	86D 6Dp %ell8R2S  2,ENDell D "s " "S " "r   r   )typingr   r   r   r   r   r   <module>r      s     ~" ~"r   