
    hS"                        S SK Jr   S SKJr  S SKrS SKJr  S SK	Js  J
r  S SKJr   " S S\R                  5      r " S S\5      rg! \ a	    S SKJr   NHf = f)	    )annotations)SelfN)Modulec                  ^   ^  \ rS rSrSU 4S jjrSS jr\S	S j5       r\S	S j5       rSr	U =r
$ )
TiedTranspose   c                .   > [         TU ]  5         Xl        g N)super__init__linear)selfr   	__class__s     w/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/sparse_encoder/models/SparseAutoEncoder.pyr   TiedTranspose.__init__   s        c                    U R                   R                  b   e[        R                   " XR                   R                  R	                  5       S 5      $ r
   )r   biasFweightt)r   xs     r   forwardTiedTranspose.forward   s<    {{'''xx;;--//1488r   c                J    U R                   R                  R                  5       $ r
   )r   r   r   r   s    r   r   TiedTranspose.weight   s    {{!!##%%r   c                .    U R                   R                  $ r
   )r   r   r   s    r   r   TiedTranspose.bias   s    {{r   )r   )r   z	nn.Linearr   torch.Tensorreturnr!   )r"   r!   )__name__
__module____qualname____firstlineno__r   r   propertyr   r   __static_attributes____classcell__r   s   @r   r   r      s5    9 & &    r   r   c                    ^  \ rS rSrSr/ SQrS1r     S             SU 4S jjjrSS jrSSS jjr	SS jr
SSS	 jjrSSS
 jjr S     SS jjrSSS jjr\     S             SS jj5       rS rS S jrSrU =r$ )!SparseAutoEncoder!   a  
This module implements the Sparse AutoEncoder architecture based on the paper:
Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation, https://arxiv.org/abs/2503.01776

This module transforms dense embeddings into sparse representations by:

1. Applying a multi-layer feed-forward network
2. Applying top-k sparsification to keep only the largest values
3. Supporting auxiliary losses for training stability (via k_aux parameter)

Args:
    input_dim: Dimension of the input embeddings.
    hidden_dim: Dimension of the hidden layers. Defaults to 512.
    k: Number of top values to keep in the final sparse representation. Defaults to 8.
    k_aux: Number of top values to keep for auxiliary loss calculation. Defaults to 512.
    normalize: Whether to apply layer normalization to the input embeddings. Defaults to False.
    dead_threshold: Threshold for dead neurons. Neurons with non-zero activations below this threshold are considered dead. Defaults to 30.
)	input_dim
hidden_dimkk_aux	normalizedead_thresholdmax_active_dimsc                $  >^ ^ [         TT ]  5         UT l        UT l        TT l        [
        R                  " [        R                  " U5      5      T l	        [
        R                  " XSS9T l        [
        R                  " [        R                  " U5      5      T l        [        T R                  5      T l        UT l        UT l        UT l        T   T R%                  S[        R                  " U[        R&                  S95        UU 4S jnUT l        g )NF)r   stats_last_nonzero)dtypec                P   > TR                   T:  nU =R                  U-  sl        U $ r
   )r6   data)r   	dead_maskr3   r   s     r   auxk_mask_fn0SparseAutoEncoder.__init__.<locals>.auxk_mask_fnQ   s&    //.@IFFiFHr   )r   r   r.   r/   r3   nn	Parametertorchzerospre_biasLinearencoderlatent_biasr   decoderr0   r1   r2   register_bufferlongr;   )	r   r.   r/   r0   r1   r2   r3   r;   r   s	   `     ` r   r   SparseAutoEncoder.__init__9   s     	"$,U[[%;<"$))I"N<<J(?@&3DLL&A
"15;;zQVQ[Q[3\]	
 )r   c                    XR                   -
  n[        R                  " XR                  R                  U R
                  5      nU$ )z
:param x: input data (shape: [batch, input_dim])
:param latent_slice: slice of latents to compute
    Example: latent_slice = slice(0, 10) to compute only the first 10 latents.
:return: autoencoder latents before activation (shape: [batch, hidden_dim])
)rA   r   r   rC   r   rD   )r   r   latents_pre_acts      r   encode_pre_act SparseAutoEncoder.encode_pre_actX   s6     ((1ll&9&94;K;KLr   c                `    UR                  SSS9nX-
  nUR                  SSS9nXU-   -  nXU4$ )NT)dimkeepdim)meanstd)r   r   epsmurR   s        r   LNSparseAutoEncoder.LNc   sB    VVDV)FeeDe)sOczr   c                z    U R                   (       d  U[        5       4$ U R                  U5      u  pnU[        X#S94$ )N)rT   rR   )r2   dictrU   )r   r   rT   rR   s       r   
preprocessSparseAutoEncoder.preprocessj   s6    ~~df9WWQZ
s$"&&&r   c                   Uc  U R                   n[        R                  " XSS9n[        R                  " U5      nUR	                  SUR
                  UR                  5        [        R                  " U5      n[        R                  " U R                  5      nUR                  SUR
                  R                  S5      UR                  S:  R                  UR                  5      R                  S5      5        U =R                  SUR                  SS9-
  -  sl	        U =R                  S-  sl	        SnU R                  (       a  U(       a  [        R                  " U R!                  U5      U R                  S9n	[        R                  " U5      n
U
R	                  SU	R
                  U	R                  5        [        R                  " U
5      nXh4$ )	zl
:param x: input data (shape: [batch, input_dim])
:return: autoencoder latents (shape: [batch, hidden_dim])
NrN   )r0   rO   r   h㈵>   )max)inputr0   )r0   r?   topk
zeros_likescatter_indicesvaluesr   relur6   scatter_add_reshapetor7   clampr1   r;   )r   r   r0   compute_auxr`   z_topk	latents_ktmplatents_auxkaux_topkz_auxks              r   top_kSparseAutoEncoder.top_kp   s_   
 9Azz!b)!!!$DLL$++6FF6N	t667LL  $[[4##CII.66r:	

 	1syyQy'7#771$ ::+zz''***H %%a(FOOB 0 0(//B66&>L&&r   c                    U R                  U5      U R                  -   nU R                  (       a  Uc   eX2S   -  US   -   nU$ )zy
:param latents: autoencoder latents (shape: [batch, hidden_dim])
:return: reconstructed data (shape: [batch, n_inputs])
rR   rT   )rE   rA   r2   )r   latentsinforets       r   decodeSparseAutoEncoder.decode   sH     ll7#dmm3>>###U#d4j0C
r   c                4   Ub  UOU R                   nUS   n[        R                  " 5       (       a<  U R                  U5      u  pEU R	                  U5      nU R                  XcSS9u  pxXqS'   U$ U R                  U5      u  pEU R	                  U5      nU R                  Xc5      u  pyU R                  USU-  5      u  pU R                  Xu5      nU R                  X5      nU R                  X5      nUR                  UUU
U	UUUXR                  -
  S.5        XqS'   U$ )Nsentence_embeddingF)rj      )sentence_embedding_backbonesentence_embedding_encodedsentence_embedding_encoded_4kauxiliary_embeddingdecoded_embedding_kdecoded_embedding_4kdecoded_embedding_auxdecoded_embedding_k_pre_bias)	r0   r?   is_inference_mode_enabledrY   rK   rq   rw   updaterA   )r   featuresr4   r0   r   ru   rJ   rl   _rn   
latents_4krecons_k	recons_4k
recons_auxs                 r   r   SparseAutoEncoder.forward   s%     /:O)* **,,ooa(GA"11!4O::oe:LLI-6)*O//!$--a0"&**_"@	

?AE:
;;y/KK
1	[[4
 	/0.=1;'3'/(1)308==0H		
 *3%&r   c                D    U R                  U5        U R                  XS9  g )N)safe_serialization)save_configsave_torch_weights)r   output_pathr   s      r   saveSparseAutoEncoder.save   s!    %Sr   c                v    UUUUUS.nU R                   " SSU0UD6n	U " S0 U	D6n
U R                  " SXS.UD6n
U
$ )N)	subfoldertokencache_folderrevisionlocal_files_onlymodel_name_or_path)r   model )load_configload_torch_weights)clsr   r   r   r   r   r   kwargs
hub_kwargsconfigr   s              r   loadSparseAutoEncoder.load   s]     #(  0

 U4FU*Uf&&h:Lh]ghr   c                *    SU R                  5        S3$ )NzSparseAutoEncoder())get_config_dictr   s    r   __repr__SparseAutoEncoder.__repr__   s    #D$8$8$:#;1==r   c                    U R                   $ )z
Get the dimension of the sentence embedding. Warning: the number of non-zero elements in the embedding is only k out of the hidden_dim.

Returns:
    int: Dimension of the sentence embedding
)r/   r   s    r    get_sentence_embedding_dimension2SparseAutoEncoder.get_sentence_embedding_dimension   s     r   )r;   r3   rE   rC   r/   r.   r0   r1   rD   r2   rA   )      r   F   )r.   intr/   r   r0   r   r1   r   r2   boolr3   r   r"   Noner    )r\   )r   r!   rS   float)r   r!   )NT)r   r!   r0   
int | Nonerj   r   r"   r!   r
   )rt   r!   r"   r!   )r   dict[str, torch.Tensor]r4   r   r"   r   )T)r   r   r"   r   ) NNNF)r   strr   r   r   zbool | str | Noner   
str | Noner   r   r   r   r"   r   )r"   r   )r#   r$   r%   r&   __doc__config_keysforward_kwargsr   rK   rU   rY   rq   rw   r   r   classmethodr   r   r   r(   r)   r*   s   @r   r,   r,   !   s4   & [K'(N
  )) ) 	)
 ) ) ) 
) )>	''B PT'/'BL'	 'RT  #'#'#!&  !	
 !   
 ,> r   r,   )
__future__r   typingr   ImportErrortyping_extensionsr?   torch.nnr=   torch.nn.functional
functionalr   #sentence_transformers.models.Moduler   r   r,   r   r   r   <module>r      sS    "'     6 BII  $K K9  '&'s   A AA