
    hu%                        S SK Jr  S SKJr  S SKrS SKJr  S SKJs  Jr	  S SK
Jr  S SKJr  SS jr " S S\R                  5      r " S	 S
\R                  5      rg)    )annotations)IterableN)"SparseMultipleNegativesRankingLoss)SparseEncoderc                    UR                  SS9n[        R                  " X5      [        R                  " USSS24   R                  UR                  5      U5      -  nU$ )z
:param reconstruction: output of Autoencoder.decode (shape: [batch, n_inputs])
:param original_input: input of Autoencoder.encode (shape: [batch, n_inputs])
:return: normalized mean squared error (shape: [1])
r   )dimN)meanFmse_lossbroadcast_toshape)reconstructionoriginal_inputoriginal_input_meanlosss       m/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/sparse_encoder/losses/CSRLoss.pynormalized_mean_squared_errorr      s\     )--!-4::n5

D!G$11.2F2FG9 D K    c                  J   ^  \ rS rSrSSU 4S jjjrS	S jrS
S jrS rSrU =r	$ )CSRReconstructionLoss   c                :   > [         TU ]  5         Xl        X l        g)a  
CSRReconstructionLoss implements the reconstruction loss component for Contrastive Sparse Representation (CSR) models.

This loss ensures that the sparse encoding can accurately reconstruct the original model embeddings through
three components:

1. A primary reconstruction loss (L_k) that measures the error between the original embedding and its
   reconstruction using the top-k sparse components.
2. A secondary reconstruction loss (L_4k) that measures the error using the top-4k sparse components.
3. An auxiliary loss (L_aux) that helps to learn residual information.

Args:
    model: SparseEncoder model with autoencoder components
    beta: Weight for the auxiliary loss component (L_aux)

References:
    - For more details, see the paper "Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation"
      https://arxiv.org/abs/2503.01776

Requirements:
    1. The model must be configured to output the necessary reconstruction components
    2. Used with SparseEncoder models that implement compositional sparse autoencoding

Relations:
    - Used as a component within :class:`CSRLoss` combined with a contrastive loss

Example:
    - This loss is never used standalone, but instead used within the :class:`CSRLoss` class. See that loss for more details.
N)super__init__modelbeta)selfr   r   	__class__s      r   r   CSRReconstructionLoss.__init__   s    < 	
	r   c                    [        S5      e)Nz[CSRReconstructionLoss is not intended to be used standalone. Use it within CSRLoss instead.)NotImplementedError)r   sentence_featuress     r   forwardCSRReconstructionLoss.forward?   s    !i
 	
r   c                `   SnSnSnU Hp  nUS   nUS   nUS   nUS   n	US   n
[         R                  " Xg5      n[         R                  " Xh5      n[        XU
R                  5       -
  5      nX+-  nX<-  nXM-  nMr     [	        U5      nUS:  a  X.-  nX>-  nXN-  nUUS-  U R
                  U-  S	.$ )
z
Compute the CSRReconstruction loss from embeddings.

Args:
    outputs: List of dictionaries containing sentence embeddings and their sparse representations

Returns:
    total_loss: The total reconstruction loss value
g        sentence_embedding_backbonedecoded_embedding_kdecoded_embedding_4kdecoded_embedding_auxdecoded_embedding_k_pre_biasr   g       @)reconstruction_loss_kreconstruction_loss_4kreconstruction_loss_aux)r
   r   r   detachlenr   )r   outputs	total_L_k
total_L_4ktotal_L_auxfeaturesxrecons_k	recons_4k
recons_auxreconsk_pre_biasL_kL_4kL_auxnum_columnss                  r   compute_loss_from_embeddings2CSRReconstructionLoss.compute_loss_from_embeddingsD   s     	
  H67A 56H !78I!"9:J'(FG **Q)C ::a+D 2*BRBYBYB[>[\E IJ K'  , 'l?$I%J&K &/&03&6'+yy;'>
 	
r   c                    SU R                   0$ )d
Get the configuration dictionary.

Returns:
    Dictionary containing the configuration parameters
r   )r   r   s    r   get_config_dict%CSRReconstructionLoss.get_config_dictw   s     		""r   )r   r   )      ?)r   r   r   floatreturnNone)r"   !Iterable[dict[str, torch.Tensor]]rG   dict[str, torch.Tensor])r0   zlist[dict[str, torch.Tensor]]rG   rJ   )
__name__
__module____qualname____firstlineno__r   r#   r>   rC   __static_attributes____classcell__r   s   @r   r   r      s#       D

1
f# #r   r   c                  d   ^  \ rS rSrSSU 4S jjjr S	     S
S jjrS r\SS j5       rSr	U =r
$ )CSRLoss   c                   > [         TU ]  5         Xl        X0l        X@l        [        XS9U l        Ub  X l        g[        US9U l        g)a  
CSRLoss implements a combined loss function for Contrastive Sparse Representation (CSR) models.

This loss combines two components:

1. A reconstruction loss :class:`CSRReconstructionLoss` that ensures the sparse representation can faithfully
    reconstruct the original embedding.
2. A main loss, which in the paper is a :class:`SparseMultipleNegativesRankingLoss` that ensures semantically
    similar sentences have similar representations.

The total loss is linear combination of the two losses.

Args:
    model: SparseEncoder model
    loss: The principal loss function to use can be any of the SparseEncoder losses except flops loss and CSRReconstruction loss.
        If None, the default loss is used, which is the SparseMultipleNegativesRankingLoss.
    beta: Weight for the L_aux component in the reconstruction loss. Default is 0.1.
    gamma: Weight for the main loss component (MNRL a.k.a. InfoNCE by default). Default is 1.0.

References:
    - For more details, see the paper "Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation"
    https://arxiv.org/abs/2503.01776

Requirements:
    1. Input requirements depend on the chosen loss
    2. Uses autoencoder components of the SparseEncoder model

Relations:
    - Uses :class:`CSRReconstructionLoss` for the reconstruction component

Example:
    ::

        from datasets import Dataset
        from sentence_transformers.sparse_encoder import SparseEncoder, SparseEncoderTrainer, losses

        model = SparseEncoder("sentence-transformers/all-MiniLM-L6-v2")
        train_dataset = Dataset.from_dict(
            {
                "anchor": ["It's nice weather outside today.", "He drove to work."],
                "positive": ["It's so sunny.", "He took the car to the office."],
                "negative": ["It's quite rainy, sadly.", "She walked to the store."],
            }
        )
        loss = losses.CSRLoss(model, beta=0.1, gamma=1.0)

        trainer = SparseEncoderTrainer(model=model, train_dataset=train_dataset, loss=loss)
        trainer.train()
)r   r   N)r   )	r   r   r   r   gammar   reconstruction_lossr   r   )r   r   r   r   rV   r   s        r   r   CSRLoss.__init__   sF    d 	
	
 $9u#P  ,D	2T[`2a	r   c                   U Vs/ s H  o0R                  U5      PM     nnU Vs/ s H  oUS   PM	     nnU R                  R                  U5      nU R                  R                  Xb5      n[	        U[
        5      (       a+  UR                  5        H  u  pXR                  -  Xy'   M     U$ XR                  -  US'   U$ s  snf s  snf )Nsentence_embedding	base_loss)r   rW   r>   r   
isinstancedictitemsrV   )r   r"   labelssentence_featurer0   outputrZ   lossesr[   keyvalues              r   r#   CSRLoss.forward   s     IZZHY4D::./HYZIPQv%9:Q))FFwOII::;MV	i&&'oo/
#jj0 0
  #,jj"8F; [Qs
   B>Cc                J    U R                   U R                  U R                  S.$ )rA   r   rV   r   rg   rB   s    r   rC   CSRLoss.get_config_dict   s     		DJJ		JJr   c                    g)Na  
@misc{wen2025matryoshkarevisitingsparsecoding,
      title={Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation},
      author={Tiansheng Wen and Yifei Wang and Zequn Zeng and Zhong Peng and Yudi Su and Xinyang Liu and Bo Chen and Hongwei Liu and Stefanie Jegelka and Chenyu You},
      year={2025},
      eprint={2503.01776},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2503.01776},
}
 rB   s    r   citationCSRLoss.citation   s    
r   )r   rV   r   r   rW   )Ng?rE   )r   r   r   znn.Module | Noner   rF   rV   rF   )N)r"   rI   r_   ztorch.Tensor | NonerG   rJ   )rG   str)rK   rL   rM   rN   r   r#   rC   propertyrk   rO   rP   rQ   s   @r   rS   rS      sN    9b 9bx cg!BL_	 $K  r   rS   )r   torch.Tensorr   ro   rG   ro   )
__future__r   collections.abcr   torchtorch.nnnntorch.nn.functional
functionalr
   Nsentence_transformers.sparse_encoder.losses.SparseMultipleNegativesRankingLossr   2sentence_transformers.sparse_encoder.SparseEncoderr   r   Moduler   rS   rj   r   r   <module>rz      sL    " $     M
b#BII b#Jcbii cr   