
    h                    b    S SK Jr  S SKJr  S SKJr  S SKJr  S SKJ	r	  S SK
Jr   " S S\	5      rg	)
    )annotations)Iterable)Tensor)util)
CoSENTLoss)SparseEncoderc                  R   ^  \ rS rSrS\R
                  4SU 4S jjjrSS jrSrU =r	$ )SparseCoSENTLoss   g      4@c                .   > SUl         [        TU ]	  XUS9$ )a<	  
This class implements CoSENT (Cosine Sentence).
It expects that each of the InputExamples consists of a pair of texts and a float valued label, representing
the expected similarity score between the pair.

It computes the following loss function:

``loss = logsum(1+exp(s(i,j)-s(k,l))+exp...)``, where ``(i,j)`` and ``(k,l)`` are any of the input pairs in the
batch such that the expected similarity of ``(i,j)`` is greater than ``(k,l)``. The summation is over all possible
pairs of input pairs in the batch that match this condition.

Args:
    model: SparseEncoder
    similarity_fct: Function to compute the PAIRWISE similarity
        between embeddings. Default is
        ``util.pairwise_cos_sim``.
    scale: Output of similarity function is multiplied by scale
        value. Represents the inverse temperature.

References:
    - For further details, see: https://kexue.fm/archives/8847

Requirements:
    - Need to be used in SpladeLoss or CSRLoss as a loss function.
    - Sentence pairs with corresponding similarity scores in range of the similarity function. Default is [-1,1].

Inputs:
    +--------------------------------+------------------------+
    | Texts                          | Labels                 |
    +================================+========================+
    | (sentence_A, sentence_B) pairs | float similarity score |
    +--------------------------------+------------------------+

Relations:
    - :class:`SparseAnglELoss` is SparseCoSENTLoss with ``pairwise_angle_sim`` as the metric, rather than ``pairwise_cos_sim``.

Example:
    ::

        from datasets import Dataset

        from sentence_transformers.sparse_encoder import SparseEncoder, SparseEncoderTrainer, losses

        model = SparseEncoder("distilbert/distilbert-base-uncased")
        train_dataset = Dataset.from_dict(
            {
                "sentence1": ["It's nice weather outside today.", "He drove to work."],
                "sentence2": ["It's so sunny.", "She walked to the store."],
                "score": [1.0, 0.3],
            }
        )
        loss = losses.SpladeLoss(
            model=model, loss=losses.SparseCoSENTLoss(model), document_regularizer_weight=5e-5, use_document_regularizer_only=True
        )

        trainer = SparseEncoderTrainer(model=model, train_dataset=train_dataset, loss=loss)
        trainer.train()
cosine)scalesimilarity_fct)similarity_fn_namesuper__init__)selfmodelr   r   	__class__s       v/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/sparse_encoder/losses/SparseCoSENTLoss.pyr   SparseCoSENTLoss.__init__   s#    v $, w>RR    c                    [        S5      e)NzMSparseCoSENTLoss should not be used alone. Use it with SpladeLoss or CSRLoss.)AttributeError)r   sentence_featureslabelss      r   forwardSparseCoSENTLoss.forwardK   s    lmmr    )r   r   r   floatreturnNone)r   zIterable[dict[str, Tensor]]r   r   r!   r   )
__name__
__module____qualname____firstlineno__r   cos_simr   r   __static_attributes____classcell__)r   s   @r   r
   r
      s(    <@QUQ]Q] <S <S|n nr   r
   N)
__future__r   collections.abcr   torchr   sentence_transformersr   'sentence_transformers.losses.CoSENTLossr   2sentence_transformers.sparse_encoder.SparseEncoderr   r
   r   r   r   <module>r0      s'    " $  & > L@nz @nr   