
    h`%                    j    S SK Jr  S SKrS SKJrJr  S SKJr  S SKJr   " S S\R                  5      r
g)    )annotationsN)Tensornn)CrossEncoder)fullnamec                  ~   ^  \ rS rSr\R
                  " 5       4SU 4S jjjrS	S jrS
S jrS r	\
SS j5       rSrU =r$ )MarginMSELoss
   c                  > [         TU ]  5         Xl        X l        [        R
                  " S0 UD6U l        [        U R                  [        5      (       d8  [        U R                  R                   S[        U R                  5       S35      eU R                  R                  S:w  a9  [        U R                  R                   SU R                  R                   S35      eg)a#  
Computes the MSE loss between ``|sim(Query, Pos) - sim(Query, Neg)|`` and ``|gold_sim(Query, Pos) - gold_sim(Query, Neg)|``.
This loss is often used to distill a cross-encoder model from a teacher cross-encoder model or gold labels.

In contrast to :class:`~sentence_transformers.cross_encoder.losses.MultipleNegativesRankingLoss`, the two passages do not
have to be strictly positive and negative, both can be relevant or not relevant for a given query. This can be
an advantage of MarginMSELoss over MultipleNegativesRankingLoss.

.. note::

    Be mindful of the magnitude of both the labels and what the model produces. If the teacher model produces
    logits with Sigmoid to bound them to [0, 1], then you may wish to use a Sigmoid activation function in the loss.

Args:
    model (:class:`~sentence_transformers.cross_encoder.CrossEncoder`): A CrossEncoder model to be trained.
    activation_fn (:class:`~torch.nn.Module`): Activation function applied to the logits before computing the loss.
    **kwargs: Additional keyword arguments passed to the underlying :class:`torch.nn.MSELoss`.

References:
    - Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation: https://arxiv.org/abs/2010.02666
    - `Cross Encoder > Training Examples > Distillation <../../../examples/cross_encoder/training/distillation/README.html>`_

Requirements:
    1. Your model must be initialized with `num_labels = 1` (a.k.a. the default) to predict one class.
    2. Usually uses a finetuned CrossEncoder teacher M in a knowledge distillation setup.

Inputs:
    +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
    | Texts                                          | Labels                                                                                     | Number of Model Output Labels |
    +================================================+============================================================================================+===============================+
    | (query, passage_one, passage_two) triplets     | gold_sim(query, passage_one) - gold_sim(query, passage_two)                                | 1                             |
    +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
    | (query, passage_one, passage_two) triplets     | [gold_sim(query, passage_one), gold_sim(query, passage_two)]                               | 1                             |
    +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
    | (query, positive, negative_1, ..., negative_n) | [gold_sim(query, positive) - gold_sim(query, negative_i) for i in 1..n]                    | 1                             |
    +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
    | (query, positive, negative_1, ..., negative_n) | [gold_sim(query, positive), gold_sim(query, negative_1), ..., gold_sim(query, negative_n)] | 1                             |
    +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+

Relations:
    - :class:`MSELoss` is similar to this loss, but without a margin through the negative pair.

Example:
    ::

        from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderTrainer, losses
        from datasets import Dataset

        student_model = CrossEncoder("microsoft/mpnet-base")
        teacher_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L12-v2")
        train_dataset = Dataset.from_dict({
            "query": ["What are pandas?", "What is the capital of France?"],
            "positive": ["Pandas are a kind of bear.", "The capital of France is Paris."],
            "negative": ["Pandas are a kind of fish.", "The capital of France is Berlin."],
        })

        def compute_labels(batch):
            positive_scores = teacher_model.predict(list(zip(batch["query"], batch["positive"])))
            negative_scores = teacher_model.predict(list(zip(batch["query"], batch["negative"])))
            return {
                "label": positive_scores - negative_scores
            }

        train_dataset = train_dataset.map(compute_labels, batched=True)
        loss = losses.MarginMSELoss(student_model)

        trainer = CrossEncoderTrainer(
            model=student_model,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
z? expects a model of type CrossEncoder, but got a model of type .   z; expects a model with 1 output label, but got a model with z output labels.N )super__init__modelactivation_fnr   MSELossloss_fct
isinstancer   
ValueError	__class____name__type
num_labels)selfr   r   kwargsr   s       r/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/cross_encoder/losses/MarginMSELoss.pyr   MarginMSELoss.__init__   s    T 	
*

,V,$**l33>>**+ ,++/

+;*<A? 
 ::  A%>>**+ ,((,

(=(='>oO  &    c           	     0   US   nUS   nUSS  n[        U5      n[        U[        5      (       a  [        R                  " U5      nUR
                  U[        U5      S-   4:X  a$  US S 2S4   R                  S5      US S 2SS 24   -
  nUR
                  U4:X  a  UR                  S5      nUR
                  U[        U5      4:w  a'  [        SUR
                   SU[        U5      4 S35      e[        [        X45      5      nU R                  U5      n/ n	U H7  n
[        [        X:5      5      nU	R                  U R                  U5      5        M9     UR                  S5      [        R                  " U	SS9-
  nU R                  XR                  5       5      nU$ )Nr   r      zLabels shape z does not match expected shape z. Ensure that your dataset labels/scores are 1) lists of differences between positive scores and negatives scores (length `num_negatives`), or 2) lists of positive and negative scores (length `num_negatives + 1`).)dim)lenr   listtorchstackshape	unsqueezer   ziplogits_from_pairsappendr   float)r   inputslabelsanchors	positives	negatives
batch_sizepositive_pairspositive_logitsnegative_logits_listnegativenegative_pairsmargin_logitslosss                 r   forwardMarginMSELoss.forwardf   s   )1I	12J	\
 fd##[[(F<<JI(:;; AqD\++A.12>F <<J=(%%a(F<<JI77~-LjZ]^gZhMiLj k0 0  c'5600@!!H!#g"89N ''(>(>~(NO " (11!4u{{CW]^7__}}]LLN;r   c                    U R                   R                  USSSS9nUR                  U R                   R                  5        U R                   " S0 UD6S   R	                  S5      nU R                  U5      $ )z
Computes the logits for a list of pairs using the model.

Args:
    pairs (list[tuple[str, str]]): A list of pairs of strings (query, passage).

Returns:
    Tensor: The logits for the pairs.
Tpt)padding
truncationreturn_tensorsr   r   )r   	tokenizertodeviceviewr   )r   pairstokenslogitss       r   r*   MarginMSELoss.logits_from_pairs   ss     %%	 & 
 			$**##$%f%a(--b1!!&))r   c                0    S[        U R                  5      0$ )Nr   )r   r   r   s    r   get_config_dictMarginMSELoss.get_config_dict   s    Xd&8&89
 	
r   c                    g)NuY  
@misc{hofstätter2021improving,
    title={Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation},
    author={Sebastian Hofstätter and Sophia Althammer and Michael Schröder and Mete Sertkan and Allan Hanbury},
    year={2021},
    eprint={2010.02666},
    archivePrefix={arXiv},
    primaryClass={cs.IR}
}
r   rK   s    r   citationMarginMSELoss.citation   s    	r   )r   r   r   )r   r   r   z	nn.ModulereturnNone)r-   zlist[list[str]]r.   zTensor | list[Tensor]rQ   r   )rF   zlist[tuple[str, str]]rQ   r   )rQ   str)r   
__module____qualname____firstlineno__r   Identityr   r:   r*   rL   propertyrO   __static_attributes____classcell__)r   s   @r   r	   r	   
   s=    GI{{} Y Yv%N*(

 
 
r   r	   )
__future__r   r%   r   r   0sentence_transformers.cross_encoder.CrossEncoderr   sentence_transformers.utilr   Moduler	   r   r   r   <module>r_      s&    "   I /gBII gr   