
    h                    b    S SK Jr  S SKJrJr  S SKJr  S SKJr   " S S\R                  5      r
g)    )annotations)Tensornn)CrossEncoder)fullnamec                  `   ^  \ rS rSr\R
                  " 5       4SU 4S jjjrSS jrS rSr	U =r
$ )MSELoss	   c                  > [         TU ]  5         Xl        X l        [        R
                  " S0 UD6U l        [        U R                  [        5      (       d8  [        U R                  R                   S[        U R                  5       S35      eU R                  R                  S:w  a9  [        U R                  R                   SU R                  R                   S35      eg)a9  
Computes the MSE loss between the computed query-passage score and a target query-passage score. This loss
is used to distill a cross-encoder model from a teacher cross-encoder model or gold labels.

Args:
    model (:class:`~sentence_transformers.cross_encoder.CrossEncoder`): A CrossEncoder model to be trained.
    activation_fn (:class:`~torch.nn.Module`): Activation function applied to the logits before computing the loss.
    **kwargs: Additional keyword arguments passed to the underlying :class:`torch.nn.MSELoss`.

.. note::

    Be mindful of the magnitude of both the labels and what the model produces. If the teacher model produces
    logits with Sigmoid to bound them to [0, 1], then you may wish to use a Sigmoid activation function in the loss.

References:
    - Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation: https://arxiv.org/abs/2010.02666
    - `Cross Encoder > Training Examples > Distillation <../../../examples/cross_encoder/training/distillation/README.html>`_

Requirements:
    1. Your model must be initialized with `num_labels = 1` (a.k.a. the default) to predict one class.
    2. Usually uses a finetuned CrossEncoder teacher M in a knowledge distillation setup.

Inputs:
    +-----------------------------------------+-----------------------------+-------------------------------+
    | Texts                                   | Labels                      | Number of Model Output Labels |
    +=========================================+=============================+===============================+
    | (sentence_A, sentence_B) pairs          | similarity score            | 1                             |
    +-----------------------------------------+-----------------------------+-------------------------------+

Relations:
    - :class:`MarginMSELoss` is similar to this loss, but with a margin through a negative pair.

Example:
    ::

        from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderTrainer, losses
        from datasets import Dataset

        student_model = CrossEncoder("microsoft/mpnet-base")
        teacher_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L12-v2")
        train_dataset = Dataset.from_dict({
            "query": ["What are pandas?", "What is the capital of France?"],
            "answer": ["Pandas are a kind of bear.", "The capital of France is Paris."],
        })

        def compute_labels(batch):
            return {
                "label": teacher_model.predict(list(zip(batch["query"], batch["answer"])))
            }

        train_dataset = train_dataset.map(compute_labels, batched=True)
        loss = losses.MSELoss(student_model)

        trainer = CrossEncoderTrainer(
            model=student_model,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
z? expects a model of type CrossEncoder, but got a model of type .   z; expects a model with 1 output label, but got a model with z output labels.N )super__init__modelactivation_fnr   r	   loss_fct
isinstancer   
ValueError	__class____name__type
num_labels)selfr   r   kwargsr   s       l/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/cross_encoder/losses/MSELoss.pyr   MSELoss.__init__
   s    z 	
*

,V,$**l33>>**+ ,++/

+;*<A? 
 ::  A%>>**+ ,((,

(=(='>oO  &    c                   [        U5      S:w  a  [        S[        U5       S35      e[        [        US   US   5      5      nU R                  R                  USSSS9nUR                  U R                  R                  5        U R                  " S
0 UD6S   R                  S	5      nU R                  U5      nU R                  XRR                  5       5      nU$ )N   zMMSELoss expects a dataset with two non-label columns, but got a dataset with z	 columns.r   r   Tpt)padding
truncationreturn_tensorsr   )lenr   listzipr   	tokenizertodeviceviewr   r   float)r   inputslabelspairstokenslogitslosss          r   forwardMSELoss.forwardX   s    v;!_`cdj`k_lluv  SF1I./%%	 & 
 			$**##$%f%a(--b1##F+}}V\\^4r   c                0    S[        U R                  5      0$ )Nr   )r   r   )r   s    r   get_config_dictMSELoss.get_config_dictk   s    Xd&8&89
 	
r   )r   r   r   )r   r   r   z	nn.ModulereturnNone)r.   zlist[list[str]]r/   r   r9   r   )r   
__module____qualname____firstlineno__r   Identityr   r4   r7   __static_attributes____classcell__)r   s   @r   r	   r	   	   s(    GI{{} L L\&
 
r   r	   N)
__future__r   torchr   r   0sentence_transformers.cross_encoder.CrossEncoderr   sentence_transformers.utilr   Moduler	   r   r   r   <module>rF      s#    "  I /e
bii e
r   