
    h                    Z    S SK Jr  S SKJr  S SKJr  S SKJr  S SKJ	r	J
r
   " S S\	5      rg)	    )annotations)Literal)nn)CrossEncoder)
LambdaLossNoWeightingSchemec                     ^  \ rS rSrSSSS\R
                  " 5       S4               S
U 4S jjjrSU 4S jjr\SS j5       r	S	r
U =r$ )RankNetLoss   Ng      ?g|=binaryc                >   > [         TU ]  U[        5       UUUUUUS9  g)a  
RankNet loss implementation for learning to rank. This loss function implements the RankNet algorithm,
which learns a ranking function by optimizing pairwise document comparisons using a neural network.
The implementation is optimized to handle padded documents efficiently by only processing valid
documents during model inference.

Args:
    model (CrossEncoder): CrossEncoder model to be trained
    sigma (float): Score difference weight used in sigmoid (default: 1.0)
    eps (float): Small constant for numerical stability (default: 1e-10)
    activation_fn (:class:`~torch.nn.Module`): Activation function applied to the logits before computing the
        loss. Defaults to :class:`~torch.nn.Identity`.
    mini_batch_size (int, optional): Number of samples to process in each forward pass. This has a significant
        impact on the memory consumption and speed of the training process. Three cases are possible:
        - If ``mini_batch_size`` is None, the ``mini_batch_size`` is set to the batch size.
        - If ``mini_batch_size`` is greater than 0, the batch is split into mini-batches of size ``mini_batch_size``.
        - If ``mini_batch_size`` is <= 0, the entire batch is processed at once.
        Defaults to None.

References:
    - Learning to Rank using Gradient Descent: https://icml.cc/Conferences/2015/wp-content/uploads/2015/06/icml_ranking.pdf
    - `Cross Encoder > Training Examples > MS MARCO <../../../examples/cross_encoder/training/ms_marco/README.html>`_

Requirements:
    1. Query with multiple documents (pairwise approach)
    2. Documents must have relevance scores/labels. Both binary and continuous labels are supported.

Inputs:
    +----------------------------------------+--------------------------------+-------------------------------+
    | Texts                                  | Labels                         | Number of Model Output Labels |
    +========================================+================================+===============================+
    | (query, [doc1, doc2, ..., docN])       | [score1, score2, ..., scoreN]  | 1                             |
    +----------------------------------------+--------------------------------+-------------------------------+

Recommendations:
    - Use :class:`~sentence_transformers.util.mine_hard_negatives` with ``output_format="labeled-list"``
      to convert question-answer pairs to the required input format with hard negatives.

Relations:
    - :class:`~sentence_transformers.cross_encoder.losses.LambdaLoss` can be seen as an extension of this loss
      where each score pair is weighted. Alternatively, this loss can be seen as a special case of the
      :class:`~sentence_transformers.cross_encoder.losses.LambdaLoss` without a weighting scheme.
    - :class:`~sentence_transformers.cross_encoder.losses.LambdaLoss` with its default NDCGLoss2++ weighting
      scheme anecdotally performs better than the other losses with the same input format.

Example:
    ::

        from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderTrainer, losses
        from datasets import Dataset

        model = CrossEncoder("microsoft/mpnet-base")
        train_dataset = Dataset.from_dict({
            "query": ["What are pandas?", "What is the capital of France?"],
            "docs": [
                ["Pandas are a kind of bear.", "Pandas are kind of like fish."],
                ["The capital of France is Paris.", "Paris is the capital of France.", "Paris is quite large."],
            ],
            "labels": [[1, 0], [1, 1, 0]],
        })
        loss = losses.RankNetLoss(model)

        trainer = CrossEncoderTrainer(
            model=model,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
)modelweighting_schemeksigmaepsreduction_logactivation_fnmini_batch_sizeN)super__init__r   )	selfr   r   r   r   r   r   r   	__class__s	           p/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/cross_encoder/losses/RankNetLoss.pyr   RankNetLoss.__init__   s3    ^ 	.0''+ 	 		
    c                *   > [         TU ]  5       nUS	 U$ )zw
Get configuration parameters for this loss function.

Returns:
    Dictionary containing the configuration parameters
r   )r   get_config_dict)r   configr   s     r   r   RankNetLoss.get_config_dictf   s      (*%&r   c                    g)NaR  
@inproceedings{burges2005learning,
  title={Learning to Rank using Gradient Descent},
  author={Burges, Chris and Shaked, Tal and Renshaw, Erin and Lazier, Ari and Deeds, Matt and Hamilton, Nicole and Hullender, Greg},
  booktitle={Proceedings of the 22nd international conference on Machine learning},
  pages={89--96},
  year={2005}
}
 )r   s    r   citationRankNetLoss.citationq   s    r   r"   )r   r   r   
int | Noner   floatr   r&   r   zLiteral['natural', 'binary']r   znn.Module | Noner   r%   returnNone)r'   z#dict[str, float | int | str | None])r'   str)__name__
__module____qualname____firstlineno__r   Identityr   r   propertyr#   __static_attributes____classcell__)r   s   @r   r
   r
      s     6>*,++-&*X
X
 X
 	X

 X
 4X
 (X
 $X
 
X
 X
t	 	 	r   r
   N)
__future__r   typingr   torchr   #sentence_transformers.cross_encoderr   *sentence_transformers.cross_encoder.lossesr   r   r
   r"   r   r   <module>r7      s"    "   < Tp* pr   