
    hp                       S SK Jr  S SKrS SKJrJr  S SKJr  S SKJ	r	J
r
  S SKJr  S SKJrJr  S SKJr  \" 5       (       a  S S	KJrJrJrJrJr  \R0                  " \5      r\	(       a  S S
KJr   " S S\5      r\ " S S\5      5       rSS jrg)    )annotationsN)	dataclassfield)Path)TYPE_CHECKINGAny)	ModelCard)$SentenceTransformerModelCardCallback SentenceTransformerModelCardData)is_datasets_available)DatasetDatasetDictIterableDatasetIterableDatasetDictValue)CrossEncoderc                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )CrossEncoderModelCardCallback   c                $   > [         TU ]  U5        g N)super__init__)selfdefault_args_dict	__class__s     h/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/cross_encoder/model_card.pyr   &CrossEncoderModelCardCallback.__init__   s    *+     )r   dict[str, Any]returnNone)__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__r   s   @r   r   r      s    , ,r   r   c                  ,  ^  \ rS rSr% Sr\" SS9rS\S'   \" S S9rS	\S
'   \" SSS9r	S\S'   \" SSSS9r
S\S'   \" SSS9rS\S'   \" \" \5      R                  S-  SSS9rS\S'   \" SSSS9rS\S'   SS jrS U 4S jjrS!S jrS"S jrS"S jrSrU =r$ )#CrossEncoderModelCardData   a2  A dataclass storing data used in the model card.

Args:
    language (`Optional[Union[str, List[str]]]`): The model language, either a string or a list,
        e.g. "en" or ["en", "de", "nl"]
    license (`Optional[str]`): The license of the model, e.g. "apache-2.0", "mit",
        or "cc-by-nc-sa-4.0"
    model_name (`Optional[str]`): The pretty name of the model, e.g. "CrossEncoder based on answerdotai/ModernBERT-base".
    model_id (`Optional[str]`): The model ID when pushing the model to the Hub,
        e.g. "tomaarsen/ce-mpnet-base-ms-marco".
    train_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the training datasets.
        e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}, {"name": "STSB"}]
    eval_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the evaluation datasets.
        e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"id": "mteb/stsbenchmark-sts"}]
    task_name (`str`): The human-readable task the model is trained on,
        e.g. "semantic search and paraphrase mining".
    tags (`Optional[List[str]]`): A list of tags for the model,
        e.g. ["sentence-transformers", "cross-encoder"].
    local_files_only (`bool`): If True, don't attempt to find dataset or base model information on the Hub.
        Defaults to False.

.. tip::

    Install `codecarbon <https://github.com/mlco2/codecarbon>`_ to automatically track carbon emission usage and
    include it in your model cards.

Example::

    >>> model = CrossEncoder(
    ...     "microsoft/mpnet-base",
    ...     model_card_data=CrossEncoderModelCardData(
    ...         model_id="tomaarsen/ce-mpnet-base-allnli",
    ...         train_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
    ...         eval_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
    ...         license="apache-2.0",
    ...         language="en",
    ...     ),
    ... )
N)defaultstr	task_namec                 
    / SQ$ )N)zsentence-transformerszcross-encoderrerankerr    r    r   r   <lambda>"CrossEncoderModelCardData.<lambda>H   s     !
r   )default_factoryzlist[str] | NonetagsF)r.   initzlist[list[str]] | Nonepredict_exampleT)r.   r7   reprzbool | Noneir_modelpipeline_tagzmodel_card_template.mdr   template_pathzCrossEncoder | Nonemodelc                `   [        U[        5      (       a  U[        UR                  5       5      S      n[        U[        [
        45      (       a  g[        U5      S:X  a  gUR                  R                  5        VVs/ s H|  u  p#[        U[        5      (       a  UR                  S;   dP  [        US5      (       d  M=  [        UR                  [        5      (       d  M^  UR                  R                  S;   d  Mz  UPM~     nnn[        U5      S:  a  gUS   nUS   n[        US   U   5      n[        US   U   5      nUSS U   n	USS U   n
U[        L a  U
S   SS n
U	S   /[        U
5      -  n	U[        L a'  [        X5       VVs/ s H  u  pX/PM
     snnU l        ggs  snnf s  snnf )z
We don't set widget examples, but only load the prediction example.
This is because the Hugging Face Hub doesn't currently have a Sentence Ranking
or Text Classification widget that accepts pairs, which is what CrossEncoder
models require.
r   N>   stringlarge_stringfeature         )
isinstancer   listkeysr   r   lenfeaturesitemsr   dtypehasattrrA   typer/   zipr8   )r   datasetcolumnrA   columnsquery_columnanswer_column
query_typeanswer_typequeriesanswersqueryresponses                r   set_widget_examples-CrossEncoderModelCardData.set_widget_examplesZ   s    g{++d7<<>2156Gg1DEFFw<1 $+#3#3#9#9#;	
#;7E**w}}@Z/Z+	 
 w6  OO))-GG #; 	 	
 w<!qz
'!*\23
71:m45"1+l+"1+m, $aj!nGqzlS\1GMPQXMb#cMb/%U$5Mb#cD  7	
8 $ds   ><F$>F$F$;F$
F*c                   > [         TU ]  U5        U R                  c  UR                  S:X  a  SOSU l        U R                  c  UR                  S:X  a  SOSU l        g g )NrC   z"text reranking and semantic searchztext pair classificationztext-rankingztext-classification)r   register_modelr0   
num_labelsr;   )r   r=   r   s     r   r]   (CrossEncoderModelCardData.register_model   s`    u%>>!8=8H8HA8M4Sm N $272B2Ba2GMbD %r   c                8    U R                   R                  U5      $ r   )r=   	tokenizer)r   textkwargss      r   tokenize"CrossEncoderModelCardData.tokenize   s    zz##D))r   c                    g r   r    r   s    r   run_usage_snippet+CrossEncoderModelCardData.run_usage_snippet   s     	r   c                \    U R                   R                  U R                   R                  S.$ )N)model_max_lengthmodel_num_labels)r=   
max_lengthr^   rg   s    r   get_model_specific_metadata5CrossEncoderModelCardData.get_model_specific_metadata   s&     $

 5 5 $

 5 5
 	
r   )r;   r8   r0   )rO   zDataset | DatasetDictr"   r#   )r"   r#   )rb   zstr | list[str]r"   r!   )r"   r!   )r$   r%   r&   r'   __doc__r   r0   __annotations__r6   r8   r:   r;   r   __file__parentr<   r=   rZ   r]   rd   rh   rn   r(   r)   r*   s   @r   r,   r,      s    &R 4(Is("
D
  /4Du.MO+M!$UGHkG d7L#7X(=(=@X(X_dkpqM4q "'t%e!LEL-d^c*

 
r   r,   c                    [         R                  " U R                  U R                  R                  SS9nUR                  $ )Nu   🤗)	card_datar<   hf_emoji)r	   from_templatemodel_card_datar<   content)r=   
model_cards     r   generate_model_cardr{      s:    ((''u7L7L7Z7ZekJ r   )r=   r   r"   r/   ) 
__future__r   loggingdataclassesr   r   pathlibr   typingr   r   huggingface_hubr	    sentence_transformers.model_cardr
   r   sentence_transformers.utilr   datasetsr   r   r   r   r   	getLoggerr$   logger0sentence_transformers.cross_encoder.CrossEncoderr   r   r,   r{   r    r   r   <module>r      st    "  (  % % s <ZZ			8	$M,$H ,
 C
 @ C
 C
Lr   