
    hV                        S SK Jr  S SKrS SKJrJr  S SKJr  S SKJ	r	J
r
  S SKJrJr  S SKJrJrJr  S SKJrJrJr  \	(       a  S S	KJr  \R0                  " \5      r " S
 S\5      r\ " S S\5      5       rg)    )annotationsN)	dataclassfield)Path)TYPE_CHECKINGAny)$SentenceTransformerModelCardCallback SentenceTransformerModelCardData)AsymModuleRouter)SparseAutoEncoderSparseStaticEmbeddingSpladePooling)SparseEncoderc                      \ rS rSrSrg)SparseEncoderModelCardCallback    N)__name__
__module____qualname____firstlineno____static_attributes__r       i/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/sparse_encoder/model_card.pyr   r      s    r   r   c                    ^  \ rS rSr% Sr\" SS9rS\S'   \" S S9rS	\S
'   \" SSS9r	S\S'   \" SSS9r
S\S'   \" \" \5      R                  S-  SSS9rS\S'   \" SSSS9rS\S'   \" SSSS9rS\S'   SU 4S jjrSS jrSS jrSrU =r$ )SparseEncoderModelCardData   a  A dataclass storing data used in the model card.

Args:
    language (`Optional[Union[str, List[str]]]`): The model language, either a string or a list,
        e.g. "en" or ["en", "de", "nl"]
    license (`Optional[str]`): The license of the model, e.g. "apache-2.0", "mit",
        or "cc-by-nc-sa-4.0"
    model_name (`Optional[str]`): The pretty name of the model, e.g. "SparseEncoder based on answerdotai/ModernBERT-base".
    model_id (`Optional[str]`): The model ID when pushing the model to the Hub,
        e.g. "tomaarsen/se-mpnet-base-ms-marco".
    train_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the training datasets.
        e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}, {"name": "STSB"}]
    eval_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the evaluation datasets.
        e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"id": "mteb/stsbenchmark-sts"}]
    task_name (`str`): The human-readable task the model is trained on,
        e.g. "semantic search and sparse retrieval".
    tags (`Optional[List[str]]`): A list of tags for the model,
        e.g. ["sentence-transformers", "sparse-encoder"].
    local_files_only (`bool`): If True, don't attempt to find dataset or base model information on the Hub.Add commentMore actions
        Defaults to False.
    generate_widget_examples (`bool`): If True, generate widget examples from the evaluation or training dataset,
        and compute their similarities. Defaults to True.

.. tip::

    Install `codecarbon <https://github.com/mlco2/codecarbon>`_ to automatically track carbon emission usage and
    include it in your model cards.

Example::

    >>> model = SparseEncoder(
    ...     "microsoft/mpnet-base",
    ...     model_card_data=SparseEncoderModelCardData(
    ...         model_id="tomaarsen/se-mpnet-base-allnli",
    ...         train_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
    ...         eval_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
    ...         license="apache-2.0",
    ...         language="en",
    ...     ),
    ... )
N)defaultstr	task_namec                 
    / SQ$ )N)zsentence-transformerszsparse-encodersparser   r   r   r   <lambda>#SparseEncoderModelCardData.<lambda>E   s     !
r   )default_factoryzlist[str] | NonetagsF)r    initzlist[list[str]] | Nonepredict_examplepipeline_tagzmodel_card_template.md)r    r)   reprr   template_pathSparse Encoder
model_typezSparseEncoder | Nonemodelc                "  > [         TU ]  U5        U R                  c  SU l        U R                  c  SU l        UR	                  5        Vs/ s H&  n[        U[        5      (       d  M  UR                  PM(     nn/ n[        U;   d
  [        U;   a  US/-  n[        U;   a  US/-  n[        U;   a  US/-  n[        U;   a  US/-  nU R                  [        [        R                   U5      5        US/-  nSR#                  U5      U l        g s  snf )	Nz$semantic search and sparse retrievalzfeature-extraction
AsymmetriczInference-freeSPLADECSRr.    )superregister_modelr"   r+   modules
isinstancer   	__class__r   r   r   r   r   add_tagsmapr!   lowerjoinr/   )selfr0   moduleall_modulesr/   r:   s        r   r7   )SparseEncoderModelCardData.register_modelW   s    u%>>!CDN$ 4D6;mmodoFTZ\bIc'v''od
;&K"7<.(J K/+,,JK'8*$J+5'!Jc#))Z01'((
((:.! es   D(Dc           	        SnU R                   R                  (       a]  SSSSS.R                  U R                   R                  U R                   R                  R                  SS5      R	                  5       5      nU R                   R                  5       U R                   R                  5       [        U R                   5      U[        U R                   SS 5      S	.$ )
NzDot ProductzCosine SimilarityzEuclidean DistancezManhattan Distance)cosinedot	euclidean	manhattan_r5   max_active_dims)model_max_lengthoutput_dimensionalitymodel_stringsimilarity_fn_namerI   )	r0   rM   getreplacetitleget_max_seq_length get_sentence_embedding_dimensionr!   getattr)r?   rM   s     r   get_model_specific_metadata6SparseEncoderModelCardData.get_model_specific_metadataq   s    *::((-$11	"
 c$**//1N1N1V1VWZ\_1`1f1f1hi  !%

 = = ?%)ZZ%P%P%R

O"4&tzz3DdK
 	
r   c                    U R                   $ )N)r/   )r?   s    r   get_default_model_name1SparseEncoderModelCardData.get_default_model_name   s    r   )r/   r+   r"   )r0   r   returnNone)rY   zdict[str, Any])rY   rZ   )r   r   r   r   __doc__r   r"   __annotations__r(   r*   r+   r   __file__parentr-   r/   r0   r7   rT   rW   r   __classcell__)r:   s   @r   r   r      s    (V 4(Is("
D
  /4Du.MO+M d7L#7X(=(=@X(X_dkpqM4q$45uMJM #(5u"MEM/4
" r   r   )
__future__r   loggingdataclassesr   r   pathlibr   typingr   r    sentence_transformers.model_cardr	   r
   sentence_transformers.modelsr   r   r   +sentence_transformers.sparse_encoder.modelsr   r   r   2sentence_transformers.sparse_encoder.SparseEncoderr   	getLoggerr   loggerr   r   r   r   r   <module>rk      se    "  (  % s = = o oP			8	$	%I 	 l!A l lr   