
    hoN                    L   S SK Jr  S SKrS SKrS SKrS SKJr  S SKJrJ	r	J
r
  S SKJrJr   S SKJr  S SKrS SKJrJrJrJrJrJr  S SKJr  S S	KJr  S S
KJr  \R>                  " \ 5      r!\(       a  \" 5       (       a  S SK"J#r#  SS jr$ " S S\5      r%g! \ a	    S SKJr   Npf = f)    )annotationsN)Path)TYPE_CHECKINGAnyCallable)load_onnx_modelload_openvino_model)Self)
AutoConfig	AutoModelAutoTokenizer	MT5ConfigPretrainedConfigT5Config)is_peft_available)find_adapter_config_file)InputModule
PeftConfigc                   ^ ^ SU U4S jjnU$ )Nc                p   > [         R                  " [        U 5      T-  SS9  T" [        U 5      T-  40 UD6$ )NT)exist_ok)osmakedirsr   )save_directorykwargs_save_pretrained_fn	subfolders     b/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/models/Transformer.pywrapper)_save_pretrained_wrapper.<locals>.wrapper   s5    
D(94tD"4#7)#CNvNN    )r   z
str | PathreturnNone )r   r   r    s   `` r   _save_pretrained_wrapperr&      s    O O Nr"   c                  0  ^  \ rS rSr% SrSrS\S'   SS/rS\S	'   S
rS\S'           S                   SU 4S jjjr	          SS jr
            SS jrSS jrSS jrS S jrS!S jrS"S jr S#     S$S jjrS#S%S jjr\          S&                       S'S jj5       r\          S&                       S(S jj5       r\      S)               S*U 4S jjj5       rSrU =r$ )+Transformer%   a  Hugging Face AutoModel to generate token embeddings.
Loads the correct class, e.g. BERT / RoBERTa etc.

Args:
    model_name_or_path: Hugging Face models name
        (https://huggingface.co/models)
    max_seq_length: Truncate any inputs longer than max_seq_length
    model_args: Keyword arguments passed to the Hugging Face
        Transformers model
    tokenizer_args: Keyword arguments passed to the Hugging Face
        Transformers tokenizer
    config_args: Keyword arguments passed to the Hugging Face
        Transformers config
    cache_dir: Cache dir for Hugging Face Transformers to store/load
        models
    do_lower_case: If true, lowercases the input (independent if the
        model is cased or not)
    tokenizer_name_or_path: Name or path of the tokenizer. When
        None, then model_name_or_path is used
    backend: Backend used for model inference. Can be `torch`, `onnx`,
        or `openvino`. Default is `torch`.
sentence_bert_config.jsonstrconfig_file_namemax_seq_lengthdo_lower_casez	list[str]config_keysTboolsave_in_rootc
                l  > [         TU ]  5         Xpl        Xl        Uc  0 nUc  0 nUc  0 nU R	                  XX5      u  pU R
                  " XXiU40 UD6  [        [        R                  " U R                  R                  5      R                  5      n[        U5      1 Sk-  U l        Ub
  SU;  a  X$S'   [        R                  " Ub  UOU4SU0UD6U l        Uc  [#        U R                  S5      (       a~  [#        U R                  R$                  S5      (       aY  [#        U R                   S5      (       a>  ['        U R                  R$                  R(                  U R                   R*                  5      nX l        Ub:  U R                   R.                  R0                  U R                  R$                  l        g g )N>   	input_idsinputs_embedsattention_masktoken_type_idsmodel_max_length	cache_dirconfigmax_position_embeddings)super__init__r.   backend_load_config_load_modellistinspect	signature
auto_modelforward
parameterssetmodel_forward_paramsr   from_pretrained	tokenizerhasattrr9   minr:   r7   r-   	__class____name__tokenizer_class)selfmodel_name_or_pathr-   
model_argstokenizer_argsconfig_argsr8   r.   tokenizer_name_or_pathr=   r9   is_peft_modelrG   rL   s                r   r<   Transformer.__init__A   s    	*J!NK $ 1 12DQX f+YeZde  $G$5$5doo6M6M$N$Y$YZ$'(<$= A
 %
! %*<N*R1?-.&66&<&H"N`

 
 !22DOO224MNNDNN,>??!$T__%;%;%S%SUYUcUcUtUt!u,!-59^^5M5M5V5VDOO""2 .r"   c           
     J   [        UUUR                  S5      UR                  S5      UR                  SS5      S9 bJ  [        5       (       d  [        S5      eUS:w  a  [	        S5      eS	S
KJn  UR                  " U40 UDSU0D6S4$ [        R                  " U40 UDSU0D6S4$ )ah  Loads the transformers or PEFT configuration

Args:
    model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
        or the path to a local model directory.
    cache_dir (str | None): The cache directory to store the model configuration.
    backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
    config_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers config.

Returns:
    tuple[PretrainedConfig, bool]: The model configuration and a boolean indicating whether the model is a PEFT model.
tokenrevisionlocal_files_onlyF)r8   rX   rY   rZ   zgLoading a PEFT model requires installing the `peft` package. You can install it via `pip install peft`.torcha  PEFT models can currently only be loaded with the `torch` backend. To use other backends, load the model with `backend="torch"`, call `model.transformers_model.merge_and_unload()`, save that model with `model.save_pretrained()` and then load the model with the desired backend.r   r   r8   T)	r   getr   	Exception
ValueErrorpeftr   rH   r   )rO   rP   r8   r=   rS   r   s         r   r>   Transformer._load_configz   s      %"#!oog.$4!,1CU!K  %&&}  '! w 
 (--.@eKe[degkkk))*<aaW`achhhr"   c                   US:X  a  U(       a  S H  nUR                  US5        M     [        U[        5      (       a  U R                  " XU40 UD6  g[        U[        5      (       a  U R
                  " XU40 UD6  g[        R                  " U4X#S.UD6U l        gUS:X  a  [        SUUSS.UD6U l        gUS:X  a  [        SUUSS.UD6U l        g[        S	U S
35      e)a  Loads the transformers or PEFT model into the `auto_model` attribute

Args:
    model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
        or the path to a local model directory.
    config ("PeftConfig" | PretrainedConfig): The model configuration.
    cache_dir (str | None): The cache directory to store the model configuration.
    backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
    is_peft_model (bool): Whether the model is a PEFT model.
    model_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers model.
r[   )rY   Nr9   r8   onnxzfeature-extraction)rP   r9   	task_nameopenvinozUnsupported backend 'z6'. `backend` should be `torch`, `onnx`, or `openvino`.r%   )pop
isinstancer   _load_t5_modelr   _load_mt5_modelr   rH   rC   r   r	   r^   )rO   rP   r9   r8   r=   rU   rQ   adapter_only_kwargs           r   r?   Transformer._load_model   s   ( g *6&NN#5t< +7 &(++##$6	XZXFI..$$%7YjY"+";";&#/5#NX# - #5. 	DO 
"1 #5. 	DO 4WI=stuur"   c                T    SSK Jn  S/Ul        UR                  " U4X#S.UD6U l        g)Loads the encoder model from T5r   )T5EncoderModel	decoder.*rb   N)transformersrn   "_keys_to_ignore_on_load_unexpectedrH   rC   )rO   rP   r9   r8   rQ   rn   s         r   rh   Transformer._load_t5_model   s4    /=HM9(88
'-
FP
r"   c                T    SSK Jn  S/Ul        UR                  " U4X#S.UD6U l        g)rm   r   )MT5EncoderModelro   rb   N)rp   rt   rq   rH   rC   )rO   rP   r9   r8   rQ   rt   s         r   ri   Transformer._load_mt5_model   s4    0>I]:)99
'-
FP
r"   c                v    S[        U R                  5       U R                  R                  R                  S9 S3$ )NzTransformer()architecture))dictget_config_dictrC   rL   rM   rO   s    r   __repr__Transformer.__repr__   s3    d4#7#7#9HaHaHjHjkllmnnr"   c                   UR                  5        VVs0 s H  u  p4X0R                  ;   d  M  X4_M     nnnU R                  " S0 UDUDSS0D6nUS   nXqS'   [        5       (       a  SSKJn  [        U R                  U5      (       a  U R                  R                  R                  (       al  UR                  S5      n	US   n
[        R                  " XR                  R                  R                  U
R                  S9n[        R                  " X4SS	9US'   U R                  R                  R                   (       a  S
U;   a  US
   US'   U$ s  snnf )a  
Forward pass through the transformer model.

This method processes the input features through the underlying transformers model
and returns the token embeddings along with any other relevant outputs.

Notes:
    - Only passes arguments that are expected by the underlying transformer model

Args:
    features (dict[str, torch.Tensor]): Input features dictionary containing at least
        'input_ids' and 'attention_mask'. May also contain other tensors required by
        the underlying transformer model.
    **kwargs: Additional keyword arguments to pass to the underlying transformer model.

Returns:
    dict[str, torch.Tensor]: Updated features dictionary containing the input features, plus:
        - 'token_embeddings': Token-level embeddings from the transformer model
        - 'attention_mask': Possibly modified attention mask if using PeftModel with prompt learning
        - 'all_layer_embeddings': If the model outputs hidden states, contains embeddings from all layers
return_dictTr   token_embeddings)PeftModelForFeatureExtractionr5   )device   )dimhidden_statesall_layer_embeddingsr%   )itemsrG   rC   r   r_   r   rg   active_peft_configis_prompt_learningsizer[   onesnum_virtual_tokensr   catr9   output_hidden_states)rO   featuresr   keyvaluetrans_featuresoutputsr   r   
batch_sizer5   prefix_attention_masks               r   rD   Transformer.forward   s.   , 8@~~7Gl7G3RkRkKk*#*7Gl//ONOfO$O"1:'7#$ : 4??,IJJOO66II-2215
!)*:!;(-

 B B U U^l^s^s)% .3YY8M7^de-f)*??!!66?g;U/6/GH+,3 ms
   EEc                B    U R                   R                  R                  $ )N)rC   r9   hidden_sizer{   s    r   get_word_embedding_dimension(Transformer.get_word_embedding_dimension  s    %%111r"   c           
        0 n[        US   [        5      (       a  U/nO[        US   [        5      (       a]  / n/ US'   U HL  n[        [	        UR                  5       5      5      u  pgUR                  U5        US   R                  U5        MN     U/nO7/ / pU H+  n
UR                  U
S   5        U	R                  U
S   5        M-     X/nU VVs/ s H,  o Vs/ s H  n[        U5      R                  5       PM     snPM.     nnnU R                  (       a1  U VVs/ s H"  o Vs/ s H  oR                  5       PM     snPM$     nnnUR                  U R                  " UUSSU R                  S.65        U$ s  snf s  snnf s  snf s  snnf )z-Tokenizes a text and maps tokens to token-idsr   	text_keysr   longest_firstpt)padding
truncationreturn_tensors
max_length)rg   r+   ry   nextiterr   appendstripr.   lowerupdaterI   r-   )rO   textsr   outputto_tokenizelookuptext_keytextbatch1batch2
text_tuplecolss                r   tokenizeTransformer.tokenize!  sn    eAh$$ 'Ka$''K"$F;!%d6<<>&:!;""4({#**84   '-KF#
jm,jm, $ "*K ALL41A4L ?JK{s3s!GGIs3{KKNN*#..	
  5L 4Ks0   	E=#E8<E=	F%F>F8E=Fc                    U R                   R                  XS9  U R                  R                  U5        U R                  U5        g )N)safe_serialization)rC   save_pretrainedrI   save_config)rO   output_pathr   r   s       r   saveTransformer.saveI  s7    '''[&&{3%r"   c                H    U R                  UUUUUUUUU	U
US9nU " SSU0UD6$ )N)rP   r   rX   cache_folderrY   rZ   trust_remote_codemodel_kwargstokenizer_kwargsconfig_kwargsr=   rP   r%   )_load_init_kwargs)clsrP   r   rX   r   rY   rZ   r   r   r   r   r=   r   init_kwargss                 r   loadTransformer.loadN  sP    $ ++1%-/%-' , 
 H&8HKHHr"   c           	        U R                  UUUUUUS9nUUUUUS.nSU;  a  0 US'   SU;  a  0 US'   SU;  a  0 US'   US   R                  U5        US   R                  U5        US   R                  U5        U(       a  US   R                  U5        U	(       a  US   R                  U	5        U
(       a  US   R                  U
5        0 UEXKS.E$ )N)rP   r   rX   r   rY   rZ   )r   rX   rY   rZ   r   rQ   rR   rS   )r8   r=   )load_configr   )r   rP   r   rX   r   rY   rZ   r   r   r   r   r=   r   r9   
hub_kwargss                  r   r   Transformer._load_init_kwargso  s   $ 1%- ! 
 #  0!2

 v%#%F< 6)')F#$&$&F=! 	|##J/ ''
3}$$Z0 < ''5#$++,<==!((7H&H|HHr"   c                @  > U(       a  U/O/ SQnU H  n[         T
U ]  UUUUUUUS9n	U	(       d  M    O   SW	;   a  SU	S   ;   a  U	S   R                  S5        SU	;   a  SU	S   ;   a  U	S   R                  S5        SU	;   a  SU	S   ;   a  U	S   R                  S5        U	$ )N)r*   zsentence_roberta_config.jsonzsentence_distilbert_config.jsonzsentence_camembert_config.jsonzsentence_albert_config.jsonz sentence_xlm-roberta_config.jsonzsentence_xlnet_config.json)rP   r   config_filenamerX   r   rY   rZ   rQ   r   rR   rS   )r;   r   rf   )r   rP   r   r   rX   r   rY   rZ   config_filenamesr9   rL   s             r   r   Transformer.load_config  s       	  0OW(#5# /)!!1 ) F v  0 6!&9VL=Q&Q< $$%89v%*=HXAY*Y#$(()<=F"':f]>S'S=!%%&9:r"   )rC   r=   r.   r-   rG   rI   )NNNNNFNr[   )rP   r+   r-   z
int | NonerQ   dict[str, Any] | NonerR   r   rS   r   r8   
str | Noner.   r0   rT   r   r=   r+   r#   r$   )
rP   r+   r8   r   r=   r+   rS   dict[str, Any]r#   z*tuple[PeftConfig | PretrainedConfig, bool])rP   r+   r9   zPeftConfig | PretrainedConfigr8   r+   r=   r+   rU   r0   r#   r$   )rP   r+   r9   r   r8   r+   r#   r$   )r#   r+   )r   dict[str, torch.Tensor]r#   r   )r#   int)T)r   z.list[str] | list[dict] | list[tuple[str, str]]r   z
str | boolr#   r   )r   r+   r   r0   r#   r$   )
 NNNFFNNNr[   )rP   r+   r   r+   rX   bool | str | Noner   r   rY   r   rZ   r0   r   r0   r   r   r   r   r   r   r=   r+   r#   r
   )rP   r+   r   r+   rX   r   r   r   rY   r   rZ   r0   r   r0   r   r   r   r   r   r   r=   r+   r#   r   )r   NNNNF)rP   r+   r   r+   r   r   rX   r   r   r   rY   r   rZ   r0   r#   r   )rM   
__module____qualname____firstlineno____doc__r,   __annotations__r/   r1   r<   r>   r?   rh   ri   r|   rD   r   r   r   classmethodr   r   r   __static_attributes____classcell__)rL   s   @r   r(   r(   %   si   . 8c7.@K@L$
 &*,004-1 $#-17W7W #7W *	7W
 .7W +7W 7W 7W !+7W 7W 
7W 7Wr(i"%(i2<(iGJ(iYg(i	3(iT2v2v .2v 	2v
 2v 2v 
2vh

o/b2 \`&C&NX&	 &P&
 
 #'#'#!&"'.226/3II 	I
 !I !I I I  I ,I 0I -I I  
!I I@ 
 #'#'#!&"'.226/37I7I 	7I
 !7I !7I 7I 7I  7I ,7I 07I -7I 7I  
!7I 7Ir  &*#'#'#!&++ + $	+
 !+ !+ + + 
+ +r"   r(   )r   r   r   r+   r#   zCallable[..., None])&
__future__r   rA   loggingr   pathlibr   typingr   r   r   sentence_transformers.backendr   r	   r
   ImportErrortyping_extensionsr[   rp   r   r   r   r   r   r   transformers.utils.import_utilsr   transformers.utils.peft_utilsr   (sentence_transformers.models.InputModuler   	getLoggerrM   loggerr_   r   r&   r(   r%   r"   r   <module>r      s    "   	  / / N'  d d = B @			8	$&((p+ p1  '&'s   B B#"B#