
    hO                        S SK Jr  S SKrS SKrS SKrS SKJr  S SKrS SK	J
r
  \(       a
  S SKrS SKJr  \R                  " \5      r " S S\
5      rg)    )annotationsN)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                     ^  \ rS rSrSr    S             S	U 4S jjjr S
         SS jjr      SS jr\SS j5       r	Sr
U =r$ )MSEEvaluatorFromDataFrame   ug  
Computes the mean squared error (x100) between the computed sentence embedding and some target sentence embedding.

Args:
    dataframe (List[Dict[str, str]]): It must have the following format. Rows contains different, parallel sentences.
        Columns are the respective language codes::

        [{'en': 'My sentence in English', 'es': 'Oración en español', 'fr': 'Phrase en français'...},
         {'en': 'My second sentence', ...}]
    teacher_model (SentenceTransformer): The teacher model used to compute the sentence embeddings.
    combinations (List[Tuple[str, str]]): Must be of the format ``[('en', 'es'), ('en', 'fr'), ...]``.
        First entry in a tuple is the source language. The sentence in the respective language will be fetched from
        the dataframe and passed to the teacher model. Second entry in a tuple the the target language. Sentence
        will be fetched from the dataframe and passed to the student model
    batch_size (int, optional): The batch size to compute sentence embeddings. Defaults to 8.
    name (str, optional): The name of the evaluator. Defaults to "".
    write_csv (bool, optional): Whether to write the results to a CSV file. Defaults to True.
    truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. If None, uses the model's
        current truncation dimension. Defaults to None.
c                  > [         TU ]  5         X0l        XPl        X@l        U(       a  SU-   nSU-   S-   U l        SS/U l        SU l        X`l        Xpl	        0 U l
        [        R                  S5        [        5       nU R                   H  u  p/ n/ nU Hl  nX   R                  5       S:w  d  M  X   R                  5       S:w  d  M3  UR                  X   5        UR!                  X   5        UR!                  X   5        Mn     X4U R                  X4'   U R                  R!                  U	 S	U
 35        M     [#        U5      nU R%                  X(5      n['        X5       VVs0 s H	  u  nnUU_M     snnU l        g s  snnf )
N_mse_evaluationz_results.csvepochstepsnegative_msezCompute teacher embeddings -)super__init__combinationsname
batch_sizecsv_filecsv_headersprimary_metric	write_csvtruncate_dimdataloggerinfosetstripaddappendlistembed_inputszipteacher_embeddings)self	dataframeteacher_modelr   r   r   r   r   all_source_sentencessrc_langtrg_langsrc_sentencestrg_sentencesrowall_src_embeddingssentemb	__class__s                    t/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.pyr   "MSEEvaluatorFromDataFrame.__init__*   ss    	(	$:D(4/.@#W-,"(	01"u"&"3"3HMM =&&(B.3=3F3F3HB3N(,,S];!((7!((7	 ! 0=.LDIIx*+##xj($<= #4  $$89!..}S>ABV>k"l>ks49>k"l"ls   #E=c           
        UR                  5         / nU R                   H  u  pgU R                  Xg4   u  p[        R                  " U V
s/ s H  oR
                  U
   PM     sn
5      n[        R                  " U R                  X5      5      nX-
  S-  R                  5       nUS-  nUR                  U5        [        R                  SU R                   SU SU S35        [        R                  SUS 35        M     Ub  U R                  (       a  [        R                  " US	S
9  [        R                  R!                  X R"                  5      n[        R                  R%                  U5      n['        USU(       a  SOSSS9 n[(        R*                  " U5      nU(       d  UR-                  U R.                  5        UR-                  X4/U-   5        S S S 5        S[        R                  " U5      R1                  5       * 0nU R3                  UU R                  5      nU R5                  UUX45        U$ s  sn
f ! , (       d  f       Nk= f)N   d   zMSE evaluation on z dataset - r   :zMSE (*100):	4fT)exist_okr   awzutf-8)newlinemodeencodingr   )evalr   r   npasarrayr&   r$   meanr"   r   r   r   r   osmakedirspathjoinr   isfileopencsvwriterwriterowr   itemprefix_name_to_metrics store_metrics_in_model_card_data)r'   modeloutput_pathr   r   
mse_scoresr+   r,   r-   r.   r1   src_embeddingstrg_embeddingsmsecsv_pathoutput_file_existsfrL   metricss                      r4   __call__"MSEEvaluatorFromDataFrame.__call__W   s    	


"&"3"3H+/99h5I+J(MZZS`(aS`4)@)@)FS`(abNZZ(9(9%(OPN"39??AC3JCc"KK,TYYK{8*AhZWXYZKK-Bx01 #4 "t~~KKd3ww||K?H!#!9h8JPS^efjkA)OOD$4$45 ;< g "BGGJ$7$<$<$>#>?--gtyyA--eWeK3 )b gfs   
H3
AH88
Ic                X    UR                   " U4U R                  SU R                  S.UD6$ )NT)r   convert_to_numpyr   )encoder   r   )r'   rQ   	sentenceskwargss       r4   r$   &MSEEvaluatorFromDataFrame.embed_inputs{   s:     ||
!**	

 
 	
    c                    g)NzKnowledge Distillation )r'   s    r4   description%MSEEvaluatorFromDataFrame.description   s    'rc   )
r   r   r   r   r   r   r   r&   r   r   )   r   TN)r(   zlist[dict[str, str]]r)   r   r   zlist[tuple[str, str]]r   intr   strr   boolr   z
int | None)Nrl   )
rQ   r   rR   z
str | Noner   ri   r   ri   returnzdict[str, float])rQ   r   r`   zstr | list[str] | np.ndarrayrm   z
np.ndarray)rm   rj   )__name__
__module____qualname____firstlineno____doc__r   r[   r$   propertyrf   __static_attributes____classcell__)r3   s   @r4   r   r      s    4 #'+m'+m ++m ,	+m
 +m +m +m !+m +m\ ik"("7A"QT"be"	"H
"
 0

 

 ( (rc   r   )
__future__r   rK   loggingrE   typingr   numpyrB   2sentence_transformers.evaluation.SentenceEvaluatorr   )sentence_transformers.SentenceTransformerr   	getLoggerrn   r   r   re   rc   r4   <module>r}      sB    " 
  	    PM			8	$w( 1 w(rc   