
    hbb                        S SK Jr  S SKrS SKrS SKrS SKrS SKJrJr  S SK	r
S SKrS SKJr  S SKJr  S SKJr  S SKJr  \(       a  S SKJr  \R*                  " \5      r " S	 S
\5      rg)    )annotationsN)TYPE_CHECKINGCallable)Tensor)trange)SentenceEvaluator)SimilarityFunction)SentenceTransformerc                  Z  ^  \ rS rSrSrSS/S// SQ/ SQS/SSS	S
SSSSSSSS4                                           SU 4S jjjrS r   S         SS jjr   S       SS jjr   S           SS jjr	SS jr
S r\S 5       rS rSrU =r$ )InformationRetrievalEvaluator   an  
This class evaluates an Information Retrieval (IR) setting.

Given a set of queries and a large corpus set. It will retrieve for each query the top-k most similar document. It measures
Mean Reciprocal Rank (MRR), Recall@k, and Normalized Discounted Cumulative Gain (NDCG)

Args:
    queries (Dict[str, str]): A dictionary mapping query IDs to queries.
    corpus (Dict[str, str]): A dictionary mapping document IDs to documents.
    relevant_docs (Dict[str, Set[str]]): A dictionary mapping query IDs to a set of relevant document IDs.
    corpus_chunk_size (int): The size of each chunk of the corpus. Defaults to 50000.
    mrr_at_k (List[int]): A list of integers representing the values of k for MRR calculation. Defaults to [10].
    ndcg_at_k (List[int]): A list of integers representing the values of k for NDCG calculation. Defaults to [10].
    accuracy_at_k (List[int]): A list of integers representing the values of k for accuracy calculation. Defaults to [1, 3, 5, 10].
    precision_recall_at_k (List[int]): A list of integers representing the values of k for precision and recall calculation. Defaults to [1, 3, 5, 10].
    map_at_k (List[int]): A list of integers representing the values of k for MAP calculation. Defaults to [100].
    show_progress_bar (bool): Whether to show a progress bar during evaluation. Defaults to False.
    batch_size (int): The batch size for evaluation. Defaults to 32.
    name (str): A name for the evaluation. Defaults to "".
    write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
    truncate_dim (int, optional): The dimension to truncate the embeddings to. Defaults to None.
    score_functions (Dict[str, Callable[[Tensor, Tensor], Tensor]]): A dictionary mapping score function names to score functions. Defaults to the ``similarity`` function from the ``model``.
    main_score_function (Union[str, SimilarityFunction], optional): The main score function to use for evaluation. Defaults to None.
    query_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
    query_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
    corpus_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
    corpus_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
    write_predictions (bool): Whether to write the predictions to a JSONL file. Defaults to False.
        This can be useful for downstream evaluation as it can be used as input to the :class:`~sentence_transformers.sparse_encoder.evaluation.ReciprocalRankFusionEvaluator` that accept precomputed predictions.

Example:
    ::

        import random
        from sentence_transformers import SentenceTransformer
        from sentence_transformers.evaluation import InformationRetrievalEvaluator
        from datasets import load_dataset

        # Load a model
        model = SentenceTransformer('all-MiniLM-L6-v2')

        # Load the Touche-2020 IR dataset (https://huggingface.co/datasets/BeIR/webis-touche2020, https://huggingface.co/datasets/BeIR/webis-touche2020-qrels)
        corpus = load_dataset("BeIR/webis-touche2020", "corpus", split="corpus")
        queries = load_dataset("BeIR/webis-touche2020", "queries", split="queries")
        relevant_docs_data = load_dataset("BeIR/webis-touche2020-qrels", split="test")

        # For this dataset, we want to concatenate the title and texts for the corpus
        corpus = corpus.map(lambda x: {'text': x['title'] + " " + x['text']}, remove_columns=['title'])

        # Shrink the corpus size heavily to only the relevant documents + 30,000 random documents
        required_corpus_ids = set(map(str, relevant_docs_data["corpus-id"]))
        required_corpus_ids |= set(random.sample(corpus["_id"], k=30_000))
        corpus = corpus.filter(lambda x: x["_id"] in required_corpus_ids)

        # Convert the datasets to dictionaries
        corpus = dict(zip(corpus["_id"], corpus["text"]))  # Our corpus (cid => document)
        queries = dict(zip(queries["_id"], queries["text"]))  # Our queries (qid => question)
        relevant_docs = {}  # Query ID to relevant documents (qid => set([relevant_cids])
        for qid, corpus_ids in zip(relevant_docs_data["query-id"], relevant_docs_data["corpus-id"]):
            qid = str(qid)
            corpus_ids = str(corpus_ids)
            if qid not in relevant_docs:
                relevant_docs[qid] = set()
            relevant_docs[qid].add(corpus_ids)

        # Given queries, a corpus and a mapping with relevant documents, the InformationRetrievalEvaluator computes different IR metrics.
        ir_evaluator = InformationRetrievalEvaluator(
            queries=queries,
            corpus=corpus,
            relevant_docs=relevant_docs,
            name="BeIR-touche2020-subset-test",
        )
        results = ir_evaluator(model)
        '''
        Information Retrieval Evaluation of the model on the BeIR-touche2020-test dataset:
        Queries: 49
        Corpus: 31923

        Score-Function: cosine
        Accuracy@1: 77.55%
        Accuracy@3: 93.88%
        Accuracy@5: 97.96%
        Accuracy@10: 100.00%
        Precision@1: 77.55%
        Precision@3: 72.11%
        Precision@5: 71.43%
        Precision@10: 62.65%
        Recall@1: 1.72%
        Recall@3: 4.78%
        Recall@5: 7.90%
        Recall@10: 13.86%
        MRR@10: 0.8580
        NDCG@10: 0.6606
        MAP@100: 0.2934
        '''
        print(ir_evaluator.primary_metric)
        # => "BeIR-touche2020-test_cosine_map@100"
        print(results[ir_evaluator.primary_metric])
        # => 0.29335196224364596
iP  
   )         r   d   F     TNc                  > [         TU ]  5         / U l        U H:  nUU;   d  M  [        UU   5      S:  d  M  U R                  R	                  U5        M<     U R                   Vs/ s H  nUU   PM
     snU l        [        UR                  5       5      U l        U R                   Vs/ s H  nUU   PM
     snU l	        UU l
        UU l        UU l        UU l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        Xl        Xl        Xl        Xl        Xl        U(       a,  [5        [        U R2                  R                  5       5      5      O/ U l        U(       a  [9        U5      OS U l        Xl        U(       a  SU-   nSU-   S-   U l        SS/U l         U RC                  U R6                  5        UU l"        U RD                  (       a  SU-   S-   U l#        g g s  snf s  snf )Nr   _z Information-Retrieval_evaluationz_results.csvepochstepsz_predictions.jsonl)$super__init__queries_idslenappendquerieslistkeys
corpus_idscorpusquery_promptquery_prompt_namecorpus_promptcorpus_prompt_namerelevant_docscorpus_chunk_sizemrr_at_k	ndcg_at_kaccuracy_at_kprecision_recall_at_kmap_at_kshow_progress_bar
batch_sizename	write_csvscore_functionssortedscore_function_namesr	   main_score_functiontruncate_dimcsv_filecsv_headers_append_csv_headerswrite_predictionspredictions_file)selfr   r"   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r6   r2   r5   r#   r$   r%   r&   r:   qidcid	__class__s                           x/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/evaluation/InformationRetrievalEvaluator.pyr   &InformationRetrievalEvaluator.__init__}   s   0 	Cm#M#,>(?!(C  '',  150@0@A0@0@Av{{}-.2oo>osvc{o>(!2*"4*!2 "*%:" !2$	".Q`F40D0D0I0I0K+L$Mfh!Na#56I#Jgk (:D?$FW#W-  !:!:;!2!!$F$MPd$dD! "G B ?s   %G (G%c                X   U GH#  nU R                    H#  nU R                  R                  U SU 35        M%     U R                   HC  nU R                  R                  U SU 35        U R                  R                  U SU 35        ME     U R                   H#  nU R                  R                  U SU 35        M%     U R
                   H#  nU R                  R                  U SU 35        M%     U R                   H#  nU R                  R                  U SU 35        M%     GM&     g )Nz
-Accuracy@z-Precision@z-Recall@z-MRR@z-NDCG@z-MAP@)r+   r8   r   r,   r)   r*   r-   )r<   r4   
score_nameks       r@   r9   1InformationRetrievalEvaluator._append_csv_headers   s   .J''  '':,j(DE ( //  '':,k!(EF  '':,hqc(BC 0 ]]  '':,eA3(?@ # ^^  '':,fQC(@A $ ]]  '':,eA3(?@ # /    c                   US:w  a  US:X  a  SU 3nOSU SU S3nOSnU R                   b  USU R                    S3-  n[        R                  S	U R                   S
U S35        U R                  cJ  UR
                  UR                  0U l        UR
                  /U l        U R                  U R                  5        U R                  " U/UQ7SU0UD6nUGb   U R                  (       Ga  [        R                  " USS9  [        R                  R                  X R                  5      n	[        R                  R!                  U	5      (       dG  [#        U	SSS9n
U
R%                  SR                  U R&                  5      5        U
R%                  S5        O[#        U	SSS9n
X4/nU R                   H  nU R(                   H  nUR+                  X   S   U   5        M     U R,                   H5  nUR+                  X   S   U   5        UR+                  X   S   U   5        M7     U R.                   H  nUR+                  X   S   U   5        M     U R0                   H  nUR+                  X   S   U   5        M     U R2                   H  nUR+                  X   S   U   5        M     M     U
R%                  SR                  [5        [6        U5      5      5        U
R%                  S5        U
R9                  5         U R:                  (       d  U R<                  ce  [?        U R                   Vs/ s H!  oX   S   [?        U R0                  5         4PM#     snS S9S   nU S[?        U R0                  5       3U l        O3U R<                  R@                   S[?        U R0                  5       3U l        URC                  5        VVVVVVs0 s H_  u  pURC                  5         HE  u  nnURC                  5         H*  u  nnU SURE                  S S![7        U5      -   5       3U_M,     MG     Ma     nnnnnnnU RG                  UU R                  5      nU RI                  UUX45        U$ s  snf s  snnnnnnf )"Nz after epoch z
 in epoch z after z stepsr   z (truncated to )z5Information Retrieval Evaluation of the model on the z dataset:output_pathT)exist_okwutf-8modeencoding,
a
accuracy@kprecision@krecall@kmrr@kndcg@kmap@kc                    U S   $ )Nr    xs    r@   <lambda>8InformationRetrievalEvaluator.__call__.<locals>.<lambda>  s    !A$rF   )keyr   z_ndcg@r   z@k@)%r6   loggerinfor0   r2   similarity_fn_name
similarityr4   r9   compute_metricesr1   osmakedirspathjoinr7   isfileopenwriter8   r+   r   r,   r)   r*   r-   mapstrcloseprimary_metricr5   maxvalueitemsreplaceprefix_name_to_metrics store_metrics_in_model_card_data)r<   modelrK   r   r   argskwargsout_txtscorescsv_pathfOutoutput_datar0   rD   score_functionvalues_dictmetric_namevaluesrt   metricss                       r@   __call__&InformationRetrievalEvaluator.__call__   s!    B;{)%1&ugWUG6BG():):(;1==GKDII;V^_f^gghij'$)$<$<e>N>N#OD ).)A)A(BD%$$T%>%>?&&uWW+WPVW "t~~~KKd3ww||K?H77>>(++H3A

388D$4$456

4  H3A .K11++A&&v|L'A!'DE , 33A&&v|M'B1'EF&&v|J'?'BC 4 A&&v|G'<Q'?@ ' A&&v|H'=a'@A ( A&&v|G'<Q'?@ ' 2" JJsxxC 567JJtJJL""''/!$UYUnUnoUnTFL23t~~3FGHUno&" " *8(8s4>>?R>S&T#)-)A)A)G)G(HsSWSaSaObNc&d# 06||~
 
/=+'2'8'8':#V"LLN5 a 3 3D#A, GHI5P + J': J/= 	 
 --gtyyA--eWeK p
s   )(Q#A&Q(c                	   Uc  Un[        [        U R                  5      [        U R                  5      [        U R                  5      [        U R                  5      [        U R
                  5      5      nU R                  UU R                  SU R                  U R                  S9n0 nU R                   H*  n[        [        U5      5       V	s/ s H  n	/ PM     sn	Xx'   M,     [        S[        U R                  5      U R                  SU R                   (       + S9 GH  n
[#        XR                  -   [        U R                  5      5      nUc4  U R                  UU R                  X SU R$                  U R&                  S9nOX:U nU R                  R)                  5        GH  u  pU" Xl5      n[*        R,                  " U[#        U[        US   5      5      SSS	S
9u  nnUR/                  5       R1                  5       nUR/                  5       R1                  5       n[        [        U5      5       H  n[3        UU   UU   5       Hj  u  nnU R4                  U
U-      n[        Xx   U   5      U:  a   [6        R8                  " Xx   U   UU45        ML  [6        R:                  " Xx   U   UU45        Ml     M     GM     GM     U HZ  n[        [        Xx   5      5       H=  n[        [        Xx   U   5      5       H  nXx   U   U   u  nnUUS.Xx   U   U'   M     M?     M\     U R<                  (       a  Ub  U H  nU R>                  RA                  SSU S35      n[B        RD                  RG                  UU5      nSn[I        UUSS9 n[        [        Xx   5      5       Hb  nU RJ                  U   nU R                  U   nXx   U   n[M        US SS9nUUUS.nURO                  [P        RR                  " U5      S-   5        Md     S S S 5        M     [T        RW                  S[        U R                  5       35        [T        RW                  S[        U R                  5       S35        U R                   Vs0 s H  oU RY                  Xx   5      _M     nnU RZ                   H/  n[T        RW                  SU 35        U R]                  UU   5        M1     U$ s  sn	f ! , (       d  f       GM  = fs  snf )Nquery)encode_fn_nameprompt_namepromptr   zCorpus Chunks)descdisabledocumentr   TF)dimlargestr3   )	corpus_idscorez.jsonlr   rM   rN   rO   c                    U S   $ Nr   r\   r]   s    r@   r_   @InformationRetrievalEvaluator.compute_metrices.<locals>.<lambda>  s    '
rF   ra   reverse)query_idr   resultsrS   z	Queries: zCorpus: zScore-Function: )/rs   r)   r*   r+   r,   r-   embed_inputsr   r$   r#   r2   ranger   r   r"   r(   r.   minr&   r%   ru   torchtopkcputolistzipr!   heapqheappushheappushpopr:   r;   rv   rh   rj   rk   rm   r   r3   rn   jsondumpsrc   rd   compute_metricsr4   output_scores)r<   ry   corpus_modelcorpus_embeddingsrK   max_kquery_embeddingsqueries_result_listr0   r   corpus_start_idxcorpus_end_idxsub_corpus_embeddingsr   pair_scorespair_scores_top_k_valuespair_scores_top_k_idx	query_itrsub_corpus_idr   r   doc_itrbase_filename	json_pathrP   r   r   
query_textr   
predictionr}   s                                  r@   rg   .InformationRetrievalEvaluator.compute_metrices%  s     L""#**+
  ,,LL"..$$ - 
 !((D5:3?O;P5Q(R5Q5Q(R% ) !'s4;;!7!7o[_[q[qWq!
 !!14J4J!JCPTP[P[L\]N !((,(9(9 KK 0@#- $ 7 7-- ): )% ):>(Z% )-(<(<(B(B(D$,-=U CH**UCA,?!@aQU^cC?(*? ,D+G+G+I+P+P+R((=(A(A(C(J(J(L%!&s+;'<!=I03-i8:RS\:]1,u %)OO4D}4T$U	 28CDuL!NN+>+DY+ORWYbQcd!--.A.G	.RUZ\eTfg1 "> )E%!
X (D"3':'@#AB	$S)<)B9)M%NOG':'@'KG'T$E9R[fkDl'-i8A  P C ( !!k&=+ $ 5 5 = =h!D6QWHX YGGLLmD	)$AT%*3/B/H+I%J	#'#3#3I#>%)\\)%<
"5";I"F #)6JTX"Y )1%/'.&
 

4::j#9D#@A &K BA ,, 	iDLL 1234hs4;;/034 UYThThiThD,,-@-FGGThi --DKK*4&12vd|, . o )Sv BA* js   SA=S2S*
S'	c           
         Uc  UR                   nO%US:X  a  UR                  nOUS:X  a  UR                  nW" U4UUU R                  U R                  SU R
                  S.UD6$ )Nr   r   T)r   r   r/   r.   convert_to_tensorr6   )encodeencode_queryencode_documentr/   r.   r6   )r<   ry   	sentencesr   r   r   r{   	encode_fns           r@   r   *InformationRetrievalEvaluator.embed_inputs  sz     !Iw&**Iz)--I	
#"44"**	
 	
 		
rF   c           	        U R                    Vs0 s H  o"S_M     nnU R                   Vs0 s H  o"/ _M     nnU R                   Vs0 s H  o"/ _M     nnU R                   Vs0 s H  o"S_M     nnU R                   Vs0 s H  o"/ _M     nnU R                   Vs0 s H  o"/ _M     nn[        [        U5      5       GH  n	U R                  U	   n
[        X   S SS9nU R                  U
   nU R                    H'  nUSU  H  nUS   U;   d  M  X===   S-  ss'     M%     M)     U R                   HU  nSnUSU  H  nUS   U;   d  M  US-  nM     XM   R                  X-  5        X]   R                  U[        U5      -  5        MW     U R                   H9  n[        USU 5       H$  u  nnUS   U;   d  M  Xm==   SUS-   -  -  ss'     M7     M;     U R                   Hj  nUSU  Vs/ s H  nUS   U;   a  SOSPM     nnS/[        U5      -  nU R                  UU5      U R                  UU5      -  nX}   R                  U5        Ml     U R                   He  nSnSn[        USU 5       H!  u  nnUS   U;   d  M  US-  nUUUS-   -  -  nM#     U[        U[        U5      5      -  nX   R                  U5        Mg     GM     U H"  nX2==   [        U R                  5      -  ss'   M$     U H  n[        R                   " XB   5      XB'   M     U H  n[        R                   " XR   5      XR'   M     U H  n[        R                   " Xr   5      Xr'   M     U H"  nXb==   [        U R                  5      -  ss'   M$     U H  n[        R                   " X   5      X'   M     UUUUUUS.$ s  snf s  snf s  snf s  snf s  snf s  snf s  snf )	Nr   c                    U S   $ r   r\   r]   s    r@   r_   ?InformationRetrievalEvaluator.compute_metrics.<locals>.<lambda>  s    AgJrF   Tr   r   r   g      ?)rU   rV   rW   rY   rX   rZ   )r+   r,   r)   r*   r-   r   r   r   r3   r'   r   	enumeratecompute_dcg_at_kr   r   npmean)r<   r   rD   num_hits_at_kprecisions_at_krecall_at_kMRRndcg	AveP_at_kr   r   top_hitsquery_relevant_docsk_valhitnum_correctranktop_hitpredicted_relevancetrue_relevances
ndcg_valuesum_precisionsavg_precisions                          r@   r   -InformationRetrievalEvaluator.compute_metrics  s   '+'9'9:'9!A'9:*.*D*DE*DQb5*DE&*&@&@A&@"u&@A!]]+]!t]+#~~.~!2~.$(MM2MqUM	2 s#678I''	2H 1<BV`deH"&"4"4X"> ++#Ae,C;'+>>%,1, - , 33#Ae,C;'+>>#q( -  &--k.AB"))+<O8P*PQ 4 !*8Ae+<!=ID#;'+>>
cTAX&66
 "> ' [cdefk[l'[lPW-1DDA!K[l $ ' $%#,?(@"@!223FNQUQfQf#UR 
 "":. ( !"!*8Ae+<!=ID#;'+>>#q(&+*BB "> !/UC@S<T1U U ''6 'Y 9p ADLL 11  !A!#);!<O ! AWW[^4KN  Aggdg&DG  AFc$,,''F  A779<0IL  (*#
 	
e ;EA+.2H's(   N+N0N5#N:?N?O(O	c                   US    H0  n[         R                  SR                  X!S   U   S-  5      5        M2     US    H0  n[         R                  SR                  X!S   U   S-  5      5        M2     US    H0  n[         R                  SR                  X!S   U   S-  5      5        M2     US    H-  n[         R                  S	R                  X!S   U   5      5        M/     US
    H-  n[         R                  SR                  X!S
   U   5      5        M/     US    H-  n[         R                  SR                  X!S   U   5      5        M/     g )NrU   zAccuracy@{}: {:.2f}%r   rV   zPrecision@{}: {:.2f}%rW   zRecall@{}: {:.2f}%rX   zMRR@{}: {:.4f}rY   zNDCG@{}: {:.4f}rZ   zMAP@{}: {:.4f})rc   rd   format)r<   r}   rD   s      r@   r   +InformationRetrievalEvaluator.output_scores  s@   %AKK.55a9Ma9PSV9VWX & &AKK/66q:OPQ:RUX:XYZ ' 
#AKK,33Aj7I!7Ls7RST $ AKK(//'?13EFG ! !AKK)0084DQ4GHI " AKK(//'?13EFG !rF   c                    Sn[        [        [        U 5      U5      5       H$  nX U   [        R                  " US-   5      -  -  nM&     U$ )Nr      )r   r   r   r   log2)
relevancesrD   dcgis       r@   r   .InformationRetrievalEvaluator.compute_dcg_at_k%  sC    s3z?A./Aa=2771q5>11C 0
rF   c                Z    0 n/ SQnU H  n[        X5      c  M  [        X5      X'   M      U$ )N)r6   r#   r$   r%   r&   )getattr)r<   config_dictconfig_dict_candidate_keysra   s       r@   get_config_dict-InformationRetrievalEvaluator.get_config_dict,  s;    &
" .Ct!-#*4#5  . rF   )r+   r/   r"   r(   r!   r%   r&   r7   r8   r5   r-   r)   r0   r*   r,   r;   rr   r   r   r#   r$   r'   r4   r2   r.   r6   r1   r:   ),r   dict[str, str]r"   r   r'   zdict[str, set[str]]r(   intr)   	list[int]r*   r   r+   r   r,   r   r-   r   r.   boolr/   r   r0   rp   r1   r   r6   z
int | Noner2   z4dict[str, Callable[[Tensor, Tensor], Tensor]] | Noner5   zstr | SimilarityFunction | Noner#   
str | Noner$   r   r%   r   r&   r   r:   r   returnNone)NrH   rH   )
ry   r
   rK   r   r   r   r   r   r   dict[str, float])NNN)ry   r
   r   zTensor | NonerK   r   r   r   )ry   r
   r   zstr | list[str] | np.ndarrayr   r   r   r   r   r   r   z
np.ndarray)r   zlist[object])__name__
__module____qualname____firstlineno____doc__r   r9   r   rg   r   r   r   staticmethodr   r   __static_attributes____classcell__)r?   s   @r@   r   r      s\   cT "'!d "t#0+8"e"'#'PT?C#'(,$()-"'-BeBe Be +	Be
 Be Be Be !Be  )Be Be  Be Be Be Be !Be  N!Be" =#Be$ !%Be& &'Be( ")Be* '+Be,  -Be. 
/Be BeHA* #'P"P  P 	P
 P 
Pj +/"&t"t )	t
  t 
tt &*"&!
"
 0
 #	

  
 
 

4[
zH&   rF   r   )
__future__r   r   r   loggingrh   typingr   r   numpyr   r   r   tqdmr   2sentence_transformers.evaluation.SentenceEvaluatorr   *sentence_transformers.similarity_functionsr	   )sentence_transformers.SentenceTransformerr
   	getLoggerr   rc   r   r\   rF   r@   <module>r     sN    "    	 *     P IM			8	$a$5 arF   