
    D_i                     L   % S r SSKJr  SSKrSSKrSSKrSSKJrJrJ	r	  SSK
JrJrJr  SSKJr  SSKrSSKJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  \(       a  SSKJ r   SSK!J"r"  \RF                  " \$5      r%\RL                  " 5       r'S\(S'   SS jr) " S S\5      r*g)z2A tracer that runs evaluators over completed runs.    )annotationsN)FutureThreadPoolExecutorwait)TYPE_CHECKINGAnycast)UUID)EvaluationResultEvaluationResults)	langchain)run_copy)
BaseTracer)tracing_v2_enabled)_get_executor)Sequence)Runz)weakref.WeakSet[EvaluatorCallbackHandler]_TRACERSc                 Z    [        [        5       H  n U c  M  U R                  5         M     g)zWait for all tracers to finish.N)listr   wait_for_futures)tracers    [/home/james-whalen/.local/lib/python3.13/site-packages/langchain_core/tracers/evaluation.pywait_for_all_evaluatorsr      s"    x.##% !    c                  b  ^  \ rS rSr% SrSrS\S'   SrS\S'    S	\S
'    SrS\S'    Sr	S\S'    \
R                  " 5       rS\S'    SrS\S'    SrS\S'    S\S'   S\S'        S"               S#U 4S jjjrS$S jr\    S%S j5       r S&       S'S jjrS(S jrS)S  jrS!rU =r$ )*EvaluatorCallbackHandler&   zTracer that runs a run evaluator whenever a run is persisted.

Attributes:
    client : Client
        The LangSmith client instance used for evaluating the runs.
evaluator_callback_handlerstrnameNUUID | None
example_idzlangsmith.Clientclient  Sequence[langsmith.RunEvaluator]
evaluatorszThreadPoolExecutor | Noneexecutorzweakref.WeakSet[Future]futuresTboolskip_unfinished
str | Noneproject_namez-dict[tuple[str, str], list[EvaluationResult]]logged_eval_resultszthreading.Locklockc                .  >^  [         TT ]  " S0 UD6  [        U[        5      (       a  [	        U5      OUT l        U=(       d    [        R                  " 5       T l        UT l	        Uc  [        5       T l        O7US:  a*  [        US9T l        [        R                  " T U 4S j5        OST l        [        R                  [         S      " 5       T l        UT l        UT l        0 T l        [*        R,                  " 5       T l        [0        R3                  T 5        g)a  Create an EvaluatorCallbackHandler.

Args:
    evaluators : Sequence[RunEvaluator]
        The run evaluators to apply to all top level runs.
    client : LangSmith Client, optional
        The LangSmith client instance to use for evaluating the runs.
        If not specified, a new instance will be created.
    example_id : Union[UUID, str], optional
        The example ID to be associated with the runs.
    skip_unfinished: bool, optional
        Whether to skip unfinished runs.
    project_name : str, optional
        The LangSmith project name to be organize eval chain runs under.
    max_concurrency : int, optional
        The maximum number of concurrent evaluators to run.
Nr   )max_workersc                 J   > [        ST R                  5      R                  SS9$ )Nr   T)r   )r	   r(   shutdownselfs   r   <lambda>3EvaluatorCallbackHandler.__init__.<locals>.<lambda>g   s     14==AJJPTJUr   r%   )super__init__
isinstancer    r
   r#   langchain_tracer
get_clientr$   r'   r   r(   r   weakreffinalizeWeakSetr   r)   r+   r-   r.   	threadingLockr/   r   add)	r5   r'   r$   r#   r+   r-   max_concurrencykwargs	__class__s	   `       r   r9   !EvaluatorCallbackHandler.__init__@   s    6 	"6" *:s ; ;D 	 = 0 ; ; =$")ODMq .?KDMU
 !DMvd|46.(#% NN$	Tr   c           	     x    U R                   c  U R                  R                  X5      nU/n[        U R                   S/U R                  S9 nUR                  (       a%  U R                  R                  UR                  5      OSnUR                  UUS9nU R                  UUUR                  (       a  UR                  R                  OSS9nSSS5        [        UR                  5      nU R                     W HO  n	[        [!        U	SUR                  5      5      n
U R"                  R%                  X4/ 5      R'                  U	5        MQ     SSS5        g! , (       d  f       N= f! [         a7    [        R                  SUR                  UR                  R                  5        e f = f! , (       d  f       g= f)zEvaluate the run in the project.

Args:
    run: The run to be evaluated.
    evaluator: The evaluator to use for evaluating the run.
Neval)r-   tagsr$   )example)source_run_idzError evaluating run %s with %starget_run_id)r-   r$   evaluate_runr   reference_example_idread_example_log_evaluation_feedback
latest_runid	Exceptionlogger	exceptionrE   __name__r    r/   getattrr.   
setdefaultappend)r5   run	evaluatoreval_resulteval_resultscbreference_exampleevaluation_resultr#   resrun_ids              r   _evaluate_in_project-EvaluatorCallbackHandler._evaluate_in_projectr   s   	  ("kk66sF +}#!..fXdkk // KK,,S-E-EF "
 %.$:$:- %; %!
  $<<%68mm"--"2"2  =  2 112
YY#WS/366BC((33V4H"MTT $ Y5 $  	1##,,
 	 Ys9   AE' BEE' 7AF+
E$ E' $E' 'AF(+
F9c                    [        U [        5      (       a  U /nU$ [        U [        5      (       a  SU ;   a  U S   nU$ S[        U 5       S3n[	        U5      e)NresultszInvalid evaluation result type z1. Expected EvaluationResult or EvaluationResults.)r:   r   dicttype	TypeError)rf   results_msgs      r   _select_eval_results-EvaluatorCallbackHandler._select_eval_results   sr     g/00yH  &&9+?y)H 	 2$w- AC C  C. r   c                   U R                  U5      nU H  n0 nUR                  (       a  0 UR                  EUEn[        USS 5      nUc  UR                  nU R                  R                  UUR                  UR                  UR                  UR                  UR                  UUR                  =(       d    U[        R                  R                  R                  S9	  M     U$ )NrL   )scorevaluecomment
correctionsource_inforK   feedback_source_type)rl   evaluator_inforW   rR   r$   create_feedbackkeyro   rp   rq   rr   rK   	langsmithschemasFeedbackSourceTypeMODEL)r5   evaluator_responserZ   rK   rf   ra   source_info_run_id_s           r   rP   1EvaluatorCallbackHandler._log_evaluation_feedback   s     ++,>?C+-L!!E#"4"4EEc?D9G&&KK''iiii>>(!//@=%.%6%6%I%I%O%O ( 
 $ r   c                   U R                   (       a2  UR                  (       d!  [        R                  SUR                  5        g[        U5      nU R                  Ul        U R                   Hb  nU R                  c  U R                  X#5        M#  U R                  R                  U R                  R                  U R                  X#5      5        Md     g)zGRun the evaluator on the run.

Args:
    run: The run to be evaluated.
zSkipping unfinished run %sN)r+   outputsrT   debugrR   r   r#   rN   r'   r(   rc   r)   rB   submit)r5   rZ   run_r[   s       r   _persist_run%EvaluatorCallbackHandler._persist_run   s     LL5svv>}$(OO!I}}$))$:  MM(()B)BDT	 )r   c                .    [        U R                  5        g)z!Wait for all futures to complete.N)r   r)   r4   s    r   r   )EvaluatorCallbackHandler.wait_for_futures   s    T\\r   )	r$   r'   r#   r(   r)   r/   r.   r-   r+   )NNTr'   N)r'   r&   r$   zlangsmith.Client | Noner#   zUUID | str | Noner+   r*   r-   r,   rC   z
int | NonerD   r   returnNone)rZ   r   r[   zlangsmith.RunEvaluatorr   r   )rf   $EvaluationResult | EvaluationResultsr   list[EvaluationResult])N)r|   r   rZ   r   rK   r"   r   r   )rZ   r   r   r   r   r   )rV   
__module____qualname____firstlineno____doc__r!   __annotations__r#   r'   r(   r=   r?   r)   r+   r-   r9   rc   staticmethodrl   rP   r   r   __static_attributes____classcell__)rE   s   @r   r   r   &   sT    -D#,"J"2E35J058*.H'.C'.'8G$8A OT H#L*#JFF

 +/(, $#/&*040 (0 &	0
 0 !0 $0 0 
0 0d*X 5	 & &*	@  #	
 
 6& r   r   r   )+r   
__future__r   loggingr@   r=   concurrent.futuresr   r   r   typingr   r   r	   uuidr
   rx   langsmith.evaluation.evaluatorr   r   langchain_core.tracersr   r;   langchain_core.tracers._compatr   langchain_core.tracers.baser   langchain_core.tracers.contextr    langchain_core.tracers.langchainr   collections.abcr   langchain_core.tracers.schemasr   	getLoggerrV   rT   r?   r   r   r   r   r%   r   r   <module>r      sv    8 "    ? ? + +   N @ 3 2 = :(2			8	$6=oo6G
3 G&xz xr   