
    h$                     $   S SK r S SKrS SKrS SKJr  S SKJrJrJrJ	r	J
r
  S SKrS SKrSSKJr  SSKJr  SSKJrJr  SSKJr  S	S
KJrJrJrJrJr  SSSS.r\R<                  " SSSS.S9\" SSS9\" SSSS9\" SSS9\" SSS9\" SSS9\" \S   SSSS9\" SSS S!S9\" \S"   S#S$S%S9\" \S&   S'S(S)S9\" SS*S+S,S-S94
S.\S/\S0\S1\S2\S\ S3\	\   S"\ S&\!S4\!4S5 jj5       r"\S   \S"   \S&   SS6.S.\S/\S0\S1\S2\S\ S"\ S&\!S7\!S8\
\#\#\\#\#4   4   4S9 jjr$g):    N)Path)AnyDictListOptionalTuple   )util)Errors)MultiLabel_TextCategorizerTextCategorizer)Corpus   )ArgOptappimport_code	setup_gpu   F)n_trialsuse_gpugold_preproczfind-thresholdT)allow_extra_argsignore_unknown_options)context_settings.zModel name or path)helpz3Location of binary evaluation data in .spacy format)r   existsz&Name of pipe to examine thresholds forz7Key of threshold attribute in component's configurationzMetric to optimizer   z
--n_trialsz-nz0Number of trials to determine optimal thresholdsz--codez-czNPath to Python file with additional code (registered functions) to be importedr   z--gpu-idz-gzGPU ID or -1 for CPUr   z--gold-preprocz-GzUse gold preprocessingz	--verbosez-Vz-VVz/Display more information for debugging purposesmodel	data_path	pipe_namethreshold_key
scores_key	code_pathverbosec
                     U	(       a-  [         R                  R                  [        R                  5        [        U5        [        U UUUUUUUSS9	  g)au  
Runs prediction trials for a trained model with varying thresholds to maximize
the specified metric. The search space for the threshold is traversed linearly
from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
(the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
returns all results).

This is applicable only for components whose predictions are influenced by
thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note
that the full path to the corresponding threshold attribute in the config has to
be provided.

DOCS: https://spacy.io/api/cli#find-threshold
F)	r   r    r!   r"   r#   r   r   r   silentN)r
   loggersetLevelloggingDEBUGr   find_threshold)
r   r    r!   r"   r#   r   r$   r   r   r%   s
             R/home/james-whalen/.local/lib/python3.13/site-packages/spacy/cli/find_threshold.pyfind_threshold_clir.      sI    @ W]]+	#!
    )r   r   r   r'   r'   returnc                  ^^ [        XhS9  [        R                  " U5      nUR                  5       (       d  [        R
                  R                  SUSS9  [        R                  " U 5      n	X)R                  ;  a0  [        [        R                  R                  X)R                  S95      eU	R                  U5      n
[        U
S5      (       d  [        [        R                  5      e[!        U
5      ["        :X  a  [        R
                  R%                  S5        U(       d'  [        R
                  R'                  SU S	U S
U S3S9  [)        XS9n[+        U" U	5      5      nUR-                  S5      nS[.        [0        [2        4   S[4        [0           S[6        S[.        [0        [2        4   4S jnS[.        [0        [2        4   S[4        [0           S[0        S[.        [0        [2        4   4U4S jjm0 mSU/UQnSn[8        R:                  " SSU5      n[=        [        R>                  RA                  SU /US95        U GHb  n[        R                  " U U" T" U	RB                  USRE                  U5      5      RG                  5       UU5      S9n	[        U
S5      (       a.  [I        U	R                  U5      SU" [K        U
S5      UU5      5        U	RM                  U5      nUU;  a=  [        R
                  R                  SU S3S[+        URO                  5       5       3SS 9  UU   TU'   [Q        TU   [6        [R        45      (       d"  [        R
                  R                  S!U S"3SS9  [=        [        R@                  " [U        US#5      [U        TU   S#5      /US95        GMe     [W        TRO                  5       U4S$ jS%9n[Y        [[        TR]                  5       5      5      S:X  a<  [        R
                  R%                  S&[Q        U
[^        5      (       a  US';   a  S(OS)S*9  O'U(       d   [=        S+[U        US,S-9 S.U S/TU    S35        UTU   T4$ )0a  
Runs prediction trials for models with varying thresholds to maximize the specified metric.
model (Union[str, Path]): Pipeline to evaluate. Can be a package or a path to a data directory.
data_path (Path): Path to file with DocBin with docs to use for threshold search.
pipe_name (str): Name of pipe to examine thresholds for.
threshold_key (str): Key of threshold attribute in component's configuration.
scores_key (str): Name of score to metric to optimize.
n_trials (int): Number of trials to determine optimal thresholds.
use_gpu (int): GPU ID or -1 for CPU.
gold_preproc (bool): Whether to use gold preprocessing. Gold preprocessing helps the annotations align to the
    tokenization, and may result in sequences of more consistent length. However, it may reduce runtime accuracy due
    to train/test skew.
silent (bool): Whether to print non-error-related output to stdout.
RETURNS (Tuple[float, float, Dict[float, float]]): Best found threshold, the corresponding score, scores for all
    evaluated thresholds.
)r'   zEvaluation data not foundr   )exits)nameoptsscorerzThe `textcat` component doesn't use a threshold as it's not applicable to the concept of exclusive classes. All thresholds will yield the same results.zOptimizing for z for component 'z' with z trials.)title)r   .configkeysvaluer0   c                 d    U[         R                  " [        R                  USS U 5      US   '   U $ )zSet item in nested dictionary. Adapted from https://stackoverflow.com/a/54138200.
config (Dict[str, Any]): Configuration dictionary.
keys (List[Any]): Path to value to set.
value (float): Value to set.
RETURNS (Dict[str, Any]): Updated dictionary.
Nr   )	functoolsreduceoperatorgetitem)r8   r9   r:   s      r-   set_nested_item'find_threshold.<locals>.set_nested_item   s4     KP	))49f=d2hGr/   full_keyc           
        > US   U ;  aK  [         R                  R                  SU SUS   / S3SUS   / S[        U R	                  5       5       3SS9  US   [        U5      S:  a  T" XS      USS	 U5      0$ XS      0$ )
a  Filters provided config dictionary so that only the specified keys path remains.
config (Dict[str, Any]): Configuration dictionary.
keys (List[Any]): Path to value to set.
full_key (str): Full user-specified key.
RETURNS (Dict[str, Any]): Filtered dictionary.
r   zFailed to look up `z` in config: sub-key z not found.zMake sure you specified z: correctly. The following sub-keys are available instead: r   r6   textr2   N)wasabimsgfaillistr9   len)r8   r9   rB   filter_configs      r-   rK   %find_threshold.<locals>.filter_config   s     7& JJOO+H:5JDQRG9+U`a/a	{:t&')	   G4y1} #6q'?DHhG
 	
 Q
 	
r/   
components)
   rN   r   	Threshold)widths)r8   cfgzFailed to look up score `z` in evaluation results.z`Make sure you specified the correct value for `scores_key`. The following scores are available: rD   zReturned score for key 'zG' is not numeric. Threshold optimization only works for numeric scores.   c                    > TU    $ )N )keyscoress    r-   <lambda> find_threshold.<locals>.<lambda>   s	    r/   )rU   z?All scores are identical. Verify that all settings are correct.)cats_macro_fcats_micro_f z\Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`.)r6   rE   z
Best threshold:    )ndigitsz with z
 value of )0r   r
   ensure_pathr   rF   rG   rH   
load_modelcomponent_namesAttributeErrorr   E001formatget_pipehasattrE1045typer   warninfor   rI   splitr   strr   r   floatnumpylinspaceprinttablesrowr8   joincopysetattrgetattrevaluater9   
isinstanceintroundmaxrJ   setvaluesr   )r   r    r!   r"   r#   r   r   r   r'   nlppipecorpusdev_datasetconfig_keysr@   config_keys_fulltable_col_widths
thresholds	thresholdeval_scoresbest_thresholdrK   rV   s                        @@r-   r,   r,   G   s   : g%  +I

3YaH
//%
 C+++KKI4G4GH
 	
 <<	"D4""V\\**Dz_$

M	

 

#J</?	{'RZQ[ \  	 	
 I9Fvc{#K%%c*K
S#X
&*3i
8=
	c3h

S#X
&*3i
;>
	c3h
. "$F$i>+>1h/J	&--

[ZL:CS

TU	oo"JJ 0#((;K2L$& 	
 4Y'e 4k9M ll;/[(JJOO1*=UV";#3#3#5679	   (
3y&+eS\::JJOO*:, 7   
 	JJy!$eF9,=q&AB'	
G  T -DFN 3v}} A%

S t%?@@!AA 
 p 	 	
 $U>1%E$FfZLXbcijxcybzz{| 6.1699r/   )%r<   r*   r>   pathlibr   typingr   r   r   r   r   rm   wasabi.tablesrF   r[   r
   errorsr   pipeliner   r   trainingr   _utilr   r   r   r   r   	_DEFAULTScommandrk   rx   boolr.   rl   r,   rT   r/   r-   <module>r      s       3 3     B  8 8 	 */4P S34#$Ybfg#KLS'`a#$89	*-|THz{ #D(D  @P  !Qy+ZDZ[Y~68H$Umn{D%>op)) ) 	)
 ) ) ) ~) ) ) )	)f j)Y'">2b:b:b: b: 	b:
 b: b: b: b: b: 5%eUl++,b:r/   