
    h                     b   S SK r S SKJr  S SKJr  S SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  S S	KJrJrJr  S
rSS\S/004SS\SS/004/rSS\S/004SS\/ SQ004SS\/ 004/r\4S jr\ R.                  R1                  S/ SQ/ SQ/ SQ4/ SQ/ SQ/ SQ4/ SQ/ SQ/ SQ4/ SQ/ SQ/ SQ4\ R2                  " SS// SQS S!/5      \ R2                  " / S"Q/ SQ/ S#Q5      \ R2                  " S$S%// SQS!S&/5      /5      S' 5       rS( rS) r\ R.                  R1                  S*/ S+Q5      S, 5       rS- rg).    N)Config)util)English)Language)span_finder_default_config)Doc)Example)fix_random_seedmake_tempdirregistrypytestzWho is Shaka Khan?spans)      zI like London and Berlin.r            I like London and Berlin)r   r   )r   r    c                     / nU H@  n[         R                  " U R                  US   5      US   5      nUR                  U5        MB     U$ )Nr      )r	   	from_dictmake_docappend)nlpdatatrain_examplestegs        _/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/pipeline/test_span_finder.pymake_examplesr#      sJ    Ns||AaD11Q48b!      z4tokens_predicted, tokens_reference, reference_truths)Mon.-June16)Mon.r'   r(   r)   )r   r   r+   r+   r   r   r+   )r*   r'   Juner)   )r+   r+   r   r   r   r   r+   )r*   r'   r(   16)r*   z-Junze 16)r+   r+   r+   r+   z	Mon.-Juner)   r0   r+   )Mon.-r(   r)   )r+   r,   r+   r4   zJune 16r/   c                 :   [        5       n[        UR                  U S/[        U 5      -  S9n[        UR                  US/[        U5      -  S9n[	        XE5      nUR
                  R                  SS5      /UR
                  R                  [        '   UR                  SS[        0S9nUR                  5         UR                  R                  nUR                  UR                  :w  a6  [        R                  " [         SS	9   UR#                  U/U5        S S S 5        g UR#                  U/U5      u  p[        U	5      [        U 5      :X  d   eUR$                  R&                  R)                  XR$                  R+                  U5      5        g ! , (       d  f       g = f)
NF)wordsspaces   	   span_finder	spans_keyconfigz*must match between reference and predictedmatch)r   r   vocablenr	   	reference	char_spanr   	SPANS_KEYadd_pipe
initializemodelopstextr   raises
ValueError_get_aligned_truth_scoresxptestingassert_array_equalasarray)tokens_predictedtokens_referencereference_truthsr   	predictedrB   exampler:   rH   truth_scoresmaskss              r"   test_loss_alignment_examplerX   '   sb   R *C		)5'C@P<Q2QI 		)5'C@P<Q2QI i+G*1*;*;*E*Ea*K)LGI&,,}k95M,NKNN




C~~']]J
 117)SA
 	%??	3OL|$4 5555FFNN%%lFFNNCS4TU
 
 	s   F
Fc                     [        5       n U " S5      U " S5      /nUS   SS /US   R                  [        '   US   SS /US   R                  [        '   SnU H  nU[        U5      -  nM     [	        5       R                  [        5      R                  5       n[        R                  " U5      S   nUR                  US	9  UR                  U5      n[        U5      U:X  d   e[        US   5      S
:X  d   eg )NThis is an example.This is the second example.r         r   r8   rG   )X   )r   r   rD   rA   r   from_strr   interpolater   resolverF   predict)r   docstotal_tokensdocr=   rG   predictionss          r"   test_span_finder_modelrh   g   s    
*C%&,I(JKD $Q!~DGMM) $Q!~DGMM)LC   X9:FFHFV$W-E	t--%K{|+++{1~!###r$   c                  ^   [        5       n U " S5      U " S5      /nUS   SS /US   R                  [        '   US   SS /US   R                  [        '   U R                  SS	[        0S
9nU R	                  5         [        UR                  U5      5      n[        US   R                  ;   d   eg )NrZ   r[   r   r\   r]   r   r8   r:   r;   r<   )r   r   rD   rE   rF   listpipe)r   rd   r:   s      r"   test_span_finder_componentrl   |   s    
*C%&,I(JKD $Q!~DGMM) $Q!~DGMM),,}k95M,NKNN  &'DQ%%%r$   z"min_length, max_length, span_count))r   r   r   )NN   )r_   N   )Nr   r_   )r_   r\   r_   c                 T  ^ ^ [        5       nU" S5      nT S:X  aA  TS:X  a;  [        R                  " [        SS9   UR	                  STT [
        S.S9nS S S 5        g UR	                  STT [
        S.S9nUR                  5         / SQnUR                  U/U5        UR                  [
           (       d   e[        UR                  [
           5      U:X  d   eTc  [        S	5      mT c  S
m [        UU 4S jUR                  [
            5       5      (       d   eg ! , (       d  f       g = f)Nz1Me and Jenny goes together like peas and carrots.r   z"Both 'min_length' and 'max_length'r>   r:   )
max_length
min_lengthr;   r<   )
r/   r+   r,   r+   r+   r+   r,   r+   r0   r+   infr   c              3   `   >#    U  H#  nT[        U5      s=:*  =(       a    T:*  Os  v   M%     g 7fN)rA   ).0spanrp   rq   s     r"   	<genexpr>4test_set_annotations_span_lengths.<locals>.<genexpr>   s'     VAUzSY44*44AUs   +.)r   r   rJ   rK   rE   rD   rF   set_annotationsr   rA   floatall)rq   rp   
span_countr   rf   r:   scoress   ``     r"   !test_set_annotations_span_lengthsr~      s'   
 *C
A
BCQ:?]]:-QR,,",",!* ' K S 	,,$$"
  K NNF v.99Ysyy#$
222 5\

V9AUVVVVVY SR 	s   D
D'c                  .  ^ [        S5        [        5       n U R                  SS[        0S9n[	        U 5      mU R                  U4S jS9nUR                  R                  S5      S:X  d   e[        S	5       H  n0 nU R                  TX$S
9  M     WS   S:  d   eSnU " U5      nUR                  [           n[        U5      S:X  d   e[        U Vs/ s H  oR                  PM     sn5      1 Sk:X  d   e[        5        n	U R                  U	5        [         R"                  " U	5      n
U
" U5      nUR                  [           n[        U5      S:X  d   e[        U Vs/ s H  oR                  PM     sn5      1 Sk:X  d   e S S S 5        U R%                  T5      nS[         S3U;   d   eUS[         S3   S:X  d   eUS[         S3   S:X  d   eU " S5      n[        UR                  [           5      S:X  d   eg s  snf s  snf ! , (       d  f       N= f)Nr   r:   r;   r<   c                     > T $ rt    )r   s   r"   <lambda>%test_overfitting_IO.<locals>.<lambda>   s    Nr$   )get_examplesnOr_   2   )sgdlossesgMbP?r   r\   >   BerlinLondonLondon and Berlinspans__f_pg      ?_rg      ?r   r   )r
   r   rE   rD   r#   rF   rG   get_dimrangeupdater   rA   setrI   r   to_diskr   load_model_from_pathevaluate)r   r:   	optimizerir   	test_textrf   r   rv   tmp_dirnlp2doc2spans2r}   r   s                 @r"   test_overfitting_IOr      s   A
)C,,}k95M,NK"3'N,BCI$$T*a///2Y

>y
@  - 5((( +I
i.CIIi Eu:??e,ed		e,- 2    
7G((1II&6{a&1&$II&12 7
 
 	
 
 
 \\.)FI;b!V+++F9+R()T111F9+R()S000 h-Csyy#$)))9 - 2 
s%   G< AHH
2HH
H)r   	thinc.apir   spacyr   spacy.lang.enr   spacy.languager   spacy.pipeline.span_finderr   spacy.tokensr   spacy.trainingr	   
spacy.utilr
   r   r   rD   
TRAIN_DATATRAIN_DATA_OVERLAPPINGr#   markparametrizeparamrX   rh   rl   r~   r   r   r$   r"   <module>r      s      ! # A  " > >	Gi'%;<=#	9w123
 Gi'%;<="	9:;< 'Ir?	#$  '  : ,'4	
 ,'4	
 ,+4	
 )',	

 	$'V	

 	#+$	

 	i 'V	
?$'PVQ'PV0$*& (G0W	0Wf/*r$   