
    h:j                     P   S SK r S SKrS SKrS SKJr  S SKrS SKJrJrJ	r	  S SK
r
S SKJr  S SKJr  S SKJr  S SKJr  S SKJrJr  S S	KJr  S S
KJrJrJrJr  S SKJr  SSKJ r J!r!   S SK"r"\"RF                  " S5        \"RH                  " S5        S r&S r'S r(S r)\RT                  S 5       r+S r,S r-S r.S r/S r0S r1S r2S r3S r4\RT                  S 5       r5\RT                  S 5       r6\RT                  S 5       r7\Rp                  Rs                  SSS /5      S! 5       r:\Rp                  Rs                  SSS /5      S" 5       r;\Rp                  Rs                  SSS /5      S# 5       r<\Rp                  Rs                  SSS /5      S$ 5       r=\Rp                  Rs                  SSS /5      S% 5       r>\Rp                  Rs                  SSS /5      S& 5       r?\Rp                  Rs                  SSS /5      S' 5       r@\Rp                  R                  \Rp                  Rs                  SSS /5      S( 5       5       rBS) rCS* rDS+ rES, rFS- rGS. rH\Rp                  Rs                  S// S0Q5      S1 5       rI\Rp                  Rs                  S// S2Q5      S3 5       rJ\Rp                  Rs                  S4S5SS6S7/\\/5      S8 5       rKS9 rL\Rp                  Rs                  SSS /5      S: 5       rMS; rN\Rp                  R                  \P" \	" 5       \5      (       + S<S=9S> 5       rQS? rRS@ rSg! \% a     GNf = f)A    N)mock)CupyOpsNumpyOpsget_current_ops)GermanEnglish)Language)Scorer)DocSpan)Example)find_matching_languageignore_errorraise_errorregistry)Vocab   )add_vecs_to_vocabassert_docs_equalc                 <    SU R                   ;   a  [        S5      eU $ )N2zno dice)text
ValueErrordocs    S/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/test_language.pyevil_componentr      s    
chh##J    c                 Z    U R                   R                  S5      (       d
  SU S   l        U $ )N4T)r   
startswithis_sent_startr   s    r   perhaps_set_sentencesr%   %   s(    88s## $BJr   c                 H    U R                  S5      (       d  [        S5      eU $ )N
SENT_STARTzno sents)has_annotationr   r   s    r   assert_sents_errorr)   +   s#    l++$$Jr   c                 T    [         R                  " S5      nUR                  SU 5        g )NspacyzTrouble with component %s.)logging	getLoggerwarning)	proc_nameprocdocseloggers        r   
warn_errorr4   1   s!    w'F
NN/;r   c                      [        [        5       5      n U R                  S5      nS H  nUR                  U5        M     U R	                  5         U $ )NtextcatPOSITIVENEGATIVE)r
   r   add_pipe	add_label
initialize)nlpr6   labels      r   r=   r=   6   sC    
57
Cll9%G)%  *NNJr   c                    SnSSSS.0nSS0n[        U R                  UR                  S5      S	9n[        R                  " XB5      nU R                  U/5        [        R                  " [        5         U R                  U5        S S S 5        [        R                  " [        5         U R                  X45        S S S 5        [        R                  " [        5         U R                  XB45        S S S 5        [        R                  " [        5         [        R                  " US 5      nS S S 5        [        R                  " [        5         [        R                  " XC5      nS S S 5        g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       N{= f! , (       d  f       g = f)
Nhello worldcats      ?        r7   LABELT words)r   vocabsplitr   	from_dictupdatepytestraises	TypeErrorr   KeyError)r=   r   annotswrongkeyannotsr   examples         r   test_language_updaterS   @   s*   D3C89Ft_N
ciitzz#
/C,GJJy 
y	!

7 
" 
y	!

D>" 
" 
y	!

C=! 
" 
z	"##C. 
#	x	 ##C8 
!	  
"	! 
"	! 
"	! 
#	"	 	 s<   3E+'E<FFF/+
E9<
F

F
F,/
F=c                 @   SnSSSSS.00n[        U R                  UR                  S5      S9n[        R                  " X25      nU R                  U/5      nUS	   S
:  d   eU R                  S U4 5       5      nUS	   S
:  d   e[        R                  " [        5         U R                  U5        S S S 5        [        R                  " [        5         U R                  X4/5        S S S 5        [        R                  " [        5         U R                  X24/5        S S S 5        [        R                  " [        5         U R                  X/5        S S S 5        g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       Nf= f! , (       d  f       g = f)Nr@   doc_annotationrA   rB   rC   r7   rE   rF   speedr   c              3   $   #    U  H  ov   M     g 7fN ).0egs     r   	<genexpr>)test_language_evaluate.<locals>.<genexpr>c   s     1y"ys   )	r   rH   rI   r   rJ   evaluaterL   rM   rN   )r=   r   rP   r   rR   scoress         r   test_language_evaluater`   Z   s9   Dcs)K LMF
ciitzz#
/C,G\\7)$F'?Q \\1y11F'?Q 
y	!W 
" 
y	!tn%& 
" 
y	!sm_% 
"	y	!d^$ 
"	! 
"	! 
"	! 
"	!	!	!s0    EE-
E> F
E*-
E;>
F
Fc                     [         R                  " S5      S 5       nSnSSSS.0n[        [        5       5      n U " U5      nU R                  S5        U R	                  [
        R                  " XC5      /5        g)	zkTest that docs are processed correctly within Language.pipe if the
component doesn't expose a .pipe method.test_evaluate_no_pipec                     U $ rX   rY   r   s    r   pipe#test_evaluate_no_pipe.<locals>.pipex       
r   r@   rA   rB   rC   r7   N)r
   	componentr   r:   r^   r   rJ   )r=   rd   r   rP   r   s        r   rb   rb   t   st     /0 1 D3C89F
57
C
d)CLL()LL'##C012r   c                    [        U 5      nUR                  S5      nS H  nUR                  U5        M     UR                  5         SSSS.0nUR	                  S5      n[
        R                  " XT5      nUR                  U/5      nUR                  S5      R                  nU H  nUS   R                  U5      b  M   e   UR                  R                  R                  5        H!  n	X;  d  M
  US   R                  U	5      c  M!   e   g)	z8Test that evaluate works with a multilabel textcat pipe.textcat_multilabelFEATUREREQUESTBUGQUESTIONrA   rB   )rk   rn   r@   cats_f_per_typeN)r
   r:   r;   r<   make_docr   rJ   r^   get_pipelabelsget	referencerA   keys)
en_vocabr=   ri   r>   rP   r   rR   r_   rr   keys
             r    test_evaluate_textcat_multilabelrx      s    
8
C&:;:$$U+ ;NN#378F
,,}
%C,G\\7)$F\\./66F'(,,U3???   %%**,+,005=== -r   c                    [        U 5      nUR                  S5      nS H  nUR                  U5        M     UR                  S5      nS H  nUR                  U5        M     UR                  5         SSSSSSSS.0nUR	                  S	5      n[
        R                  " Xe5      nUR                  U/5      nUR                  UR                  S
   5      R                  n	U	 H  nUS   R                  U5      b  M   e   UR                  R                  R                  5        H!  n
X;  d  M
  US   R                  U
5      c  M!   e   g)zxTest that evaluate evaluates the final textcat component in a pipeline
with more than one textcat or textcat_multilabel.r6   r7   ri   rj   rA   rB   rC   r8   r9   rk   rn   r8   r9   r@   r"   ro   N)r
   r:   r;   r<   rp   r   rJ   r^   rq   
pipe_namesrr   rs   rt   rA   ru   )rv   r=   r6   r>   ri   rP   r   rR   r_   rr   rw   s              r   $test_evaluate_multiple_textcat_finalr|      s?    8
Cll9%G)%  *&:;:$$U+ ;NN 	
	F ,,}
%C,G\\7)$F\\#..,-44F'(,,U3???   %%**,+,005=== -r   c                   ^ S m[         R                  R                  S5      U4S j5       n[        U 5      nUR	                  SSSS00S9nS H  nUR                  U5        M     UR	                  S	5      nS
 H  nUR                  U5        M     UR                  5         SSSSSSSS.0nUR                  S5      n[        R                  " Xv5      nUR                  U/5      n	SU	;   d   eUR                  S5      R                  n
[        U	S   R                  5       5      [        U
5      :X  d   eSU	;   d   eUR                  S	5      R                  n
[        U	S   R                  5       5      [        U
5      :X  d   eg)z[Test that evaluate can evaluate multiple textcat components separately
with custom scorers.c                     [         R                  " U S4SS0UD6nUR                  5        VVs0 s H  u  p4SU 3U_M     snn$ s  snnf )NrA   multi_labelFcustom_)r   
score_catsitems)exampleskwargsr_   kvs        r   custom_textcat_scoreEtest_evaluate_multiple_textcat_separate.<locals>.custom_textcat_score   sY    ""
 
 	
 .4\\^<^TQ'!q ^<<<s   Atest_custom_textcat_scorerc                     > T $ rX   rY   )r   s   r   make_custom_textcat_scorerKtest_evaluate_multiple_textcat_separate.<locals>.make_custom_textcat_scorer   s	    ##r   r6   scorerz@scorersconfigr7   ri   rj   rA   rB   rC   rz   r@   custom_cats_f_per_typero   N)r+   r   scorersr
   r:   r;   r<   rp   r   rJ   r^   rq   rr   setru   )rv   r   r=   r6   r>   ri   rP   r   rR   r_   rr   r   s              @r   'test_evaluate_multiple_textcat_separater      s   = ^^89$ :$ 8
Cll:'CDE  G *%  *&:;:$$U+ ;NN 	
	F ,,}
%C,G\\7)$F#v---\\)$++Fv./44673v;FFF&&&\\./66Fv'(--/0CK???r   c                 0    U =R                   S-  sl         U $ )Nr   )vectorr   s    r   vector_modification_piper      s    JJ!OJJr   c                 $    SU R                   S'   U $ )Nbarfoo)	user_datar   s    r   userdata_piper      s     CMM%Jr   c                 J    [        U SSSS9nU =R                  U4-  sl        U $ )Nr   r   FIRST)r>   )r   ents)r   spans     r   ner_piper      s'    Q)DHHHJr   c                  $    S/ SQ4S/ SQ4S/ SQ4/$ )Nr+   )g皙ɿ333333ӿworld)r   r   gٿrd   )gffffff?g?g?rY   rY   r   r   sample_vectorsr      s(     
$%	$%	! r   c                 .   [         R                  " S[        S9  [         R                  " S[        S9  [         R                  " S[        S9  [        U R                  U5        U R                  S5        U R                  S5        U R                  S5        U $ )N&test_language_vector_modification_pipefunctest_language_userdata_pipetest_language_ner_pipe)r
   rg   r   r   r   r   rH   r:   )r=   r   s     r   nlp2r     st    07O 4=I/h?cii0LL9:LL)*LL./Jr   c                      / SQn U $ )N)zHello world.zThis is spacy.z-You can use multiprocessing with pipe method.zPlease try!rY   )datas    r   textsr     s    D Kr   	n_process   c                     [        5       n[        U[        5      (       d  US:  aL  US-  nU Vs/ s H
  o@" U5      PM     nnU R                  X!SS9n[	        Xe5       H  u  px[        Xx5        M     g g s  snf )Nr   
   r   
batch_size)r   
isinstancer   rd   zipr   )	r   r   r   opsr   	expectedsr1   r   expected_docs	            r   test_language_piper      sr    

C#x  IM
,12EDT$ZE	2yyyB!$T!5Cc0 "6 %22s   A3c                 R  ^  [        5       n[        U[        5      (       d  US:  a  [        R                  " U5      n[        R
                  " U5      u  pVU 4S jU 5       nT R                  XaSS9nSn	[        R                  " [        X5      U	5       H  u  p[        X5        M     g g )Nr   c              3   4   >#    U  H  nT" U5      v   M     g 7frX   rY   )rZ   r   r   s     r   r\   ,test_language_pipe_stream.<locals>.<genexpr>3  s     3FDT$ZZFs   r      )
r   r   r   	itertoolscycleteerd   islicer   r   )r   r   r   r   stream_textstexts0texts1r   r1   n_fetchr   r   s   `           r   test_language_pipe_streamr   ,  s    

C#x  IM u-"|43F3	yyyC!*!1!1#d2F!PCc0 "Q %2r   c                    [        5       n[        U[        5      (       d  U S:  Ga2  [        5       nUR	                  S5        UR                  5         SS/n[        R                  " [        5         U" US   5        SSS5        [        R                  " [        5         [        UR                  X0S95        SSS5        UR                  [        5        [        R                  " [        5         [        UR                  X0S95        SSS5        UR                  [        5        [        UR                  X0S95      n[        U5      S:X  d   eU" US   5        gg! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       N{= f)z3Test that the error handling of nlp.pipe works wellr   merge_subtokensz-Curious to see what will happen to this text.zAnd this one.r   Nr   )r   r   r   r	   r:   r<   rL   rM   r   listrd   set_error_handlerr   r   lenr   r   r=   r   r1   s        r    test_language_pipe_error_handlerr   ;  s    
C#x  IMi&'@/R]]:&aM ']]:&%56 'k*]]:&%56 ' 	l+CHHUH894yA~~E!H# %2 '&&& '&s$   0EE+.E<
E(+
E9<
F
c                 J   [         R                  " S[        S9  [        5       n[	        U[
        5      (       d  US:  Ga=  [        5       nUR                  S5        / SQn[        R                  " [        5         [        UR                  U5      5        SSS5        UR                  [        5        [        R                   " S5      n["        R$                  R'                  US5       n[        UR                  XAS95      nUS	:X  aI  UR)                  5         UR*                  S:X  d   e[-        U5      UR*                  -   [-        U5      :X  d   eU Vs/ s H  oR.                  PM     sn/ S
Q:X  d   e SSS5        gg! , (       d  f       N= fs  snf ! , (       d  f       g= f)zETest the error handling of a custom component that has no pipe methodmy_evil_componentr   r   )TEXT 111TEXT 222TEXT 333TEXT 342TEXT 666Nr+   r.   r   r   )r   r   r   r
   rg   r   r   r   r   r	   r:   rL   rM   r   r   rd   r   r4   r,   r-   r   patchobjectassert_called
call_countr   r   )	rv   r   r   r=   r   r3   mock_warningr1   r   s	            r   'test_language_pipe_error_handler_customr   S  sD    *@

C#x  IMi()L]]:&%! ' 	j)""7+ZZvy1\ <=D A~**,#..!3334y<#:#::c%jHHH(,-HH-1UUUU 21 %2 '&  . 21s+   9E>'A,FF(F>
FF
F"c           	         [         R                  " S[        S9  [        5       n[	        U[
        5      (       d  US:  Gaz  [        5       nUR                  S5        / SQn[        R                  " [        5         [        UR                  USS95        SSS5        UR                  [        5        [        R                   " S5      n["        R$                  R'                  US	5       n[        UR                  USUS
95      nUS:X  aI  UR)                  5         UR*                  S:X  d   e[-        U5      UR*                  -   [-        U5      :X  d   eUS   S   R.                  US   S   4S:X  d   eUS   S   R.                  US   S   4S:X  d   eUS   S   R.                  US   S   4S:X  d   e SSS5        gg! , (       d  f       GN/= f! , (       d  f       g= f)z8Test the error handling of nlp.pipe with input as tuplesr   r   r   )r   o   )r      r   iM  )r   iV  r   i  T)	as_tuplesNr+   r.   )r   r   r   r   r   r   r   r   )rv   r   r   r=   r   r3   r   tupless           r   0test_language_pipe_error_handler_input_as_tuplesr   o  s    *@

C#x  IMi()
 ]]:&%401 'j)""7+ZZvy1\#((5DI(NOF A~**,#..!3336{\%<%<<E
JJJ1IaL%%vay|48IIII1IaL%%vay|48IIII1IaL%%vay|48IIII 21 %2 '& 21s   9F;&C
G;
G

Gc           	         [         R                  " S[        S9  [         R                  " S[        S9  [	        5       n[        U[        5      (       d  US:  a  [        S5       Vs/ s H  n[        U5       S3PM     nn[        5       nUR                  S5        UR                  S5        UR                  5         [        R                  " [        5         [        UR!                  XASS95      nS	S	S	5        UR#                  [$        5        [        UR!                  XASS95      n['        U5      S
:X  d   eg	g	s  snf ! , (       d  f       NT= f)z4Test the error handling of a component's pipe methodmy_perhaps_sentencesr   r)   r   d   z is enough. Doner   r   NY   )r
   rg   r%   r)   r   r   r   rangestrr	   r:   r<   rL   rM   r   r   rd   r   r   r   )rv   r   r   ir   r=   r1   s          r   %test_language_pipe_error_handler_piper     s    -4IJ+2DE

C#x  IM6;CjAjCF8+,jAi+,)*]]:&KLD ' 	l+CHHUBHGH4yB %2A
 '&s   %D8D==
Ec                 &   [        5       n[        U[        5      (       d  U S:  a  [        5       nSUl        SS/S-  n[
        R                  " [        5         [        UR                  X0S95        SSS5        [        Ul        U S:X  a<  [
        R                  " [        5         [        UR                  X0S95        SSS5        g[        UR                  X0S95      n[        U5      S:X  d   egg! , (       d  f       N= f! , (       d  f       g= f)	$Test the error handling for make_docr   r   1234567890123456789012345r   Nr   r   r   r   r   r	   
max_lengthrL   rM   r   r   rd   r   default_error_handlerr   r   s        r   0test_language_pipe_error_handler_make_doc_actualr     s    
 
C#x  IMi'1B6]]:&%56 '$0!>z*SXXeX9: +* <=Dt9>!> %2 '& +*s   C1$D1
C?
Dc                    [        5       n[        U[        5      (       d  U S:  a  [        5       nSUl        SS/S-  n[
        R                  " [        5         [        UR                  X0S95        SSS5        [        Ul        [        UR                  X0S95      n[        U5      S:X  d   egg! , (       d  f       ND= f)r   r   r   r   r   r   Nr   r   r   s        r   3test_language_pipe_error_handler_make_doc_preferredr     s    
 
C#x  IMi'1B6]]:&%56 '$0!CHHUH894yA~~ %2 '&s   B//
B=c                    ^^	^
^^ Sn SmSmSm
SmSm	[         R                  " U  S35      U4S j5       n[         R                  " U  S35      U4S j5       n[         R                  " U  S35      U
4S j5       n[         R                  " U  S	35      U4S
 j5       n[         R                  " U  S35      U	4S j5       nS/SU  S30SU  S30SU  S30S.SSS00SU  S	30SU  S30S.S.n[        R                  " U5      nUR                  R
                  S:X  d   eUR                  S   S:X  d   eUR                  S   S:X  d   eSUR                  ;  d   eSUR                  ;  d   eUR                  S/:X  d   eU" S5      (       d   eUR                  5         UR                  S   S:X  d   eUR                  S   S:X  d   e[        TTT
TT	/5      (       d   eg )N+test_language_from_config_before_after_initF_beforec                     > U4S jn U $ )Nc                 D   > SmU [         L d   eSU R                  l        U $ )NTr   )r	   Defaultsr   )lang_cls
ran_befores    r   before_creationbtest_language_from_config_before_after_init.<locals>.make_before_creation.<locals>.before_creation  s)    Jw&&&$)H!Or   rY   )r  r  s    r   make_before_creationItest_language_from_config_before_after_init.<locals>.make_before_creation  s    	 r   _afterc                     > U4S jn U $ )Nc                    > Sm[        U [        5      (       d   eU R                  / :X  d   eU R                  R                  S:X  d   eSU R
                  S'   U $ )NTr   r   r   r	   r{   r   r   meta)r=   	ran_afters    r   after_creation`test_language_from_config_before_after_init.<locals>.make_after_creation.<locals>.after_creation  sW    Ic7++++>>R'''<<##u,,,#CHHUOJr   rY   )r  r  s    r   make_after_creationHtest_language_from_config_before_after_init.<locals>.make_after_creation  s    	 r   _after_pipelinec                     > U4S jn U $ )Nc                    > Sm[        U [        5      (       d   eU R                  S/:X  d   eU R                  R                  S:X  d   eU R
                  S   S:X  d   eSU R
                  S'   U $ )NTsentencizerr   r   bazr	  )r=   ran_after_pipelines    r   after_pipeline_creationrtest_language_from_config_before_after_init.<locals>.make_after_pipeline_creation.<locals>.after_pipeline_creation  sp    !%c7++++>>m_444<<##u,,,88E?e+++#CHHUOJr   rY   )r  r  s    r   make_after_pipeline_creationQtest_language_from_config_before_after_init.<locals>.make_after_pipeline_creation  s    	 '&r   _before_initc                     > U4S jn U $ )Nc                 *   > SmSU R                   S'   U $ )NTbeforebefore_initr
  )r=   ran_before_inits    r   r  Ztest_language_from_config_before_after_init.<locals>.make_before_init.<locals>.before_init  s    "O&.CHH]#Jr   rY   )r  r   s    r   make_before_initEtest_language_from_config_before_after_init.<locals>.make_before_init  s    	 r   _after_initc                     > U4S jn U $ )Nc                 *   > SmSU R                   S'   U $ )NTafter
after_initr  )r=   ran_after_inits    r   r(  Xtest_language_from_config_before_after_init.<locals>.make_after_init.<locals>.after_init  s    !N%,CHH\"Jr   rY   )r(  r)  s    r   make_after_initDtest_language_from_config_before_after_init.<locals>.make_after_init  s    	 r   r  
@callbacks)pipeliner  r  r  factory)r  r(  )r=   
componentsr<   r   r   r  r  r(  r   r  r'  )
r   	callbacksr	   from_configr   r   r
  r{   r<   all)namer  r  r  r"  r+  r   r=   r  r)  r  r  r   s           @@@@@r   r   r     sH   8DJION4&() * 4&(
 )
 4&01' 2' 4&-. / 4&,- . ' ,g.>?+vV_=(4o6N'O	
 %y-&@A(TF,*?@'D6)=>
F 

f
%C<<u$$$88E?e###88E?e###(((sxx'''>>m_,,,v;;;NN88M"h...88L!W,,,	Y 2O^T   r   c                  (   Sn [         R                  " U  S3S S9  [         R                  " U  S3S S9  [         R                  " U  S3S S9  [         R                  " U  S3S	 S9  U  S3U  S34 HD  nS
SSU000n[        R                  " [        5         [
        R                  " U5        SSS5        MF     U  S3U  S34 HD  nS
SSU000n[        R                  " [        5         [
        R                  " U5        SSS5        MF     U  S3U  S34 HD  nS
SSU000n[        R                  " [        5         [
        R                  " U5        SSS5        MF     g! , (       d  f       M  = f! , (       d  f       M  = f! , (       d  f       M|  = f)z=Check that an error is raised if function doesn't return nlp.3test_language_from_config_before_after_init_invalid_before1c                      S $ )Nc                     g rX   rY   r=   s    r   <lambda>Wtest_language_from_config_before_after_init_invalid.<locals>.<lambda>.<locals>.<lambda>,  s    4r   rY   rY   r   r   r;  Etest_language_from_config_before_after_init_invalid.<locals>.<lambda>,  s    7Gr   r   _before2c                      S $ )Nc                     U " 5       $ rX   rY   r:  s    r   r;  r<  -  s    35r   rY   rY   r   r   r;  r=  -  s    7Hr   _after1c                      S $ )Nc                     g rX   rY   r:  s    r   r;  r<  .  s    $r   rY   rY   r   r   r;  r=  .  s    6Fr   c                      S $ )Nc                     [         $ rX   r   r:  s    r   r;  r<  /  s    'r   rY   rY   r   r   r;  r=  /  s    6Ir   r=   r  r-  N_after2r  r  )r   r1  rL   rM   r   r	   r2  )r4  callback_namer   s      r   r6  r6  )  s   @D$x(/GH$x(/HI$w'.FG$w'.IJ!F(+vX->?+lM-JKL]]:&' '& @ "F'*tfG,<=*\=,IJK]]:&' '& > "F'*tfG,<=3lM5RST]]:&' '& > '& '& '&s$   E(E0:F
E-	0
E?	
F	c                       " S S5      n [         R                  " S5      nU " UR                  5      Ul        SnU" U5      nUR                  U:X  d   eg)z3Test the custom whitespace tokenizer from the docs.c                        \ rS rSrS rS rSrg)?test_language_whitespace_tokenizer.<locals>.WhitespaceTokenizeriB  c                     Xl         g rX   rH   )selfrH   s     r   __init__Htest_language_whitespace_tokenizer.<locals>.WhitespaceTokenizer.__init__C  s    Jr   c                     UR                  S5      nS/[        U5      -  n[        U5       H  u  pEUS:X  d  M  SX$'   SX4'   M     US   S:X  a  USS nUSS nOSUS'   [        U R                  X#S9$ )NrE   T Fr"   r   )rG   spaces)rI   r   	enumerater   rH   )rM  r   rG   rR  r   words         r   __call__Htest_language_whitespace_tokenizer.<locals>.WhitespaceTokenizer.__call__F  s    JJsOEVc%j(F$U+2:"EH %FI ,
 RyCa""r
tzz>>r   rL  N)__name__
__module____qualname____firstlineno__rN  rU  __static_attributes__rY   r   r   WhitespaceTokenizerrJ  B  s    		?r   r\  enz?   What's happened to    me? he thought. It wasn't a dream.    N)r+   blankrH   	tokenizerr   )r\  r=   r   r   s       r   "test_language_whitespace_tokenizerr`  ?  sL    ? ?* ++d
C'		2CMLD
d)C88tr   c                    ^ Sn  " S S5      m[         R                  " U 5      SS[        4U4S jjj5       nSSSU 000n[        R                  " U5      nU" S	5      nU Vs/ s H  oUR
                  PM     snS
S/:X  d   e[        UR                  S	/5      5      S   nU Vs/ s H  oUR
                  PM     snS
S/:X  d   egs  snf s  snf )zFTest that a fully custom tokenizer can be plugged in via the registry.test_language_custom_tokenizerc                   $    \ rS rSrSrS rS rSrg)7test_language_custom_tokenizer.<locals>.CustomTokenizerib  zEDummy "tokenizer" that splits on spaces and adds prefix to each word.c                 2    UR                   U l         X l        g rX   )rH   prefix)rM  r=   rf  s      r   rN  @test_language_custom_tokenizer.<locals>.CustomTokenizer.__init__e  s    DJ Kr   c                     UR                  S5       Vs/ s H  o R                   U 3PM     nn[        U R                  US9$ s  snf )NrE   rF   )rI   rf  r   rH   )rM  r   rT  rG   s       r   rU  @test_language_custom_tokenizer.<locals>.CustomTokenizer.__call__i  s@    8<

3H}TF+EHtzz// Is   A)rf  rH   N)rW  rX  rY  rZ  __doc__rN  rU  r[  rY   r   r   CustomTokenizerrd  b  s    S	!	0r   rk  rf  c                    >^  UU 4S jnU$ )Nc                    > T" U TS9$ )N)rf  rY   )r=   rk  rf  s    r   create_tokenizerYtest_language_custom_tokenizer.<locals>.custom_create_tokenizer.<locals>.create_tokenizero  s    "3v66r   rY   )rf  rn  rk  s   ` r   custom_create_tokenizer?test_language_custom_tokenizer.<locals>.custom_create_tokenizerm  s    	7  r   r=   r_  z@tokenizersr@   _hello_worldr   N)_)r   
tokenizersr   r	   r2  r   r   rd   )r4  rp  r   r=   r   trk  s         @r   rb  rb  ^  s    +D	0 	0       kM4#89:F


f
%C
m
C CqFFC Xx$8888
sxx(
)!
,C CqFFC Xx$8888 ! s   !C#C	c                  8   SSS00n [         R                  " [        5         [        R                  " U 5        SSS5        [         R                  " [        5         [
        R                  " U 5        SSS5        g! , (       d  f       NH= f! , (       d  f       g= f)z~Test that calling Language.from_config raises an error and lang defined
in config needs to match language-specific subclasses.r=   langr]  N)rL   rM   r   r
   r2  r   r   s    r   &test_language_from_config_invalid_langry  |  sc     fd^$F	z	"V$ 
#	z	"6" 
#	" 
#	"	"	"s   A:B:
B
Bc                      [         R                  " S5      n U R                  S   S   S:X  d   eSSS00nSS0n[         R                  " SXS9n U R                  S   S   S:X  d   eU R                  S   S:X  d   eg )	Nr]  trainingdropoutg?g?r4  my_custom_model)r   r
  )r+   r^  r   r
  )r=   r   r
  s      r   test_spacy_blankr~    s    
++d
C::j!),3339c*+F%&D
++d6
5C::j!),33388F0000r   zlang,target)r]  r]  frafrfrer  iwhemoromulxxnonbzpt-BRptr  r  zzh-Hanszh)zzh-HantN)zxxNc                 &    [        U 5      U:X  d   eg)zW
Test that we can look up languages by equivalent or nearly-equivalent
language codes.
N)r   )rx  targets     r   test_language_matchingr    s    , "$'6111r   )
r  r  r  r  r  r  r  r  r  r  c                 T    [         R                  " U 5      nUR                  U:X  d   eg)z
Test that we can get spacy.blank in various languages, including codes
that are defined to be equivalent or that match by CLDR language matching.
N)r+   r^  rx  )rx  r  r=   s      r   test_blank_languagesr    s$    ( ++d
C88vr   valueFxyc                     Sn[         R                  " [        5       n[        U 5        S S S 5        U[	        WR
                  5      ;   d   eg ! , (       d  f       N*= f)Nzinvalid value)rL   rM   r   r
   r   r  )r  err_fragmentr2   s      r    test_language_init_invalid_vocabr    s@    "L	z	"a 
#3qww<''' 
#	"s   A
Ac                 N   [        [        5       5      nUR                  S5      nS H  nUR                  U5        M     UR	                  5         SnXAR
                  R                  ;  d   eX@R
                  R                  ;  d   eUR
                  R                  R                  U5        UR
                  R                  R                  5       U R
                  R                  R                  5       :w  d   eUR
                  R                  R                  5       n[        R                  " [        5         U R                  SSUS9  S S S 5        X@R
                  R                  ;   d   eUR
                  R                  R                  5       U:X  d   eg ! , (       d  f       NT= f)Nr6   r7   thisisalongstringtextcat2)r4  source)r
   r   r:   r;   r<   rH   stringsaddvectorsto_bytesrL   warnsUserWarning)r   r=   r6   r>   long_stringvectors_bytess         r    test_language_source_and_vectorsr    s8   
57
Cll9%G)%  *NN%Kii/////jj00000II+&99%%'4::+=+=+F+F+HHHHII%%..0M	k	"ij= 
# **,,,,,99%%'=888 
#	"s   6F
F$c                 p   / SQnU Vs/ s H  o0R                  U5      PM     nn[        S U 5       5      (       a   eU " US   5      nUR                  US   :X  d   e[        UR                  5      S:  d   e[        [        5       [        5      (       d  US:  a  [        R                  " 5          [        R                  " S5        U R                  XAS9nU Vs/ s H  oUR                  PM     snU:X  d   e[        S U 5       5      (       d   e S S S 5        g g s  snf s  snf ! , (       d  f       g = f)N)rA   dogszguinea pigsc              3   L   #    U  H  n[        UR                  5      v   M     g 7frX   r   rA   rZ   r   s     r   r\   ,test_pass_doc_to_pipeline.<locals>.<genexpr>  s     1DS3sxx==D   "$r   r   errorr   c              3   L   #    U  H  n[        UR                  5      v   M     g 7frX   r  r  s     r   r\   r    s     5s388}}r  )rp   anyr   r   rA   r   r   r   warningscatch_warningssimplefilterrd   r3  )r=   r   r   r   r1   r   s         r   test_pass_doc_to_pipeliner    s   +E+0154LL5D11D11111
d1g,C88uQxsxx=1/#X..)a- $$&!!'*88D86D(,-HH-666555555	 '& 3@ 2 . '&s#   D1*D'D"0"D'"D''
D5c                    SS/n[         R                  " [        5         U " U5        S S S 5        [        [	        U R                  U5      5      5      S:X  d   e/ SQn[         R                  " [        5         [	        U R                  U5      5        S S S 5        [         R                  " [        5         U " U5        S S S 5        g ! , (       d  f       N= f! , (       d  f       NK= f! , (       d  f       g = f)NzThis is a text.zThis is another.r   )r   r      )rL   rM   r   r   r   rd   )r=   str_listint_lists      r   test_invalid_arg_to_pipeliner    s    !#56H	z	"H 
#tCHHX&'(A---H	z	"SXXh  
#	z	"H 
#	" 
#	" 
#	"	"	"s#   	C7C4	C(
C
C%(
C6ztest requires GPU)reasonc                    US-  nU R                  USSS9n[        R                  " [        SS9   [        R                  " [
        5         U H  nM     S S S 5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)Nr   r   r   zmultiprocessing with GPU modelsmatch)rd   rL   r  r  rM   r   )r   r   r1   rt  s       r    test_multiprocessing_gpu_warningr    sh     BJE99UaA96D	k)J	K]]:&  ' 
L	K&& 
L	Ks#   A7
A&A7&
A4	0A77
Bc                    [         R                  " S[        S9  U R                  S5        [        R
                  " [        SS9   [         R                  " S[        S9  S S S 5        [        R
                  " [        SS9   [         R                  " S[        S9  S S S 5        g ! , (       d  f       NJ= f! , (       d  f       g = f)Nr   r   znot permittedr  zmy.evil.component.v1)r
   rg   r   r:   rL   rM   r   r/  r:  s    r   test_dot_in_factory_namesr  	  s    *@LL$%	z	91G 
: 
z	9/nE 
:	9 
:	9 
:	9s   B"?B3"
B03
Cc                  t   [        5       n [        R                  " S5      S 5       nU R                  S5        U " S5        U R	                  S5        [        R                  " S5      S 5       nU R                  S5        [
        R                  " [        SS9   U " S5        SSS5        g! , (       d  f       g= f)	zJTest that an error is raised if components return a type other than a
doc.test_component_good_pipec                     U $ rX   rY   r   s    r   	good_pipe(test_component_return.<locals>.good_pipe  rf   r   r   test_component_bad_pipec                     U R                   $ rX   )r   r   s    r   bad_pipe'test_component_return.<locals>.bad_pipe!  s    xxr   zinstead of a Docr  N)r	   r
   rg   r:   remove_piperL   rM   r   )r=   r  r  s      r   test_component_returnr    s     )C23 4 LL+,KOO./12 3 LL*+	z);	<F 
=	<	<s   	B))
B7)Tr   r,   r  unittestr   rL   	thinc.apir   r   r   r+   spacy.lang.der   spacy.lang.enr	   spacy.languager
   spacy.scorerr   spacy.tokensr   r   spacy.trainingr   
spacy.utilr   r   r   r   spacy.vocabr   utilr   r   torchset_num_threadsset_num_interop_threadsImportErrorr   r%   r)   r4   fixturer=   rS   r`   rb   rx   r|   r   r   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   xfailr   r   r6  r`  rb  ry  r~  r  r  r  r  r  r  skipifr   r  r  r  rY   r   r   <module>r     s        8 8    ! #  " " R R  6	 
!	!!!$
<
  94%43 >(>D1@h

   
 
   q!f-1 .1 q!f-1 .1 q!f- .. q!f-V .V6 q!f-J .J> q!f- .( q!f-" ."( q!f- . \~(,>9<#1 "2#"2  5$c
He"LM( N(9( q!f-6 .6"	 ?$g..7J  Fs  		s   &(L L%$L%