
    h                        S SK r S SKrS SKJr  S SKJr  S SKJrJrJ	r	J
r
  S SKrS SKrS SKJr  S SKJr  S SKJr  S SKrS SKJr  S S	KJrJr  S S
KJrJrJr  S SKJr  S SKJrJ r J!r!J"r"J#r#J$r$J%r%J&r&  S SK'J(r(J)r)  S SK*J+r+  S SK,J-r-  S SK.J/r/J0r0J1r1  S SK2J3r3  S SK4J5r5J6r6  S SK7J8r8  S SK9J:r:  S SK;J<r<  S SK=J>r>  S SK?J@r@  S SKAJBrBJCrC  S SKDJErE  S SKFJGrGJHrHJIrI  S SKJJKrKJLrLJMrM  S SKNJOrOJPrPJQrQJRrR  SSKSJTrT  \R                  R                  S5      S  5       rW\R                  R                  S!5      S" 5       rX\R                  R                  S#5      S$ 5       rY\R                  R                  S%5      \R                  R                  S&/ S'Q5      S(\[S)\[4S* j5       5       r\S+ r]S, r^\R                  R                  S-S.S//5      S0 5       r_S1 r`S2 raS3 rb\R                  R                  S4S5S6/S7S804S9/S7S804S5S:/S7S:04S;/S7S:04/ S<QS=S>S?.4S5S@/S=S>S?.4/ SAQSBS=SCSD.4/ SEQSBS=SCSD.4/5      SF 5       rc\R                  R                  SGSH// SIQ/5      SJ 5       rd\R                  R                  SG/ SKQS7//5      SL 5       reSM rf\R                  R                  SNSOSP/5      \R                  R                  SQ/ SRQ/ / SSQ/ STQSUSV//5      \R                  R                  SWSXSY/5      \R                  R                  SZS=SC/5      S[ 5       5       5       5       rgS\ rh\R                  R                  S]/ S^Q5      S_ 5       ri\R                  R                  S]/ S`Q5      Sa 5       rjSb rkSc rl\R                  R                  Sd/ SeQ5      Sf 5       rmSg rn\R                  R                  \R                  R                  Sh/ SiQ5      Sj 5       5       rpSk rqSl rr\R                  R                  SdSmSU/5      Sn 5       rsSo rtSp ruSq rvSr rw\R                  R                  Ss/ StQ5      Su 5       rxSv rySw rzSx r{Sy r|Sz r}S{ r~S| rS} rS~ rS rS rS rS rS rS rg)    N)Counter)Path)AnyDictListTuple)NoSuchOption)SpecifierSet)Config)about)download_moduleinfo)parse_config_overridesstring_to_listwalk_directory)apply)_compile_gold_get_distribution_get_kl_divergence_get_labels_from_model_get_labels_from_spancat_get_span_characteristics_get_spans_length_freq_dist_print_span_characteristics)get_compatibilityget_version)render_parses)find_threshold)RECOMMENDATIONSfill_configinit_config)_init_labels)_is_permitted_package_nameget_third_party_dependencies)get_model_pkgs)English)Dutch)Language)RecommendationSchema)DocDocBin)Span)Exampledocs_to_jsonoffsets_to_biluo_tags)conll_ner_to_docsconllu_to_docsiob_to_docs)ENV_VARSget_minor_versionload_configload_model_from_config   )make_tempdiri9  c                      Sn [        [        U 5      5      n[        US    Vs/ s H  o"R                  R                  PM     sn5      (       a   eUS   R                  S5      (       a   egs  snf )zX
conllu_to_docs should not raise an exception if the HEAD column contains an
underscore
a%  
1	[	_	PUNCT	-LRB-	_	_	punct	_	_
2	This	_	DET	DT	_	_	det	_	_
3	killing	_	NOUN	NN	_	_	nsubj	_	_
4	of	_	ADP	IN	_	_	case	_	_
5	a	_	DET	DT	_	_	det	_	_
6	respected	_	ADJ	JJ	_	_	amod	_	_
7	cleric	_	NOUN	NN	_	_	nmod	_	_
8	will	_	AUX	MD	_	_	aux	_	_
9	be	_	AUX	VB	_	_	aux	_	_
10	causing	_	VERB	VBG	_	_	root	_	_
11	us	_	PRON	PRP	_	_	iobj	_	_
12	trouble	_	NOUN	NN	_	_	dobj	_	_
13	for	_	ADP	IN	_	_	case	_	_
14	years	_	NOUN	NNS	_	_	nmod	_	_
15	to	_	PART	TO	_	_	mark	_	_
16	come	_	VERB	VB	_	_	acl	_	_
17	.	_	PUNCT	.	_	_	punct	_	_
18	]	_	PUNCT	-RRB-	_	_	punct	_	_
r   ENT_IOBN)listr1   allheadihas_annotation)
input_datadocsts      N/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/test_cli.py*test_cli_converters_conllu_empty_heads_nerrD   0   sg    J( z*+Dd1g.gFFHHg.////Aw%%i00000 /s   A,i<  c                      [        5       n [        R                  " U R                  S5      0 5      nU R	                  U/5        g )N )r(   r-   	from_dictmake_docevaluate)nlpexamples     rC   test_issue4924rL   Q   s2    
*CR 0"5GLL'    i  c                  .   SSS/S.SS0SS0S.S.n [         R                  " U 5      n[        5        nUS-  nUR                  U5        S/ S	QS.S
[	        U5      0S
[	        U5      0SS0S	.S.n[        U5      nUS-  nUR                  U5        US-  n[        XeSS9  [        U5      nSSS5        WS   S   S
   [	        W5      :X  d   eUS   S   S
   [	        U5      :X  d   eUS   S   S   S:X  d   eSUS   S   ;   d   eg! , (       d  f       Nb= f)zETest that fill-config doesn't turn sourced components into factories.entok2vectagger)langpipelinefactory)rP   rQ   )rJ   
components
test_model)rP   rQ   nersourcerW   zbase.cfgz
config.cfgT)silentNrU   model)r&   from_configr8   to_diskstrr   r    r5   )
source_cfg
source_nlpdir_pathsource_pathbase_cfg	base_pathoutput_path
filled_cfgs           rC   test_issue7055rf   X   s_    9h*?@!9- (+
J $$Z0J	8-;' .JK$c+&67#S%56!5)
 (#z)	#-K48 -
# 
$ l#I.x8C<LLLLl#H-h73{;KKKKl#E*95>>>j.u5555+ 
s   A8D
Di1  zfactory,output_file))depszparses.html)entszentities.html)spansz
spans.htmlrT   output_filec                    [        5        nSSSS.SSSS.SS	S
S./SSSSSS.SSSSS.SSS	S
S./0SSSSSSSSSSS.	SSSSSSSSSS.	SSS S!S"S#S$S%SS.	S&S'S(S)S)S*S+S,S-S.	S-S.S/S0S"S1S2S3SS.	S4S5S6S)S)S7S8S9S-S.	S:SSS)S)S;S<S,S=S.	S=S>S?S0S"S@SAS%S4S.	SSBSCSDSESFSGSHSS.	SSSIS0SJS@SKSLS-S.	SMSNSS0SJS@SOSPSS.	SQSRSSSDSESTSUSHSVS.	SVS
SWS0SJSXSYSZS-S.	S[S\SS0S"SXS]SPSVS.	S^S_S`SDSESaSbSHSS.	/Sc.n[        R                  " Sd5      n[        UR                  5      R                  U5      n[        ShU/USSSe.U Sf0D6  X!-  R                  5       (       d   e SgSgSg5        gg! , (       d  f       gg= f)izD
Test if all displaCy types (ents, dep, spans) produce an HTML file
6   nam_adj_country,   )endlabelstartS   nam_liv_personE   d   nam_pro_title_bookV   scrF   )ro   kb_idrp   rq   uk   Niedawno czytał em nową książkę znakomitego szkockiego medioznawcy , Briana McNaira - Cultural Chaos .r      ADVz
Degree=Posniedawnoadvmodr7   )	idrq   ro   tagposmorphlemmadepr=   	      PRAETVERBzYAnimacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Actu   czytaćROOT         AGLTNOUNz-Animacy=Inan|Case=Ins|Gender=Masc|Number=Singemiobj         ADJz*Case=Acc|Degree=Pos|Gender=Fem|Number=Singnowyamod         SUBSTzCase=Acc|Gender=Fem|Number=Singu	   książkaobj       +   z8Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Singznakomitacl   z7Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Singszkockiy   7   B   z,Animacy=Hum|Case=Gen|Gender=Masc|Number=SingmedioznawcaC   D   INTERPPUNCTzPunctType=Comm,punctK   PROPNBriannmod
   L   McNairflat   T   U   zPunctType=Dash-   ^   z-Animacy=Inan|Case=Nom|Gender=Masc|Number=SingCulturalconj   _   Chaos   e   f   zPunctType=Peri.)rh   ri   texttokenspl)rA   rd   
model_namelimitTN )r8   spacyblankr*   vocab	from_jsonr   is_file)rT   rj   tmp_dirdoc_jsonrJ   docs         rC   test_issue12566r   {   sv    
7 %6D%5C&:RH 6GRTU6FQST"!#!5!#		0 1QuUUalv  @H  RS  V1R  Zu  @I  RX  bc  f2b  ZI  TX  ag  qr  u2bu  XD  OU  ^d  no  r2bZ{  GR  [`  jk  n2bu  XR  ]g  pu  @  C2bu  XQ  \f  ou  @  C2b  [I  Ta  jp  z{  ~2b'\lwz  DK  UV  Y2b  \J  U\  ek  uv  yBr''  ]K  V^  gm  wx  {Br(7]mx{  EL  VX  [Br''  ]L  Wa  jp  z{  ~Bs76  ]L  W^  gm  wy  |CHW_oz}  GN  XY  \!)'
T kk$#))n&&x0 	 	
G!	
HOQU	
 %..0000i 
s   D6E
Ec                      [        5       n U R                  S5        [        5        nU R                  U5        [	        US/S9nUS   S:X  d   eUS   S/:X  d   e S S S 5        g ! , (       d  f       g = f)NtextcatrF   )excluderR   nlrU   )r'   add_piper8   r\   r   )rJ   r   raw_datas      rC   test_cli_infor      sh    
'CLL	7G".4'''%)444	 
s   4A$$
A2c                     / SQn SR                  U 5      n[        [        USS95      n[        U5      S:X  d   e[	        U5      /nUS   S   S:X  d   e[        US   S   5      S:X  d   e[        US   S   S   S   5      S:X  d   eUS   S   S   S   S   n[        US	   5      S
:X  d   eUS	   nU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eUS   S   S   S    Vs/ s H  owS   US   US   4PM     nn[        US   USS9n	U	/ SQ:X  d   eg s  snf s  snf s  snf s  snf s  snf )N)zG1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	Oz/2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	B-PERz.3	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	I-PERuC   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	O
r7   n_sentsr   r~   
paragraphs	sentencesr   r   orth)DommerFinn	Eilertsen   avstårr   )r   r   r   r   r=   )r7   r   r   r   )apposnsubjnamer   entitiesr   Omissing)r   zB-PERzL-PERr   joinr;   r1   lenr.   r/   
linesr@   converted_docs	convertedsentr   rB   eent_offsets
biluo_tagss
             rC   "test_cli_converters_conllu_to_docsr      s   E 5!J.Q?@N~!###n-.IQ<"""y|L)*a///y|L)!,[9:a???Q<%a(5a8DtH~!###(^F%&v!fIv&*TTTT$%feHf%)KKKK%&v!fIv&-777$%feHf%)KKKK$-aL$>q$A*$M$Mq1qtQqT$M   '~a'8+sSJ5555 '%&%s   6E(E-0E2E76E<r   )L1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	name=OzB2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	SpaceAfter=No|name=B-PERz33	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	name=I-PERV   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	SpaceAfter=No|name=Oz%5	.	$.	PUNCT	_	_	4	punct	_	name=B-BAD)zG1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	_z@2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	SpaceAfter=No|NE=B-PERz13	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	NE=L-PERuO   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	SpaceAfter=Noz#5	.	$.	PUNCT	_	_	4	punct	_	NE=B-BADc           	      *   SR                  U 5      n[        [        USSSS.S95      n[        U5      S:X  d   e[	        U5      /nUS   S   S:X  d   e[        US   S	   5      S:X  d   eUS   S	   S   S
   S:X  d   e[        US   S	   S   S   5      S:X  d   eUS   S	   S   S   S   n[        US   5      S:X  d   eUS   nU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eUS   S	   S   S    Vs/ s H  owS   US   US   4PM     nn[        US   USS9n	U	/ SQ:X  d   eg s  snf s  snf s  snf s  snf s  snf )Nr   r7   PERSONrF   )PERBAD)r   ner_mapr   r~   r   rawu   Dommer FinnEilertsen avstår. r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r=   )r7   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   )r   zB-PERSONzL-PERSONr   r   r   r   s
             rC   /test_cli_converters_conllu_to_docs_name_ner_mapr      s   ( 5!Jz1hr6RSN ~!###n-.IQ<"""y|L)*a///Q<%a(/3SSSSy|L)!,[9:a???Q<%a(5a8DtH~!###(^F%&v!fIv&*YYYY$%feHf%)TTTT%&v!fIv&*;;;;$%feHf%)TTTT$-aL$>q$A*$M$Mq1qtQqT$M   '~a'8+sSJ@@@@ '%&%s   
E<'FF!F
Fc            	         / SQn SR                  U 5      n[        [        USSSS95      n[        U5      S:X  d   e[	        U5      /nUS   S   S:X  d   e[        US   S   5      S:X  d   eUS   S   S   S	   S
:X  d   e[        US   S   S   S   5      S:X  d   eUS   S   S   S   S   n[        US   5      S:X  d   eUS   nU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eU Vs/ s H  ofS   PM	     sn/ SQ:X  d   eUS   S   S   S    Vs/ s H  owS   US   US   4PM     nn[        US   USS9n	U	/ S Q:X  d   eg s  snf s  snf s  snf s  snf s  snf s  snf s  snf s  snf )!N)r   z2-3	FE	_	_	_	_	_	_	_	_z42	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	name=B-PERzC3	Eilertsen	Eilertsen	X	_	Gender=Fem|Tense=past	2	name	_	name=I-PERr   z!5	.	$.	PUNCT	_	_	4	punct	_	name=Or   r7   T)r   merge_subtokensappend_morphologyr   r~   r   r   u   Dommer FE avstår. r   r   r   r   )r   FEr   r   r   )z*NOUN__Definite=Ind|Gender=Masc|Number=Singz#PROPN_X__Gender=Fem,Masc|Tense=pastz&VERB__Mood=Ind|Tense=Pres|VerbForm=Finr   r   )r   r   r   r   r   )z$Definite=Ind|Gender=Masc|Number=SingzGender=Fem,Masc|Tense=pastz Mood=Ind|Tense=Pres|VerbForm=FinrF   r   )dommerzFinn Eilertsenu   avståz$.r=   )r7   r7   r   r   r   )r   r   r   r   r   r   r   r   )r   zU-PERr   r   r   r   s
             rC   ,test_cli_converters_conllu_to_docs_subtokensr    s   E 5!J44	
N
 ~!###n-.IQ<"""y|L)*a///Q<%a(/3HHHHy|L)!,[9:a???Q<%a(5a8DtH~!###(^F%&v!fIv&*JJJJ$%feHf% *    %%feHf%)KKKK &'1gJ' ,    !''1gJ'+WWWW%&v!fIv&-777$%feHf%)LLLL$-aL$>q$A*$M$Mq1qtQqT$M   '~a'8+sSJ1111+ '% &' (&%s0   G)GG#G$ G)G.:G3#G8c                  F   / SQn SR                  U 5      n[        [        USS95      n[        U5      S:X  d   e[	        U5      nUS   S:X  d   e[        US   5      S:X  d   e[        US   S   S	   5      S
:X  d   e[        SS
5       HI  nUS   S   S	   U   n[        US   5      S:X  d   eUS   n/ SQnU Vs/ s H  oS   PM	     snU:X  a  MI   e   [        US   R                  5      S:X  d   eUS   R                   H  n	U	R                  S;   a  M   e   g s  snf )N)zAI|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|OzAI|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|Oz^I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Oz^I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Or   r   r   r7   r~   r   r   r   r   r   rz   IlikeLondonandNewYorkCityr   r   New York Cityr  )r   r;   r2   r   r.   rangerh   r   )
r   r@   r   r   r>   r   r   expectedrB   ents
             rC   test_cli_converters_iob_to_docsr  @  sM   E 5!J+j"=>N~!###^,IT?ay&'1,,,y&q)+671<<<1a[&q)+6q94>"a'''hM#)*6a&	6*h666  ~a %%&!+++a %%xx6666 & +s   <Dc                  B   / SQn SR                  U 5      n[        [        USS95      n[        U5      S:X  d   e[	        U5      nUS   S:X  d   e[        US   5      S:X  d   e[        US   S   S	   5      S
:X  d   e[        SS
5       HG  nUS   S   S	   U   n[        US   5      S:X  d   eUS   nU Vs/ s H  owS   PM	     sn/ SQ:X  a  MG   e   [        US   R                  5      S:X  d   eUS   R                   H  nUR                  S;   a  M   e   g s  snf )N).z-DOCSTART- -X- O OrF   zI	Ozlike	OzLondon	B-GPEzand	Oz	New	B-GPEz
York	I-GPEz
City	I-GPEz.	OrF   zI Ozlike OzLondon B-GPEzand Oz	New B-GPEz
York I-GPEz
City I-GPEz. OrF   zI PRP Oz
like VBP OzLondon NNP B-GPEzand CC OzNew NNP B-GPEzYork NNP I-GPEzCity NNP I-GPEz. . OrF   z	I PRP _ Ozlike VBP _ OzLondon NNP _ B-GPEz
and CC _ OzNew NNP _ B-GPEzYork NNP _ I-GPEzCity NNP _ I-GPEz. . _ OrF   z	I	PRP	_	Ozlike	VBP	_	OzLondon	NNP	_	B-GPEz
and	CC	_	OzNew	NNP	_	B-GPEzYork	NNP	_	I-GPEzCity	NNP	_	I-GPEz.	.	_	Or   r   r   r7   r~   r   r   r   r   r   rz   r   r  r  )r   r;   r0   r   r.   r  rh   r   )	r   r@   r   r   r>   r   r   rB   r  s	            rC   %test_cli_converters_conll_ner_to_docsr  Y  sK   /E` 5!J+JCDN~!###^,IT?ay&'1,,,y&q)+671<<<1a[&q)+6q94>"a'''h#)*6a&	6*.hhhh  ~a %%&",,,a %%xx6666 & +s   8Dzargs,expected--x.foo10x.foor   z
--x.foo=10barz--x.foo=bar)r  --x.barbazTr  )r  x.barz--x.bar=baz)r  10.1r  z--x.bazfalseg333333$@F)r  r  x.baz)r  r  r  z--x.baz=falsec                 &    [        U 5      U:X  d   eg N)r   )argsr  s     rC   test_parse_config_overridesr%    s      "$'8333rM   r$  z--foo)r  r  z--bazc                     [         R                  " [        5         [        U 5        S S S 5        g ! , (       d  f       g = fr#  )pytestraisesr	   r   r$  s    rC   #test_parse_config_overrides_invalidr*    s"    	|	$t$ 
%	$	$   0
>)r  r  r  c                     [         R                  " [        5         [        U 5        S S S 5        g ! , (       d  f       g = fr#  )r'  r(  
SystemExitr   r)  s    rC   %test_parse_config_overrides_invalid_2r.    s"    	z	"t$ 
#	"	"r+  c                     Sn U [         R                  [        R                  '   [	        / 5      n[        U5      S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   SL d   eUS	   S
:X  d   eS[         R                  [        R                  '   [	        / S S90 :X  d   e[        R                  " [        5         [	        / 5        S S S 5        S[         R                  [        R                  '   [        R                  " [        5         [	        / 5        S S S 5        [         R                  [        R                  	 g ! , (       d  f       N}= f! , (       d  f       N?= f)Nz2--x.foo bar --x.bar=12 --x.baz false --y.foo=hellor   r  r  r  r   r!  Fzy.foohelloz--x)env_varzhello world)	osenvironr3   CONFIG_OVERRIDESr   r   r'  r(  r-  )	overridesresults     rC   test_parse_cli_overridesr7    s   DI,5BJJx(()#B'Fv;!'?e###'?b   '?e###'?g%%%,1BJJx(()!"d3r999	z	"r" 
#,9BJJx(()	z	"r" 
#


8,,- 
#	" 
#	"s   6D9E
9
E

ErR   rO   r   rS   )rQ   parserrW   )rW   r   sentencizer)morphologizerspancatentity_linkerspancat_singlelabeltextcat_multilabeloptimize
efficiencyaccuracypretrainingc                 ~    [        U UUUSS9n[        U[        5      (       d   eU(       a  SUS   S'   [        USS9  g )NF)rR   rS   r?  rB  gpuzmy_data.jsonlpathsraw_textT	auto_fill)r!   
isinstancer   r6   )rR   rS   r?  rB  configs        rC   test_init_configrK    sM     F ff%%%%&5w
#6T2rM   c                  d    [         R                  " 5        H  u  p[        S0 UD6(       a  M   e   g )Nr   )r   itemsr)   )rR   datas     rC   test_model_recommendationsrO    s+    %++-
#+d++++ .rM   value)parser,textcat,tagger parser, textcat ,tagger rQ  rR  z  "parser"," textcat " ,"tagger "z  'parser',' textcat ' ,'tagger '[parser,textcat,tagger]z["parser","textcat","tagger"]z$[" parser" ,"textcat ", " tagger " ]rS  z[ parser, textcat , tagger]z['parser','textcat','tagger']z$[' parser' , 'textcat', ' tagger ' ]c                 (    [        U SS9/ SQ:X  d   eg )NFintify)r8  r   rQ   r   rP  s    rC   test_string_to_listrY    s    * %.2QQQQrM   )z1,2,3z[1,2,3]z["1","2","3"]z[" 1" ,"2 ", " 3 " ]z[' 1' , '2', ' 3 ' ]c                 L    [        U SS9/ SQ:X  d   e[        U SS9/ SQ:X  d   eg )NFrU  )123T)r7   r   r   rW  rX  s    rC   test_string_to_list_intifyr^    s.     %./AAA%-:::rM   c                      [        S[        R                  -   5      n SU l        [        R                  U ;   a@  Sn[	        5       n[        X5      n[        [        R                  5      [        U5      :X  d   eg g )N==Fen_core_web_sm)r
   r   __version__prereleasesr   r   r4   )specr   compatibilityversions       rC   test_download_compatibilityrg    sh    u0001DDD %
)+j8 !2!237H7QQQQ	 !rM   c                     [        S[        R                  -   5      n SU l        [        R                  U ;   aQ  [	        5       u  p[        [        R                  5      nUR                  U0 5      n[        U5      S:  d   eSU;   d   eg g )Nr`  Fr   ra  )r
   r   rb  rc  r%   r4   getr   )rd  
model_pkgscompatspacy_versioncurrent_compats        rC   !test_validate_compatibility_tablern  #  s~    u0001DDD +-
)%*;*;<M26>"Q&&&>111 !rM   component_name)rW   r   r;  rQ   c                 D   [        5       nUR                  U 5      nS H  nUR                  U5        M     [        UR	                  U 5      R
                  5      S:X  d   e[        5        n[        X5        [        SU /SSS9nSSU S	U  S
3S.0US   S   U '   [        USS9n[        UR	                  U 5      R
                  5      S:X  d   eUR                  5         [        UR	                  U 5      R
                  5      S:X  d   e S S S 5        g ! , (       d  f       g = f)N)T1T2T3T4r   r   r@  F)rR   rS   r?  rD  labelszspacy.read_labels.v1/z.json)z@readerspath
initializerU   TrG  r   )r'   r   	add_labelr   get_piperu  r8   r"   r!   r6   rx  )ro  rJ   	componentrp   r   rJ  nlp2s          rC   test_init_labelsr}  .  s   
'C^,I)E" *s||N+223q888	7S"$%!	
 2")1^$4E:>
|\*>: &f=4==0778A===4==0778A===' 
s   *BD
Dc            	      l   [        5       n U R                  S5        [        U R                  5      / :X  d   e[        5       n U R                  SSSSSSS.0S	9  [        U R                  5      / :X  d   e[         R                  " S
5      S 5       nU R                  S
5        [        U R                  5        g )NrQ   r   rZ   zspacy.TextCatBOW.v1Tr7   F)z@architecturesexclusive_classes
ngram_sizeno_output_layerrJ  third_party_testc                     S $ )Nc                     U $ r#  r   )xs    rC   <lambda>Itest_get_third_party_dependencies.<locals>.test_factory.<locals>.<lambda>g  s    rM   r   )rJ   r   s     rC   test_factory7test_get_third_party_dependencies.<locals>.test_factorye  s    rM   )r'   r   r$   rJ  rT   )rJ   r  s     rC   !test_get_third_party_dependenciesr  L  s     'CLL'

3r999 'CLL"7%)#(
   (

3r999 ]]%& ' LL#$ ,rM   zfactory_name,pipe_name))rW   rW   )rW   my_ner)r;  r;  )r;  
my_spancatc                 `   Sn[        5       nUR                  XS9nU H  nUR                  U5        M     UR                  5         UR	                  U5      R
                  U:X  d   eU S:X  a(  [        U5      UR                     [        U5      :X  d   eg [        X05      [        U5      :X  d   eg )N)AB)r   r;  )
r&   r   ry  rx  rz  ru  r   keysetr   )factory_name	pipe_nameru  rJ   piperp   s         rC   test_get_labels_from_modelr  n  s     F
)C<<<5Du NN<<	"))V333y ',TXX6#f+EEE%c8CKGGGrM   c                      [        S5      S:X  d   e[        S5      S:X  d   e[        S5      S:X  d   e[        S5      S:X  d   e[        S5      S:X  d   e[        S5      S:X  d   e[        S5      S:X  d   eg )	Nu   Meine_BäumeF_packagepackage_z.packagezpackage.z-packagezpackage-)r#   r   rM   rC   test_permitted_package_namesr    s    %n5>>>%j1U:::%j1U:::%j1U:::%j1U:::%j1U:::%j1U:::rM   c                  x   [        5       n [        U R                  / SQS9n[        U R                  / SQ/ SQ/ SQS9n[        X5      n[	        U/S/U S5      nUS	   S
:X  d   e[        U R                  / SQS9n[        U R                  / SQ/ SQ/ SQS9n[        X5      n[	        U/S/U S5      nUS	   S:X  d   eg )N)Tokenr   r  r  r  words)r  r   r  )TFT)r   r   B-ENT)r  sent_startsrh   rW   Tboundary_cross_entsr   )r   r  zI-ENTr7   )r&   r*   r   r-   r   )rJ   predrefegrN  s        rC   test_debug_data_compile_goldr    s    
)Csyy EFD
		-' 	C 
	B"wT2D%&!+++syy EFD
		-'$	C 
	B"wT2D%&!+++rM   r;  c                 L   [        5       nSn[        UR                  / SQS9n[        USSS5      [        USSS5      /UR                  U'   [        UR                  / SQS9n[        USSS5      [        USSS5      /UR                  U'   [        X45      n[        U/U /US	5      nUS
   U   [        SSS.5      :X  d   eUS   U   S/S/S.:X  d   eUS   U   [        USSS5      /[        USSS5      /S.:X  d   eUS   U   USS /USS /S.USS /USS /S.S.:X  d   eg )Nrx   WelcometotheBankofChinar   r  r   r   ORGr   GPETr;  r7   )r  r  spans_lengthspans_per_typesb_per_typer   r   )rq   ro   r   )r&   r*   r   r,   ri   r-   r   r   )ro  rJ   	spans_keyr  r  r  rN  s          rC   &test_debug_data_compile_gold_for_spansr    sz   
)CIsyy TUD!$1e4d4Au6MNDJJy
ciiS
TC aE2DaE4JKCIIi		B"/d;D	?9%11E)FFFF	*qc1#.FFFF !),S!Q&'S!Q&'1    y)a(S1XJ7a(S1XJ7.   rM   c                      [        5       n [        U R                  / SQS9[        U R                  S/S9/n[        SSSS.5      n[	        USS9nX2:X  d   eg )	N)r  r  r  r  r        ?      ?)chinabankr  T)	normalize)r&   r*   r   r   r   )rJ   rA   r  freq_distributions       rC   &test_frequency_distribution_is_correctr    s[    
)CCII45CIIgY'D
 d$?@H)$$?(((rM   c                      [        SSS.5      n [        SSSSS.5      n[        X5      nSn[        R                  " X#SS	9(       d   eg )
Nr  r  )abg333333?g?)r  r  cdgX2ı.?gMbP?)rel_tol)r   r   mathisclose)pqr6  r  s       rC   )test_kl_divergence_computation_is_correctr    sG    c%&AdDt<=A%FH<<$777rM   c                     [        5       n Sn[        U R                  / SQS9n[        USSS5      [        USSS5      /UR                  U'   [        U R                  / SQS9n[        USSS5      [        USSS5      /UR                  U'   [        X#5      nU/n[        US	/U S
5      n[        XVUS9n1 SkR                  UR                  5       5      (       d   eUS   S:X  d   eUS   S:X  d   eg )Nrx   r  r  r   r   r  r   r  r;  Texamplescompiled_goldr  >   bdsdlengths
min_lengthr7   
max_length)
r&   r*   r   r,   ri   r-   r   r   issubsetkeysrJ   r  r  r  r  r  rN  span_characteristicss           rC   *test_get_span_characteristics_return_valuer    s    
)CIsyy TUD!$1e4d4Au6MNDJJy
ciiS
TC aE2DaE4JKCIIi		BtHI;T:D4 #++,@,E,E,GHHHH-222-222rM   c                  |   [        5       n Sn[        U R                  / SQS9n[        USSS5      [        USSS5      /UR                  U'   [        U R                  / SQS9n[        USSS5      [        USSS5      /UR                  U'   [        X#5      nU/n[        US	/U S
5      n[        XVUS9n[        U5        g)zDTest if interface between two methods aren't destroyed if refactoredrx   r  r  r   r   r  r   r  r;  Tr  N)	r&   r*   r   r,   ri   r-   r   r   r   r  s           rC   0test_ensure_print_span_characteristics_wont_failr    s    
)CIsyy TUD!$1e4d4Au6MNDJJy
ciiS
TC aE2DaE4JKCIIi		BtHI;T:D4   45rM   	threshold)F   P   r   Z   r   c                 p    / SQ/ SQ/ SQS.n[        X5      n[        UR                  5       5      U :  d   eg )Nr7   r   r   r   r   r   r   r   r   r7   r   r   span_type_1span_type_2span_type_3)r   sumvalues)r  sample_span_lengths
span_freqss      rC   4test_span_length_freq_dist_threshold_must_be_correctr     s>     $##
 --@LJz  "#y000rM   c                      / SQ/ SQ/ SQS.n Sn[        X5      n[        UR                  5       5      U:  d   e[        UR	                  5       5      / SQ:X  d   eg )Nr  r  r  r  r  )r   r7   r   r   r   )r   r  r  r;   r  )r  r  r  s      rC   1test_span_length_freq_dist_output_must_be_correctr    s[    ###
 I,-@LJz  "#y000
!"o555rM   c            	      t    [        5        n U S-  n[        XSSSS5        S S S 5        g ! , (       d  f       g = f)Nz
test.spacyblank:enr   r7   )r8   r   )	data_pathoutputs     rC   test_applycli_empty_dirr    s-    	9\)iVQ: 
s   )
7c            	      T   [        5        n U S-  n[        R                  " S5      nU" S5      n[        5       nUR	                  U S-  5        [        XSSSS5        UR                  U5        UR	                  U S-  5        [        XSSSS5        S S S 5        g ! , (       d  f       g = f)Ntestout.spacyrO   testing apply cli.testin.spacyr  r   r7   )r8   r   r   r+   r\   r   add)r  r  rJ   r   docbins        rC   test_applycli_docbinr    s    	9_,kk$&'y>12iVQ:

3y>12iVQ: 
s   BB
B'c            	         [        5        n U S-  nSSS./nSS0/n[        R                  " U S-  U5        [        XSSS	S	5        [        R                  " U S
-  U5        [        XSSS	S	5        S S S 5        g ! , (       d  f       g = f)Nr  Testing apply cli.   )fieldr  r  234
test.jsonlr  r7   ztest2.jsonl)r8   srslywrite_jsonlr   )r  r  rN  data2s       rC   test_applycli_jsonlr   +  s    	9_,.s;<5!")l2D9iWa;)m3U;iWa; 
s   A#A77
Bc            	          [        5        n U S-  n[        U S-  S5       nUR                  S5        S S S 5        [        XSSSS5        S S S 5        g ! , (       d  f       N&= f! , (       d  f       g = f)Nr  ztest.foowr  r  r   r7   )r8   openwriter   )r  r  ftests      rC   test_applycli_txtr  6  s\    	9_,)j(#.%KK,- /iVQ:	 
.. 
s!   A#AA#
A 	A##
A1c            	         [        5        n U S-  nSn[        R                  " S5      nU" U5      nSU0/n[        R                  " U S-  U5        [        5       nUR                  U5        UR                  U S-  5        [        U S-  S5       nUR                  U5        S S S 5        [        XS	SS
S
5        [        [        5       R                  U5      R                  UR                  5      5      n[        U5      S:X  d   eU H  nUR                   U:X  a  M   e   S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)Nr  zTesting apply clirO   r   r  r  ztest.txtr  r  r7   r   )r8   r   r   r  r  r+   r  r\   r  r  r   r;   	from_diskget_docsr   r   r   )	r  r  r   rJ   r   
jsonl_datar  r  r6  s	            rC   test_applycli_mixedr  >  s   	9_,"kk$$itn%
)l2J?

3y>12)j(#.%KK /iVQ:fh((099#))DE6{aC88t### ! 
 /. 
s+   BD8D' A6D8D8'
D5	1D88
Ec            	         [         R                  " SSS9  Sn [        5        nUS-  n[        R                  " S5      nU" S5      nXR
                  l        [        SS	9nUR                  U5        UR                  US
-  5        [        XSSSS5        [        [        5       R                  U5      R                  UR                  5      5      nUS   R
                  R                  U :X  d   e S S S 5        g ! , (       d  f       g = f)Nextr   )default)r  r   r  rO   r  T)store_user_datar  r  rF   r7   )r*   set_extensionr8   r   r   _r  r+   r  r\   r   r;   r  r	  r   )valr  r  rJ   r   r  r6  s          rC   test_applycli_user_datar  S  s    eQ'
C	9_,kk$&'	-

3y>12iRA6fh((099#))DEay{{#%%% 
s   CC77
Dc                 H  ^ S[         S[        [           4S jm SS[        [        [        [
        [        [        4   4   S4   S[        [         [        [           4   4U4S jjjn[        5        nU" 5       u  p4[        U Vs/ s H  oUR                  PM     snS9R                  US-  5        [        5        nUR                  U5        [        UUS-  S	S
SSS9u  pxn	U[        U	R                  5       5      :X  d   eU	S   S:X  d   e S S S 5        U" S0 445      u  p:[        5        nUR                  U5        [        UUS-  SS
SSS9u  pxn	U[        U	R                  5       5      :X  d   eU	S   S:X  d   e S S S 5        U" S0 445      u  p:[        5        nUR                  U5        [        UUS-  S	S
SSS9(       d   e S S S 5        U" 5       u  p:[        5        nUR                  U5        [        R                  " [         5         [        UUS-  SS
SSS9  S S S 5        S S S 5        S S S 5        g s  snf ! , (       d  f       GN8= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       NX= f! , (       d  f       Na= f! , (       d  f       g = f)NrJ   returnc                     / nSSSSS.SS/0S.4SSSSS.SS	/0S.44 H?  nU R                  US
   5      nUR                  [        R                  " X2S   5      5        MA     U$ )Nz/I am angry and confused in the Bank of America.      ?        )ANGRYCONFUSEDHAPPYrx   )r   .   r  )catsri   z$I am confused but happy in New York.)   #   r  r   r7   )rH   appendr-   rG   )rJ   rA   rB   r   s       rC   make_examples.test_cli_find_threshold.<locals>.make_examplesd  s      B&)sSI"_$56 7&)sSI"_$56
A  ,,qt$CKK))#t45#
& rM   rU   .c                    >^ [        5       nUR                  SSSS0S9  U  H  u  p#UR                  X#S9  M     T" U5      mUR                  U4S jS9  [        S	5       H  nUR	                  T5        M     UT4$ )
Nr>  tc_multir  g?)r  r   rJ  r  c                     > T $ r#  r   )new_exampless   rC   r  ;test_cli_find_threshold.<locals>.init_nlp.<locals>.<lambda>  s    rM   )get_examplesr   )r&   r   rx  r  update)rU   new_nlpcfncomp_configr>   r&  r!  s        @rC   init_nlp)test_cli_find_threshold.<locals>.init_nlp|  s     )-% 	 	
 !+CS5 !+ %W-(<=qANN<(  $$rM   )rA   z
docs.spacyr$  r  cats_macro_fT)rZ   r  r  threshold_key
scores_keyrY   r  r  r;  
spans_sc_fr>  r  )r   )r(   r   r-   r   r]   r   r   r8   r+   	referencer\   r   maxr  r'  r(  AttributeError)capsysr-  docs_dirrJ   r  rK   nlp_dirbest_threshold
best_scoreresr  r!  s              @rC   test_cli_find_thresholdr<  c  sz   8 W 2 >@%%T#s(^ 34c9:%	xg&	'% %* 
8 !
h?h7&&h?@HH|#	
 ^wKK .<"\1$))/+N SZZ\!2222s8s?"?  Ir?,-^wKK .<"\1#)'/+N SZZ\!2222s8s?"?  0"578^wKK !"\1$))    ^wKK ~.!&5!"-- / g 

 @ ^ ^ ^ /. ^g 
s   2JH8
!J;AH=J+AI<J*I J!,JI1J'J8J=
I	J
I	J 
I.	*J1
I?;J
J	J
J!c                     [        5        n / SQnU H  n[        X-  5      R                  5         M      [        [	        U 5      5      S:X  d   e[        [	        U S S95      S:X  d   e[        [	        U SS95      S:X  d   e[        [	        U SS95      S:X  d   e[        [	        U SS95      S	:X  d   e[        [	        U S
S95      S:X  d   e S S S 5        g ! , (       d  f       g = f)N)z	data1.iobz	data2.iobz
data3.jsonzdata4.conllzdata5.conllzdata6.conllz	data7.txtr   )suffixjsonr7   iobr   conllr   pdfr   )r8   r   touchr   r   )r  filesfs      rC   test_walk_directoryrF    s    	1
 AK  N1%&1,,,N1T23999N1V45!;;;N1U34:::N1W561<<<N1U34:::' 
s   C C
C#c            	      
   SS/ SQ04SS/ SQ04/n [        5       n/ nU  H>  nUR                  [        R                  " UR	                  US   5      US   5      5        M@     [        US/US	5      n[        US
   5      S:X  d   eg )NShe likes green eggslemmas)sher
  greeneggEat blue ham)eatbluehamr   r7   trainable_lemmatizerTlemmatizer_treesr   )r(   r   r-   rG   rH   r   r   )r  rJ   train_examplesrB   rN  s        rC   *test_debug_data_trainable_lemmatizer_basicrT    s    	(,K!LM	($:;<H *CNg//QqT0BAaDIJ  *@)A3MD t&'(A---rM   c            	          SS/ SQ04S/ SQ/ SQS.4/n [        5       n/ nU  H>  nUR                  [        R                  " UR	                  US   5      US	   5      5        M@     [        US
/US5      nUS   S:X  d   eg )NrH  rI  )rF   r
  rK  rF   zHe hates green eggs)HehatesrK  eggs)rF   rW  r   rK  rF   )r  rI  r   r7   rQ  Tpartial_lemma_annotationsr   r(   r   r-   rG   rH   r   )partial_examplesrJ   rS  rB   rN  s        rC   ,test_debug_data_trainable_lemmatizer_partialr]    s     
 (,E!FG "=7	
	 *CNg//QqT0BAaDIJ  *@)A3MD+,111rM   c            	          SS/ SQ04SS/ SQ04/n [        5       n/ nU  H>  nUR                  [        R                  " UR	                  US   5      US   5      5        M@     [        US/US	5      nUS
   S:X  d   eg )NrH  rI  )nor_  r_  r_  rM  )r_  r_  r_  r   r7   rQ  Tn_low_cardinality_lemmasr   r[  )low_cardinality_examplesrJ   rS  rB   rN  s        rC   4test_debug_data_trainable_lemmatizer_low_cardinalityrb    s    	(,D!EF	($678  *CN%g//QqT0BAaDIJ & *@)A3MD*+q000rM   c            	          S0 4S0 4/n [        5       n/ nU  H>  nUR                  [        R                  " UR	                  US   5      US   5      5        M@     [        US/US5      nUS   S:X  d   eg )	NrH  rM  r   r7   rQ  Tno_lemma_annotationsr   r[  )unannotated_examplesrJ   rS  rB   rN  s        rC   2test_debug_data_trainable_lemmatizer_not_annotatedrf    s    	$	 *CN!g//QqT0BAaDIJ " *@)A3MD&'1,,,rM   c                      SSK Jn   SSKJn   g )Nr   project_run)	spacy.cliri  spacy.cli.project.runrh  s    rC   test_project_api_importsrl  *  s    %1rM   c                     U R                  [        SS 5        [        R                  " SSS9  [        R                  " [
        5         [        R                  " SSS9  SSS5        g! , (       d  f       g= f)ziTest that we can't tell spacy download to get an arbitrary model by using a
relative path in the filenamerun_commandc                     g r#  r   )cmds    rC   r  5test_download_rejects_relative_urls.<locals>.<lambda>3  s    DrM   zen_core_web_sm-3.7.1T)directz../en_core_web_sm-3.7.1N)setattrr   downloadr'  r(  r-  )monkeypatchs    rC   #test_download_rejects_relative_urlsrv  /  sS     8HI 3DA	z	"  !:4H 
#	"	"s   A''
A5)r  r2  collectionsr   pathlibr   typingr   r   r   r   r'  r  clickr	   packaging.specifiersr
   	thinc.apir   r   r   rj  r   r   spacy.cli._utilr   r   r   spacy.cli.applyr   spacy.cli.debug_datar   r   r   r   r   r   r   r   spacy.cli.downloadr   r   spacy.cli.evaluater   spacy.cli.find_thresholdr   spacy.cli.init_configr   r    r!   spacy.cli.init_pipeliner"   spacy.cli.packager#   r$   spacy.cli.validater%   spacy.lang.enr&   spacy.lang.nlr'   spacy.languager(   spacy.schemasr)   spacy.tokensr*   r+   spacy.tokens.spanr,   spacy.trainingr-   r.   r/   spacy.training.convertersr0   r1   r2   
spacy.utilr3   r4   r5   r6   utilr8   markissuerD   rL   rf   parametrizer]   r   r   r   r   r  r  r  r%  r*  r.  r7  rK  rO  rY  r^  rg  rn  r}  r  slowr  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r<  rF  rT  r]  rb  rf  rl  rv  r   rM   rC   <module>r     s    	   ) )    -    + R R !	 	 	 > , 3 K K 0 V - !  # . $ " G G T T W W  41 1@ 4  46 6D 5Q81S 81s 81	 
81v56: 	
	
&A'&A2/2d72A7H  T	WbM*
'2'
U	gu-.
7E*+	&$(GH
]	#te%DE	;tVZej=kl	8DSWbg:hi44 7)-H!IJ% K%
 ";gY!GH% I%
.& $.#
)5	 45	 lJ%?@u63 7 A	 /3,
 (R)(R ;;
R2 )+RS> T>:-D H H;,2 )I7L+MN O0	)83*6& &:;1 <1	6;;<;$*& k\;.. 2,1-2
	IrM   