
    hA                        S SK r S SKrS SKrS SKJr  S SKrS SKJrJrJr  S SK	J
r
  S SKJr  S SKJrJrJrJrJrJrJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJ r J!r!  SSK"J#r#  \\/r$\RJ                  S 5       r&\RJ                  S 5       r'\RJ                  S 5       r(\RR                  RU                  S5      S 5       r+\RR                  RU                  S5      S 5       r,\RR                  RU                  S5      S 5       r-\RR                  RU                  S5      S 5       r.\RR                  RU                  S5      S 5       r/\RR                  RU                  S5      S 5       r0\RR                  RU                  S5      S 5       r1\RR                  RU                  S5      S 5       r2\RR                  Rg                  S\$5      S 5       r4\RR                  Rg                  S\$5      S  5       r5\RR                  Rg                  S\$5      S! 5       r6S" r7S# r8S$ r9S% r:\RR                  RU                  S&5      S' 5       r;\RR                  Rg                  S\$5      S( 5       r<S) r=S* r>S+ r?S, r@g)-    N)Linear)Vocabloadregistry)English)Language)DependencyParserEntityRecognizerEntityRulerSentenceRecognizerTaggerTextCategorizerTrainablePipe)DEFAULT_PARSER_MODEL)DEFAULT_SENTER_MODEL)DEFAULT_TAGGER_MODEL)DEFAULT_SINGLE_TEXTCAT_MODEL)Span)ensure_path
load_model   )make_tempdirc                     SSSSSSS.nS[         0n[        R                  " US	S
9S   n[        X40 UD6nUR	                  S5        U$ )NF   d            ?        learn_tokensmin_action_frequpdate_with_oracle_cut_size
beam_widthbeam_update_probbeam_densitymodelTvalidatensubj)r   r   resolver	   	add_labelen_vocabconfigcfgr&   parsers        g/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/serialize/test_serialize_pipeline.pyr0   r0       sc     '*F (
)CS409Eh88F
WM    c                 p    SSSSSSS.nS[         0n[        R                  " US	S
9S   n[        X40 UD6nU$ )NFr   r   r   r   r   r   r&   Tr'   )r   r   r*   r	   r,   s        r1   blank_parserr4   1   sU     '*F (
)CS409Eh88FMr2   c                 t    S[         0n[        R                  " USS9S   n[        X5      n[        X5      nX44$ Nr&   Tr'   )r   r   r*   r   )r-   r/   r&   tagger1tagger2s        r1   taggersr9   A   sA    (
)CS409EX%GX%Gr2   i  c                      [        5       n U R                  S5      nUR                  S5        U R                  5         [	        U R                  SS/5      5        g )NtaggerAhi )r   add_piper+   
initializelistpipe)nlpr;   s     r1   test_issue3456rD   J   sF     )C\\(#F
SNN4*	r2   i  c                 4   SSS.SSS0SS0/S.SSS0/S.SS	S
S./S.SSSS./n[        U S9n[        X!SS9nUR                  5       n[        U5      [        U5      :X  d   e[        UR                  5      S:X  d   eUR
                  (       d   e[        U5      nUR                  U5      n[        U5      [        U5      :X  d   e[        UR                  5      S:X  d   eUR
                  UR
                  :X  d   eUR                  UR                  :X  d   eg )NHELLOhello worldlabelpatternBYELOWERbyeORTHCOMPLEXfoo*rN   OPTECH_ORGApplea1rI   rJ   idvocabTpatternsoverwrite_ents   )r   r   to_byteslenlabels	overwrite
from_bytes
ent_id_sep)r-   r\   rC   rulerruler_bytes	new_rulers         r1   test_issue_3526_1rh   T   s,    m4gu%57G$HI'8&9:%s)C(DE=H 
"CtDE.."Ku:X&&&u||!!!???C I$$[1Iy>SZ'''y A%%%%//1115#3#3333r2   c                    SSS.SSS0SS0/S.SSS0/S.SS	S
S./S.SSSS./n[        U S9n[        X!SS9n[        R                  " UR                  5      n[        U5      nUR                  U5      n[        U5      [        U5      :X  d   eUR                   H  nXeR                  ;   a  M   e   UR                  UR                  Ld   eg )NrF   rG   rH   rK   rL   rM   rN   rO   rP   rQ   rR   rT   rU   rV   rW   rY   Tr[   )r   r   srslymsgpack_dumpsr\   rc   r`   rb   )r-   r\   rC   re   bytes_old_stylerg   rJ   s          r1   test_issue_3526_2rm   k   s     m4gu%57G$HI'8&9:%s)C(DE=H 
"CtDE))%..9OC I$$_5Iy>SZ'''>>,,,,, "eoo555r2   c                    SSS.SSS0SS0/S.SSS0/S.SS	S
S./S.SSSS./n[        U S9n[        X!SS9n[        5        nUS-  n[        R                  " UR                  S5      UR                  5        [        U5      R                  U5      nUR                   H  nXvR                  ;   a  M   e   [        U5      [        U5      :X  d   eUR                  UR                  Ld   e S S S 5        g ! , (       d  f       g = f)NrF   rG   rH   rK   rL   rM   rN   rO   rP   rQ   rR   rT   rU   rV   rW   rY   Tr[   entity_rulerz.jsonl)
r   r   r   rj   write_jsonlwith_suffixr\   	from_diskr`   rb   )r-   r\   rC   re   tmpdirout_filerg   rJ   s           r1   test_issue_3526_3ru      s	    m4gu%57G$HI'8&9:%s)C(DE=H 
"CtDE	6N*(..x8%..I$..x8	~~G00000 &9~U+++""%//999 
s   A,C529C55
Dc                    [        U S9nSSS./nSS0nUR                  SUS9nUR                  U5        [        5        nUR	                  U5        UR                  S5      nUR                  SSS./:X  d   eUR                  SL d   e[        U5      nUR                  S5      nUR                  SSS./:X  d   eUR                  SL d   e S S S 5        g ! , (       d  f       g = f)	NrY   ORGrU   rH   r]   Tro   r.   )	r   r?   add_patternsr   to_diskget_piper\   rb   r   )r-   rC   r\   r.   re   rs   nlp2rg   s           r1   test_issue_3526_4r}      s    

"CG45H%FLLL7E	x 	6F^,~~Eg"F!GGGG$&&&F|MM.1	!!'&J%KKKK""d*** 
s   BC
C$i  c                  \   [        5       n U R                  S5      nUR                  S5        U R                  5         SSS.SSS0SS	0/S./nU R                  S
SS9nUR	                  U5        U " S5      nUR
                  S   R                  S:X  d   e[        5        n[        U5      nUR                  5       (       d  UR                  5         U R                  U5        [        U5      nU" S5      nUR
                  S   R                  S:X  d   e SSS5        g! , (       d  f       g= f)z@Test that serialization of an EntityRuler before NER works fine.ner
SOME_LABELMY_ORGrU   rH   MY_GPElowersan	franciscoro   )beforeWhat do you think about Apple ?r   N)r   r?   r+   r@   ry   entslabel_r   r   existsmkdirrz   r   )	rC   r   r\   re   doc1d
output_dirr|   doc2s	            r1   test_issue4042r      s    )C
,,u
CMM,NN w/%(87K:P'QRH
 LLL6E	x 01D99Q<(***	1 ^
  ""J*%56yy|""h... 
s   A4D
D+c                  2   [        5       n U R                  S5      nUR                  S5        U R                  5         U " S5      n[	        UR
                  5      S:X  d   eSUR
                  ;   d   e[        USSSS9n[        UR                  5      U/-   Ul        UR                  S5        U" U5        [	        UR
                  5      S	:X  d   eSUR
                  ;   d   eSUR
                  ;   d   e[        5        n[        U5      nUR                  5       (       d  UR                  5         UR                  U5        0 nU R                  SUS
9nUR                  U5        [	        UR
                  5      S	:X  d   e SSS5        g! , (       d  f       g= f)z
Test that serialization of an NER works fine when new labels were added.
This is the second bug of two bugs underlying the issue 4042.
r   r   r   r         r   )rI   r   rx   N)r   r?   r+   r@   r`   ra   r   rA   r   r   r   r   r   rz   create_piperr   )nlp1ner1r   	apple_entr   r   r.   ner2s           r1   test_issue4042_bug2r      sU    9D==DNN< OO12Dt{{q   4;;&&&T1ax0ITYY9+-DINN8Jt{{q   4;;&&&t{{"""	1 ^
  ""Z f5z"4;;1$$$ 
s   >B F
Fiu  c                      [        SS9n [        U S9nSS0nUR                  SUS9n[        5        nUS-  R	                  S	5       n[
        R                  " X55        UR                  S   S:X  d   e S
S
S
5        US-  R	                  S5       n[
        R                  " U5      nUR                  S   S:X  d   e S
S
S
5        S
S
S
5        g
! , (       d  f       N`= f! , (       d  f       N(= f! , (       d  f       g
= f)z(Ensure the pickling of the NER goes welltest_vocab_add_vector)vectors_namerY   r"   o   r   rx   zner.pklwbNrb)	r   r   r   r   openpickledumpr/   r   )rZ   rC   r.   r   tmp_pathfile_r   s          r1   test_issue4725_1r      s     67E

C%sF //%/
/C	8"((.%KK#7789S@@@ / "((.%;;u%D889:cAAA / 
.. /. 
s;   C/,C3C/,C<C/
C	C/
C,	(C//
C=Parserc                 $   S[         0n[        R                  " USS9S   nU" X5      nU" X5      nUR                  UR	                  S/S95      nUR	                  S/S9nUR	                  S/S9n[        U5      [        U5      :X  d   eXg:X  d   eg )Nr&   Tr'   rZ   exclude)r   r   r*   rc   r_   r`   )r-   r   r/   r&   r0   
new_parserbytes_2bytes_3s           r1   %test_serialize_parser_roundtrip_bytesr      s    (
)CS409EH$F(J&&vy'IJJ!!7)!4Goowio0Gw<3w<'''r2   c                    [        5       nSnX!R                  ;  d   eS[        0n[        R                  " USS9S   nU " X5      nUR                  U5        X%R                  R                  ;   d   e[        5       nX&R                  ;  d   eU " Xd5      nUR                  UR                  S/S95      nX'R                  R                  ;   d   eg )N
FunnyLabelr&   Tr'   rZ   r   )	r   stringsr   r   r*   r+   rZ   rc   r_   )r   vocab1rI   r/   r&   parser1vocab2parser2s           r1   test_serialize_parser_stringsr     s    WFE&&&(
)CS409EV#GeMM)))))WF&&&V#G  !1!17)!1!DEGMM)))))r2   c                    S[         0n[        R                  " USS9S   nU" X5      n[        5        nUS-  nUR	                  U5        U" X5      nUR                  U5      nUR                  SS/S9nUR                  SS/S9n	[        U5      [        U	5      :X  d   eX:X  d   e S S S 5        g ! , (       d  f       g = f)Nr&   Tr'   r0   rZ   r   )r   r   r*   r   rz   rr   r_   r`   )
r-   r   r/   r&   r0   r   	file_pathparser_dparser_bytesparser_d_bytess
             r1   $test_serialize_parser_roundtrip_diskr     s    (
)CS409EH$F	1L	y!(*%%i0/AB!**GW3E*F< C$7777--- 
s   A3B00
B>c                    U R                   SLd   eUR                   SLd   eUR                  R                  U R                  R                  :w  d   eU R                  S/S9nUR                   R                  S   " UR                   U R                  R                  5        UR                  U5        UR                   SLd   eUR                  R                  U R                  R                  :X  d   eg )NTrZ   r   resize_output)r&   movesn_movesr_   attrsrc   )r0   r4   
bytes_datas      r1   test_to_from_bytesr   '  s    <<t###T)))%%)=)===='3J_-l.@.@&,,BVBVWJ'T)))%%)=)====r2   c                 R   US   nUR                  5       nUR                  U5      nUR                  5       U:X  d   eS[        0n[        R                  " USS9S   n[        X5      R                  U5      nUR                  5       n[        U5      [        U5      :X  d   eXs:X  d   eg )Nr   r&   Tr'   )r_   rc   r   r   r*   r   r`   )r-   r9   r7   	tagger1_br/   r&   new_tagger1new_tagger1_bs           r1   %test_serialize_tagger_roundtrip_bytesr   3  s    ajG  "I  +G***(
)CS409E)44Y?K((*M}Y///%%%r2   c                    Uu  p#[        5        nUS-  nUS-  nUR                  U5        UR                  U5        S[        0n[        R                  " USS9S   n[        X5      R                  U5      n	[        X5      R                  U5      n
U	R                  5       U
R                  5       :X  d   e S S S 5        g ! , (       d  f       g = f)Nr7   r8   r&   Tr'   )r   rz   r   r   r*   r   rr   r_   )r-   r9   r7   r8   r   
file_path1
file_path2r/   r&   	tagger1_d	tagger2_ds              r1   $test_serialize_tagger_roundtrip_diskr   @  s    G	1]
]

#
#,-  t4W=8+55jA	8+55jA	!!#y'9'9';;;; 
s   B%B>>
Cc                    SnX0R                   ;  d   eX1R                   ;  d   eUS   nX4R                  R                   ;  d   e[        5        nUR                  U5        X4R                  R                   ;   d   eUS-  nUR	                  U5        S[
        0n[        R                  " USS9S   n[        X5      R                  U5      n	X9R                  R                   ;   d   e S S S 5        g ! , (       d  f       g = f)NSomeWeirdLabelr   r7   r&   Tr'   )
r   rZ   r   r+   rz   r   r   r*   r   rr   )
r-   de_vocabr9   rI   r;   r   r   r/   r&   r8   s
             r1   test_serialize_tagger_stringsr   N  s    E((((((((((QZF,,,,,	1,,,,,	M	y!,-  t4W=)33I>----- 
s   BC11
C?iQ  c                 x    S[         0n[        R                  " USS9S   n[        XSS9nUR	                  S/S9  g )Nr&   Tr'   g      ?)	thresholdrZ   r   )r   r   r*   r   r_   )r-   r/   r&   textcats       r1   test_serialize_textcat_emptyr   a  sE     0
1CS409Eh=GgY'r2   c                   ^ ^^ S[         0n[        R                  " USS9S   mUU U4S jnT" T T5      nSUR                  S'   U" 5       R	                  UR                  S/S95      nSUR                  ;   d   eU" 5       R	                  UR                  S/S9S	/S9nSUR                  ;  d   eU" 5       R	                  UR                  S	/S9S/S9nSUR                  ;  d   eg )
Nr&   Tr'   c                     > T" TT5      n U $ N )r   r   r-   r&   s    r1   get_new_parser3test_serialize_pipe_exclude.<locals>.get_new_parsero  s    He,
r2   barrP   rZ   r   r/   )r   r   r*   r/   rc   r_   )r-   r   r/   r   r0   r   r&   s   ``    @r1   test_serialize_pipe_excluder   j  s    (
)CS409E He$FFJJu!,,V__gY_-OPJJNN"""!,,	*UG - J 
&&&!,,(7) - J 
&&&r2   c                     S[         0n[        R                  " USS9S   n[        X5      nUR	                  5       n[        X5      R                  U5      nUR	                  5       UR	                  5       :X  d   eg r6   )r   r   r*   r   r_   rc   )r-   r/   r&   srsr_bsr_ds         r1   !test_serialize_sentencerecognizerr     sh    (
)CS409E	H	,B;;=Dh.99$?D;;=DMMO+++r2   c                     [        5       n U R                  S5        U R                  S5        U R                  S5        U R                  S   S   S/:X  d   eU R                  R	                  5       n[         R
                  " U5      nUR                  S/:X  d   eUR                  SS/:X  d   eUR                  S/:X  d   eUR                  S   S   S/:X  d   e[        5        nUR                  U5        [        R                  " U5      nS S S 5        WR                  S/:X  d   eUR                  SS/:X  d   e[        5        nUR                  U5        [        R                  " US/S9nS S S 5        WR                  / :X  d   eUR                  SS/:X  d   eUR                  SS/:X  d   e[        5        nU R                  U5        [        R                  " US/S9nS S S 5        WR                  S/:X  d   eUR                  S/:X  d   eUR                  / :X  d   eg ! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       Nj= f)Nr   r;   rC   disabled)disabler   )r   r?   disable_piper.   copyfrom_config
pipe_namescomponent_namesr   r   rz   spacyr   )rC   r.   r|   r   nlp3nlp4nlp5s          r1   &test_serialize_pipeline_disable_enabler     s   
)CLLLLX::eZ(XJ666ZZ__Fv&D??ug%%%E8#4444==XJ&&&;;uj)hZ777	1Qzz!} 
 ??ug%%%E8#4444	1Qzz!eW- 
 ??b   E8#4444==UH----	1Azz!hZ0 
 ??ug%%%E7***==B! 

 
 
s$   $(H"(H49(I"
H14
I
Ic                      " S S[         5      n  " S S[         5      n " S S[         5      nU " [        5       5      n[        R                  " [        5         UR                  5         S S S 5        [        5        n[        R                  " [        5         UR                  U5        S S S 5        S S S 5        U" [        5       5      n[        R                  " [        5         UR                  5         S S S 5        [        5        n[        R                  " [        5         UR                  U5        S S S 5        S S S 5        U" [        5       [        5       5      nUR                  5       nU" [        5       [        5       5      R                  U5      nUR                  5       U:X  d   e[        5        nUR                  U5        U" [        5       [        5       5      R                  U5      nS S S 5        UR                  5       U:X  d   eg ! , (       d  f       GN= f! , (       d  f       GN}= f! , (       d  f       GN= f! , (       d  f       GNV= f! , (       d  f       GN)= f! , (       d  f       GN3= f! , (       d  f       N= f)Nc                       \ rS rSrS rSrg)<test_serialize_custom_trainable_pipe.<locals>.BadCustomPipe1i  c                     g r   r   selfrZ   s     r1   __init__Etest_serialize_custom_trainable_pipe.<locals>.BadCustomPipe1.__init__  s    r2   r   N__name__
__module____qualname____firstlineno__r   __static_attributes__r   r2   r1   BadCustomPipe1r     s    	r2   r   c                       \ rS rSrS rSrg)<test_serialize_custom_trainable_pipe.<locals>.BadCustomPipe2i  c                     Xl         S U l        g r   rZ   r&   r   s     r1   r   Etest_serialize_custom_trainable_pipe.<locals>.BadCustomPipe2.__init__  s    JDJr2   r&   rZ   Nr   r   r2   r1   BadCustomPipe2r     s    	r2   r  c                       \ rS rSrS rSrg)8test_serialize_custom_trainable_pipe.<locals>.CustomPipei  c                     Xl         X l        g r   r  )r   rZ   r&   s      r1   r   Atest_serialize_custom_trainable_pipe.<locals>.CustomPipe.__init__  s    JJr2   r  Nr   r   r2   r1   
CustomPiper    s    	r2   r
  )r   r   pytestraises
ValueErrorr_   r   rz   r   rc   rr   )r   r  r
  rB   r   
pipe_bytesnew_pipes          r1   $test_serialize_custom_trainable_piper    s     
] 
 %'"D	z	" 
#	1]]:&LLO ' 
 %'"D	z	" 
#	1]]:&LLO ' 
 egvx(DJ%'68,77
CH*,,,	1Qegvx0::1= 
 *,,,% 
#	" '& 
 
#	" '& 
 
sl   H;H0H(H0"II& I2I&3:I8
H
H-	(H00
H?
I
I#	I&&
I58
Jc                     [         R                  " S5      n [        U R                  R                  5      nSnU R                  R                  R                  U5        [        U R                  R                  5      US-   :X  d   e[        5        nU R                  U5        [        U5      n[        U R                  R                  5      [        UR                  R                  5      :X  d   eX$R                  R                  ;   d   e[        US/S9nU[        UR                  R                  5      :X  d   eX$R                  R                  ;  d   e S S S 5        g ! , (       d  f       g = f)Nen  unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_r   r   r   )	r   blankr`   rZ   r   addr   rz   r   )rC   orig_strings_lengthwordr   reloaded_nlps        r1   test_load_without_stringsr    s   
++d
Ccii//0 DII$syy  !%81%<<<<	1AAw399$$%\-?-?-G-G)HHHH))11111A	{3"c,*<*<*D*D&EEEE--55555 
s   CE
E,)Ar   r  rj   	thinc.apir   r   r   r   r   spacy.lang.enr   spacy.languager   spacy.pipeliner	   r
   r   r   r   r   r   spacy.pipeline.dep_parserr   spacy.pipeline.senterr   spacy.pipeline.taggerr   spacy.pipeline.textcatr   spacy.tokensr   
spacy.utilr   r   utilr   test_parsersfixturer0   r4   r9   markissuerD   rh   rm   ru   r}   r   r   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r  r  r   r2   r1   <module>r*     s        ' ' ! #   ; 6 6 ?  .  "23        4  44 4, 46 6& 4: :( 4+ +" 4/ /8 4% %D 4B B$ <0	 1	 <0* 1*  <0. 1.	>
&<.& 4( ( <0' 1',,>"-J6r2   