
    h                        S SK r S SKrS SKJr  S SKrS SKJr  S SKJr  S SK	J
r
  S SKJrJr  S SKJr  S SKJr  S	S
KJr  / / 4/ SQ/ SQ4/r/ SQS4/r\R,                  R/                  S5      S 5       r\R,                  R/                  S5      S 5       r\R,                  R/                  S5      S 5       r\R,                  R7                  SS/5      S 5       r\R,                  R7                  S\5      S 5       r\R,                  R7                  S\5      S 5       r\R,                  R7                  S\5      S 5       r\R,                  R7                  S\5      S 5       r \R,                  R7                  S\5      S 5       r!\R,                  R7                  S\5      S 5       r"\R,                  R7                  S\5      S 5       r#\R,                  R7                  S\5      S  5       r$g)!    N)get_current_ops)English)StringStore)Doc)ensure_path
load_model)Vectors)Vocab   )make_tempdir)ratsarecute)iliker   HelloiW  c                     [        U 5      n[        UR                  5      nUR                  UR                  5       5        UR	                  S5      (       d   eg )NDEP)r   vocab
from_bytesto_byteshas_annotation)en_vocabdocdoc2s      l/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/serialize/test_serialize_vocab_strings.pytest_issue599r      sA    
h-Csyy>DOOCLLN#u%%%%    i  c                     [        5       nUR                  n[        5        n[        US-  5      nUR	                  5       (       d  UR                  5         UR                  U5        [        5       R                  U5      n[        R                  " SUS9n[        US-  5      nUR	                  5       (       d  UR                  5         UR                  U5        [        U5      nUR                  S:X  d   e SSS5        g! , (       d  f       g= f)zTest that a new blank model can be made with a vocab from file,
and that serialization does not drop the language at any point.r   en)r   nlpN)r   r   r   r   existsmkdirto_diskr
   	from_diskspacyblankr   lang)	r   nlp1vocab1d	vocab_dirvocab2nlp2nlp_dirnlp3s	            r   test_issue4054r1      s     9DZZF	1G,	!!OOy!""9-{{4v.a%i(~~MMOW'"yyD    
s   CC??
Di%  c                    [        5       nUR                  R                  5       n/ SQn/ SQn[        XS9n[	        U5       H  u  pgXF   Ul        M     UR                  5       n[        5       n	U	R                  U5      n	[        U	5      R                  U5      n/ n
U H  nU
R                  UR
                  5        M      X:X  d   eg )N)Appleislookingatbuyingastartup)NOUNVERBADPr;   PROPNr:   r<   )words)	r   r   r   r   	enumeratepos_r
   r   append)r   r!   vocab_bytesr>   posr   r   token	doc_bytesr   actuals              r   test_issue4133rG   1   s    
)C))$$&KFE
AC
h
$CcNV
 # IGE[)E
e*

	
*CFejj! ==r   textratc                     U R                   R                  U5      nU R                  S/S9n[        5       R	                  U5      nUR                   U   U:X  d   eUR                  S/S9U:X  d   eg )Nlookups)exclude)stringsaddr   r
   r   )r   rH   	text_hashrB   	new_vocabs        r   test_serialize_vocabrQ   E   sv      $$T*I##YK#8K"";/IY'4///yk2kAAAr   zstrings1,strings2c                    [        U S9n[        US9nUR                  5       nUR                  5       nX:X  a  XE:X  d   eOXE:w  d   eUR                  U5      nUR                  5       U:X  d   e[        5       R                  U5      nUR                  5       U:X  d   e[        UR                  5      [        U 5      :X  d   e[        UR                   Vs/ s H  owPM     sn5      [        U 5      :X  d   eg s  snf NrM   )r
   r   r   lenrM   sorted)strings1strings2r*   r-   vocab1_bvocab2_b
new_vocab1ss           r   $test_serialize_vocab_roundtrip_bytesr]   N   s    8$F8$F H H######x(F??(((##H-J H,,,z!!"c(m333j00101012fX6FFFF1s   C6c                 Z   [        U S9n[        US9n[        5        nUS-  nUS-  nUR                  U5        UR                  U5        [        5       R                  U5      n[        5       R                  U5      n[	        U 5      [	        UR
                   V	s/ s H  oPM     sn	5      :X  d   e[	        U5      [	        UR
                   V	s/ s H  oPM     sn	5      :X  d   e[	        U 5      [	        U5      :X  a=  UR
                   V	s/ s H  oPM     sn	UR
                   V	s/ s H  oPM     sn	:X  d   eO<UR
                   V	s/ s H  oPM     sn	UR
                   V	s/ s H  oPM     sn	:w  d   eS S S 5        g s  sn	f s  sn	f s  sn	f s  sn	f s  sn	f s  sn	f ! , (       d  f       g = f)NrT   r*   r-   )r
   r   r$   r%   setrM   )
rW   rX   r*   r-   r+   
file_path1
file_path2vocab1_dvocab2_dr\   s
             r   #test_serialize_vocab_roundtrip_diskrd   `   ss   8$F8$F	1\
\
z"z"7$$Z07$$Z08}0@0@$A0@1Q0@$A BBBB8}0@0@$A0@1Q0@$A BBBBx=CM)'//0/!A/0@P@P4Q@P1Q@P4QQQQ'//0/!A/0@P@P4Q@P1Q@P4QQQQ 
 %B$A04Q04Q 
sZ   A<FE>$)FF2F
FF%F0FFF"F-F>F
F*zstrings,lex_attrc                 
   [        U S9n[        5       nXU S      l        X S      R                  U:X  d   eX0S      R                  U:w  d   eUR                  UR                  5       5      nX0S      R                  U:X  d   eg )NrT   r   )r
   norm_r   r   )rM   lex_attrr*   r-   s       r   $test_serialize_vocab_lex_attrs_bytesrh   t   s    7#FWF'71:!*##x///!*##x///v01F!*##x///r   c                     [        U S9nUR                  UR                  5       5        [        UR                  5      [        U 5      :X  d   eg rS   )r
   r   r   rU   rM   )rM   rg   r   s      r   #test_deserialize_vocab_seen_entriesrj      s>     '"E	U^^%&u}}W---r   c                 b   [        U S9n[        5       nXU S      l        X S      R                  U:X  d   eX0S      R                  U:w  d   e[        5        nUS-  nUR                  U5        UR	                  U5      nS S S 5        X0S      R                  U:X  d   eg ! , (       d  f       N&= f)NrT   r   r   )r
   rf   r   r$   r%   )rM   rg   r*   r-   r+   	file_paths         r   #test_serialize_vocab_lex_attrs_diskrm      s    7#FWF'71:!*##x///!*##x///	1K	y!!!), 
 !*##x///	 
s   (B  
B.c                    [        U S9n[        US9nUR                  5       nUR                  5       n[        U 5      [        U5      :X  a  XE:X  d   eOXE:w  d   eUR                  U5      nUR                  5       U:X  d   e[        5       R                  U5      nUR                  5       U:X  d   e[        U5      [        U 5      :X  d   eg rS   )r   r   r_   r   )rW   rX   sstore1sstore2	sstore1_b	sstore2_bnew_sstore1s          r   *test_serialize_stringstore_roundtrip_bytesrt      s    (+G(+G  "I  "I
8}H%%%%%%%  +G***-**95K!Y...{s8},,,r   c                 .   [        U S9n[        US9n[        5        nUS-  nUS-  nUR                  U5        UR                  U5        [        5       R                  U5      n[        5       R                  U5      n[	        U5      [	        U5      :X  d   e[	        U5      [	        U5      :X  d   e[	        U 5      [	        U5      :X  a  [	        U5      [	        U5      :X  d   eO[	        U5      [	        U5      :w  d   eS S S 5        g ! , (       d  f       g = f)NrT   rW   rX   )r   r   r$   r%   r_   )	rW   rX   ro   rp   r+   r`   ra   	sstore1_d	sstore2_ds	            r   )test_serialize_stringstore_roundtrip_diskrx      s    (+G(+G	1^
^

#
#M++J7	M++J7	9~W---9~W---x=CM)y>S^333y>S^333 
s   C D
Dc                 n   [        U S9n[        5       n[        UR                  R	                  S5      SSS9nXBl        XU S      l        [        R                  " U5      n[        R                  " U5      nUR                  5       UR                  5       :X  d   eUR
                  R                  S:X  d   eg )NrT   )
   rz   floret   )datamode
hash_countr   )r
   r   r	   xpzerosvectorsrf   pickledumpsloadsr   r~   )rM   rg   r   opsr   vocab_pickledvocab_unpickleds          r   test_pickle_vocabr      s    '"E

C366<<1QOGM&'!*LL'Mll=1O>>779999""''8333r   )%r   pytest	thinc.apir   r&   spacy.lang.enr   spacy.stringsr   spacy.tokensr   
spacy.utilr   r   spacy.vectorsr	   spacy.vocabr
   utilr   test_stringstest_strings_attrsmarkissuer   r1   rG   parametrizerQ   r]   rd   rh   rj   rm   rt   rx   r    r   r   <module>r      s     %  ! %  . !  R24IJK.89  3& & 4! !( 4 & %)B *B ,l;G <G" ,l;R <R& +-?@0 A0 +-?@. A. +-?@
0 A
0 ,l;- <-  ,l;4 <4$ +-?@	4 A	4r   