
    h
                     .   S SK r S SKrS SKrS SKJr  S SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  SS	KJr  \R"                  S
 5       r\R&                  R)                  S5      S 5       rSr\R&                  R)                  S5      S 5       rS rS rS rg)    N)English)Italian)Language)	Tokenizer)Example)load_config_from_str   )make_tempdirc                  "    SSSSSSSSSSS S	.S
.$ )Nname-in-fixturezversion-in-fixturezdescription-in-fixturezauthor-in-fixturezemail-in-fixturezurl-in-fixturezlicense-in-fixturer   )widthvectorskeysname)r   versiondescriptionauthoremailurllicenser    r       g/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/serialize/test_serialize_language.py	meta_datar      s/     "'/%#'1aF	 	r   i	  c                      [        5       n U R                  S5        U R                  5       n[        5       R                  U5        g)zBTest we can serialize and deserialize a blank NER or parser model.nerN)r   add_pipeto_bytes
from_bytes)nlpbs     r   test_issue2482r"      s2     )CLLAIr   a<  
[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1"

[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1"
width = ${components.tok2vec.model.encode:width}
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
rows = [5000,2500,2500,2500]
include_static_vectors = false

[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
width = 96
depth = 4
window_size = 1
maxout_pieces = 3

[components.ner]
factory = "ner"

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"
nO = null

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode:width}
upstream = "*"
i&  c                     ^  [         R                  " [        [        5      5      m T R	                  U 4S j5        [
        R                  " T 5        T " S5        [
        R                  " T 5        g)zoTest that the nlp object with initialized tok2vec with listeners pickles
correctly (and doesn't have lambdas).
c                  X   > [         R                  " T R                  S5      SS/05      /$ )NhellotagsV)r   	from_dictmake_docr    s   r   <lambda> test_issue6950.<locals>.<lambda>Y   s$    G--cll7.Cfse_UVr   r%   N)r   from_configr   CONFIG_ISSUE_6950
initializepickledumpsr*   s   @r   test_issue6950r2   S   sH    
 

23DE
FCNNVW
LLL
LLr   c                     [        U S9n[        5        nUR                  U5        [        5       R                  U5      nS S S 5        WR                  UR                  :X  d   eg ! , (       d  f       N+= f)Nmeta)r   r
   to_disk	from_diskr5   )r   languagednew_languages       r   !test_serialize_language_meta_diskr;   _   sZ    Y'H	1z++A. 
 --- 
s   +A$$
A2c                  6  ^^^ [         R                  " S5      m[         R                  " S5      m[         R                  " S5      mUUU4S jn [        5       nU " U5      Ul        [	        5        nUR                  U5        SSS5        g! , (       d  f       g= f)zTest that serialization with custom tokenizer works without token_match.
See: https://support.prodi.gy/t/how-to-save-a-custom-tokenizer/661/2
z$1/|2/|:[0-9][0-9][A-K]:|:[0-9][0-9]: z[~]c                 n   > [        U R                  0 TR                  TR                  TR                  S9$ )N)prefix_searchsuffix_searchinfix_finditer)r   vocabsearchfinditer)r    infix_re	prefix_re	suffix_res    r   custom_tokenizer>test_serialize_with_custom_tokenizer.<locals>.custom_tokenizero   s5    II#**#**#,,
 	
r   N)recompiler   	tokenizerr
   r6   )rH   r    r9   rE   rF   rG   s      @@@r   $test_serialize_with_custom_tokenizerrM   g   sf     

FGI

7#Izz*%H
 *C$S)CM	1A 
s   /B


Bc                    Sn[        U S9nUR                  S   U:X  d   e[        5       R                  UR                  5       5      nUR                  S   U:X  d   e[        5       R                  UR                  5       S/S9nUR                  S   U:X  a   e[        5       R                  UR                  S/S95      nUR                  S   U:X  a   eg )Nr   r4   r   r5   )exclude)r   r5   r   r   )r   r   r    new_nlps       r   test_serialize_language_excluderQ   ~   s    D
	
"C88Ft###j##CLLN3G<<4'''j##CLLNVH#EG||F#t+++j##CLL&L$BCG||F#t++++r   )r0   rJ   pytestspacy.lang.enr   spacy.lang.itr   spacy.languager   spacy.tokenizerr   spacy.trainingr   
spacy.utilr   utilr
   fixturer   markissuer"   r.   r2   r;   rM   rQ   r   r   r   <module>r]      s     	  ! ! # % " +  
 
 4 ) X 4 ..	,r   