
    hF                     b   S SK r S SKJr  S SKJrJr  S SKrS SKJr  S SK	J
r
  S SKJrJrJr  S SKJrJrJrJr  S SKJrJr  S S	KJr  S S
KJrJrJrJr  SSKJr  Sr Sr!Sr"Sr#\RH                  " S5      S 5       r%\ RL                  RO                  S5      S 5       r(S r)S r*S r+S r,S r-\ RL                  R]                  S\"\#/5      S 5       r/S r0S r1S r2S r3S  r4\ RL                  Rk                  S!5      S" 5       r6S# r7S$ r8S% r9\ RL                  R]                  S\"\#/5      S& 5       r:S' r;S( r<g))    N)RegistryError)ConfigConfigValidationError)German)English)DEFAULT_CONFIGDEFAULT_CONFIG_PRETRAIN_PATHLanguage)MaxoutWindowEncoderMultiHashEmbedbuild_tb_parser_modelbuild_Tok2Vec_model)ConfigSchemaConfigSchemaPretrain)Example)load_configload_config_from_strload_model_from_configregistry   )make_tempdira  
[paths]
train = null
dev = null

[corpora]

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}

[training]

[training.batcher]
@batchers = "spacy.batch_by_words.v1"
size = 666

[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 342
depth = 4
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.width}
a  
[paths]
train = null
dev = null

[corpora]

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}

[training]

[training.batcher]
@batchers = "spacy.batch_by_words.v1"
size = 666

[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 342
depth = 4
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.width}

[pretraining]
aX  
[model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "parser"
extra_state_tokens = false
hidden_width = 66
maxout_pieces = 2
use_upper = true

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 333
depth = 4
embed_size = 5555
window_size = 1
maxout_pieces = 7
subword_features = false
aY  
[model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "parser"
extra_state_tokens = false
hidden_width = 66
maxout_pieces = 2
use_upper = false

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 333
depth = 4
embed_size = 5555
window_size = 1
maxout_pieces = 7
subword_features = false
my_test_parserc            
      j    [        [        SSS/SS/SS9[        SSSS	S
95      n [        U SSSSSS9nU$ )NiA  LOWERSHAPEi8  F)widthattrsrowsinclude_static_vectors      r   )r   window_sizemaxout_piecesdepthparserTA      )tok2vec
state_typeextra_state_tokenshidden_widthr#   	use_upper)r   r   r   r   )r(   r%   s     e/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/serialize/test_serialize_config.py	my_parserr.      s^    !G$#(		
 	#1AQOG #F M    i  c                     SS0SS0S.n [         R                  " U 5      n[        5        nUS-  nUR                  U5        [        R
                  " USSS00S	9nUR                  S   S   S:X  d   e S
S
S
5        g
! , (       d  f       g
= f)z?Test that config overrides are not lost after load is complete.langenkeyvalue)nlpcustom
test_modelr6   updated_valueconfigN)r   from_configr   to_diskspacyloadr:   )
source_cfg
source_nlpdir_pathsource_pathr5   s        r-   test_issue8190rC      s    
 D
 '"	J $$Z0J	8-;'jjh8P-QRzz(#E*o=== 
s   AA<<
B
c                     [        5       R                  [        5      n [        R                  " [
        5         [        U SS9  S S S 5        [        U SS9nUR                  S   S   S   S:X  d   e[        UR                  S   5      S:  d   eUR                  S	S
/:X  d   e[        UR                  S   5      S:X  d   e[        UR                  S   S   5      S:X  d   eUR                  S
5        [        UR                  S   5      S:X  d   e[        UR                  S   S   5      S:X  d   e[        R                  " [        5         S0 0n[        [        U5      SS9  S S S 5        [        R                  " [        5         SSS00n[        [        U5      SS9  S S S 5        g ! , (       d  f       GNl= f! , (       d  f       N]= f! , (       d  f       g = f)NF	auto_fillTtrainingbatchersizei     r(   tagger
componentsr   r5   pipelineyolofoobar)r   from_strnlp_config_stringpytestraisesr   r   r:   len
pipe_namesremove_pipe
ValueError)r:   r5   bad_cfgs      r-   test_create_nlp_from_configrZ      s   X01F	,	-v7 
.
 4
8C::j!),V4;;;szz*%&***>>i2222szz,'(A---szz% ,-222OOHszz,'(A---szz% ,-222	z	"2,vg$? 
# 
z	"u~.vg$? 
#	" 
.	- 
#	" 
#	"s#   F(F:G(
F7:
G
Gc                      [        5       R                  [        5      n [        [        5      nU R                  U5      n[        R                  " US   [        S9  g)z;Test that the default pretraining config validates properlypretraining)schemaN)	r   rQ   pretrain_config_stringr   r	   merger   resolver   )r:   pretrain_configfilleds      r-   'test_create_nlp_from_pretraining_configrc      sD    X56F!">?O\\/*FVM*3GHr/   c                  p   [        5       R                  [        5      n U S   S   U S   S   U S   S   S.U S'   [        U S   R	                  5       5      U S   S'   [        U SS9nUR                  / SQ:X  d   eUR                  S	5      R                  S:X  d   eUR                  S
5      R                  S:X  d   eUR                  S5      R                  S:X  d   eUR                  S   n[        U5      S:X  d   e[        UR	                  5       5      / SQ:X  d   eUR                  S   S   / SQ:X  d   eg)ziTest that the nlp object is created correctly for a config with multiple
instances of the same component.rL   r(   rK   )t2vtagger1tagger2r5   rM   TrE   re   rf   rg   r    N)r   rQ   rR   listkeysr   rV   get_pipe_metafactoryr:   rU   )r:   r5   pipeline_configs      r-   .test_create_nlp_from_config_multiple_instancesrm      sD    X01Fl#I.,'1,'1F<
 !%VL%9%>%>%@ AF5M*
 4
8C>>::::U#++y888Y'//8;;;Y'//8;;;jj.O1$$$$$&'+HHHH::eZ(,IIIIr/   c                     [        5       R                  [        5      n [        U SS9nUR	                  S5      R                  S5        UR                  5         SUR                  ;   d   eSUR                  ;   d   eSUR                  ;  d   eUR	                  S5      R                  R                  S5      R                  S5      S:X  d   e[        5        nUR                  U5        [        R                  " U5      nSUR                  ;   d   eSUR                  ;   d   eSUR                  ;  d   eUR	                  S5      R                  R                  S5      R                  S5      S:X  d   e S	S	S	5        g	! , (       d  f       g	= f)
zNCreate a custom nlp pipeline from config and ensure it serializes it correctlyTrE   rK   Ar(   r%   nOV  N)r   rQ   rR   r   get_pipe	add_label
initializerV   modelget_refget_dimr   r<   r=   r>   )
nlp_configr5   dnlp2s       r-   test_serialize_nlpr{     s>   ""#45J
 t
<CLL$$S)NN&&&s~~%%%3>>)))<<!''//	:BB4HCOOO	1Azz!}DOO+++4??***t...}}X&,,44Y?GGMQTTTT 
s   BE>>
Fc                     [        5       n [        5       nSS0US'   U R                  SUS9  U R                  5         [	        5        nU R                  U5        [        R                  " U5      nUR                  S5      R                  nUR                  S5        UR                  S5      R                  S5      S	:X  d   eUR                  S
5      R                  S5      S	:X  d   e SSS5        g! , (       d  f       g= f)zBCreate a custom nlp pipeline and ensure it serializes it correctlyz@architecturesr   ru   r%   r9   r(   uppernIr&   lowerN)r   dictadd_pipert   r   r<   r=   r>   rr   ru   rv   rw   )r5   
parser_cfgry   rz   ru   s        r-   test_serialize_custom_nlpr     s    
)CJ+-=>JwLL*L-NN	1Azz!}h'--i }}W%--d3r999}}W%--d3r999 
s   B C00
C>parser_config_stringc                 \   [        5       n[        5       R                  U 5      nUR                  SUS9nUR	                  S5        UR                  5         [        5        nUR                  U5        [        R                  " U5      nUR                  S5      R                  nUR                  S5        UR                  S   (       a&  UR                  S5      R                  S5      S:X  d   eUR                  S	5      R                  S5      S:X  d   e S
S
S
5        g
! , (       d  f       g
= f)zGCreate a non-default parser config to check nlp serializes it correctlyr%   r9   nsubjr(   	has_upperr}   r~   B   r   N)r   r   rQ   r   rs   rt   r   r<   r=   r>   rr   ru   rv   r   rw   )r   r5   model_configr%   ry   rz   ru   s          r-   test_serialize_parserr   .  s    
 )C8$$%9:L\\(<\8F
WNN	1Azz!}h'--i ;;{#==)11$72===}}W%--d3r999 
s   B4D
D+c                     [        5       n U R                  S5        U R                  S5        [        U R                  SS9nUR                  U R                  :X  d   eUR                  U R                  :X  d   eUR
                  U R
                  :X  d   eUR                  U R                  :X  d   eUR                  U R                  :X  d   eg)zPTest that a config produced by the nlp object passes training config
validation.entity_rulernerFrE   N)r   r   r   r:   rV   _pipe_configs
_pipe_meta_factory_meta)r5   new_nlps     r-   test_config_nlp_roundtripr   D  s     )CLL LL$SZZ5AG>>SZZ'''///  C$5$5555///  C$5$5555r/   c                     [        5       n U R                  5       n[        5       R                  U5      nUR                  U R                  :X  d   e[        5       n [	        5        nU R                  U5        [        R                  " U5      nSSS5        UR                  U R                  :X  d   eg! , (       d  f       N+= f)zMTest that the config is serialized correctly and not interpolated
by mistake.N)r   to_bytes
from_bytesr:   r   r<   r=   r>   )r5   	nlp_bytesr   ry   s       r-   $test_config_nlp_roundtrip_bytes_diskr   R  s     )CIi""9-G>>SZZ'''
)C	1A**Q- 
 >>SZZ''' 
s   $(B11
B?c                     Sn [         R                  " U SS0S9S[        S[        S[        4S j5       n[        5       nUR                  U 5      (       a   e[        5       nUR                  U 5      (       d   eUR                  U SS0S	S
9  UR                  S   S	   nUS   S:X  d   eUS   U :X  d   e[        5        nUR                  U5        [        R                  " U5      nSSS5        WR                  U 5      (       d   eUR                  S	/:X  d   eUR                  S	5      R                  U :X  d   eUR                  S   S	   nUS   S:X  d   eUS   U :X  d   e[        5       R                  UR                  R!                  5       5      nSUS   S'   ["        R$                  " [&        5         [)        U5        SSS5        g! , (       d  f       N= f! , (       d  f       g= f)zRTest that config serialization works as expected with language-specific
factories.'test_serialize_config_language_specificrO      )default_configr5   namec                     S $ )Nc                     U $ N )docs    r-   <lambda>Qtest_serialize_config_language_specific.<locals>.custom_factory.<locals>.<lambda>g  s    3r/   r   )r5   r   rO   s      r-   custom_factory?test_serialize_config_language_specific.<locals>.custom_factorye  s    r/   d   rP   )r:   r   rL   rk   Nder1   )r   rk   r
   strinthas_factoryr   r:   r   r<   r=   r>   rV   rj   r   rQ   to_strrS   rT   rX   r   )r   r   r5   pipe_configry   rz   r:   s          r-   r   r   `  s    5D__T5"+6H C c  7 *Ct$$$$
)C??4    LLuclL7**\*51Ku$$$y!T)))	1Azz!} 
 D!!!!??ug%%%e$,,444++l+E2Ku$$$y!T)))Xt{{1134F F5M&	z	"v& 
#	" 
 
#	"s   =(G	4G	
G
G(c                     [        5       R                  [        5      n U S   R                  S5        SU S   S   ;   d   eSU S   ;  d   e[        R
                  " [        5         [        U SS9  S S S 5        g ! , (       d  f       g = f)NrL   r(   r5   rM   TrE   )r   rQ   rR   poprS   rT   rX   r   r9   s    r-   #test_serialize_config_missing_pipesr     st    X01F
<Y'uj1111F<0000	z	"v6 
#	"	"s   %A99
Bc                     SSS/S.0n SS/S.n[        5       R                  [        US9n[        USS9n[	        U[
        5      (       d   eUR                  S/:X  d   e[        5       R                  [        5      n[        USS9n[	        U[        5      (       d   eUR                  S	S/:X  d   e[        5        nUR                  U5        [        R                  " X`S
9nS S S 5        [	        U[
        5      (       d   eUR                  S/:X  d   e[        5        nUR                  U5        [        R                  " XaS
9nS S S 5        [	        U[
        5      (       d   eUR                  S/:X  d   e[        5        nUR                  U5        [        R                  " U5      nS S S 5        [	        U[        5      (       d   eUR                  S	S/:X  d   eg ! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       N]= f)Nr5   r   rK   )r1   rM   )znlp.langznlp.pipeline)	overridesTrE   r(   r9   )r   rQ   rR   r   
isinstancer   rV   r   r   r<   r=   r>   )overrides_nestedoverrides_dotr:   r5   base_configbase_nlpry   s          r-   test_config_overridesr     s   8*EF!%zBMX0MJF
 4
8Cc6"""">>hZ'''(##$56K%kTBHh((((9h"7777	1jj4 
 c6"""">>hZ'''	1jj1 
 c6"""">>hZ'''	1jjm 
 c7####>>i2222 

 

 
s$   ;&G&G-?(G>
G*-
G;>
Hzignore:\[W036c            
      l   [         R                  " S5      n U R                  S5        [        5        nU R	                  U5        [         R
                  " USSSSS0000S9nUR                  S   S   S   S   S:X  d   e[        R                  " S5      S	 5       n[         R
                  " USSSS
SS0S.000S9nUR                  S   S   S   S   SS0:X  d   e[        R                  " UR                  S5      0 5      nUR                  U/5      nSU;   d   e S S S 5        g ! , (       d  f       g = f)Nr2   attribute_rulerrL   scorer@scorerszspacy.tagger_scorer.v1r9   test_some_other_keyc                      g)Nsome_other_keyr   r   r/   r-   misc_some_other_keyGtest_config_overrides_registered_functions.<locals>.misc_some_other_key  s    #r/   z)spacy.overlapping_labeled_spans_scorer.v1@misc)r   	spans_keyr   za b cspans_some_other_key_f)r=   blankr   r   r<   r>   r:   r   miscr   	from_dictmake_docevaluate)r5   ry   nlp_re1r   nlp_re2examplescoress          r-   *test_config_overrides_registered_functionsr     sm   
++d
CLL"#	1A**% :/G"H(	
 NN<():;HEjQ'(	
( 
,	-	$ 
.	$ **% (S*13H)I#(	
 ~~l+,=>xH
,-. 	. . ##G$4$4W$=rB!!7),'6111U 
s   C)D%%
D3c                     [        5       R                  [        SS9n U S   S   S   S:X  d   eU R                  5       nUS   S   S   b   e[        R
                  " U 5      nUR                  S   S   S   S:X  d   eSnU S   S	   S
   S   S   U:X  d   eUR                  S   S	   S
   S   S   U:X  d   eUR                  R                  5       nUS   S   S   b   eUS   S	   S
   S   S   S:X  d   e[        R
                  " U5      nUR                  S   S   S   b   eUR                  S   S	   S
   S   S   S:X  d   eg )NF)interpolatecorporatrainpathz${paths.train}z!${components.tok2vec.model.width}rL   rK   ru   r(   r   rq   )r   rQ   rR   r   r   r;   r:   )r:   interpolatedr5   r   interpolated2rz   s         r-   test_config_interpolationr     s   X0eDF)W%f-1AAAA%%'L	"7+F3;;;


f
%C::i )&15EEEE/E,)'29=gF%OOO::l#H-g6yA'JeSSSJJ**,M#G,V4<<<&x09)DWMQTTTT|,D;;y!'*62:::;;|$X.w7	B7KsRRRr/   c                     [        5       R                  [        5      n [        R                  " U 5      n SU ;  d   e[
        R                  " U [        SS9n[        5       R                  UR                  5       5      nUS   0 :X  d   eg )Nr\   F)r]   validate)	r   rQ   rR   r   r_   r   fillr   r   )r:   rb   
new_configs      r-   test_config_optional_sectionsr     sr    X01F!!&)F&&&]]6,GF
 ""6==?3Jm$***r/   c                      [        SS00 S.5      n [        U SS9(       d   e[        SS0SS0S.5      n [        U SSS	9nSUR                  S
   ;  d   e[        UR                  5        g )Nr1   r2   )r5   rG   TrE   extrahelloF)rF   r   rG   )r   r   r:   )r:   r5   s     r-   "test_config_auto_fill_extra_fieldsr     sn    VTN;<F!&D999VTN'8JKLF
 4%
HC#**Z00003::&r/   c                    [        5       n[        5       R                  U 5      nSUS   S'   [        R                  " [
        5         UR                  SUS9  S S S 5        SUS   S'   UR                  SUS9  g ! , (       d  f       N'= f)Nnonsenseru   r)   r%   r9   r   )r   r   rQ   rS   rT   r   r   )r   r5   r:   s      r-   test_config_validate_literalr     su     )CX34F$.F7OL!	,	-Xf- 
.$)F7OL!LL&L) 
.	-s   A88
Bc                  8   [        5       n U R                  nSS0US   S'   SS0US   S'   [        USS9n [        R                  " [
        5         U R                  5         S	S	S	5        S	U R                  S   S'   U R                  5         g	! , (       d  f       N1= f)
zTest that only the relevant blocks are resolved in the different methods
and that invalid blocks are ignored if needed. For instance, the [initialize]
shouldn't be resolved at runtime.
r   nonexistentrG   before_to_diskrt   lookupsTrE   N)r   r:   r   rS   rT   r   rt   )r5   r:   s     r-   (test_config_only_resolve_relevant_blocksr     s    
 )CZZF,3]+CF:'('.&>F<#
 4
8C	}	% 
&*.CJJ|Y'NN 
&	%s   B
Bc                      Sn [         R                  R                  S5       " S S[        5      5       n[        R
                  " [        U 5      5      nUR                  S5      R                  SS/:X  d   eg )Nz
    [nlp]
    lang = "en"
    pipeline = ["my_punctual_component"]

    [components]

    [components.my_punctual_component]
    factory = "my_punctual_component"
    punctuation = ["?","-"]
    my_punctual_componentc                       \ rS rSrSrS rSrg)2test_hyphen_in_config.<locals>.MyPunctualComponenti1  r   c                     X0l         g r   punctuation)selfr5   r   r   s       r-   __init__;test_hyphen_in_config.<locals>.MyPunctualComponent.__init__5  s
      +r/   r   N)__name__
__module____qualname____firstlineno__r   r   __static_attributes__r   r/   r-   MyPunctualComponentr   1  s    &	+r/   r   ?-)	r=   r
   rk   objectr   r;   r   rr   r   )hyphen_config_strr   r5   s      r-   test_hyphen_in_configr   $  sp    
 ^^34	+f 	+ 5	+ 

23DE
FC<</0<<c
JJJr/   )=rS   	cataloguer   	thinc.apir   r   r=   spacy.lang.der   spacy.lang.enr   spacy.languager   r	   r
   spacy.ml.modelsr   r   r   r   spacy.schemasr   r   spacy.trainingr   
spacy.utilr   r   r   r   utilr   rR   r^   parser_config_string_upperparser_config_string_no_upperarchitecturesr.   markissuerC   rZ   rc   rm   r{   r   parametrizer   r   r   r   r   r   filterwarningsr   r   r   r   r   r   r   r   r/   r-   <module>r     sx    # 3    ! Q Q  = "   1 f3 l *! * 
() ** 4> >$@*IJ*U(:$ 79VW::&6( 'F73< ,--2 .-2`S&
+' 79VW**$Kr/   