
    h             	          S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	J
r
  S SKJr  S SKJr  S SKJrJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&  S SK'r'S SK(r(S S	K)J*r*  S S
K+J,r,J-r-J.r.J/r/  S SK0J1r1  SSK2J3r3J4r4J5r5  SSK6J7r7  SSK8J9r9J:r:  SSK;J<r<  SSK=J>r>J?r?J@r@  SSKAJBrBJCrC  SSKDJErE  SSKFJGrGJHrHJIrI  SSKJJKrKJLrLJMrMJNrNJOrO  SSKPJQrQ  SSKRJSrS  SSKTJUrU  SSKVJWrW  SSKXJYrYJZrZ  SSK[J\r\J]r]  SSK5J^r^J_r_J`r`JaraJbrbJcrcJdrdJereJfrf  SSKgJhrh  SSKiJjrjJkrk  \\U/\U4   rl\" \m5      R                  S-  ro\5R                  " \o5      rq\" \m5      R                  S -  rr\#" S!5      rs " S" S#5      rtS$\S%/\S4   4S& jruS' rv " S( S%5      rwS)\xS$\x4S* jry\ " S+ S,5      5       rz " S- S.\{5      r|S/\\Y   S$\\Y   4S0 jr}S1\\$\~\U\4   \s/\U4   S2\\S3\\U   4      S4\"\x\x\x4   S$S4S5 jr " S6 S75      r " S8 S95      r\" 5       rg):    N)	ExitStackcontextmanager)deepcopy)	dataclass)chaincycle)Path)default_timer)AnyCallableDictIterableIteratorListNoReturnOptionalPatternSequenceSetTupleTypeVarUnioncastoverload)Pool)ConfigCupyOps	Optimizerget_current_ops)convert_recursive   )abouttyutil)Literal)ErrorsWarnings)GIT_VERSION)TOKENIZER_INFIXESTOKENIZER_PREFIXESTOKENIZER_SUFFIXES)BASE_EXCEPTIONS	URL_MATCH)load_lookups)analyze_pipesprint_pipe_analysisvalidate_attrs)ConfigSchemaConfigSchemaInitConfigSchemaNlpConfigSchemaPretrainvalidate_init_settings)Scorer	Tokenizer)Doc)
Underscore)Examplevalidate_examples)init_tok2vec
init_vocab)	_DEFAULT_EMPTY_PIPESCONFIG_SECTION_ORDERSimpleFrozenDictSimpleFrozenList_pipecombine_score_weightsraise_errorregistrywarn_if_jupyter_cupy)BaseVectors)Vocabcreate_vocabzdefault_config.cfgzdefault_config_pretraining.cfg_AnyContextc                   H   \ rS rSr% Sr\" \S9r\\S'   \	r
\\\\   4   \S'   \r\\\\\4         \S'   \r\\\\\4         \S'   \r\\\\\4         \S'   S	r\\   \S
'   \r\\   \S'   0 r\\\4   \S'   0 r\\\\/\ 4   4   \S'   \!" 5       r"\#\   \S'   SSSS.r$Sr%g	)BaseDefaultsX   zLanguage data defaults, available via Language.Defaults. Can be
overwritten by language subclasses by defining their own subclasses of
Language.Defaults.
section_orderconfigtokenizer_exceptionsprefixessuffixesinfixesNtoken_match	url_matchsyntax_iteratorslex_attr_getters
stop_wordsltrT)	directionhas_casehas_letters )&__name__
__module____qualname____firstlineno____doc__r   rA   rR   __annotations__r,   rS   r   strr   dictr*   rT   r   r   r   r   r+   rU   r)   rV   rW   r   r-   rX   rY   rZ   intr   setr[   r   writing_system__static_attributes__r`       H/home/james-whalen/.local/lib/python3.13/site-packages/spacy/language.pyrN   rN   X   s    
 *>?FF?2A$sDJ/A8JHhxc7l 345J8JHhxc7l 345J7HGXhuS'\234H&*K(#*$-Ix!-,.d3=).8:d3# 445:5JC #(d4PNrm   rN   returnLanguagec                       SSS[         4S jn U $ )zRegistered function to create a tokenizer. Returns a factory that takes
the nlp object and returns a Tokenizer instance using the language detaults.
nlprp   ro   c           
      &   U R                   R                  nU R                   R                  nU R                   R                  nU(       a   [        R
                  " U5      R                  OS nU(       a   [        R                  " U5      R                  OS nU(       a   [        R                  " U5      R                  OS n[        U R                  U R                   R                  UUUU R                   R                  U R                   R                  S9$ )N)rulesprefix_searchsuffix_searchinfix_finditerrW   rX   )DefaultsrT   rU   rV   r$   compile_prefix_regexsearchcompile_suffix_regexcompile_infix_regexfinditerr9   vocabrS   rW   rX   )rr   rT   rU   rV   ru   rv   rw   s          rn   tokenizer_factory+create_tokenizer.<locals>.tokenizer_factoryp   s    <<((<<((,,&&FN11(;BBTXFN11(;BBTXGN11':CCTXII,,33'')00ll,,
 	
rm   r8   )r   s    rn   create_tokenizerr   k   s    

z 
i 
" rm   c                 X    [         R                  R                  SU5        [        XS9nU$ )Nz+Loading lookups from spacy-lookups-data: %s)langtables)r$   loggerdebugr.   )r   r   lookupss      rn   load_lookups_datar      s&    KKCVL4GNrm   c                     ^  \ rS rSr% Sr\rSr\\	   \
S'   \r\" \R                  S9r0 r\\	S4   \
S'    SS	0 SSS
S.S\\\4   S\S\\	\4   S\\S /\\	/\4   4      S\\S/\4      S\SS4S jjjrU 4S jr\S 5       r\S\\	\4   4S j5       r\R@                  S\\	\4   SS4S j5       r\S\!4S j5       r"\"R@                  S\!SS4S j5       r"\S\#\	   4S j5       r$\S\#\	   4S j5       r%\S\#\&\	\'4      4S j5       r(\S\#\	   4S j5       r)\S\#\&\	\'4      4S  j5       r*\S\#\	   4S! j5       r+\S\\	\	4   4S" j5       r,\S\\	\#\	   4   4S# j5       r-\.S$\	S\4S% j5       r/\.S$\	S\	4S& j5       r0\.S$\	SS4S' j5       r1\.S$\	SSSS4S( j5       r2S$\	SS4S) jr3S$\	S\!4S* jr4\.\" 5       \5" 5       \5" 5       S+\" 5       SS,.S$\	S-\\	\4   S.\6\	   S/\6\	   S0\S1\\	\\7   4   S2\\   S\4S3 jj5       r8\.\5" 5       \5" 5       S+SS4.S$\	S.\6\	   S/\6\	   S0\S2\\'   S\S5\4   4S6 jj5       r9/ S7QS+S8.S9\#\	   S:\S\\\	\4      4S; jjr:S$\	S\'4S< jr; S\" 5       SSS=.S>\	S$\\	   S?\\	\4   S@\\!   SA\S\'4SB jjjr<SC\	SDS S$\	S\&\'\	4   4SE jr= SSSSSS\" 5       SSSF.S>\	S$\\	   SG\\\	\4      SH\\\	\4      SI\\   SJ\\   SD\S    S?\\	\4   S@\\!   SA\S\'4SK jjjr>    SSG\\\	\4      SH\\\	\4      SI\\   SJ\\   S\4
SL jjr?S$\	S\4SM jr@\" 5       SSN.S$\	S>\	S?\\	\4   SA\S\'4
SO jjrASP\	SQ\	SS4SR jrBS$\	S\&\	\'4   4SS jrCS$\	SS4ST jrDS$\	SS4SU jrE\5" 5       SSV.SW\\	\4   SX\6\	   SY\\\	\\	\4   4      S\4SZ jjrFSS\ jrGSSS].SX\\\	\6\	   4      S^\\\	\6\	   4      SS[4S_ jjrHSW\	S\4S` jrISa\\	\\J4   S\4Sb jrKSa\\	\\J4   Sc\LS\4Sd jrM SSeSSS\5" 5       \5" 5       Sf.Sg\6\N   Sh\\   Si\7Sj\\O   Sk\\\	\74      SY\\\	\\	\4   4      Sl\6\	   Sm\6\	   4Sn jjjrPSSS\5" 5       So.Sg\6\N   Sj\\O   Sk\\\	\74      SY\\\	\\	\4   4      Sl\6\	   S\\	\74   4Sp jjrQ SSSq.Sr\\/ \6\N   4      Sj\\O   S\O4Ss jjjrR SSSq.Sr\\/ \6\N   4      Sj\\O   S\O4St jjjrSSSq.Sj\\O   S\O4Su jjrTSv\\	\'\#\   \U/\V4   4Sw jrWSSSSS+Sx.Sg\6\N   S\\   Sy\\X   SY\\\	\\	\4   4      Sz\\\	\4      S{\S\\	\4   4S| jjrYS} rZ\[S~\\\   4S j5       r]\^S5S5S5S5S5S.S\6\\	\4      S\_S+   S\\   SX\6\	   SY\\\	\\	\4   4      S\S\`\   4S jj5       ra\^S5S5S5S5S5S.S\6\&\\	\4   \L4      S\_S   S\\   SX\6\	   SY\\\	\\	\4   4      S\S\`\&\\L4      4S jj5       raS+S\5" 5       SSS.S\\6\\	\4      \6\&\\	\4   \L4      4   S\S\\   SX\6\	   SY\\\	\\	\4   4      S\S\\`\   \`\&\\L4      4   4S jjraSX\6\	   4S jrbS\6\\	\4      S\6\S5\`\   4      S\S\S\`\   4
S jrcSS jrd\.0 4S\e\e\e\" 5       SSS.S?\\\	\4   \!4   S\\\4   SX\\	\6\	   4   S^\\	\6\	   4   Sl\\	\6\	   4   S\\	\4   S\SA\SS 4S jjj5       rfS\	S\	S\6\	   SS4S jrg\[SS\\h   S\`\h   4S jj5       ri\5" 5       S.S\\	\j4   Sl\6\	   SS4S jjrk\lSX\\	\6\	   4   S^\\	\6\	   4   S\6\	   S\&\	S54   4S j5       rm\5" 5       \" 5       S.S\\	\j4   Sl\6\	   S\\	\4   SS 4S jjrn\5" 5       S.Sl\6\	   S\J4S jjro\5" 5       S.S\JSl\6\	   SS 4S jjrpSrqU =rr$ )rp      a:  A text-processing pipeline. Usually you'll load this once per process,
and pass the instance around your application.

Defaults (class): Settings, data and factory methods for creating the `nlp`
    object and processing pipeline.
lang (str): IETF language code, such as 'en'.

DOCS: https://spacy.io/api/language
Nr   errorFactoryMeta_factory_metaTi@B i  )
max_lengthmetar   create_vectors
batch_sizer~   r   r   r   r   rJ   r   ro   c                   SSK Jn  U" 5         [        R                  R                  R                  5         [        R                  U R                  5      U l	        [        U5      U l        SU l        SU l        0 U l        0 U l        [!        U["        5      (       d9  USLa4  [%        [&        R(                  R+                  U[-        ["        5      S95      eUSL a  UR/                  S0 5      R/                  S5      n	[1        U R2                  U R4                  U	S9nU(       d-  SU R                  S	   S   0n
[        R6                  " U
5      S   nU" U5      Ul        OwU R2                  (       af  UR2                  (       aU  U R2                  UR2                  :w  a;  [%        [&        R:                  R+                  U R2                  UR2                  S
95      eXl        U R2                  c  U R<                  R2                  U l        / U l        [A        5       U l!        X l"        U(       d-  SU R                  S	   S   0n[        R6                  " U5      S   nU" U 5      U l#        X`l$        [J        U l&        g)a  Initialise a Language object.

vocab (Vocab): A `Vocab` object. If `True`, a vocab is created.
meta (dict): Custom meta data for the Language class. Is written to by
    models to add model meta data.
max_length (int): Maximum number of characters in a single text. The
    current models may run out memory on extremely long texts, due to
    large internal allocations. You should segment these texts into
    meaningful units, e.g. paragraphs, subsections etc, before passing
    them to spaCy. Default maximum length is 1,000,000 charas (1mb). As
    a rule of thumb, if all pipeline components are enabled, spaCy's
    default models currently requires roughly 1GB of temporary memory per
    100,000 characters in one text.
create_tokenizer (Callable): Function that takes the nlp object and
    returns a tokenizer.
batch_size (int): Default batch size for pipe and evaluate.

DOCS: https://spacy.io/api/language#init
r!   )register_factoriesNT)r~   
vocab_typevectorsname)vectors_namerr   )rr   r~   	tokenizer)'pipeline.factoriesr   r$   rG   _entry_point_factoriesget_allDEFAULT_CONFIGmergedefault_config_configrh   _meta_path
_optimizer
_pipe_meta_pipe_configs
isinstancerJ   
ValueErrorr&   E918formattypegetrK   r   rx   resolver   E150r~   _componentsrj   	_disabledr   r   r   rF   default_error_handler)selfr~   r   r   r   r   r   kwargsr   r   vectors_cfgtokenizer_cfgs               rn   __init__Language.__init__   s   < 	; 	,,446%++D,?,?@$Z

/34602%''E,=V[[//eU/TUUD=88Ir266v>L DMMUE!($,,u*=i*HI!)!1!1+!>y!I*51EM		ejjtyyEJJ/F !3!3		!3!TUU!
99

DI;=#&5$($,,u*=k*JKM'//>{K)$/$%0"rm   c                    > [         TU ]  " S0 UD6  [        R                  U R                  R
                  5      U l        U R                  U R                  S   S'   g )Nrr   r   r`   )super__init_subclass__r   r   rx   rR   r   r   )clsr   	__class__s     rn   r   Language.__init_subclass__   sI    !+F++11#,,2E2EF,/HH5!&)rm   c                     U R                   $ N)r   r   s    rn   pathLanguage.path   s    zzrm   c                    [         R                  " [        R                  5      nU R                  R
                  (       a1  U R                  R                  SU R                  R
                  5        O&U R                  R                  SU R
                  5        U R                  R                  SS5        U R                  R                  SS5        U R                  R                  SU5        U R                  R                  SS5        U R                  R                  S	S5        U R                  R                  S
S5        U R                  R                  SS5        U R                  R                  SS5        U R                  R                  S[        5        U R                  R                  [        U R                  R                  5      U R                  R                  R                  U R                  R                  R                  U R                  R                  R                  S.U R                  S'   [        U R                   5      U R                  S'   [#        U R$                  5      U R                  S'   [#        U R&                  5      U R                  S'   [#        U R(                  5      U R                  S'   U R                  $ )zCustom meta data of the language class. If a model is loaded, this
includes details from the model's meta.json.

RETURNS (Dict[str, Any]): The meta.

DOCS: https://spacy.io/api/language#meta
r   r   pipelineversionz0.0.0spacy_versiondescription authoremailurllicensespacy_git_version)widthr   keysr   moder   labels
componentsdisabled)r$   get_minor_version_ranger"   __version__r~   r   r   
setdefaultr(   vectors_lengthlenr   n_keysr   r   rh   pipe_labelslist
pipe_namescomponent_namesr   )r   r   s     rn   r   Language.meta   s    44U5F5FG::??JJ!!&$**//:JJ!!&$))4

fj1

i1

o}=

mR0

h+

gr*

eR(

i,

1;?ZZ..4::--.JJ&&--JJ&&++JJ&&++!


9  $D$4$45

8 "&doo!6

:#'(<(<#=

< !%dmm!4

:zzrm   valuec                     Xl         g r   )r   r   r   s     rn   r   r     s    
rm   c                    U R                   R                  S0 5        U R                   R                  S0 5        U R                  U R                   S   S'   0 n/ nU R                   He  nU R	                  U5      nU R                  U5      nSUR                  0UEX'   UR                  (       d  MJ  UR                  UR                  5        Mg     [        U R                  5      U R                   S   S'   [        U R                  5      U R                   S   S'   XR                   S'   U R                   S   R                  S0 5      n[        X&5      nXpR                   S   S'   [        R                  " U R                   5      (       d0  [        [         R"                  R%                  U R                   S	95      eU R                   $ )
zTrainable config for the current language instance. Includes the
current pipeline components, as well as default training config.

RETURNS (thinc.api.Config): The config.

DOCS: https://spacy.io/api/language#config
rr   trainingr   factoryr   r   r   score_weightsrR   )r   r   r   r   get_pipe_metaget_pipe_configr   default_score_weightsappendr   r   r   rE   srslyis_json_serializabler   r&   E961r   )r   r   r   	pipe_name	pipe_metapipe_configprev_weightscombined_score_weightss           rn   rR   Language.config  sx    	r*
B/&*iiUF# --I**95I..y9K#,i.?.?"O;"OH...$$Y%D%DE . +/t/C/C*DUJ'*.t}}*=UJ'%-\" ||J/33ORH!6}!S4JZ 1))$,,77V[[//t||/DEE||rm   c                     Xl         g r   )r   r   s     rn   rR   r   :  s    rm   c                     U R                    VVs/ s H  u  pXR                  ;   d  M  UPM     nnn[        U[        R                  R                  SS9S9$ s  snnf )zYGet the names of all disabled components.

RETURNS (List[str]): The disabled components.
r   attrr   r   r   rC   r&   E926r   )r   r   _namess       rn   r   Language.disabled>  sP     &*%5%5P%5'$9O%5PV[[-?-?Z-?-PQQ Qs
   AAc                 ^    [        U R                  R                  5       5      n[        U5      $ )zOGet names of all available factories.

RETURNS (List[str]): The factory names.
)r   	factoriesr   rC   r   r   s     rn   factory_namesLanguage.factory_namesI  s&     T^^((*+&&rm   c                 `    [        U R                  [        R                  R	                  SS9S9$ )z_Get all (name, component) tuples in the pipeline, including the
currently disabled components.
r   r   r   )rC   r   r&   r   r   r   s    rn   r   Language.componentsR  s.    
  FKK$6$6L$6$I
 	
rm   c                     U R                    VVs/ s H  u  pUPM	     nnn[        U[        R                  R	                  SS9S9$ s  snnf )zGet the names of the available pipeline components. Includes all
active and inactive pipeline components.

RETURNS (List[str]): List of component name strings, in order.
r   r   r   )r   rC   r&   r   r   r   r   r   r   s       rn   r   Language.component_names[  sH     04/?/?@/?|y/?@V[[-?-?EV-?-WXX A   Ac                     U R                    VVs/ s H  u  pXR                  ;  d  M  X4PM     nnn[        U[        R                  R                  SS9S9$ s  snnf )zThe processing pipeline consisting of (name, component) tuples. The
components are called on the Doc in order as it passes through the
pipeline.

RETURNS (List[Tuple[str, Callable[[Doc], Doc]]]): The pipeline.
r   r   r   r   )r   nppipess       rn   r   Language.pipelinee  sT     %)$4$4P$4DA8O!$4PV[[-?-?Z-?-PQQ Qs
   AAc                     U R                    VVs/ s H  u  pUPM	     nnn[        U[        R                  R	                  SS9S9$ s  snnf )zsGet names of available active pipeline components.

RETURNS (List[str]): List of component name strings, in order.
r   r   r   )r   rC   r&   r   r   r  s       rn   r   Language.pipe_namesp  sB     04}}=}|y}=V[[-?-?\-?-RSS >r  c                     0 nU R                    H"  u  p#U R                  U5      R                  X'   M$     [        U5      $ )zGet the component factories for the available pipeline components.

RETURNS (Dict[str, str]): Factory names, keyed by component names.
)r   r   r   rB   )r   r   r   pipes       rn   pipe_factoriesLanguage.pipe_factoriesy  s?     	#//OI#'#5#5i#@#H#HI   0	**rm   c                     0 nU R                    HQ  u  p#[        US5      (       a  UR                  SL a  M'  [        US5      (       d  M:  [        UR                  5      X'   MS     [        U5      $ )zGet the labels set by the pipeline components, if available (if
the component exposes a labels property and the labels are not
hidden).

RETURNS (Dict[str, List[str]]): Labels keyed by component name.
hide_labelsTr   )r   hasattrr  r   r   rB   )r   r   r   r  s       rn   r   Language.pipe_labels  sb     **JDt]++0@0@D0HtX&&#DKK0	 +
  ''rm   r   c                 ~    U R                  U5      nU[        R                  ;   =(       d    U[        R                  ;   $ )z=RETURNS (bool): Whether a factory of that name is registered.)get_factory_namerG   r   r   r   internal_names      rn   has_factoryLanguage.has_factory  s5     ,,T2x)))P]h>P>P-PPrm   c                 B    U R                   c  U$ U R                    SU 3$ )zGet the internal factory name based on the language subclass.

name (str): The factory name.
RETURNS (str): The internal factory name.
.)r   )r   r   s     rn   r  Language.get_factory_name  s(     88K((1TF##rm   c                     U R                  U5      nX R                  ;   a  U R                  U   $ XR                  ;   a  U R                  U   $ [        [        R                  R                  SUS95      e)zGet the meta information for a given factory name.

name (str): The component factory name.
RETURNS (FactoryMeta): The meta for the given factory name.
r   r   r   )r  r   r   r&   E967r   r  s      rn   get_factory_metaLanguage.get_factory_meta  sm     ,,T2---$$]33$$$$$T**+++FGGrm   c                 >    X R                   U R                  U5      '   g)zSet the meta information for a given factory name.

name (str): The component factory name.
value (FactoryMeta): The meta to set.
N)r   r  )r   r   r   s      rn   set_factory_metaLanguage.set_factory_meta  s     9>#..t45rm   c                     XR                   ;  a'  [        [        R                  R	                  SUS95      eU R                   U   $ )zGet the meta information for a given component name.

name (str): The component name.
RETURNS (FactoryMeta): The meta for the given component name.
	componentr   )r   r   r&   r!  r   r   r   s     rn   r   Language.get_pipe_meta  s<     &V[[//[t/LMMt$$rm   c                     XR                   ;  a&  [        [        R                  R	                  US95      eU R                   U   nU$ )zGet the config used to create a pipeline component.

name (str): The component name.
RETURNS (Config): The config used to create the pipeline component.
r   )r   r   r&   E960r   )r   r   r   s      rn   r   Language.get_pipe_config  sC     )))V[[//T/:;;((.rm   F)r   assignsrequiresretokenizesr   funcr   r/  r0  r1  r   r2  c                  ^ ^^^^^^ [        T[        5      (       d&  [        [        R                  R                  SS95      eST;   a&  [        [        R                  R                  TS95      e[        T[        5      (       d3  [        R                  R                  ST[        T5      S9n[        U5      eS[        S[        4UU UUUUU4S	 jjn	Ub  U	" U5      $ U	$ )
a  Register a new pipeline component factory. Can be used as a decorator
on a function or classmethod, or called as a function with the factory
provided as the func keyword argument. To create a component and add
it to the pipeline, you can use nlp.add_pipe(name).

name (str): The name of the component factory.
default_config (Dict[str, Any]): Default configuration, describing the
    default values of the factory arguments.
assigns (Iterable[str]): Doc/Token attributes assigned by this component,
    e.g. "token.ent_id". Used for pipeline analysis.
requires (Iterable[str]): Doc/Token attributes required by this component,
    e.g. "token.ent_id". Used for pipeline analysis.
retokenizes (bool): Whether the component changes the tokenization.
    Used for pipeline analysis.
default_score_weights (Dict[str, Optional[float]]): The scores to report during
    training, and their default weight towards the final score used to
    select the best model. Weights should sum to 1.0 per component and
    will be combined and normalized for the whole pipeline. If None,
    the score won't be shown in the logs or be weighted.
func (Optional[Callable]): Factory function if not used as a decorator.

DOCS: https://spacy.io/api/language#factory
r   	decoratorr  r,  zdefault configstyler   cfg_typefactory_funcro   c           
        > TR                  T
5      nU[        R                  ;   ac  [        R                  R                  U5      n[        R
                  " X5      (       d)  [        R                  R                  T
X S9n[        U5      e[        R                  " U 5      nSU;  d  SU;  a&  [        [        R                  R                  T
S95      e[        R                  R                  XS9  [        T
T[        T5      [        T5      [        T	R!                  5       5      T	TS9nTR#                  T
U5        [%        [        R                  R'                  5       [        R(                  S9Tl        U $ )N)r   r2  new_funcrr   r   r,  )r2  )r   r   r/  r0  scoresr   r1  r   )r  rG   r   r   r$   is_same_funcr&   E004r   r   get_arg_namesE964registerr   r1   r   r   r%  rB   r   E957)r9  r  existing_funcerr	arg_namesfactory_metar/  r   r   r   r   r0  r1  s         rn   add_factory%Language.factory.<locals>.add_factory  sA   006M 2 22
 !) 2 2 6 6} E((EE ++,,! - C %S/)**<8II%y)@ !3!3!3!>??
 '''I&-&w/'116689&;'L   |4
 -""**,FKKCM  rm   )r   rg   r   r&   E963r   E853rh   E962r   r   )
r   r   r   r/  r0  r1  r   r2  rD  rG  s
   ```````   rn   r   Language.factory  s    F $$$V[[//)/DEE$;V[[//T/:;;.$//++$$&TD<P % C S/!'	 h '	 8 '	  '	 R t$$rm   r/  r0  r1  r2  .c                h  ^ ^^^^^^ Tbg  [        T[        5      (       d&  [        [        R                  R                  SS95      eST;   a&  [        [        R                  R                  TS95      eTb  TO[        R                  " T5      mS[        S[        4UU UUUUU4S jjnTb  U" T5      $ U$ )ah  Register a new pipeline component. Can be used for stateless function
components that don't require a separate factory. Can be used as a
decorator on a function or classmethod, or called as a function with the
factory provided as the func keyword argument. To create a component and
add it to the pipeline, you can use nlp.add_pipe(name).

name (str): The name of the component factory.
assigns (Iterable[str]): Doc/Token attributes assigned by this component,
    e.g. "token.ent_id". Used for pipeline analysis.
requires (Iterable[str]): Doc/Token attributes required by this component,
    e.g. "token.ent_id". Used for pipeline analysis.
retokenizes (bool): Whether the component changes the tokenization.
    Used for pipeline analysis.
func (Optional[Callable[[Doc], Doc]): Factory function if not used as a decorator.

DOCS: https://spacy.io/api/language#component
r(  r4  r  r,  component_funcro   c                   >^  [        T
[        5      (       a&  [        [        R                  R                  T	S95      eS[        S[        4U 4S jjnTR                  T5      nU[        R                  ;   aq  [        R                  R                  U5      nUR                  nU(       a  U Vs/ s H  oUR                  PM     snS   OS n[        R                  " UT 5      (       a  UnTR!                  T	TTTUS9  T $ s  snf )Nr,  r   ro   c                    > T$ r   r`   )rr   r   rO  s     rn   r9  ?Language.component.<locals>.add_component.<locals>.factory_funcQ  s	    %%rm   r   rM  )r   r   r   r&   E965r   rg   PipeCallabler  rG   r   r   __closure__cell_contentsr$   r=  r   )rO  r9  r  rC  closurecwrappedr/  r   component_namer2  r   r0  r1  s   `      rn   add_component)Language.component.<locals>.add_componentM  s    $%% !3!3!3!HII& & &  006M 2 22 !) 2 2 6 6} E'33CJG<Gq??G<Q?PT$$Wn==#0LKK!'!   "! =s   -C=)r   rg   r   r&   rI  r   rJ  r$   get_object_namerT  r   )r   r   r/  r0  r1  r2  r[  rZ  s   `````` @rn   r(  Language.component+  s    6 dC(( !3!3k!3!JKKd{ !3!3!3!>??!%!1t7K7KD7Q	", 	"8 	" 	">  &&rm   )r/  r0  r<  r1  )r   prettyr   r_  c                8    [        XS9nU(       a	  [        X1S9  U$ )aR  Analyze the current pipeline components, print a summary of what
they assign or require and check that all requirements are met.

keys (List[str]): The meta values to display in the table. Corresponds
    to values in FactoryMeta, defined by @Language.factory decorator.
pretty (bool): Pretty-print the results.
RETURNS (dict): The data.
)r   )r/   r0   )r   r   r_  analysiss       rn   r/   Language.analyze_pipesp  s     !14rm   c                     U R                    H  u  p#X!:X  d  M  Us  $    [        [        R                  R	                  XR
                  S95      e)zGet a pipeline component for a given component name.

name (str): Name of pipeline component to get.
RETURNS (callable): The pipeline component.

DOCS: https://spacy.io/api/language#get_pipe
r   opts)r   KeyErrorr&   E001r   r   )r   r   r   r(  s       rn   get_pipeLanguage.get_pipe  sH     %)$4$4 I    %5 v{{))t:N:N)OPPrm   )rR   
raw_configvalidatefactory_namerR   rj  rk  c                F   Ub  UOUn[        U[        5      (       d3  [        R                  R	                  SU[        U5      S9n[        U5      e[        R                  " U5      (       d&  [        [        R                  R	                  US95      eU R                  U5      (       dc  [        R                  R	                  USR                  U R                  5      S[        R                  " U 5      U R                   S9n[        U5      eU R#                  U5      nUR$                  (       a$  ['        UR$                  5      R)                  U5      nU R+                  U5      nU[,        R.                  ;  a  UnXS.UES	U0EnX0n	[,        R0                  " XS
9n
[,        R2                  " SX   0US
9S   n['        U5      nXS'   UR5                  S	S5        UR5                  SS5        UR5                  SS5        U(       a  UR)                  U5      nXR6                  U'   X   $ )a  Create a pipeline component. Mostly used internally. To create and
add a component to the pipeline, you can use nlp.add_pipe.

factory_name (str): Name of component factory.
name (Optional[str]): Optional name to assign to component instance.
    Defaults to factory name if not set.
config (Dict[str, Any]): Config parameters to use for this component.
    Will be merged with default config, if available.
raw_config (Optional[Config]): Internals: the non-interpolated config.
validate (bool): Whether to validate the component config against the
    arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component.

DOCS: https://spacy.io/api/language#create_pipe
NrR   r6  r   , create_pipe)r   re  methodr   	lang_code)rr   r   z
@factories)rk  cfgr   rr   r   )r   rh   r&   rK  r   r   r   r   r   r   r  E002joinr   r$   r]  r   r"  r   r   r   r  rG   r   r   fillpopr   )r   rl  r   rR   rj  rk  rD  r   r  rr  resolvedfilleds               rn   ro  Language.create_pipe  s   0 't\&$''++$$8$f$VCS/!))&11V[[//v/>??--++$$!YYt112$))$/)) % C S/!)),7	 ##I445;;FCF--l;  2 22(M SvS|]S $ ##C;s'89HMeT(y

<& 	

5$

64  \\*-F#)4 %%rm   source_namesourcec          	         [        U[        5      (       d0  [        [        R                  R                  U[        U5      S95      eU R                  R                  UR                  R                  :w  a1  [        R                  " [        R                  R                  US95        XR                  ;  a`  [        [        R                  R                  UUR                   S    SUR                   S    3SR#                  UR                  5      S95      eUR%                  U5      n['        US5      (       a  X4l        UR*                  R-                  5       n[.        R0                  " US   U   5      nX`R2                  U'   U R                  R4                  UR                  R4                  :w  aB  UR                  R4                   H(  nU R                  R4                  R7                  U5        M*     XFS	   4$ )
aM  Create a pipeline component by copying it from an existing model.

source_name (str): Name of the component in the source pipeline.
source (Language): The source nlp object to copy from.
name (str): Optional alternative name to use in current pipeline.
RETURNS (Tuple[Callable[[Doc], Doc], str]): The component and its factory name.
)r   r{  r,  r   r   r   rn  )r   modelre  r   r   )r   rp   r   r&   E945r   r   r~   r   warningswarnr'   W113r   rf  E944r   rt  rh  r  r   rR   interpolater$   copy_configr   stringsadd)r   rz  r{  r   r  source_configr   ss           rn   create_pipe_from_source Language.create_pipe_from_source  s    &(++V[[//[f/VWW::!5!55MM(--..K.@A444""$#[[016;;v3F2GH6#9#9: #   {+ 4  I 113&&}\'B;'OP#.4 ::!5!55\\))

""&&q) *+++rm   )beforeafterfirstlastr{  rR   rj  rk  r  r  r  r  c                   [        U[        5      (       d3  [        U5      n[        R                  R                  XS9n[        U5      eUb  UOUnX R                  ;   a0  [        [        R                  R                  X R                  S95      eSU;   a@  [        R                  " [        R                  R                  UR                  S5      S95        Ub  U R                  XUS9u  pOU R                  UUUU	U
S9nU R!                  X4XV5      nU R#                  U5      U R$                  U'   U R&                  R)                  XU45        U R+                  5         U$ )aC  Add a component to the processing pipeline. Valid components are
callables that take a `Doc` object, modify it and return it. Only one
of before/after/first/last can be set. Default behaviour is "last".

factory_name (str): Name of the component factory.
name (str): Name of pipeline component. Overwrites existing
    component.name attribute if available. If no name is set and
    the component exposes no name attribute, component.__name__ is
    used. An error is raised if a name already exists in the pipeline.
before (Union[str, int]): Name or index of the component to insert new
    component directly before.
after (Union[str, int]): Name or index of the component to insert new
    component directly after.
first (bool): If True, insert component first in the pipeline.
last (bool): If True, insert component last in the pipeline.
source (Language): Optional loaded nlp object to copy the pipeline
    component from.
config (Dict[str, Any]): Config parameters to use for this component.
    Will be merged with default config, if available.
raw_config (Optional[Config]): Internals: the non-interpolated config.
validate (bool): Whether to validate the component config against the
    arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component.

DOCS: https://spacy.io/api/language#add_pipe
r(  r   rd  r   )name_in_configr,  )r   rR   rj  rk  )r   rg   reprr&   E966r   r   r   E007r  r  r'   W119rv  r  ro  _get_pipe_indexr"  r   r   insert_link_components)r   rl  r   r  r  r  r  r{  rR   rj  rk  bad_valrD  pipe_component
pipe_indexs                  rn   add_pipeLanguage.add_pipe   sM   P ,,,<(G++$$w$BCS/!'t\'''V[[//T@T@T/UVVVMM(--..fjj>P.QR ,0+G+G4 ,H ,(NL "--%! . N ))&E
 $ 5 5l C
>,BCrm   c                    XX4S.n[        S XX44 5       5      S:  a0  [        [        R                  R	                  XPR
                  S95      eU(       d  [        S X1U4 5       5      (       d  [        U R                  5      $ U(       a  g[        U[        5      (       aZ  XR
                  ;  a0  [        [        R                  R	                  XR
                  S95      eU R
                  R                  U5      $ [        U[        5      (       a]  X R
                  ;  a0  [        [        R                  R	                  X R
                  S95      eU R
                  R                  U5      S-   $ [        U5      [        :X  aT  U[        U R                  5      :  d  US:  a3  [        R                  R	                  S	XR
                  S
9n[        U5      eU$ [        U5      [        :X  aW  U[        U R                  5      :  d  US:  a3  [        R                  R	                  SX R
                  S
9n[        U5      eUS-   $ [        [        R                  R	                  XPR
                  S95      e)a  Determine where to insert a pipeline component based on the before/
after/first/last values.

before (str): Name or index of the component to insert directly before.
after (str): Name or index of component to insert directly after.
first (bool): If True, insert component first in the pipeline.
last (bool): If True, insert component last in the pipeline.
RETURNS (int): The index of the new pipeline component.
)r  r  r  r  c              3   (   #    U  H  oS Lv   M
     g 7fr   r`   ).0args     rn   	<genexpr>+Language._get_pipe_index.<locals>.<genexpr>W  s     G*F3$*F      )argsre  c              3   (   #    U  H  oS Lv   M
     g 7fr   r`   )r  r   s     rn   r  r  [  s     Q:P,:Pr  r   rd  r!   r  )diridxre  r  )sumr   r&   E006r   r   anyr   r   r   rg   rg  indexr   ri   E959)r   r  r  r  r  all_argsrD  s          rn   r  Language._get_pipe_indexF  s     %uSG6%*FGG1L""7K7K"L  sQ5%:PQQQt''(($$111 KK&&F9M9M&N  ''--f55s##000 KK&&E8L8L&M  ''--e4q88 &\S T--..&1*kk(( f3G3G )  !o%M%[CD,,--kk((U1E1E )  !o%19++@T@T+UVVrm   c                     XR                   ;   $ )zCheck if a component name is present in the pipeline. Equivalent to
`name in nlp.pipe_names`.

name (str): Name of the component.
RETURNS (bool): Whether a component of the name exists in the pipeline.

DOCS: https://spacy.io/api/language#has_pipe
)r   r)  s     rn   has_pipeLanguage.has_pipe}  s     &&rm   )rR   rk  c                
   XR                   ;  a0  [        [        R                  R	                  XR
                  S95      e[        US5      (       a2  [        R                  R	                  [        U5      US9n[        U5      eU R                   R                  U5      nU R                  U5        [        U R                  5      (       a  U[        U R                  5      :X  a  U R                  X!X4S9$ U R                  UUUUUS9$ )a  Replace a component in the pipeline.

name (str): Name of the component to replace.
factory_name (str): Factory name of replacement component.
config (Optional[Dict[str, Any]]): Config parameters to use for this
    component. Will be merged with default config, if available.
validate (bool): Whether to validate the component config against the
    arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The new pipeline component.

DOCS: https://spacy.io/api/language#replace_pipe
rd  __call__r  )r   rR   rk  )r   r  rR   rk  )r   r   r&   rg  r   r   r  E968r  r  remove_piper   r   r  )r   r   rl  rR   rk  rD  r  s          rn   replace_pipeLanguage.replace_pipe  s    ( +++V[[//T/PQQ<,,++$$tL/A$MCS/! ))//5
4##$$
c$:J:J6K(K== !   ==!! !  rm   old_namenew_namec                    XR                   ;  a0  [        [        R                  R	                  XR                   S95      eX R                   ;   a0  [        [        R
                  R	                  X R                   S95      eU R                   R                  U5      nX R                  U   S   4U R                  U'   U R                  R                  U5      U R                  U'   U R                  R                  U5      U R                  U'   XR                  S   S   ;   a5  U R                  S   S   R                  U5      nX@R                  S   S   U'   U R                  5         g)zRename a pipeline component.

old_name (str): Name of the component to rename.
new_name (str): New name of the component.

DOCS: https://spacy.io/api/language#rename_pipe
rd  r!   
initializer   N)r   r   r&   rg  r   r  r  r   r   rv  r   r   r  )r   r  r  iinit_cfgs        rn   rename_pipeLanguage.rename_pipe  s;    ///""7K7K"L  +++""7K7K"L    &&x0')9)9!)<Q)?@$(OO$7$7$A!'+'9'9'='=h'G8$||L1,??||L1,?CCHMHAILL&|4X>rm   c                    XR                   ;  a0  [        [        R                  R	                  XR                   S95      eU R
                  R                  U R                   R                  U5      5      nU R                  R                  U5        U R                  R                  U5        U R                  R                  S0 5      R                  US5        XR                  S   S   ;   a!  U R                  S   S   R                  U5        XR                  ;   a  U R                  R                  U5        U R!                  5         U$ )zRemove a component from the pipeline.

name (str): Name of the component to remove.
RETURNS (Tuple[str, Callable[[Doc], Doc]]): A `(name, component)` tuple of the removed component.

DOCS: https://spacy.io/api/language#remove_pipe
rd  _sourced_vectors_hashesNr  r   )r   r   r&   rg  r   r   rv  r  r   r   r   r   r   r   r   remover  )r   r   removeds      rn   r  Language.remove_pipe  s     +++V[[//T@T@T/UVV""&&t';';'A'A$'GH 	D!t$		/488tD<<-l;;LL&|488>== NN!!$'rm   c                     XR                   ;  a0  [        [        R                  R	                  XR                   S95      eU R
                  R                  U5        g)zDisable a pipeline component. The component will still exist on
the nlp object, but it won't be run as part of the pipeline. Does
nothing if the component is already disabled.

name (str): The name of the component to disable.
rd  N)r   r   r&   rg  r   r   r  r)  s     rn   disable_pipeLanguage.disable_pipe  sE     +++V[[//T@T@T/UVV4 rm   c                     XR                   ;  a0  [        [        R                  R	                  XR                   S95      eXR
                  ;   a  U R                  R                  U5        gg)zEnable a previously disabled pipeline component so it's run as part
of the pipeline. Does nothing if the component is already enabled.

name (str): The name of the component to enable.
rd  N)r   r   r&   rg  r   r   r   r  r)  s     rn   enable_pipeLanguage.enable_pipe  sT     +++V[[//T@T@T/UVV== NN!!$' !rm   )disablecomponent_cfgtextr  r  c          	         U R                  U5      nUc  0 nU R                   H  u  pVXR;   a  M  [        US5      (       d0  [        [        R
                  R                  [        U5      US95      eU R                  n[        US5      (       a  UR                  5       n U" U40 UR                  U0 5      D6n[        U[        5      (       a  M  [        [        R                   R                  U[        U5      S95      e   U$ ! [         a,  n[        [        R                  R                  US95      UeSnAf[         a  nU" XVU/U5         SnANSnAff = f)aT  Apply the pipeline to some text. The text can span multiple sentences,
and can contain arbitrary whitespace. Alignment into the original string
is preserved.

text (Union[str, Doc]): If `str`, the text to be processed. If `Doc`,
    the doc will be passed directly to the pipeline, skipping
    `Language.make_doc`.
disable (List[str]): Names of the pipeline components to disable.
component_cfg (Dict[str, dict]): An optional dictionary with extra
    keyword arguments for specific components.
RETURNS (Doc): A container for accessing the annotations.

DOCS: https://spacy.io/api/language#call
Nr  r  get_error_handlerr,  )r   returned_type)_ensure_docr   r  r   r&   E003r   r   r   r  r   rf  E109	Exceptionr   r:   E005)	r   r  r  r  docr   procerror_handleres	            rn   r  Language.__call__  s5   * t$ M--JD4,, !3!3d4jt!3!TUU 66Mt011 $ 6 6 843>-"3"3D""=> c3'' !3!3TRUY!3!WXX! (" 
  G !3!3!3!>?QF 4d3%334s$   D
E'D33E EEDisabledPipesc                     [         R                  " [        R                  [        5        [        U5      S:X  a#  [        US   [        [        45      (       a  US   nU R                  US9$ )a.  Disable one or more pipeline components. If used as a context
manager, the pipeline will be restored to the initial state at the end
of the block. Otherwise, a DisabledPipes object is returned, that has
a `.restore()` method you can use to undo your changes.

This method has been deprecated since 3.0
r!   r   )r  )
r  r  r'   W096DeprecationWarningr   r   r   tupleselect_pipesr   s     rn   disable_pipesLanguage.disable_pipes'  sS     	hmm%78u:?z%(T5MBB!HE   //rm   )r  enabler  c                   Uc  Uc  [        [        R                  5      e[        U[        5      (       a  U/nUbv  [        U[        5      (       a  U/nU R
                   Vs/ s H  o3U;  d  M
  UPM     nnUb6  X:w  a1  [        [        R                  R                  X!U R
                  S95      eUnUc   eU Vs/ s H  oUU R                  ;  d  M  UPM     nn[        X5      $ s  snf s  snf )a  Disable one or more pipeline components. If used as a context
manager, the pipeline will be restored to the initial state at the end
of the block. Otherwise, a DisabledPipes object is returned, that has
a `.restore()` method you can use to undo your changes.

disable (str or iterable): The name(s) of the pipes to disable
enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled

DOCS: https://spacy.io/api/language#select_pipes
)r  r  r   )
r   r&   E991r   rg   r   E992r   r   r  )r   r  r  r  
to_disableds         rn   r  Language.select_pipes4  s      >goV[[))gs##iG&#&& +/??Q?4&>P$?JQ"w'< KK&&%doo '  
 !G""" &Ag$..)@1gAT++ R Bs   !	C$.C$:C)C)c                     [        U5      U R                  :  a:  [        [        R                  R                  [        U5      U R                  S95      eU R                  U5      $ )zcTurn a text into a Doc object.

text (str): The text to process.
RETURNS (Doc): The processed doc.
)lengthr   )r   r   r   r&   E088r   r   )r   r  s     rn   make_docLanguage.make_docZ  sQ     t9t&""#d)"P  ~~d##rm   doc_likec                 L   [        U[        5      (       a  U$ [        U[        5      (       a  U R                  U5      $ [        U[        5      (       a$  [        U R
                  5      R                  U5      $ [        [        R                  R                  [        U5      S95      e)z|Create a Doc if need be, or raise an error if the input is not
a Doc, string, or a byte array (generated by Doc.to_bytes()).)r   )r   r:   rg   r  bytesr~   
from_bytesr   r&   E1041r   r   )r   r  s     rn   r  Language._ensure_docf  sy     h$$Oh$$==**h&&tzz?--h77,,$x.,ABBrm   contextc                 4    U R                  U5      nX#l        U$ )z>Call _ensure_doc to generate a Doc and set its context object.)r  _context)r   r  r  r  s       rn   _ensure_doc_with_context!Language._ensure_doc_with_contextq  s     x(
rm   g        )dropsgdlossesr  exclude	annotatesexamplesr   r  r  r  r  r   c                   Ub  [        [        R                  5      eUc  0 n[        U[        5      (       a  [        U5      S:X  a  U$ [        US5        [        U5      nUc.  U R                  c  U R                  5       U l        U R                  nUc  0 n0 n	[        U R                  5       H[  u  n
u  pUR                  U0 5        [        Xk   5      X'   Xk   R                  SU5        X   R                  SU R                  5        M]     U R                   H  u  pX;  a)  [        US5      (       a  UR                   " U4SUS.Xk   D6  US;  aV  X;  aQ  [        U["        R$                  5      (       a2  UR&                  (       a!  UR(                  S	;  a  UR+                  U5        X;   d  M  [-        [/        S
 U 5       UUU R0                  X   S9U5       H  u  pXl        M     M     [5        U5      $ )a  Update the models in the pipeline.

examples (Iterable[Example]): A batch of examples
_: Should not be set - serves to catch backwards-incompatible scripts.
drop (float): The dropout rate.
sgd (Optimizer): An optimizer.
losses (Dict[str, float]): Dictionary to update with the loss, keyed by
    component.
component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
    components, keyed by component name.
exclude (Iterable[str]): Names of components that shouldn't be updated.
annotates (Iterable[str]): Names of components that should set
    annotations on the predicted examples after updating.
RETURNS (Dict[str, float]): The updated losses dictionary

DOCS: https://spacy.io/api/language#update
Nr   zLanguage.updater  r   updater  r  )NF)TFNc              3   8   #    U  H  oR                   v   M     g 7fr   	predictedr  egs     rn   r  "Language.update.<locals>.<genexpr>  s     9"   )r  r   r   r   )r   r&   E989r   r   r   r=   _copy_examplesr   create_optimizer	enumerater   r   r   r   r  r  r#   TrainableComponentis_trainabler}  finish_updateziprD   r   r  _replace_numpy_floats)r   r  r   r  r  r  r  r  r   pipe_kwargsr  r   r  r  r	  s                  rn   r  Language.updatey  s   : =V[[))>Fh%%#h-1*<M($56!(+;&"&"7"7"9//C M(7OA|$$T2. ()< =K**648((tG	  8
 --JD"wtX'>'>HU$vUATU-''"4)>)>??))

*==&&s+ "99!!.2.H.H*0 	 GC $'L	  (0 %V,,rm   )r  r  r  r  c          	        ^ Uc  0 n[        U[        5      (       a  [        U5      S:X  a  U$ [        US5        Uc.  U R                  c  U R                  5       U l        U R                  n[        U R                  5      n[        R                  " U5        Uc  0 n0 mU4S jnUR                  Ul	        UR                  Ul
        UR                  Ul        U HD  u  pX;   d  [        U	S5      (       d  M  0 mU	R                  " U4XsS.UR                  U0 5      D6  MF     TR                  5        H  u  n
u  pU" XU5        M     U$ )a  Make a "rehearsal" update to the models in the pipeline, to prevent
forgetting. Rehearsal updates run an initial copy of the model over some
data, and update the model so its current predictions are more like the
initial ones. This is useful for keeping a pretrained model on-track,
even if you're updating it with a smaller set of examples.

examples (Iterable[Example]): A batch of `Example` objects.
sgd (Optional[Optimizer]): An optimizer.
component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
    components, keyed by component name.
exclude (Iterable[str]): Names of components that shouldn't be updated.
RETURNS (dict): Results from the update.

EXAMPLE:
    >>> raw_text_batches = minibatch(raw_texts)
    >>> for labelled_batch in minibatch(examples):
    >>>     nlp.update(labelled_batch)
    >>>     raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
    >>>     nlp.rehearse(raw_batch)

DOCS: https://spacy.io/api/language#rehearse
r   zLanguage.rehearsec                    > X4TU '   X4$ r   r`   )keyWdWgradss      rn   	get_grads$Language.rehearse.<locals>.get_grads  s    E#J5Lrm   rehearser  )r   r   r   r=   r   r  r   randomshuffle
learn_rateb1b2r  r  r   items)r   r  r  r  r  r  r
  r  r   r  r  r  r  r  s                @rn   r  Language.rehearse  s*   > >Fh%%#h-1*<M($78;&"&"7"7"9//CT]]#u M	  #~~	vv	vv	JDgdJ&?&?EMM':G:K:KDRT:U	   "KKMLC!O *rm   r  get_examplesc                r    [         R                  " [        R                  [        5        U R                  XS9$ )Nr'  )r  r  r'   W089r  r  )r   r(  r  s      rn   begin_trainingLanguage.begin_training  s(     	hmm%78|55rm   c                  ^ Uc;  [         R                  R                  S5        [        U R                  / SQS9mU4S jn[        US5      (       d2  [        R                  R                  S[        U5      S9n[        U5      eU R                  R                  5       n[        R                  " US   [        S	9nUS
   nUb  U" U 5         [!        XS   US   US   S9  U R                  R&                  R(                  S   S:  a/  [+        5       nU R                  R&                  R-                  U5        [        U R.                  S5      (       aB  [1        U R.                  R2                  US   SSS9nU R.                  R2                  " U4SU 0UD6  U R4                   Hf  u  p[7        U
[8        R:                  5      (       d  M&  US   R=                  U	0 5      n[1        U
R2                  USU	S9nU
R2                  " U4SU 0UD6  Mh     UR=                  S5      nU(       a%  [        R                  " U[>        S	9n[A        XU5        U RC                  5         X l"        Ub  X l"        O"U RD                  c  U RG                  5       U l"        US   nUb  U" U 5        U RD                  $ ! ["         a*    [#        [        R$                  R                  US   S95      ef = f)a  Initialize the pipe for training, using data examples if available.

get_examples (Callable[[], Iterable[Example]]): Optional function that
    returns gold-standard Example objects.
sgd (Optional[Optimizer]): An optimizer to use for updates. If not
    provided, will be created using the .create_optimizer() method.
RETURNS (thinc.api.Optimizer): The optimizer.

DOCS: https://spacy.io/api/language#initialize
zUNo 'get_examples' callback provided to 'Language.initialize', creating dummy examples)xyz)wordsc                  4   > [         R                  " T 0 5      /$ r   )r<   	from_dict)r  s   rn   r(  )Language.initialize.<locals>.get_examples#  s    ))#r233rm   r  zLanguage.initialize)rp  objr  )schemabefore_init
vocab_datar   r   )datar   r   )r   r!   r   )sectionr   rr   r   pretraining
after_init)$r$   r   r   r:   r~   r  r&   E930r   r   	TypeErrorrR   r  rG   r   r3   r?   IOErrorE884r   shaper   to_opsr   r6   r  r   r   r#   InitializableComponentr   r5   r>   r  r   r  )r   r(  r  rD  rR   Ir7  opstok_settingsr   r  
p_settingspretrain_cfgPr<  r  s                  @rn   r  Language.initialize  s     KKg djj8C4 |Z00++$$,$|2D % C C. ((*VL1:JK&"	D\?AiL!I,
 ::##A&!+!#CJJ%%c*4>><001))+# 	L NN%%lMMM--JD$ 9 9::|_00r:
3OOZD
 E$E*E ( zz-0  6JKA!$?!O__$"335DO|_
!tC  	D&++,,Qy\,BCC	Ds   
J" "4Kc                   [        5       nU R                  R                  R                  S   S:  a%  U R                  R                  R	                  U5        U R
                   H2  u  p4[        US5      (       d  M  [        UR                  5      Ul	        M4     Ub  Xl
        U R                  $ U R                  c  U R                  5       U l
        U R                  $ )a  Continue training a pretrained model.

Create and return an optimizer, and initialize "rehearsal" for any pipeline
component that has a .rehearse() method. Rehearsal is used to prevent
models from "forgetting" their initialized "knowledge". To perform
rehearsal, collect samples of text you want the models to retain performance
on, and call nlp.rehearse() with a batch of Example objects.

RETURNS (Optimizer): The optimizer.

DOCS: https://spacy.io/api/language#resume_training
r!   _rehearsal_model)r   r~   r   rA  rB  r   r  r   r}  rL  r   r  )r   r  rE  r   r  s        rn   resume_trainingLanguage.resume_trainingY  s     ::##A&!+JJ%%c*--JDt/00(0(<% ( ?!O  __$"335DOrm   r  c                     Xl         U R                   H)  u  p#[        US5      (       d  M  UR                  U5        M+     g)a  Set an error handler object for all the components in the pipeline
that implement a set_error_handler function.

error_handler (Callable[[str, Callable[[Doc], Doc], List[Doc], Exception], NoReturn]):
    Function that deals with a failing batch of documents. This callable
    function should take in the component's name, the component itself,
    the offending batch of documents, and the exception that was thrown.
DOCS: https://spacy.io/api/language#set_error_handler
set_error_handlerN)r   r   r  rP  )r   r  r   r  s       rn   rP  Language.set_error_handlerr  s6     &3"--JDt011&&}5 (rm   )r   scorerr  
scorer_cfgper_componentrR  rS  rT  c                "   [        U5      n[        US5        [        U5      nUc  U R                  nUc  0 nUc  0 nUc(  [	        U5      nUR                  SU 5        [        S0 UD6n[        5       nU H(  n	U R                  U	R                  R                  5        M*     U R                  S U 5       UUS9n
[        X5       H  u  pXl        M     [        5       nUR                  XS9n[        S U 5       5      nXU-
  -  US'   [!        U5      $ )	ax  Evaluate a model's pipeline components.

examples (Iterable[Example]): `Example` objects.
batch_size (Optional[int]): Batch size to use.
scorer (Optional[Scorer]): Scorer to use. If not passed in, a new one
    will be created.
component_cfg (dict): An optional dictionary with extra keyword
    arguments for specific components.
scorer_cfg (dict): An optional dictionary with extra keyword arguments
    for the scorer.
per_component (bool): Whether to return the scores keyed by component
    name. Defaults to False.

RETURNS (Scorer): The scorer containing the evaluation results.

DOCS: https://spacy.io/api/language#evaluate
zLanguage.evaluaterr   c              3   8   #    U  H  oR                   v   M     g 7fr   r  r  s     rn   r  $Language.evaluate.<locals>.<genexpr>  s     -Hb\\Hr  )r   r  )rT  c              3   L   #    U  H  n[        UR                  5      v   M     g 7fr   )r   r  r  s     rn   r  rW    s     ;(Bc",,''(s   "$speedr`   )r   r=   r  r   rh   r   r7   timerr  	referencer  r  r  r  scorer  r  )r   r  r   rR  r  rS  rT  r   
start_timer	  docsr  end_timeresultsn_wordss                  rn   evaluateLanguage.evaluate  s   6 >($78!(+J MJ>*%FeT*%f%FW
BMM",,++,  yy-H-!'  

 8*GBL +7,,x,E;(;;"&;<$W--rm   c                 \    SU R                   S   S   0n[        R                  " U5      S   $ )zCCreate an optimizer, usually using the [training.optimizer] config.	optimizerr   )rR   rG   r   )r   	subconfigs     rn   r  Language.create_optimizer  s0     $++j"9+"FG		*;77rm   paramsc              #   z  #    U(       d  Sv   gU R                    VVs/ s H<  u  p#[        US5      (       d  M  [        US5      (       d  M+  UR                  U5      PM>     nnnU H  n [        U5        M     Sv   U H  n [        U5        M     gs  snnf ! [         a     M@  f = f! [         a     M8  f = f7f)a  Replace weights of models in the pipeline with those provided in the
params dictionary. Can be used as a contextmanager, in which case,
models go back to their original weights after the block.

params (dict): A dictionary of parameters keyed by model ID.

EXAMPLE:
    >>> with nlp.use_params(optimizer.averages):
    >>>     nlp.to_disk("/tmp/checkpoint")

DOCS: https://spacy.io/api/language#use_params
N
use_paramsr}  )r   r  rj  nextStopIteration)r   rh  r   r  contextsr  s         rn   rj  Language.use_params  s       #'--"/JD4. (3:43I ('"/   $M $
 #M $ %  % sc   B;BBB!B;*B5B;B*B;
B'#B;&B''B;*
B84B;7B88B;)	as_tuplesr   r  r  	n_processtextsro  rp  c                    g r   r`   r   rq  ro  r   r  r  rp  s          rn   r  Language.pipe       	rm   c                    g r   r`   rs  s          rn   r  rt    ru  rm   r!   c          	   #     ^ #    U(       ar  [        [        [        [        [        [
        4   [        4      U5      nU 4S jU 5       nT R                  UUUUUS9nU H  n	U	R                  n
SU	l        X4v   M     g[        [        [        [        [
        4      U5      nUS:X  a  [        R                  " 5       nUc  0 nUc  T R                  n/ nT R                   Hg  u  pX;   a  M  UR                  U0 5      nUR                  SU5        [        R                   " ["        UUUT R$                  S9nUR'                  U5        Mi     US:w  aM  T R)                  U5      (       a$  [*        R,                  " [.        R0                  5        T R3                  XXc5      nOU 4S jU 5       nU H  nU" U5      nM     U H  n	U	v   M	     g7f)	a  Process texts as a stream, and yield `Doc` objects in order.

texts (Iterable[Union[str, Doc]]): A sequence of texts or docs to
    process.
as_tuples (bool): If set to True, inputs should be a sequence of
    (text, context) tuples. Output will then be a sequence of
    (doc, context) tuples. Defaults to False.
batch_size (Optional[int]): The number of texts to buffer.
disable (List[str]): Names of the pipeline components to disable.
component_cfg (Dict[str, Dict]): An optional dictionary with extra keyword
    arguments for specific components.
n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
YIELDS (Doc): Documents in the order of the original text.

DOCS: https://spacy.io/api/language#pipe
c              3   J   >#    U  H  u  pTR                  X5      v   M     g 7fr   )r  )r  r  r  r   s      rn   r   Language.pipe.<locals>.<genexpr>   s%      "RW--d<<RWs    #)r   r  rp  r  Nr   )r  r   r   r   r!   c              3   F   >#    U  H  nTR                  U5      v   M     g 7fr   )r  )r  r  r   s     rn   r  ry  S  s     =utD$$T**us   !)r   r   r   r   rg   r:   rL   r  r  mp	cpu_countr   r   r   r   	functoolspartialrD   r   r   _has_gpu_modelr  r  r'   W114_multiprocessing_pipe)r   rq  ro  r   r  r  rp  docs_with_contextsr^  r  r  r
  r   r  r   fr  s   `                rn   r  rt    s    8 %c3h(D"EFNE"RW" 99"%#+  D ,,#n$  XeCHo.6 ?I MJ  	 --JD"&&tR0FlJ7!!&*&@&@A LLO ( >""7++hmm,--eIRD >u=DDz CI s   GGc                 >   U R                    H  u  p#[        US5      =(       a    UR                  nX!;   d  U(       d  M2  [        US5      (       d  ME  [        UR                  S5      (       d  Mb  [	        UR                  R
                  [        5      (       d  M    g   g)Nr  r}  rE  TF)r   r  r  r}  r   rE  r   )r   r  r   r  r  s        rn   r  Language._has_gpu_modelY  sr    --JD"48NT=N=NLltW%%'$**e*D*DTXT^T^TbTbdkIlIl ( rm   r
  c              #     #    S[         [        [        [        4      S[         [        [        [        [
        4   [        4      4S jnU" U5      n[        R                  " U5      u  p[        U5       Vs/ s H  n[        R                  " 5       PM     n	n[        [        U5       Vs/ s H  n[        R                  " S5      PM     sn6 u  p[        R                  " X5      n[!        XUS9nUR#                  5         UR#                  5         [        X5       VVs/ s H@  u  p[        R$                  " [&        U R(                  UUU[*        R,                  " 5       4S9PMB     nnnU H  nUR/                  5         M     U H  nUR1                  5         M     [2        R4                  " S [7        U
5       5       5      n [9        [        UU5      S5       H  u  nu  nu  nnnUb0  [        U R:                  5      R=                  U5      nUUl        Uv   OQUbN  [@        RB                  " U5      nU RE                  S S S [G        [H        RJ                  RM                  US	95      5        UU-  S
:X  d  M  URO                  5         M     U	 H(  nURQ                  [R        5        UR1                  5         M*     U
 H  nUR1                  5         M     U H  nURU                  5         M     [W        S U 5       5      (       d%  [X        RZ                  " [\        R^                  5        g g s  snf s  snf s  snnf ! U	 H(  nURQ                  [R        5        UR1                  5         M*     U
 H  nUR1                  5         M     U H  nURU                  5         M     [W        S U 5       5      (       d%  [X        RZ                  " [\        R^                  5        f f = f7f)Nrq  ro   c              3      #    U  HZ  n[        U[        5      (       a.  UR                  5       [        [        UR
                  5      4v   MF  U[        [        S 5      4v   M\     g 7fr   )r   r:   to_bytesr   rL   r  )rq  r  s     rn   prepare_input5Language._multiprocessing_pipe.<locals>.prepare_inputk  sQ      "h,,#,,.[(BSBS0TUU#T+t%<==	 "s   A"A$F)
chunk_size)targetr  c              3   @   #    U  H  oR                  5       v   M     g 7fr   )recv)r  r  s     rn   r  1Language._multiprocessing_pipe.<locals>.<genexpr>  s      *
$;DIIKK$;s   r!   r   r   c              3   >   #    U  H  oR                   S :H  v   M     g7f)r   N)exitcode)r  r  s     rn   r  r    s     <ed}})es   )0r   r   rg   r:   r   r  rL   	itertoolsteeranger|  Queuer  Piper$   	minibatch_SendersendProcess_apply_pipesr  r;   	get_statestartcloser   from_iterabler   r  r~   r  r  r   msgpack_loadsr   r   r&   E871r   stepput_WORK_DONE_SENTINELrt  allr  r  r'   W127)r   rq  r
  rp  r   r  serialized_texts_with_ctx	raw_textsr   texts_qbytedocs_recv_chbytedocs_send_chbatch_textssenderrchschprocsr  txbyte_tuplesr  byte_docr  
byte_errorr  r   qrs                               rn   r  Language._multiprocessing_piped  s<    
	>E#s(O,
	>eE#u*-{:;<
	> %2%$8!$==)BC7<Y7G"H7G!288:7G"H-0&+I&67&6bggen&67.
* nnU7 )D  :
 ; JJ#11((*	 ; 	 
 DJJL  #BHHJ #
 )) *
$)*:$;*
 
%	-;DI{+Q<77A6': 'djj/44X>C#*CLI+!//
;E..dD*V[[5G5Ge5G5T*U z>Q&KKM<& )*	  &	 & 		  <e<<<hmm, =] #I 8
^ )*	  &	 & 		  <e<<<hmm, =sN   A4O6L#O) L(	AOAL-$AO B.L3 2L3 B-O3BOOc                 J   U R                    H  u  p[        US5      (       d  M  Xl        M      [        U R                   5       H\  u  nu  pE[	        U[
        R                  5      (       d  M)  0 Ul        U R                   US-   S  H  u  pgUR                  U5        M     M^     g)z]Register 'listeners' within pipeline components, to allow them to
effectively share weights.
r   r!   N)	r   r  r   r  r   r#   ListenedToComponentlistener_mapfind_listeners)r   r   r  r  name1proc1name2proc2s           rn   r  Language._link_components  s     --JDtV$$ 	 ( "+4==!9A~%!7!788%'"$(MM!a%'$:LE((/ %; ":rm   )r~   r  r  r  r   	auto_fillrk  r  c          	        ^ U(       a'  [        U R                  [        S9R                  U5      nSU;  a&  [	        [
        R                  R                  US95      eSUS   ;  a
  SS0US   S'   US   R                  S5      n	U	b[  XR                  :w  aL  [	        [
        R                  R                  US   S   U R                  [        R                  " U 5      S	95      eU R                  US   S'   [        R                  " U5      nUR                  S
0 5      n
UR                  SS5      n0 US
'   U(       a  [        R                   " X["        S9nOUnXS
'   XS
'   Ub  XS'   XS'   [        R$                  " US   U[&        S9nUS   nUS   nUS   nUS   nUS   nU nUbb  U" U 5      n[)        U[*        5      (       a  [-        UU 5      (       a  UU La/  [	        [
        R.                  R                  [+        U5      S95      e[1        5         U" UUUUS9nUbI  U" U5      n[)        UU 5      (       d0  [	        [
        R2                  R                  S[+        U5      S95      eUR4                  (       d  UR7                  5       OUnUR                  S
0 5      n0 n0 nSnUS   S    GHm  nUU;  aF  SR9                  UR;                  5       5      n[	        [
        R<                  R                  UUS95      e[        R                  " UU   5      n[        US
   U   5      nUT;  d  M  SU;  a/  SU;  a)  [
        R>                  R                  UUS9n[	        U5      eSU;   a&  UR                  S5      nURA                  UUUUUS9  M  SU;   d   eUc  URB                  RE                  SS/S9nUS   n U U;  a$  [        RF                  " U URB                  S/S9UU '   UR                  S U5      n!S!n"S"U;   aZ  UU    RI                  5         UU    RJ                   H4  u  n#n$U![M        U$S#/ 5      ;   d  M  UU    RO                  U#U!US"   5        S$n"M6     [P        RR                  " 5          [P        RT                  " S%S&S'9  URA                  U!UU    US(9  SSS5        U U;  a3  [W        UU    RB                  RX                  RE                  S/S95      UU '   S)URZ                  ;  a  0 URZ                  S)'   UU    URZ                  S)   U'   U"(       d  GMj  UU 	 GMp     Ub  URB                  R]                  U5        [)        U[^        5      (       a  U/n[)        U[^        5      (       a  U/n[)        T[^        5      (       a  T/m[a        U5      [a        [b        5      :w  av  US   R                  S*/ 5      n%[e        U%5      (       aQ  [g        U%5      Ri                  U5      (       d2  [P        Rj                  " [l        Rn                  R                  UU%S+95        U Rq                  [s        1 UkUS   R                  S,/ 5      k5      UUS   S   5      n&[g        U4S- jU& 5       5      Ul:        US   S.   Ul;        U(       a  UOUUl<        UbI  U" U5      n[)        UU 5      (       d0  [	        [
        R2                  R                  S/[+        U5      S95      eU$ ! , (       d  f       GND= f)0a  Create the nlp object from a loaded config. Will set up the tokenizer
and language data, add pipeline components etc. If no config is provided,
the default config of the given language is used.

config (Dict[str, Any] / Config): The loaded config.
vocab (Vocab): A Vocab object. If True, a vocab is created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
    Disabled pipes will be loaded but they won't be run unless you
    explicitly enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
    pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
    Excluded components won't be loaded.
meta (Dict[str, Any]): Meta overrides for nlp.meta.
auto_fill (bool): Automatically fill in missing values in config based
    on defaults and function argument annotations.
validate (bool): Validate the component config and arguments against
    the types expected by the factory.
RETURNS (Language): The initialized Language class.

DOCS: https://spacy.io/api/language#from_config
rP   rr   r   r   z@vectorszspacy.Vectors.v1r   N)bad_lang_coderq  r   r   r;  )rk  r6  r   before_creationafter_creationafter_pipeline_creation)r   )r~   r   r   r   creation)r   r   r   rn  rd  r   r{  )r   rR   )r   rR   rk  rj  r   r  r  )r~   r  r(  Freplace_listenerslistening_componentsTignorez\[W113\])message)r{  r   r  enabled)r  r  r   c              3   6   >#    U  H  oT;  d  M
  Uv   M     g 7fr   r`   )r  r	  r  s     rn   r  'Language.from_config.<locals>.<genexpr>  s     J~!'9IAA~s   		r   pipeline_creation)=r   r   rA   r   r   r&   E985r   r   r   E958r$   r]  r  rv  rG   ru  r2   r   r4   r   r   
issubclassE943rH   E942is_interpolatedr  rt  r   E956E984r  r~   r  
load_modelr  r   getattrr  r  catch_warningsfilterwarningshashr   r   r  rg   idr@   r   rj   issubsetr  r'   W123_resolve_component_statusr   r   r   rR   )'r   rR   r~   r  r  r  r   r  rk  config_langorig_pipelineorig_pretrainingrx  resolved_nlpr   r   r  r  r  lang_clsrr   interpolatedr   source_nlpssource_nlp_vectors_hashesvocab_br   re  pipe_cfgrj  rD  r   r}  rz  listeners_replacedr   r  r  disabled_pipess'        `                                 rn   from_configLanguage.from_config  s   F ""2FeFm  V[[//v/>?? F5M)(24F'GF5M)$Um''/"{hh'>"""(-"7!hh--c2 #   !$uf
 !!&)

<4!::mT:!|]]6\RFF,|,|'$4=!$4=!''5MH_
 (4%i0&'89%&67"./H"I&&s+Hx..!(C003& !3!3$x.!3!IJJ 	 -)	
 % %Cc3'' !3!34PS9!3!UVV 4:3I3Iv))+v##L"5 $&!z2I(yy1 !3!3!3!NOO''(;<H| 4Y ?@J'H,1I ++,,)H,MC$S/)(&ll95G LL&'!)#- !  $x/// "%))"4"4i=S"4"T$X.EK/-1__!YK.E* #+,,{I"FK).&*h6 $E*;;=*5e*<*E*EJD$*gd<RTV.WW +E 2 D D$(+x@S7T!" 6: 2 +F "002 //,O'E0B %  3 $==;?'.44<<EE)2 F <1%8
 1@>@!:; 2%8 HH67! *)'.Q 3T II  ) gs##iGfc""XFgs##iG f:011Um''	26G7||CL$9$9&$A$AMM((% ' )  66?7?VE]..z2>?@5M*%

 J~JJ|4(Vf
".)#.Cc3'' KK&&,?tCy&Q  
{ 32s   (*[
[	tok2vec_namer   	listenersc                    XR                   ;  aD  [        R                  R                  UUUSR	                  U R                   5      S9n[        U5      eX R                   ;  aD  [        R                  R                  UUUSR	                  U R                   5      S9n[        U5      eU R                  U5      nU R                  U5      n[        U[        R                  5      (       d0  [        [        R                  R                  U[        U5      S95      eUR                  nUR                  R                  U/ 5      nU R                  U5      n	U R                   U   n
U(       Ga  ["        R$                  R'                  SU5        [)        [+        U5      5      [)        U5      :w  a4  [        R,                  R                  UUU[)        U5      S9n[        U5      eU Hg  n ["        R.                  " X5        US   nSUR4                  ;   a!  UR4                  S   nU" US   U
S   S	   5      n["        R6                  " XU5        Mi     U H  nUR9                  5       nUR4                  R                  S
5      nUbn  [)        [:        R<                  " U5      R>                  5      nUS:X  a	  U" U5      nO6US:X  a
  U" XU5      nO&[        [        R@                  R                  US95      e["        RB                  " U	R                  X5        URE                  X5        M     gg! [0         a*    [        R2                  R                  X!US9n[        U5      ef = f)a%  Find listener layers (connecting to a token-to-vector embedding
component) of a given pipeline component model and replace
them with a standalone copy of the token-to-vector layer. This can be
useful when training a pipeline with components sourced from an existing
pipeline: if multiple components (e.g. tagger, parser, NER) listen to
the same tok2vec component, but some of them are frozen and not updated,
their performance may degrade significantly as the tok2vec component is
updated with new data. To prevent this, listeners can be replaced with
a standalone tok2vec layer that is owned by the component and doesn't
change if the component isn't updated.

tok2vec_name (str): Name of the token-to-vector component, typically
    "tok2vec" or "transformer".
pipe_name (str): Name of pipeline component to replace listeners for.
listeners (Iterable[str]): The paths to the listeners, relative to the
    component config, e.g. ["model.tok2vec"]. Typically, implementations
    will only connect to one tok2vec component, [model.tok2vec], but in
    theory, custom models can use multiple listeners. The value here can
    either be an empty list to not replace any listeners, or a complete
    (!) list of the paths to all listener layers used by the model.

DOCS: https://spacy.io/api/language#replace_listeners
rn  )tok2vecr   unknownre  )r   r  z%Replacing listeners of component '%s')r   r  pathsn_listeners)r   r  r   r}  replace_listener_cfgr  replace_listenerNr!      )
num_params)#r   r&   E889r   rt  r   rh  r   r   r#   r  E888r   r}  r  r   r   r$   r   r   r   r   E887dot_to_objectrf  E886attrsset_dot_to_objectcopyinspect	signature
parametersE1055replace_model_noderemove_listener)r   r  r   r  rD  r  tok2vec_cfgtok2vec_modelpipe_listenersr  r  listener_path
new_configreplace_funclistener	new_modelreplace_listener_funcr  s                     rn   r  Language.replace_listeners  s   : .++$$$$YYt/	 % C S/!OO+++$$$!YYt/	 % C S/!---**<8'2#9#9::V[[//\W/VWW --11)R@}}Y'%%i0KKEyQ4	?#s>':: kk(("(# #N 3	 )  !o% "+*&&x? )1
)]-@-@@#0#6#67M#NL!-#G,hw.?	.J"J &&x
K "+" +)..0	(5(;(;(?(?@R(S%(4 "%))*?@KK"J "Q$9)$D	#q$9)w$W	()<)<
)<)STT''

HH''<# +A &   * ++,,&= - C %S/)	*s   #L##4Mmemc           	   #   "  #    Uc
  [        5       n[        5        nUR                  U R                  R	                  U5      5      /n[        U R                  S5      (       a9  UR                  UR                  U R                  R	                  U5      5      5        U R                   HG  u  pE[        US5      (       d  M  UR                  UR                  UR	                  U5      5      5        MI     Uv   SSS5        g! , (       d  f       g= f7f)a:  Begin a block where all resources allocated during the block will
be freed at the end of it. If a resources was created within the
memory zone block, accessing it outside the block is invalid.
Behaviour of this invalid access is undefined. Memory zones should
not be nested.

The memory zone is helpful for services that need to process large
volumes of text with a defined memory budget.

Example
-------
>>> with nlp.memory_zone():
...     for doc in nlp.pipe(texts):
...        process_my_doc(doc)
>>> # use_doc(doc) <-- Invalid: doc was allocated in the memory zone
Nmemory_zone)	r   r   enter_contextr~   r  r  r   r   r   )r   r  stackrm  r   r  s         rn   r  Language.memory_zone2  s     $ ;&C [E++DJJ,B,B3,GHIHt~~}55 3 3DNN4N4Ns4S TU==4//OOE$7$78H8H8M$NO ) I [[s#   DB C>>7C>5	D>
DDr  r   c                   ^ ^ [         R                  " U5      n0 nU 4S jUS'   U 4S jUS'   U 4S jUS'   T R                   H(  u  pEUT;   a  M  [        US5      (       d  M   U4S jX4'   M*     UU 4S	 jUS
'   [         R                  " XT5        g)a1  Save the current state to a directory.  If a model is loaded, this
will include the model.

path (str / Path): Path to a directory, which will be created if
    it doesn't exist.
exclude (Iterable[str]): Names of components or serialization fields to exclude.

DOCS: https://spacy.io/api/language#to_disk
c                 :   > TR                   R                  U S/S9$ Nr~   r  )r   to_diskr	  r   s    rn   <lambda>"Language.to_disk.<locals>.<lambda>`  s!    T^^-C-Cy .D .
rm   r   c                 X   > [         R                  " U [        TR                  5      5      $ r   )r   
write_jsonr  r   r  s    rn   r  r   c  s    U-=-=$TYY/.
rm   	meta.jsonc                 :   > TR                   R                  U 5      $ r   )rR   r  r  s    rn   r  r   f  s    dkk.A.A!.Drm   
config.cfgr  c                 $    UR                  U S/S9$ r  )r  r	  r  s     rn   r  r   l  s    T\\!gY\5Wrm   c                 8   > TR                   R                  U TS9$ Nr  )r~   r  )r	  r  r   s    rn   r  r   m  s    ););Aw);)Orm   r~   N)r$   ensure_pathr   r  r  )r   r   r  serializersr   r  s   ` `   rn   r  Language.to_diskR  s     %$
K $
K  %EL!**JDw4++/3 WK +  PGT0rm   r   c                 T   [        U [        5      (       a  U /n U nU(       ax  [        U[        5      (       a  U/n1 U Vs/ s H  oDU;  d  M
  UPM     snkU kn[        [        U5      U-  5      (       a&  [	        [
        R                  R                  XS95      e[        U5      $ s  snf )a&  Derives whether (1) `disable` and `enable` values are consistent and (2)
resolves those to a single set of disabled components. Raises an error in
case of inconsistency.

disable (Union[str, Iterable[str]]): Name(s) of component(s) or serialization fields to disable.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable.
pipe_names (Iterable[str]): Names of all pipeline components.

RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t.
                           specified includes and excludes.
)r  r  )	r   rg   r   rj   r   r&   E1042r   r  )r  r  r   r  r   s        rn   r  "Language._resolve_component_statusp  s    $ gs##iG
&#&& -7SZ	F;R)ZSJ
 3v;+,, !4!4F!4!TUUZ   Ts   	B%B%)r  	overridesr0  c                  ^ ^^ S[         SS4U 4S jjnS[         SS4UU 4S jjn[        R                  " U5      n0 n[        US-  5      R                  5       (       a
  UU 4S jUS'   XFS'   XVS	'   U 4S
 jUS'   T R                   H(  u  pxUT;   a  M  [        US5      (       d  M   U4S jXg'   M*     US	-  R                  5       (       d  S	T;  a  [        T5      S	/-   m[        R                  " XT5        UT l        T R                  5         T $ )au  Loads state from a directory. Modifies the object in place and
returns it. If the saved `Language` object contains a model, the
model will be loaded.

path (str / Path): A path to a directory.
exclude (Iterable[str]): Names of components or serialization fields to exclude.
RETURNS (Language): The modified `Language` object.

DOCS: https://spacy.io/api/language#from_disk
r   ro   Nc                   > U R                  5       (       al  [        R                  " U 5      nTR                  R	                  U5        UR                  S0 5      R                  S5      TR                  R                  l        g g Nr   r   )	existsr   	read_jsonr   r  r   r~   r   r   )r   r9  r   s     rn   deserialize_meta,Language.from_disk.<locals>.deserialize_meta  s]    {{}}t,		  & +/((9b*A*E*Ef*M

""' rm   c                 f   > U R                  5       (       a  TR                  R                  U TS9  g g r)  )r4  r~   	from_disk)r   r  r   s    rn   deserialize_vocab-Language.from_disk.<locals>.deserialize_vocab  s*    {{}}

$$T7$; rm   r%  c                 :   > TR                   R                  U STS9$ )NF)r  r0  )rR   r9  )r	  r0  r   s    rn   r  $Language.from_disk.<locals>.<lambda>  s!    DKK4I4Iu	 5J 5rm   r#  r~   c                 :   > TR                   R                  U S/S9$ r  )r   r9  r  s    rn   r  r=    s!    t~~/G/Gy 0H 0
rm   r   r9  c                 $    UR                  U S/S9$ r  )r9  r'  s     rn   r  r=    s    t~~G9 8F 8rm   )
r	   r$   r*  r4  r   r  r   r9  r   r  )	r   r   r  r0  r6  r:  deserializersr   r  s	   ` ``     rn   r9  Language.from_disk  s   $	N4 	ND 	N	<D 	<T 	< 	< %|#$++--+M,' &6k"!2g&
k" **JDw4--15 #M + w&&((WG-C7mwi/GtG4
rm   c                   ^ ^ 0 nUU 4S jUS'   U 4S jUS'   U 4S jUS'   U 4S jUS'   T R                    H(  u  p4UT;   a  M  [        US	5      (       d  M   U4S
 jX#'   M*     [        R                  " UT5      $ )zSerialize the current state to a binary string.

exclude (Iterable[str]): Names of components or serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Language` object.

DOCS: https://spacy.io/api/language#to_bytes
c                  6   > TR                   R                  T S9$ r)  )r~   r  )r  r   s   rn   r  #Language.to_bytes.<locals>.<lambda>  s    tzz':':7':'Krm   r~   c                  8   > T R                   R                  S/S9$ r  )r   r  r   s   rn   r  rD    s    4>>+B+BG9+B+Urm   r   c                  V   > [         R                  " [        T R                  5      5      $ r   )r   
json_dumpsr  r   r   s   rn   r  rD    s    5+;+;!$)),,
rm   r#  c                  8   > T R                   R                  5       $ r   )rR   r  r   s   rn   r  rD    s    DKK,@,@,Brm   r%  r  c                 "    U R                  S/S9$ r  )r  )r  s    rn   r  rD    s    $--	-2Rrm   )r   r  r$   r  )r   r  r+  r   r  s   ``   rn   r  Language.to_bytes  s     79KG#UK $
K  %CL!**JDw4,,,0 RK + }}['22rm   
bytes_datac                  ^ ^ U 4S jn0 nU 4S jUS'   X4S'   UU 4S jUS'   U 4S jUS'   T R                    H(  u  pVUT;   a  M  [        US	5      (       d  M   U4S
 jXE'   M*     [        R                  " XT5        T R	                  5         T $ )zLoad state from a binary string.

bytes_data (bytes): The data to load from.
exclude (Iterable[str]): Names of components or serialization fields to exclude.
RETURNS (Language): The `Language` object.

DOCS: https://spacy.io/api/language#from_bytes
c                    > [         R                  " U 5      nTR                  R                  U5        UR	                  S0 5      R	                  S5      TR
                  R                  l        g r3  )r   
json_loadsr   r  r   r~   r   r   )br9  r   s     rn   r6  -Language.from_bytes.<locals>.deserialize_meta  sO    ##A&DIIT" '+hhy"&=&A&A&&IDJJ#rm   c                 8   > TR                   R                  U SS9$ )NF)r  )rR   r  rO  r   s    rn   r  %Language.from_bytes.<locals>.<lambda>  s    0F0F5 1G 1
rm   r%  r#  c                 8   > TR                   R                  U TS9$ r)  )r~   r  )rO  r  r   s    rn   r  rS    s    4::+@+@G+@+Trm   r~   c                 :   > TR                   R                  U S/S9$ r  )r   r  rR  s    rn   r  rS    s!    t~~/H/Hy 0I 0
rm   r   r  c                 $    UR                  U S/S9$ r  )r  )rO  r  s     rn   r  rS  	  s    tG9 8G 8rm   )r   r  r$   r  r  )r   rK  r  r6  r@  r   r  s   ` `    rn   r  Language.from_bytes  s    	J <>'
l# &6k"!Tg&
k" **JDw4..15 #M + 	
7;rm   )r   r   r   r   r   r   r   r   r   r   r   r   r   r~   )Tr   )NNNN)ro   r  ro   N)sra   rb   rc   rd   re   rN   rx   r   r   rg   rf   r   r   rB   r&   rB  r   r   r   r   rJ   boolri   r   r   r:   rI   r   r   propertyr   r   setterr   rR   r   r   r   r   rT  r   r   r   r   r  r   classmethodr  r  r"  r%  r   r   rC   r   floatr   r(  r/   rh  ro  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rL   r  r<   r   r  r  r+  r  rM  r  r   rP  r7   rb  r  r   rh   rj  r   r%   r   r  r  r  r  r@   r  r  r   r  r	   r  staticmethodr  r9  r  r  rl   __classcell__)r   s   @rn   rp   rp      s    HD(3-#N v{{3I.0M4]*+0 %)F1  !SWEIF1UD[!F1 	F1
 38nF1 #8ZL(C5#::N,N#OPF1 !7)[*@!ABF1 F1 
F1P5
   #d38n # #J 
[[$sCx. T        D ]]F t   R$s) R R 'tCy ' ' 
DsL'8!9: 
 
 Yc Y Y R$uS,%678 R R TDI T T +S#X + + (T#tCy.1 ( ( Qs Qt Q Q
 $C $C $ $ HC HM H H >C > >$ > >%# %- %	C 	F 	 
 *:);!1!3"2"4!<L<N#'WW S#X	W
 #W 3-W W  $C%$89W x W 
W Wr 
 "2!3"2"4!'+BB #	B
 3-B B |$B 
#s(	B BN K	 3i 	
 
$sCx.	!&QS Q\ Q  #G&
 "2!3'+G&G& smG&
 S#XG& V$G& G& 
G&R%,%,(2%,=@%,	|S 	!%,T #D
 -1+/ $#'+!1!3'+DD smD
 sCx)D c3h(D ~D tnD $D S#XD V$D D 
DP -1+/ $#5WsCx)5W c3h(5W ~	5W
 tn5W 
5Wn	'S 	'T 	'  "2!3)) )
 S#X) ) 
)V C  3  4  4 c<.?(@ 2	! 	! 	!	( 	( 	( "2!3=A)CHo) #	)
  S$sCx.%8 9:) 
)V0  8<6:	$, %Xc] 234$, sHSM123	$,
 
$,L
$S 
$S 
$	CE#sE/$: 	Cs 	Cc3o.9D	  I-
 #'-1=A!1!3#3#5I-7#I- C=I-
 I- i I- c5j)*I-  S$sCx.%8 9:I- #I- C=I-^ $(-1=A!1!3>7#> i 	>
 c5j)*>  S$sCx.%8 9:> #> 
c5j	>D CG6 $(	6xHW,=(=>?6 i 	6
 
6 CGJ $(	JxHW,=(=>?J i 	J
 
JX =A hy&9 Y 26lDIy I8 ST6, %)#'=A/3#9.7#9. SM	9.
  9.  S$sCx.%8 9:9. T#s(^,9. 9. 
c3h9.v8
 !$ ! !F 
 %($'!$=@
c3h(
 5>	

 SM
 #
  S$sCx.%8 9:
 
 
#
 
 
 $'$'!$=@
eCHo{:;<
 4=	

 SM
 #
  S$sCx.%8 9:
 
 
%[()	*
 
$  $(!1!3=AUU38_%xeCHo{6R0S'TT
U U SMU #U  S$sCx.%8 9:U U 
x}huS+-='>??	@Un	hsm 	f-c3h(f- hsm!345f- 	f-
 f- 
#f-P0,  13e %)-A,@-A/1ed38nf,-e UD[!	e
 sHSM)*e c8C=()e sHSM)*e 38ne e e 
e eNf=f= f= C=	f=
 
f=P x~ $  @ CSBT1#t)$12:3-1	1<  !sHSM)* !c8C=() ! SM ! 
sCx	 !  !L "2!3$4$67CI7 #	7
 S>7 
7r 4D3E 38C= 3% 30 >N=O&&-5c]&	& &rm   	meta_dictc                 2    [        S S [        U 5      5      $ )Nc                 6    [        U [        R                  5      $ r   )r   numpyfloatingvs    rn   r  '_replace_numpy_floats.<locals>.<lambda>	  s    *Q/rm   c                     [        U 5      $ r   )r]  re  s    rn   r  rg  	  s    58rm   )r    rh   )r`  s    rn   r  r  	  s    /1CT)_ rm   c                       \ rS rSr% Sr\\S'   Sr\\	\\
4      \S'   \" 5       r\\   \S'   \" 5       r\\   \S'   Sr\\S	'   \" 5       r\\   \S
'   Sr\\	\\\   4      \S'   Srg)r   i	  a  Dataclass containing information about a component and its defaults
provided by the @Language.component or @Language.factory decorator. It's
created whenever a component is defined and stored on the Language class for
each component instance and factory instance.
r   Nr   r/  r0  Fr1  r<  r   r`   )ra   rb   rc   rd   re   rg   rf   r   r   r   r   r  r/  r   r0  r1  rY  r<  r   r]  rl   r`   rm   rn   r   r   	  s}     L/3NHT#s(^,3"WGXc]$#gHhsm%K!GFHSM#BF8Dhuo)=$>?Frm   r   c                   J    \ rS rSrSrS\S\\   SS4S jrS r	S	 r
SS
 jrSrg)r  i#	  z)Manager for temporary pipeline disabling.rr   r   ro   Nc                     Xl         X l        U R                   H  nU R                   R                  U5        M      [        R	                  U 5        U R                  U R                  5        g r   )rr   r   r  r   r   extend)r   rr   r   r   s       rn   r   DisabledPipes.__init__&	  sH    
JJDHH!!$' dDJJrm   c                     U $ r   r`   r   s    rn   	__enter__DisabledPipes.__enter__.	  s    rm   c                 $    U R                  5         g r   )restore)r   r  s     rn   __exit__DisabledPipes.__exit__1	  s    rm   c                     U R                    H]  nXR                  R                  ;  a&  [        [        R
                  R                  US95      eU R                  R                  U5        M_     / U SS& g)zARestore the pipeline to its state when DisabledPipes was created.r,  N)r   rr   r   r   r&   E008r   r  r)  s     rn   rr  DisabledPipes.restore4	  sZ    JJD88333 !3!3!3!>??HH  &  Qrm   )r   rr   rX  )ra   rb   rc   rd   re   rp   r   rg   r   ro  rs  rr  rl   r`   rm   rn   r  r  #	  s2    3 H  T#Y  4  rm   r  r  c                     U  Vs/ s H1  n[        UR                  R                  5       UR                  5      PM3     sn$ s  snf )zMake a copy of a batch of examples, copying the predicted Doc as well.
This is used in contexts where we need to take ownership of the examples
so that they can be mutated, for instance during Language.evaluate and
Language.update.
)r<   r.  r  r/  )r  r	  s     rn   r  r  =	  s1     2::2GBDDIIK&:::s   8A 
ensure_docr
  .underscore_statec                   ^  [         R                  " U5          UR                  5       n[        U[        5      (       a!  UR                  5         UR                  5         gU 4S jU 5       nU H  nU" U5      nM     U Vs/ s H  oR                  5       UR                  S4PM!     n	nS/[        U5      [        U	5      -
  -  n
X-   n UR                  U5        M  s  snf ! [         aF    SS[        R                  " [        R                  " 5       5      4/nS/[        W5      S-
  -  n
X-   n Nhf = f! [         a#    UR                  5         UR                  5          gf = f)a  Worker for Language.pipe

ensure_doc (Callable[[Union[str, Doc]], Doc]): Function to create Doc from text
    or raise an error if the input is neither a Doc nor a string.
pipes (Iterable[Pipe]): The components to apply.
receiver (multiprocessing.Connection): Pipe to receive text. Usually
    created by `multiprocessing.Pipe()`
sender (multiprocessing.Connection): Pipe to send doc. Usually created by
    `multiprocessing.Pipe()`
underscore_state (Tuple[dict, dict, dict]): The data in the Underscore class
    of the parent.
Nc              3   8   >#    U  H  u  pT" X5      v   M     g 7fr   r`   )r  r  r  ry  s      rn   r  _apply_pipes.<locals>.<genexpr>d	  s      GU2C(
8--~s   )NNNr!   )r;   
load_stater   r   _WorkDoneSentinelr  r  r  r   r  r   msgpack_dumps	traceback
format_excr  BrokenPipeError)ry  r
  receiverr  rz  texts_with_ctxr^  r  r  	byte_docspaddingr9  	error_msgs   `            rn   r  r  F	  sU   & *+
	'%\\^N .*;<< GUD Dz  JNN#,,.#,,=IN)*c..AC	N.RSG# 	KK7   O
  	'e&9&9):N:N:P&QRSI)*c..AA.EFG&D	'  	 LLNNN	s=   AC!  !C! &C'!C! 	D4 C! !AD10D14*E! E!c                   f    \ rS rSrSrS\\   S\\R                     S\
SS4S jrSS	 jrSS
 jrSrg)r  i	  zAUtil for sending data to multiprocessing workers in Language.piper9  queuesr  ro   Nc                 p    [        U5      U l        [        [        U5      5      U l        X0l        SU l        g )Nr   )iterr9  r   r  r  count)r   r9  r  r  s       rn   r   _Sender.__init__	  s+     J	5=)$
rm   c                     [         R                  " [        U R                  [	        U R
                  5      5      U R                  5       H  u  pUR                  U5        M     g)z1Send chunk_size items from self.data to channels.N)r  islicer  r9  r   r  r  r  )r   itemr  s      rn   r  _Sender.send	  sC     ''		5-.
GD EE$K	
rm   c                     U =R                   S-  sl         U R                   U R                  :  a  SU l         U R                  5         gg)z^Tell sender that comsumed one item. Data is sent to the workers after
every chunk_size calls.
r!   r   N)r  r  r  r   s    rn   r  _Sender.step	  s6     	

a
::(DJIIK )rm   )r  r  r9  r  rX  )ra   rb   rc   rd   re   r   r   r   r|  r  ri   r   r  r  rl   r`   rm   rn   r  r  	  s@    KSM+/>GJ	rm   r  c                       \ rS rSrSrg)r  i	  r`   N)ra   rb   rc   rd   rl   r`   rm   rn   r  r  	  s    rm   r  )r~  r  r  multiprocessingr|  r   r  r  
contextlibr   r   r  r   dataclassesr   r   r   pathlibr	   timeitr
   rZ  typingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rc  r   cymem.cymemr   	thinc.apir   r   r   r   
thinc.utilr    r   r"   r#   r$   compatr%   errorsr&   r'   git_infor(   lang.punctuationr)   r*   r+   lang.tokenizer_exceptionsr,   r-   r   r.   pipe_analysisr/   r0   r1   schemasr2   r3   r4   r5   r6   rR  r7   r   r9   tokensr:   tokens.underscorer;   r   r<   r=   training.initializer>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   r   rI   r~   rJ   rK   rT  __file__parentDEFAULT_CONFIG_PATHload_configr   DEFAULT_CONFIG_PRETRAIN_PATHrL   rN   r   r   rp   rh   r  r   r   r  r  rg   r  r  r  r  r  r`   rm   rn   <module>r     s          0  ! "  )    &    A A (    $ ! W W A ! M M      ) 0 9
 
 
 ! &s
# 8n++.BB !!"56  $H~447WW  m$Q Q&(J<#:; 2! !DDT d  G G G D 4;Xg. ;4= ;6%S%0+>CD6HS(3-/016
 D$,-6 
6r :	 	 () rm   