
    h	                    4   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*  S SK+r+S SK,r,S SK-r-S SK.r.S SK/r/S SK+J0r0J1r1  S SK2J3r3  S S	K4J5r5J6r6  S S
K7J8r8J9r9  S SK:J;r;J<r<J=r=J>r>J?r?J@r@JArA   S SKBrCS SK:JErEJFrFJGrG  SSKHJIrI  SSKJJKrKJCrCJLrLJMrM  SSKNJOrOJPrPJQrQ  SSKRJSrS  \(       a  SSKTJUrUJVrV  SSKWJXrXJYrY  SSKZJ[r[  \-R                  " \-R                  5      R                  r_Sr`/ SQra/ SQrb\R                  " S5      rd\R                  " 5       rf\fR                  \R                  " S5      5        \dR                  \f5         " S S5      rj " S S\/R                  5      rk " S S\l5      rm " S  S!\n5      roS"\pS#\q4S$ jrrS"\pS#\$\p   4S% jrsS"\pS#\(S&   4S' jrtS(\pS)\(S&   S#S4S* jruS+\S#\4S, jrvS+\)\p\4   S#\)\l\n4   4S- jrwS.\S#\4S/ jrx\o" 5       ryS0\y\y\y\m" 5       S1.S(\)\p\4   S2\)S3\q4   S4\)\p\\p   4   S5\)\p\\p   4   S6\)\p\\p   4   S7\)\\p\4   \<4   S#S&4S8 jjrzS0\y\y\y\m" 5       S1.S(\pS2\)S3\q4   S4\)\p\\p   4   S5\)\p\\p   4   S6\)\p\\p   4   S7\)\\p\4   \<4   S#S&4S9 jjr{SS0\y\y\y\m" 5       S:.S;\S<\$\\p\4      S2\)S3\q4   S4\)\p\\p   4   S5\)\p\\p   4   S6\)\p\\p   4   S7\)\\p\4   \<4   S#S&4S= jjr|\m" 5       S0\y\y\yS>S0S?.S7\)\\p\4   \<4   S<\\p\4   S2\)S3\q4   S4\)\p\\p   4   S5\)\p\\p   4   S6\)\p\\p   4   S@\qSA\qS#S&4SB jjr}S7\)\\p\4   \<4   S#\\p\\p\4   4   4SC jr~S7\<SD\!\$\p      S#\'\SE4   4SF jrS0\y\y\y\m" 5       S1.SG\)\\p4   S2\)S3\q4   S4\)\p\\p   4   S5\)\p\\p   4   S6\)\p\\p   4   S7\)\\p\4   \<4   S#S&4SH jjr\m" 5       S>4S+\)\p\4   SI\\p\4   SJ\qS#\<4SK jjr\m" 5       S>4SL\pSI\\p\4   SJ\q4SM jjrS#\!\p   4SN jrS(\pS#\$\p   4SO jr SSP\pSQ\pSR\qS#\$\q   4SS jjr SSQ\pSR\qS#\$\q   4ST jjrSU\pS#\'\p\p4   4SV jrSP\pS#\p4SW jrSQ\pS#\$\p   4SX jrSP\pS#\q4SY jrSP\pS#\p4SZ jrSP\pS#\$\p   4S[ jrS\\pS]\pS#\q4S^ jrS+\)\p\4   S#\\p\4   4S_ jrS+\)\p\4   S#\\p\4   4S` jrS(\pS#\q4Sa jrS(\pS#\4Sb jrSc\>Sd\>Se\>S#S4Sf jrSg\pS#\!\p   4Sh jrSS>Si.Sg\)\p\!\p   4   Sj\$\   Sk\qS#\GR(                  4Sl jjr\S+\)\p\4   S#\ \   4Sm j5       r\S#\\SS4   4Sn j5       rS#\q4So jrS#\q4Sp jrSq\S#\p4Sr jrSs\St\S#\q4Su jr SSv\qSw\qS#\$\K   4Sx jjrSy rS+\)\p\4   S#\%4Sz jrS{\\)\p\%4      S#\%4S| jrS{\\)\p\%4      S#\%4S} jrS{\\)\p\%4      S#\%4S~ jrS\\p/\4   S#\\p/\4   4S jrS\\p/\4   S\\p\4   S\pS#\4S jrS\\p\!\l   4   S#\\p\!\l   4   4S jrS\\p\!\l   4   S\pS\pS#\\p\!\l   4   4S jr SS\S\S\S\$\   S#\'\\4   4
S jjrS\S   S#\!S   4S jrS\S   S#\!S   4S jrS rS\\p\/ \4   4   S6\\p   S#\4S jrS\S\\p\\/\4   4   S6\\p   S#S4S jrS\\p\/ \4   4   S6\\p   S#\\p\4   4S jrS\\p\4   S\\p\\/\4   4   S6\\p   S#\\p\4   4S jrS+\)\p\4   S\\p\\/S4   4   S6\\p   S#\4S jrS+\)\p\4   S\\p\\/S4   4   S6\\p   S#\4S jrS(\pS\)\p\4   S#\4S jrS\pS#\p4S jrSL\pS#\p4S jrS\\p   SL\pS#\'\!\p   \!\q   4   4S jrS7\)\\p\4   \<4   S#\<4S jrS\\p\4   S#\\p\l4   4S jrS>S.Sq\\p\l4   S\qS#\\p\4   4S jjrS7\<S\p4S jrS7\<S\pS\S#S4S jr/ 4S>S.S\\p\4   S\!\p   S\qS#\ \'\!\p   \4      4S jjjrS\S#\!\p   4S jr\m" 5       4S\!\\p\$\   4      SI\\p\$\   4   S#\\p\$\   4   4S jjr " S S5      rS#\@4S jrS rS\S#\q4S jrS\pS#\q4S jrS\S   SSS(\pS\\pS\!S   \/\#4   S\"\p\4   S#\ S   4S jrS rS rS rS rS#\4S jrS#\\p\!\p   4   4S jrS rSS\S\pS#\q4S jjrSS\S\pS\qS#\4S jjrg! \D a    SrC GNf = f)    N)defaultdict)contextmanager)Path)
ModuleType)TYPE_CHECKINGAnyCallableDict	GeneratorIterableIteratorListMappingNoReturnOptionalPatternSetTupleTypeUnioncast)RegistryRegistryError)Requirement)InvalidSpecifierSpecifierSet)InvalidVersionVersion)AdamConfigConfigValidationErrorModelNumpyOps	Optimizerget_current_ops)compoundingdecayingfix_random_seed   )about)
CudaStreamcupyimportlib_metadata
is_windows)OLD_MODEL_SHORTCUTSErrorsWarningsORTH)LanguagePipeCallable)DocSpan)Vocabi)csdadeelengrcidlbmkptrusrtath)	paths	variablessystemnlp
componentscorporatrainingpretraining
initializespacyz)[%(asctime)s] [%(levelname)s] %(message)sc                       \ rS rSrSrSrg)ENV_VARSf   SPACY_CONFIG_OVERRIDES N)__name__
__module____qualname____firstlineno__CONFIG_OVERRIDES__static_attributes__rU       D/home/james-whalen/.local/lib/python3.13/site-packages/spacy/util.pyrR   rR   f   s    /r\   rR   c                   J   \ rS rSr\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r	\R
                  " SS	SS9r
\R
                  " SS
SS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SSSS9r\R
                  " SS5      r\R
                  " SSSS9r\R
                  " SSSS9r\S!S j5       r\S\\   4S j5       r\S\S\S\4S j5       r\S\S\S\\\ \!\\"4      4   4S j5       r#\S\S\S\$4S j5       r%S r&g)"registryj   rP   	languagesT)entry_pointsarchitectures
tokenizerslemmatizerslookupsdisplacy_colorsmisc	callbacksbatchersreaders
augmentersloggersscorersvectors	factoriesinternal_factoriesmodelsclireturnNc                 2    SSK JnJn  U(       d  U" 5         gg)z?Ensure the registry is populated with all necessary components.r)   )REGISTRY_POPULATEDpopulate_registryN)registrationsrv   rw   )clsrv   rw   s      r]   ensure_populatedregistry.ensure_populated   s     	I! "r\   c                     U R                  5         / n[        R                  " U 5       HE  u  p#UR                  S5      (       a  M  [	        U[
        5      (       d  M4  UR                  U5        MG     [        U5      $ )zList all available registries._)rz   inspect
getmembers
startswith
isinstancer   appendsorted)ry   namesnamevalues       r]   get_registry_namesregistry.get_registry_names   s_     	"--c2KD??3''Juh,G,GT" 3 e}r\   registry_name	func_namec           	         U R                  5         [        X5      (       dN  SR                  U R                  5       5      =(       d    Sn[	        [
        R                  R                  XS95      e[        X5      n UR                  U5      nU$ ! [         a    UR                  S5      (       a@  UR                  SS5      n UR                  U5      s $ ! [        R                   a     Of = fSR                  [        UR                  5       R                  5       5      5      =(       d    Sn[	        [
        R                   R                  X!US95      Sef = f)z,Get a registered function from the registry., noner   	availablespacy.spacy-legacy.r   reg_namer   N)rz   hasattrjoinr   r   r0   E892formatgetattrgetr   replace	cataloguer   get_allkeysE893)ry   r   r   r   regfunclegacy_namer   s           r]   r   registry.get   s/    	 s**IIc4467A6E 2 2 2 WXXc)	779%D   	##H--'///J77;// .. 		&););)=">?I6I"""i #  		1   ;B 2ECEC+(E*C++A+Ec           	         U R                  5         [        X5      (       dN  SR                  U R                  5       5      =(       d    Sn[	        [
        R                  R                  XS95      e[        X5      n UR                  U5      nU$ ! [         a    UR                  S5      (       a@  UR                  SS5      n UR                  U5      s $ ! [        R                   a     Of = fSR                  [        UR                  5       R                  5       5      5      =(       d    Sn[	        [
        R                   R                  X!US95      Sef = f)aE  Find information about a registered function, including the
module and path to the file it's defined in, the line number and the
docstring, if available.

registry_name (str): Name of the catalogue registry.
func_name (str): Name of the registered function.
RETURNS (Dict[str, Optional[Union[str, int]]]): The function info.
r   r   r   r   r   r   N)rz   r   r   r   r   r0   r   r   r   findr   r   r   r   r   r   r   )ry   r   r   r   r   	func_infor   r   s           r]   r   registry.find   s0    	 s**IIc4467A6E 2 2 2 WXXc)	+I   	##H--'///J88K00 .. 		&););)=">?I6I"""i #  		r   c                     U R                  5         [        X5      (       d  g[        X5      nUR                  S5      (       a!  UR	                  SS5      nX#;   =(       d    XC;   $ X#;   $ )z4Check whether a function is available in a registry.Fr   r   )rz   r   r   r   r   )ry   r   r   r   r   s        r]   hasregistry.has   se     	s**c)))#++HoFK#9{'99r\   rU   )rt   N)'rV   rW   rX   rY   r   createra   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   _entry_point_factoriesrp   rr   rs   classmethodrz   r   strr   r	   r   r
   r   r   intr   boolr   r[   rU   r\   r]   r_   r_   j   s8     +DII$$WoDQM!!'<dKJ""7MMKw	EG&&w0APTUOGV$?D  +DII$GHw	EG!!'<dKJw	EGw	EGw	EG '--g{QUV  *>?I gxdCF


7E
=C    49       4 !!,/!	c8E#s(O,,	-! !F 	  	  	  	  	 r\   r_   c                   j   ^  \ rS rSrSr\R                  S.S\SS4U 4S jjjrS r	SS	 jr
S
 rSrU =r$ )SimpleFrozenDict   zSimplified implementation of a frozen dict, mainly used as default
function or method argument (for arguments that should default to empty
dictionary). Will raise an error if user or spaCy attempts to add to dict.
errorr   rt   Nc                2   > [         TU ]  " U0 UD6  Xl        g)zInitialize the frozen dict. Can be initialized with pre-defined
values.

error (str): The error message when user tries to assign to dict.
N)super__init__r   )selfr   argskwargs	__class__s       r]   r   SimpleFrozenDict.__init__   s     	$)&)
r\   c                 ,    [        U R                  5      eNNotImplementedErrorr   )r   keyr   s      r]   __setitem__SimpleFrozenDict.__setitem__       !$**--r\   c                 ,    [        U R                  5      er   r   )r   r   defaults      r]   popSimpleFrozenDict.pop   r   r\   c                 ,    [        U R                  5      er   r   )r   others     r]   updateSimpleFrozenDict.update   r   r\   r   )rV   rW   rX   rY   __doc__r0   E095r   r   r   r   r   r[   __classcell__r   s   @r]   r   r      s=    
 ,2;; S T  ... .r\   r   c                      ^  \ rS rSrSr\R                  S.S\SS4U 4S jjjrS r	S	 r
S
 rS rS rS rS rS rSrU =r$ )SimpleFrozenList   an  Wrapper class around a list that lets us raise custom errors if certain
attributes/methods are accessed. Mostly used for properties like
Language.pipeline that return an immutable list (and that we don't want to
convert to a tuple to not break too much backwards compatibility). If a user
accidentally calls nlp.pipeline.append(), we can raise a more helpful error.
r   r   rt   Nc                ,   > Xl         [        TU ]  " U6   g)z`Initialize the frozen list.

error (str): The error message when user tries to mutate the list.
N)r   r   r   )r   r   r   r   s      r]   r   SimpleFrozenList.__init__  s    
 
$r\   c                 ,    [        U R                  5      er   r   r   r   r   s      r]   r   SimpleFrozenList.append  r   r\   c                 ,    [        U R                  5      er   r   r   s      r]   clearSimpleFrozenList.clear  r   r\   c                 ,    [        U R                  5      er   r   r   s      r]   extendSimpleFrozenList.extend  r   r\   c                 ,    [        U R                  5      er   r   r   s      r]   insertSimpleFrozenList.insert  r   r\   c                 ,    [        U R                  5      er   r   r   s      r]   r   SimpleFrozenList.pop  r   r\   c                 ,    [        U R                  5      er   r   r   s      r]   removeSimpleFrozenList.remove  r   r\   c                 ,    [        U R                  5      er   r   r   s      r]   reverseSimpleFrozenList.reverse   r   r\   c                 ,    [        U R                  5      er   r   r   s      r]   sortSimpleFrozenList.sort#  r   r\   )rV   rW   rX   rY   r   r0   E927r   r   r   r   r   r   r   r   r   r   r[   r   r   s   @r]   r   r      sV     ,2;;  S  4    ........ .r\   r   langrt   c                 (    U [         R                  ;   $ )a  Check whether a Language class is already loaded. Language classes are
loaded lazily, to avoid expensive setup code associated with the language
data.

lang (str): Two-letter language code, e.g. 'en'.
RETURNS (bool): Whether a Language class has been loaded.
)r_   ra   )r   s    r]   lang_class_is_loadedr   '  s     8%%%%r\   c                 `   SSK nU S:X  a  g/ n[        R                  " UR                  R                  5       HV  nUR
                  nUS:X  a  UR                  S5        M(  [        R                  " U5      (       d  ME  UR                  U5        MX     [        R                  " XSS9nUS:X  a  gU$ )a&  
Given an IETF language code, find a supported spaCy language that is a
close match for it (according to Unicode CLDR language-matching rules).
This allows for language aliases, ISO 639-2 codes, more detailed language
tags, and close matches.

Returns the language code if a matching language is available, or None
if there is no matching language.

>>> find_matching_language('en')
'en'
>>> find_matching_language('pt-BR')  # Brazilian Portuguese
'pt'
>>> find_matching_language('fra')  # an ISO 639-2 code for French
'fr'
>>> find_matching_language('iw')  # obsolete alias for Hebrew
'he'
>>> find_matching_language('no')  # Norwegian
'nb'
>>> find_matching_language('mo')  # old code for ro-MD
'ro'
>>> find_matching_language('zh-Hans')  # Simplified Chinese
'zh'
>>> find_matching_language('zxx')
None
r   Nxxmul	   )max_distance)

spacy.langpkgutiliter_modulesr   __path__r   r   	langcodestag_is_validclosest_supported_match)r   rP   possible_languagesmodinfocodematchs         r]   find_matching_languager  2  s    6 t| ''

(;(;<||4<%%e,##D))%%d+ = --dUVWE~r\   r4   c                 H   U [         R                  ;   a  [         R                  R                  U 5      $  [        R                  " SU  3S5      n[        U [        XR                  S   5      5        [         R                  R                  U 5      $ ! [
         a  n [        U 5      nO&! [        R                  R                   a    Sn Of = fU(       a!  Un [        R                  " SU  3S5      n SnAN[        [        R                  R                  XS95      UeSnAff = f)zuImport and load a Language class.

lang (str): IETF language code, such as 'en'.
RETURNS (Language): Language class.
z.lang.rP   N)r   errr   )r_   ra   r   	importlibimport_moduleImportErrorr  r   
tag_parserLanguageTagErrorr0   E048r   set_lang_classr   __all__)r   moduler  r  s       r]   get_lang_classr  i  s     x!!!!!%%d++	S,,vdV_gFF 	tWV^^A->?@!!$''  	S.t4''88  "006$'J!&++"4"4$"4"HIsR	Ss;   B 
D!B'&D' C
D	C

&D5'DD!r   ry   c                 >    [         R                  R                  XS9  g)zSet a custom Language class name that can be loaded via get_lang_class.

name (str): Name of Language class.
cls (Language): Language class.
)r   N)r_   ra   register)r   ry   s     r]   r  r    s     /r\   pathc                 F    [        U [        5      (       a  [        U 5      $ U $ )zEnsure string is converted to a Path.

path (Any): Anything. If string, it's converted to Path.
RETURNS: Path or original argument.
)r   r   r   r  s    r]   ensure_pathr    s      $Dzr\   c                 L   [        U 5      n U R                  5       (       a  [        R                  " U 5      $ U R	                  U R
                  S-   5      n U R                  5       (       a  [        R                  " U 5      $ [        [        R                  R                  U S95      e)zLoad JSON language data using the given path as a base. If the provided
path isn't present, will attempt to load a gzipped version before giving up.

path (str / Path): The data to load.
RETURNS: The loaded data.
z.gzr  )r  existssrsly	read_jsonwith_suffixsuffixread_gzip_json
ValueErrorr0   E160r   r  s    r]   load_language_datar    sz     tD{{}}t$$DKK%/0D{{}}##D))
V[[''T'2
33r\   r  c                 *   [        U S5      (       d/  [        [        R                  R	                  [        U 5      S95      e[        [        [        R                  [        R                  U R                     R                  5      5      nUR                  $ )zdGet the path of a Python module.

module (ModuleType): The Python module.
RETURNS (Path): The path.
rW   )r  )r   r  r0   E169r   reprr   r   osPathLikesysmodulesrW   __file__parent)r  	file_paths     r]   get_module_pathr)    sh     6<((++4<+@AAT"++s{{63D3D'E'N'NOPIr\   Tvocabdisableenableexcludeconfigr+  r8   r,  r-  r.  r/  c                >   UUUUUS.n[        U [        5      (       a  U R                  S5      (       a   [        U R	                  SS5      5      " 5       $ [        U 5      (       a  [        U 40 UD6$ [        U 5      R                  5       (       a  [        [        U 5      40 UD6$ O[        U S5      (       a  [        U 40 UD6$ U [        ;   a.  [        [        R                  R                  U [        U    S95      e[        [        R                   R                  U S95      e)a{  Load a model from a package or data path.

name (str): Package name or model path.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
    a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled.
exclude (Union[str, Iterable[str]]):  Name(s) of pipeline component(s) to exclude.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
    keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
r*  zblank: r  )r   fullr   )r   r   r   r  r   
is_packageload_model_from_packager   r  load_model_from_pathr   r/   IOErrorr0   E941r   E050)r   r+  r,  r-  r.  r/  r   s          r]   
load_modelr:    s    , F $??8$$!$,,x"<=??d*4:6:::'T
=f== 	x	 	 #D3F33""fkk((d9LT9R(STT
&++$$$$/
00r\   c                P    [         R                  " U 5      nUR                  XX4US9$ )aG  Load a model from an installed package.

name (str): The package name.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
    a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
    pipes will be loaded but they won't be run unless you explicitly
    enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
    pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
    components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
    keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
r*  )r  r  load)r   r+  r,  r-  r.  r/  ry   s          r]   r5  r5    s*    2 
!
!$
'C88%Y_8``r\   )metar+  r,  r-  r.  r/  
model_pathr=  c          	         U R                  5       (       d&  [        [        R                  R	                  U S95      eU(       d  [        U 5      nU S-  n[        USS9n[        XxS9n[        UUUUUUS9n	U	R                  XUS9$ )a  Load a model from a data directory path. Creates Language class with
pipeline from config.cfg and then calls from_disk() with path.

model_path (Path): Model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
    a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
    pipes will be loaded but they won't be run unless you explicitly
    enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
    pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
    components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
    keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
r  z
config.cfgTfor_overrides)	overrides)r+  r,  r-  r.  r=  )r.  rB  )
r  r7  r0   E052r   get_model_metadict_to_dotload_configload_model_from_config	from_disk)
r>  r=  r+  r,  r-  r.  r/  config_pathrB  rJ   s
             r]   r6  r6    s    8 fkk((j(9::j)|+KF$7I:F
 C ==	=JJr\   F)r=  r+  r,  r-  r.  	auto_fillvalidaterJ  rK  c                   SU ;  a&  [        [        R                  R                  U S95      eU S   nSU;  d  US   c&  [        [        R                  R                  US95      e[        US   5      n	U	R                  U UUUUUUUS9n
U
$ )a  Create an nlp object from a config. Expects the full config file including
a section "nlp" containing the settings for the nlp object.

name (str): Package name or model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
    a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
    pipes will be loaded but they won't be run unless you explicitly
    enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
    pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
    components won't be loaded.
auto_fill (bool): Whether to auto-fill config with missing defaults.
validate (bool): Whether to show config validation errors.
RETURNS (Language): The loaded nlp object.
rJ   r/  r   )r+  r,  r-  r.  rJ  rK  r=  )r  r0   E985r   E993r  from_config)r/  r=  r+  r,  r-  r.  rJ  rK  
nlp_configlang_clsrJ   s              r]   rG  rG  2  s    : F++6+:;;JZ:f#5#=++:+>?? j01H


  	C Jr\   c                     U R                  S0 5      R                  5        VVs0 s H  u  pSU;  d  M  SU;   d  M  X_M     snn$ s  snnf )zRETURNS (List[str]): All sourced components in the original config,
e.g. {"source": "en_core_web_sm"}. If the config contains a key
"factory", we assume it refers to a component factory.
rK   factorysource)r   items)r/  r   cfgs      r]   get_sourced_componentsrX  d  sW      L"5;;==IDC 	$,O 		=  s   AAA	dot_names.c                    0 n/ n/ nU H  nUc  UR                  U5        M  UR                  S5      S   nXb;  aT  [        R                  X   5      (       a  [        R	                  SX   05      S   nO[        R	                  X   5      nXrU'    UR                  [        X%5      5        M     U(       a	  [        XS9e[        U5      $ ! [         a,    SU 3nUR                  UR                  S5      US.5         M  f = f)a&  Resolve one or more "dot notation" names, e.g. corpora.train.
The paths could point anywhere into the config, so we don't know which
top-level section we'll be looking within.

We resolve the whole top-level section, although we could resolve less --
we could find the lowest part of the tree.
.r   r/  znot a valid section reference: )locmsg)r/  errors)	r   splitr_   
is_promiseresolvedot_to_objectKeyErrorr!   tuple)	r/  rY  resolvedoutputr^  r   sectionresultr]  s	            r]   resolve_dot_namesri  r  s     HFF<MM$jjoa(G&&&v77%--x.IJ8TF%--fo>F$*!DmH;< $ #6AA=  D7v>djjocBCDs   C		2C?>C?	init_filec          
         [        U 5      R                  n[        U5      nUS    SUS    SUS    3nXh-  n	UR                  5       (       d&  [	        [
        R                  R                  U	S95      e[        U	UUUUUUS9$ )aQ  Helper function to use in the `load()` method of a model package's
__init__.py.

vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
    a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
    pipes will be loaded but they won't be run unless you explicitly
    enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
    pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
    components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
    keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
r   r}   r   -versionr  )r+  r=  r,  r-  r.  r/  )	r   r'  rD  r  r7  r0   rC  r   r6  )
rj  r+  r,  r-  r.  r/  r>  r=  data_dir	data_paths
             r]   load_model_from_init_pyrp    s    2 i''J*%Dv,qfaY/@AH%Ifkk((i(899 r\   rB  interpolatec                 N   [        U 5      n[        [        S9n[        U5      S:X  a,  UR	                  [
        R                  R                  5       XS9$ U(       a  UR                  5       (       d'  [        [        R                  R                  USS95      eUR                  X1US9$ )ak  Load a config file. Takes care of path validation and section order.

path (Union[str, Path]): Path to the config file or "-" to read from stdin.
overrides: (Dict[str, Any]): Config overrides as nested dict or
    dict keyed by section values in dot notation.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
section_orderrl  rB  rq  zconfig filer  r   )r  r    CONFIG_SECTION_ORDERr   from_strr$  stdinreadis_filer7  r0   E053r   rH  )r  rB  rq  rI  r/  s        r]   rF  rF    s     d#K"67F
;3IINN	  
 	
 +"5"5"7"7&++,,+M,RSS+   
 	
r\   textc                 8    [        [        S9R                  XUS9$ )zLoad a full config from a string. Wrapper around Thinc's Config.from_str.

text (str): The string config to load.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
rs  ru  )r    rw  rx  )r}  rB  rq  s      r]   load_config_from_strr    s'      45>>{ ?  r\   c                  l    [        [        R                  R                  5       R	                  5       5      $ )zvList all model packages currently installed in the environment.

RETURNS (List[str]): The string names of the models.
)listr_   rr   r   r   rU   r\   r]   get_installed_modelsr    s%    
 '')..011r\   c                 d     [         R                  " U 5      $ ! [         R                   a     gf = f)zGet the version of an installed package. Typically used to get model
package versions.

name (str): The name of the installed Python package.
RETURNS (str / None): The version or None if package not installed.
N)r-   rm  PackageNotFoundErrorr3  s    r]   get_package_versionr    s0    !))$//22 s    //rm  
constraintprereleasesc                     US   R                  5       (       a  SU 3n [        U5      n[        U 5      n X#l        X;   $ ! [        [        4 a     gf = f)a  Check if a version (e.g. "2.0.0") is compatible given a version
constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version,
it's interpreted as =={version}.

version (str): The version to check.
constraint (str): The constraint string.
prereleases (bool): Whether to allow prereleases. If set to False,
    prerelease versions will be considered incompatible.
RETURNS (bool / None): Whether the version is compatible, or None if the
    version or constraint are invalid.
r   ==N)isdigitr   r   r   r   r  )rm  r  r  specs       r]   is_compatible_versionr    se     !}*&
J''" #? n- s   ? AAc                    U S   R                  5       (       a  g [        U 5      nXl        U Vs/ s H  o3PM     nn[	        U5      S:X  a  US   R
                  S;   a  g[        S U 5       5      (       a  g[        S U 5       5      n[        S U 5       5      nU(       a  U(       a  gg! [         a     g f = fs  snf )	Nr   Fr)   >>=Tc              3   >   #    U  H  oR                   S ;   v   M     g7f)r  Noperator.0sps     r]   	<genexpr>+is_unconstrained_version.<locals>.<genexpr>)  s     
15R;;4 5   c              3   >   #    U  H  oR                   S ;   v   M     g7f))<z<=Nr  r  s     r]   r  r  +       ?2KK;.r  c              3   >   #    U  H  oR                   S ;   v   M     g7f)r  Nr  r  s     r]   r  r  ,  r  r  )r  r   r   r  lenr  any)r  r  r  r  specs	has_upper	has_lowers          r]   is_unconstrained_versionr    s     !}J' #$BR$E
5zQ58,,;

15
111???I???IY!    s   B, B<,
B98B9requirementc                 Z    [        U 5      nUR                  [        UR                  5      4$ )z@Split a requirement like spacy>=1.2.3 into ("spacy", ">=1.2.3").)r   r   r   	specifier)r  reqs     r]   split_requirementr  4  s$    
k
"CHHc#--())r\   c                 V    [        U 5      R                  nSU  SUS    SUS   S-    S3$ )zWGenerate a version range like >=1.2.3,<1.3.0 based on a given version
(e.g. of spaCy).
r  z,<r   r[  r)   z.0)r   release)rm  r  s     r]   get_minor_version_ranger  :  s<     g&&Gy71:,a
Q'7r::r\   c                      [        U 5      nU H!  nUR                  S;   d  M  UR                  s  $    g! [         a     gf = f)z>From a version range like >=1.2.3,<1.3.0 return the lower pin.)r  r  z~=N)r   r  rm  	Exception)r  specsetr  s      r]   get_model_lower_versionr  B  sN    z*D}} 22||# 
   s   5 5 5 
AAc                 ,    [        U 5      R                  $ )zCheck whether a version is a prerelease version.

version (str): The version, e.g. "3.0.0.dev1".
RETURNS (bool): Whether the version is a prerelease version.
)r   is_prereleaserm  s    r]   is_prerelease_versionr  N  s     7)))r\   c                 ,    [        U 5      R                  $ )zGenerate the base version without any prerelease identifiers.

version (str): The version, e.g. "3.0.0.dev1".
RETURNS (str): The base version, e.g. "3.0.0".
)r   base_versionr  s    r]   get_base_versionr  W  s     7(((r\   c                 |     [        U 5      nUR                   SUR                   3$ ! [        [        4 a     gf = f)zGet the major + minor version (without patch or prerelease identifiers).

version (str): The version.
RETURNS (str): The major + minor version or None if version is invalid.
Nr[  )r   	TypeErrorr   majorminor)rm  vs     r]   get_minor_versionr  `  sE    G ggYay!! ~& s   ( ;;	version_a	version_bc                 b    [        U 5      n[        U5      nUSL=(       a    USL=(       a    X#:H  $ )a6  Compare two versions and check if they match in major and minor, without
patch or prerelease identifiers. Used internally for compatibility checks
that should be insensitive to patch releases.

version_a (str): The first version
version_b (str): The second version.
RETURNS (bool): Whether the versions match.
N)r  )r  r  abs       r]   is_minor_version_matchr  m  s2     	)$A)$AD=5Qd]5qv5r\   c                 T   [        U 5      n U R                  R                  5       (       d0  [        [        R
                  R                  U R                  S95      eU R                  5       (       a  U R                  5       (       d1  [        [        R                  R                  U R                  SS95      e[        R                  " U 5      nS H7  nX!;  d  X   (       a  M  [        [        R                  R                  US95      e   SU;   Ga  [        [        R                  US   5      (       d  [!        US   5      n[#        U5      nUb  SU-   nOSU;   a	  S	US   -   nOS
n[$        R&                  R                  US    SUS    3US   U[        R                  S9n[(        R*                  " U5        [-        US   5      (       a^  [$        R.                  R                  US    SUS    3US   US   [1        [        R                  5      S9n[(        R*                  " U5        U$ )zLoad a model meta.json from a path and validate its contents.

path (Union[str, Path]): Path to meta.json.
RETURNS (Dict[str, Any]): The loaded meta.
r  	meta.jsonrv  )r   r   rm  )settingspacy_versionr  spacy_git_versionzgit commit zversion unknownr   r}   r   rm  )modelmodel_versionrm  current)r  r  rm  example)r  r'  r  r7  r0   rC  r   r{  r|  r  r  r  E054r  r*   __version__r  r  r1   W095warningswarnr  W094r  )r  r=  r  lower_versionwarn_msgs        r]   	load_metar  {  s    tD;;fkk((dkk(:;;;;==fkk((dkk(LMM??4 D.dmmV[[///@AA / $$U%6%6_8MNN3D4IJM,];M( #m 3$, -5H0I I 1}}++faV~6"9o%))	 , H MM(##D$9::}}++faV~6"9o_-/0A0AB	 , H MM(#Kr\   c                 4    [        U 5      n[        US-  5      $ )zGet model meta.json from a directory path and validate its contents.

path (str / Path): Path to model directory.
RETURNS (Dict[str, Any]): The model's meta data.
r  )r  r  )r  r>  s     r]   rD  rD    s     T"JZ+-..r\   c                 @     [         R                  " U 5        g!    g= f)zCheck if string maps to a package installed via pip.

name (str): Name of package.
RETURNS (bool): True if installed package, False if not.
TF)r-   distributionr3  s    r]   r4  r4    s"    ''-s    c                     [         R                  " U 5      n[        [        [        [
        [        R                  4   UR                  5      5      R                  $ )zlGet the path to an installed package.

name (str): Package name.
RETURNS (Path): Path to installed package.
)
r  r  r   r   r   r   r"  r#  r&  r'  )r   pkgs     r]   get_package_pathr    s>     
!
!$
'CU3+,cll;<CCCr\   r  targetreplacementc                 D   U R                  5        H;  nXR                  ;   d  M  X#R                  UR                  R                  U5      '   M=     U R                  5        H=  nUR                   H*  nUR	                  U5      UL d  M  UR                  XB5        M,     M?     g)zReplace a node within a model with a new one, updating refs.

model (Model): The parent model.
target (Model): The target node.
replacement (Model): The node to replace the target with.
N)walklayersindex	ref_namesmaybe_get_refset_ref)r  r  r  noderef_names        r]   replace_model_noder    sx     

[[ 5@KK))&12  

H!!(+v5X3 ' r\   commandc                 >    [         R                  " U [        (       + S9$ )zSplit a string command using shlex. Handles platform compatibility.
command (str) : The command to split
RETURNS (List[str]): The split command.
)posix)shlexr_  r.   )r  s    r]   split_commandr    s    
 ;;w*n55r\   )ry  capturery  r  c          
         [        U [        5      (       a  [        U 5      nU nOU nSR                  U 5      n [        R
                  " U[        R                  R                  5       USSU(       a  [        R                  OSU(       a  [        R                  OSS9nUR                  S:w  ac  U(       a\  SU S	3nUS
UR                   3-  nUR                   b  US-  nXeR                   -  n[        R"                  " U5      nXWl        XGl        UeUR                  S:w  a   [(        R*                  " UR                  5        U$ ! [         a+    [        [        R                  R                  XCS   S95      Sef = f)a  Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed.
command (str / List[str]): The command. If provided as a string, the
    string will be split using shlex.split.
stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output and errors. If False,
    the stdout and stderr will not be redirected, and if there's an error,
    sys.exit will be called with the return code. You should use capture=False
    when you want to turn over execution to the command, and capture=True
    when you want to run the command more like a function.
RETURNS (Optional[CompletedProcess]): The process object.
 utf8FN)envinputencodingcheckstdoutstderrr   )str_commandtoolzError running command:

z

zSubprocess exited with status z$

Process log (stdout and stderr):

)r   r   r  r   
subprocessrunr"  environcopyPIPESTDOUTFileNotFoundErrorr0   E970r   
returncoder  SubprocessErrorretr  r$  exit)r  ry  r  cmd_listcmd_strr  messager   s           r]   run_commandr    sJ   $ '3 )((7#nn

!&-:??4(/:$$T
 ~~w.wit<3CNN3CDD::!ABGzz!G**73		1	 J%    KK7!E
	s   A$E 5E6c              #   6  #    [         R                  " 5       n[        U 5      R                  5       n[        R                  " [        U5      5         Uv   [        R                  " [        U5      5        g! [        R                  " [        U5      5        f = f7f)aA  Change current working directory and returns to previous on exit.
path (str / Path): The directory to navigate to.
YIELDS (Path): The absolute path to the current working directory. This
    should be used if the block needs to perform actions within the working
    directory, to prevent mismatches with relative paths.
N)r   cwdra  r"  chdirr   )r  prev_cwdr  s      r]   working_dirr    s^      xxzH4j  "GHHS\ 
XXs   ABA5  B5!BBc               #     #    [        [        R                  " 5       5      n U v   S n [        R                  S:  a  [
        R                  " [        U 5      US9  g[
        R                  " [        U 5      US9  g! [         a;  n[        R                  " [        R                  R                  XS95         SnAgSnAff = f7f)zExecute a block in a temporary directory and remove the directory and
its contents at the end of the with block.
YIELDS (Path): The path of the temp directory.
c                 ^    [         R                  " U[        R                  5        U " U5        g r   )r"  chmodstatS_IWRITE)rmfuncr  exs      r]   force_remove"make_tempdir.<locals>.force_remove6  s    
t}}%tr\   )      )onexc)onerror)dirr]  N)r   tempfilemkdtempr$  version_infoshutilrmtreer   PermissionErrorr  r  r1   W091r   )dr  es      r]   make_tempdirr$  +  s      	X A
G:w&MM#a&5MM#a&,7 :hmm**q*899:s:   &C2A; CA; :C;
C 1B;6C;C  Cc                       [        5       R                  R                  S:X  a  g[        5       R                  R                  S:X  a  g  SSKn g! [         a     Nf = f! [         a     gf = f)zCheck if user is running spaCy from a Jupyter or Colab notebook by
detecting the IPython kernel. Mainly used for the displaCy visualizer.
RETURNS (bool): True if in Jupyter/Colab, False if not.
ZMQInteractiveShellTzgoogle.colab._shellr   NF)get_ipythonr   rV   rW   	NameErrorgoogle.colabr  )googles    r]   is_in_jupyterr+  C  sv    =""++/DD=""--1FF G
    s(   "A "A 
A 
AA
A,+A,c                  P    [        [        S5      =(       d    [        [        S5      $ )zCheck if user is running spaCy from an interactive Python
shell. Will return True in Jupyter notebooks too.
RETURNS (bool): True if in interactive mode, False if not.
ps1ps2)r   r$  rU   r\   r]   is_in_interactiver/  [  s     35'#u"55r\   objc                 *   [        U S5      (       a  U R                  b  U R                  $ [        U S5      (       a  U R                  $ [        U S5      (       a1  [        U R                  S5      (       a  U R                  R                  $ [	        U 5      $ )zGet a human-readable name of a Python object, e.g. a pipeline component.

obj (Any): The Python object, typically a function or class.
RETURNS (str): A human-readable name.
r   rV   r   )r   r   rV   r   r!  )r0  s    r]   get_object_namer2  d  sq     sF 4xxsJ||sK  WS]]J%G%G}}%%%9r\   func1func2c                    [        U 5      (       a  [        U5      (       d  g[        U S5      (       a  [        US5      (       d  gU R                  UR                  :H  n[        R                  " U 5      [        R                  " U5      :H  n[        R
                  " U 5      [        R
                  " U5      :H  nU=(       a    U=(       a    U$ )a  Approximately decide whether two functions are the same, even if their
identity is different (e.g. after they have been live reloaded). Mostly
used in the @Language.component and @Language.factory decorators to decide
whether to raise if a factory already exists. Allows decorator to run
multiple times with the same function.

func1 (Callable): The first function.
func2 (Callable): The second function.
RETURNS (bool): Whether it's the same function (most likely).
FrX   )callabler   rX   r~   getfilegetsourcelines)r3  r4  	same_name	same_file	same_codes        r]   is_same_funcr<  s  s     E??(5//5.))1O1O""e&8&88I&'//%*@@I&&u-1G1G1NNI00y0r\   requirenon_blockingc                 d    [        5       n[        c  g [        U[        5      (       a  g [        US9$ )N)r>  )r%   r+   r   r#   )r=  r>  opss      r]   get_cuda_streamrA    s0     
C	C	"	"|44r\   c                     [         c  U$ [         R                  " UR                  SUR                  S9nUR	                  XS9  U$ )NC)orderdtype)stream)r,   ndarrayshaperE  set)rF  numpy_arrayarrays      r]   	get_asyncrL    s>    |[..cARARS		+	-r\   c           	      |   [        U 5      n U R                  SS9 nUR                  5       R                  S5      nS S S 5        SR	                  W Vs/ s H2  o3R                  5       (       d  M  S[        R                  " U5      -   PM4     sn5      n[        R                  " U5      $ ! , (       d  f       Ns= fs  snf )Nr  )r  
|^)	r  openrz  r_  r   stripreescapecompile)r  file_entriespiece
expressions        r]   
read_regexrZ    s    tD	F	#u**,$$T* 
$-4FWE	ryy	WFJ ::j!! 
$	# 	Gs    B(B9.B9(
B6rW  c                     SR                  U  Vs/ s H  oR                  5       (       d  M  SU-   PM      sn5      n[        R                  " U5      $ s  snf )zCompile a sequence of prefix rules into a regex object.

entries (Iterable[Union[str, Pattern]]): The prefix rules, e.g.
    spacy.lang.punctuation.TOKENIZER_PREFIXES.
RETURNS (Pattern): The regex object. to be used for Tokenizer.prefix_search.
rO  rP  r   rR  rS  rU  rW  rX  rY  s      r]   compile_prefix_regexr^    sB     GMG5{{};3;GMNJ::j!! N
   A	Ac                     SR                  U  Vs/ s H  oR                  5       (       d  M  US-   PM      sn5      n[        R                  " U5      $ s  snf )zCompile a sequence of suffix rules into a regex object.

entries (Iterable[Union[str, Pattern]]): The suffix rules, e.g.
    spacy.lang.punctuation.TOKENIZER_SUFFIXES.
RETURNS (Pattern): The regex object. to be used for Tokenizer.suffix_search.
rO  $r\  r]  s      r]   compile_suffix_regexrb    sB     GMG5{{};53;GMNJ::j!! Nr_  c                     SR                  U  Vs/ s H  oR                  5       (       d  M  UPM     sn5      n[        R                  " U5      $ s  snf )zCompile a sequence of infix rules into a regex object.

entries (Iterable[Union[str, Pattern]]): The infix rules, e.g.
    spacy.lang.punctuation.TOKENIZER_INFIXES.
RETURNS (regex object): The regex object. to be used for Tokenizer.infix_finditer.
rO  r\  r]  s      r]   compile_infix_regexrd    s<     gGgU5gGHJ::j!! Hs
   AAdefault_funcc                 8    [         R                  " [        X5      $ )a=  Extend an attribute function with special cases. If a word is in the
lookups, the value is returned. Otherwise the previous function is used.

default_func (callable): The default function to execute.
*lookups (dict): Lookup dictionary mapping string to attribute value.
RETURNS (callable): Lexical attribute getter.
)	functoolspartial_get_attr_unless_lookup)re  rf   s     r]   add_lookupsrj    s     4lLLr\   rf   stringc                 :    U H  nX#;   d  M
  X2   s  $    U " U5      $ r   rU   )re  rf   rk  lookups       r]   ri  ri    s)     >!  r\   base_exceptionsc           	         [        U 5      nU H  nUR                  5        H  u  pE[        S U 5       5      (       d&  [        [        R
                  R                  XES95      eSR                  S U 5       5      nXF:w  d  Ma  [        [        R                  R                  XFS95      e   UR                  U5        M     [        USS5      nU$ )a  Update and validate tokenizer exceptions. Will overwrite exceptions.

base_exceptions (Dict[str, List[dict]]): Base exceptions.
*addition_dicts (Dict[str, List[dict]]): Exceptions to add to the base dict, in order.
RETURNS (Dict[str, List[dict]]): Combined tokenizer exceptions.
c              3   P   #    U  H  n[        U[           [        5      v   M     g 7fr   )r   r3   r   r  attrs     r]   r  update_exc.<locals>.<genexpr>  s     K{tz$t*c22{s   $&)r   orthsr1  c              3   2   #    U  H  o[            v   M     g 7fr   r2   rq  s     r]   r  rs    s     $HKD$ZK   'u   ’)dictrV  allr  r0   E055r   r   E056r   
expand_exc)rn  addition_dictsexc	additionsorthtoken_attrsdescribed_orths          r]   
update_excr    s     
C#	!*!2DK{KKK !3!3!3!PQQWW$HK$HHN% !3!3!3!STT "3 	

9 $ S#u
%CJr\   excssearchr   c           	          S n[        U 5      nU R                  5        H9  u  pVX;   d  M  UR                  X5      nU Vs/ s H  o" XU5      PM     n	nXU'   M;     U$ s  snf )aJ  Find string in tokenizer exceptions, duplicate entry and replace string.
For example, to add additional versions with typographic apostrophes.

excs (Dict[str, List[dict]]): Tokenizer exceptions.
search (str): String to find and replace.
replace (str): Replacement.
RETURNS (Dict[str, List[dict]]): Combined tokenizer exceptions.
c                 Z    [        U 5      nU[           R                  X5      U[        '   U$ r   )rx  r3   r   )tokenr  r   fixeds       r]   
_fix_tokenexpand_exc.<locals>._fix_token   s(    UDk))&:dr\   )rx  rV  r   )
r  r  r   r  new_excstoken_stringtokensnew_keyt	new_values
             r]   r|  r|    sk    
 DzH $

!"**6;GAGHAAw7IH )W	 !-
 O Is    Alengthstartstopstepc                     Ub  US:X  d  [        [        R                  5      eUc  SnO
US:  a  X-  n[        U [	        SU5      5      nUc  U nO
US:  a  X -  n[        U [	        X5      5      nX4$ )Nr)   r   )r  r0   E057minmax)r  r  r  r  s       r]   normalize_slicer    sy     LDAI%%}	Au&E|	vs5'(D;r\   spansr7   c                 .   S n[        XSS9n/ n[        5       nU Hj  nUR                  U;  d  M  UR                  S-
  U;  d  M*  UR	                  U5        UR                  [        UR                  UR                  5      5        Ml     [        US S9nU$ )ah  Filter a sequence of spans and remove duplicates or overlaps. Useful for
creating named entities (where one token can only be part of one entity) or
when merging spans with `Retokenizer.merge`. When spans overlap, the (first)
longest span is preferred over shorter spans.

spans (Iterable[Span]): The spans to filter.
RETURNS (List[Span]): The filtered spans.
c                 N    U R                   U R                  -
  U R                  * 4$ r   )endr  spans    r]   <lambda>filter_spans.<locals>.<lambda>)  s    DJJ!6 Dr\   T)r   r   r)   c                     U R                   $ r   )r  r  s    r]   r  r  2  s    TZZr\   )r   )r   rI  r  r  r   r   range)r  get_sort_keysorted_spansrh  seen_tokensr  s         r]   filter_spansr     s     EL%4@LFEK::[(TXX\-LMM$uTZZ:;	 
 F 78FMr\   c                  :    [        [        R                  " U 6 5      $ r   )r  	itertoolschain)r  s    r]   filter_chain_spansr  6  s    	/00r\   c                      [         $ r   )r  rU   r\   r]   make_first_longest_spans_filterr  :  s    r\   gettersc                 @    [         R                  " [        X5      5      $ r   )r  msgpack_dumpsto_dict)r  r.  s     r]   to_bytesr  >  s    ww899r\   
bytes_datasettersc                 B    [        [        R                  " U 5      X5      $ r   )	from_dictr  msgpack_loads)r  r  r.  s      r]   
from_bytesr  B  s    
 U((4gGGr\   c                     0 nU R                  5        H(  u  p4UR                  S5      S   U;  d  M  U" 5       X#'   M*     U$ Nr[  r   rV  r_  )r  r.  
serializedr   getters        r]   r  r  J  sA     J}}99S>!G+$hJO ' r\   r]  c                     UR                  5        H0  u  p4UR                  S5      S   U;  d  M  X0;   d  M&  U" X   5        M2     U $ r  r  )r]  r  r.  r   setters        r]   r  r  U  s@    
 }}99S>!G+
38 ' Jr\   writersc                     [        U 5      n U R                  5       (       d  U R                  5         UR                  5        H)  u  p4UR	                  S5      S   U;  d  M  U" X-  5        M+     U $ r  )r  r  mkdirrV  r_  )r  r  r.  r   writers        r]   to_diskr  a  sZ    
 tD;;==

}}99S>!G+4: ' Kr\   rk   c                     [        U 5      n UR                  5        H)  u  p4UR                  S5      S   U;  d  M  U" X-  5        M+     U $ r  )r  rV  r_  )r  rk   r.  r   readers        r]   rH  rH  p  sF    
 tD}}99S>!G+4: ' Kr\   r\  c                     [         R                  R                  U [        U5      5      n[         R                  R	                  U5      nUR
                  R                  U5        U$ )zImport module from a file. Used to load models from a directory.

name (str): Name of module to load.
loc (str / Path): Path to the file.
RETURNS: The loaded module.
)r  utilspec_from_file_locationr   module_from_specloaderexec_module)r   r\  r  r  s       r]   import_filer  }  sI     >>11$CAD^^,,T2FKKF#Mr\   htmlc                 b    U R                  5       R                  SS5      R                  SS5      $ )zPerform a template-specific, rudimentary HTML minification for displaCy.
Disclaimer: NOT a general-purpose solution, only removes indentation and
newlines.

html (str): Markup to minify.
RETURNS (str): "Minified" HTML.
z    r1  rN  )rR  r   )r  s    r]   minify_htmlr    s*     ::<+33D"==r\   c                     U R                  SS5      n U R                  SS5      n U R                  SS5      n U R                  SS5      n U $ )	zReplace <, >, &, " with their HTML encoded representation. Intended to
prevent HTML errors in rendered displaCy markup.

text (str): The original text.
RETURNS (str): Equivalent text to be safely used within HTML.
&z&amp;r  z&lt;r  z&gt;"z&quot;)r   )r}  s    r]   escape_htmlr    sI     <<W%D<<V$D<<V$D<<X&DKr\   wordsc           	      l   SR                  SR                  U 5      R                  5       5      SR                  UR                  5       5      :w  a&  [        [        R                  R                  XS95      e/ n/ nSnU  Vs/ s H  oUR                  5       (       a  M  UPM     nnU H  n XS R                  U5      nUS:  a*  UR                  XXG-    5        UR                  S5        XG-  nUR                  U5        UR                  S5        U[        U5      -  nU[        U5      :  d  M  X   S:X  d  M  SUS'   US	-  nM     U[        U5      :  a$  UR                  XS 5        UR                  S5        X#4$ s  snf ! [         a(    [        [        R                  R                  XS95      Sef = f)
aj  Given a list of words and a text, reconstruct the original tokens and
return a list of words and spaces that can be used to create a Doc. This
can help recover destructive tokenization that didn't preserve any
whitespace information.

words (Iterable[str]): The words.
text (str): The original text.
RETURNS (Tuple[List[str], List[bool]]): The words and spaces.
r1  )r}  r  r   NFr  Tr)   )
r   r_  r  r0   E194r   isspacer  r   r  )r  r}  
text_wordstext_spacestext_posword
norm_words
word_starts           r]   get_words_and_spacesr    s    
wwrwwu~##%&"''$**,*??+++CDDJKH#(?54$5J?	Si..t4J >dh.CDEu%"H$5!CIc$iDNc$9"KOMH  #d)$y/*5!$$) @
  	SV[[//T/GHdR	Ss    E<E<*F2F3c                      [        U 5      R                  5       $ ! [         a(    [        [        R                  R                  U S95      Sef = f)zDeep copy a Config. Will raise an error if the config contents are not
JSON-serializable.

config (Config): The config to copy.
RETURNS (Config): The copied config.
rM  N)r    r  r  r0   E961r   rM  s    r]   copy_configr    sK    Ff~""$$ F++6+:;EFs	    2Avaluesc                     0 nU R                  5        He  u  p#UnUR                  5       R                  S5      n[        U5       H0  u  pgU[	        U5      S-
  :H  nUR                  Xx(       a  UO0 5      nM2     Mg     U$ )zConvert dot notation to a dict. For example: {"token.pos": True,
"token._.xyz": True} becomes {"token": {"pos": True, "_": {"xyz": True }}}.

values (Dict[str, Any]): The key/value pairs to convert.
RETURNS (Dict[str, dict]): The converted values.
r[  r)   )rV  lowerr_  	enumerater  
setdefault)	r  rh  r   r   r  partsiitemis_lasts	            r]   dot_to_dictr    st     !Flln
		!!#& 'GA3u:>)G??4'rBD ( % Mr\   r@  rA  c                j    [        XS9 VVs0 s H  u  p#SR                  U5      U_M     snn$ s  snnf )aM  Convert dot notation to a dict. For example: {"token": {"pos": True,
"_": {"xyz": True }}} becomes {"token.pos": True, "token._.xyz": True}.

obj (Dict[str, dict]): The dict to convert.
for_overrides (bool): Whether to enable special handling for registered
    functions in overrides.
RETURNS (Dict[str, Any]): The key/value pairs.
r@  r[  )	walk_dictr   )r0  rA  r   r   s       r]   rE  rE    s=     $CEEJC 	uE  s   /rg  c                     U nUR                  S5      nU H  n X$   nM
     U$ ! [        [        4 a(    [        [        R                  R                  US95      Sef = f)aH  Convert dot notation of a "section" to a specific part of the Config.
e.g. "training.optimizer" would return the Optimizer object.
Throws an error if the section is not defined in this config.

config (Config): The config.
section (str): The dot notation of the section in the config.
RETURNS: The object denoted by the section
r[  r3  N)r_  rc  r  r0   E952r   )r/  rg  	componentr  r  s        r]   rb  rb    sm     IMM#E	G!I 
  )$ 	G6;;--7-;<$F	Gs	   $8Ar   c                    U nUR                  S5      n[        U5       H"  u  pV U[        U5      S-
  :X  a  X#U'   M  X6   nM$     g! [        [        4 a(    [        [
        R                  R                  US95      Sef = f)zUpdate a config at a given position from a dot notation.

config (Config): The config.
section (str): The dot notation of the section in the config.
value (Any): The value to set in the config.
r[  r)   r3  N)r_  r  r  rc  r  r0   r  r   )r/  rg  r   r  r  r  r  s          r]   set_dot_to_objectr    s     IMM#EU#	GCJN""'$%O	 $ )$ 	G6;;--7-;<$F	Gs   AA8A>r  r'  c             #      #    U R                  5        HW  u  p4/ UQUPn[        U[        5      (       a2  U(       a  [        S U 5       5      (       d  [	        XEUS9 Sh  vN   MR  XT4v   MY     g N7f)zWalk a dict and yield the path and values of the leaves.

for_overrides (bool): Whether to treat registered functions that start with
    @ as final values rather than dicts to traverse.
c              3   B   #    U  H  oR                  S 5      v   M     g7f)@N)r   )r  	value_keys     r]   r  walk_dict.<locals>.<genexpr>+  s     H%Y++C00%s   r@  N)rV  r   rx  r  r  )r  r'  rA  r   r   
key_parents         r]   r  r    sf      jjl
#v^s^
eT""H%HHH -PPP%% # Qs   AA1A/A1r   c                     [         R                  " U 5      n[        [        R	                  / UR
                  QUR                  Q5      5      $ )zGet a list of all named arguments of a function (regular,
keyword-only).

func (Callable): The function
RETURNS (List[str]): The argument names.
)r~   getfullargspecr  rx  fromkeysr   
kwonlyargs)r   argspecs     r]   get_arg_namesr  2  s=     $$T*GBBw/A/ABCDDr\   weightsc                 v   U  VVVs0 s H  o"R                  5         H  u  p4X4_M	     M      nnnnUR                  U5        [        UR                  5        Vs/ s H  of(       a  UOSPM     sn5      nUR                  5        H&  u  p4U(       d  M  US:  d  M  [	        XG-  S5      XS'   M(     U$ s  snnnf s  snf )an  Combine and normalize score weights defined by components, e.g.
{"ents_r": 0.2, "ents_p": 0.3, "ents_f": 0.5} and {"some_other_score": 1.0}.

weights (List[dict]): The weights defined by the components.
overrides (Dict[str, Optional[Union[float, int]]]): Existing scores that
    should be preserved.
RETURNS (Dict[str, float]): The combined and normalized weights.
g        r      )rV  r   sumr  round)r  rB  w_dictr   r   rh  r  
weight_sums           r]   combine_score_weightsr	  =  s      ")*!(v\\^\c
^  * MM)v}}?!1a#o?@Jlln
5Z!^ 2A6FK % M* @s   %B/B6c                   p    \ rS rSrS rS rS rS\SS 4S jrS\	\
\4   SS	4S
 jrS\	\
\4   SS 4S jrSrg	)DummyTokenizeriW  c                     [         er   )r   )r   r}  s     r]   __call__DummyTokenizer.__call__X  s    !!r\   c              +   2   #    U H  nU " U5      v   M     g 7fr   rU   )r   textsr   r}  s       r]   pipeDummyTokenizer.pipe[  s     Dt* rv  c                     g)Nr\   rU   )r   r   s     r]   r  DummyTokenizer.to_bytesa  s    r\   datart   c                     U $ r   rU   )r   r  r   s      r]   r  DummyTokenizer.from_bytesd      r\   r  Nc                     g r   rU   r   r  r   s      r]   r  DummyTokenizer.to_diskg  s    r\   c                     U $ r   rU   r  s      r]   rH  DummyTokenizer.from_diskj  r  r\   rU   )rV   rW   rX   rY   r  r  r  bytesr  r   r   r   r  rH  r[   rU   r\   r]   r  r  W  s^    "u 3C E#t), 4 eCI. =M r\   r  c                      [        5       $ r   )r   rU   r\   r]   create_default_optimizerr   n  s	    6Mr\   c              #   $  #    [        U[        5      (       a  [        R                  " U5      nOUn[	        U 5      n  [        U5      n[        [        R                  " U [        U5      5      5      n[        U5      S:X  a  g[        U5      v   MS  7f)zdIterate over batches of items. `size` may be an iterator,
so that batch-size can vary on each step.
r   N)	r   r   r  repeatiternextr  islicer  )rV  sizesize_
batch_sizebatchs        r]   	minibatchr*  r  sv      $  &KE
%[
Y%%eS_=>u:?5k s   BBc                 J   Sn[        X5      (       a  g[        U S5      (       a  [        U S5      (       an  U R                  [        R                  ;   aP  [	        [        R                  U R                     5      U R
                  R                  S5      S      n[        X!5      $ g)a  Slightly hacky check for whether a callable is implemented in Cython.
Can be used to implement slightly different behaviors, especially around
inspecting and parameter annotations. Note that this will only return True
for actual cdef functions and methods, not regular Python functions defined
in Python modules.

func (Callable): The callable to check.
RETURNS (bool): Whether the callable is Cython (probably).
__pyx_vtable__TrX   rW   r[  r   F)r   rW   r$  r%  varsrX   r_  )r   rr  cls_funcs      r]   is_cython_funcr/    s     Dt 	n%%D,''OOs{{*DOO45d6G6G6M6Mc6RST6UVx&&r\   env_varc                 f    [         R                  R                  U S5      nUS:X  a  g[        U5      $ )zConvert the value of an environment variable to a boolean. Add special
check for "0" (falsy) and consider everything else truthy, except unset.

env_var (str): The name of the environment variable to check.
RETURNS (bool): Its boolean value.
F0)r"  r  r   r   )r0  r   s     r]   check_bool_env_varr3    s,     JJNN7E*E|;r\   docsr6   procr5   default_error_handlerr   c              #   |  #    [        US5      (       a  UR                  " U 40 UD6 S h  vN   g [        U5      nUn[        US5      (       a  UR                  5       nS H  nXd;   d  M
  UR	                  U5        M     U  H  n U" U40 UD6nUv   M     g  Nl! [
         a  nU" X!U/U5         S nAM3  S nAff = f7f)Nr  get_error_handler)r(  )r   r  rx  r8  r   r  )	r4  r5  r   r6  r   error_handlerargdocr#  s	            r]   _piper<    s      tV99T,V,,, f-4,-- 224M!C}

3 " C43)&)	  	-  4d3%334s?   (B<B;B<*B<BB<
B9#B4.B<4B99B<c                     Uer   rU   	proc_namer5  r4  r#  s       r]   raise_errorr@    s    
Gr\   c                     g r   rU   r>  s       r]   ignore_errorrB    s    r\   c                      [        5       (       aL  SSKJn   U R                  b8  SSKJn  U" 5       (       d%  [        R                  " [        R                  5        gggg)ziWarn about require_gpu if a jupyter notebook + cupy + mismatched
contextvars vs. thread ops are detected
r   )CupyOpsN)contextvars_eq_thread_ops)
r+  thinc.backends.cupy_opsrD  xpthinc.backendsrE  r  r  r1   W111)rD  rE  s     r]   warn_if_jupyter_cupyrJ    sC     3::!@,..hmm, / " r\   c                    U R                   R                  S0 5      n[        U5      S:X  a[  U R                  [        ;   aF  SR                  [        5      n[        R                  [        R                  R                  XS95        g g g )Nlexeme_normr   r   )r  langs)rf   	get_tabler  r   LEXEME_NORM_LANGSr   loggerdebugr1   W033r   )r+  component_namelexeme_normsrM  s       r]   check_lexeme_normsrU    sg    ==**="=L
<A%**0A"A		+,X]])))LM #Br\   c                 @    U SL a  gU c  gU SL a  gU S:X  a  gU S:X  a  gg)zConvert a value to the ternary 1/0/-1 int used for True/None/False in
attributes such as SENT_START: True/1/1.0 is 1 (True), None/0/0.0 is 0
(None), any other values are -1 (False).
Tr)   r   Fr  rU   )vals    r]   to_ternary_intrX    s7    
 d{				r\   c                     [        [        5      n [        R                  " 5        HR  nUR	                  S5      =(       d    SR                  5        H#  nX   R                  UR                  S   5        M%     MT     [        U 5      $ )zReturn a mapping of top-level packages to their distributions. We're
inlining this helper from the importlib_metadata "backport" here, since
it's not available in the builtin importlib.metadata.
ztop_level.txtr1  Name)	r   r  r-   distributions	read_textr_  r   metadatarx  )pkg_to_distdistr  s      r]   packages_distributionsr`    sj    
 d#K"002NN?39r@@BC##DMM&$9: C 3 r\   c                 v    [         R                  " U 5      n[        US5      =(       a    [        US5      (       + $ )zpReturn True if all the elements are equal to each other
(or if the input is an empty sequence), False otherwise.TF)r  groupbyr$  )iterablegs     r]   	all_equalre    s.     	(#A4=/a//r\   porthostc                 0   [         R                   " [         R                  [         R                  5      n UR                  X45         UR	                  5         g! [         R
                   a     UR	                  5         gf = f! UR	                  5         f = f)zCheck if 'host:port' is in use. Return True if it is, False otherwise.

port (int): the port to check
host (str): the host to check (default "localhost")
RETURNS (bool): Whether 'host:port' is in use.
FT)socketAF_INETSOCK_STREAMbindcloser   )rf  rg  ss      r]   _is_port_in_usero    sl     	fnnf&8&89A	| 	
	 << 		 	
	s#   A B -B ?B  B Bauto_selectc                    [        X5      (       d  U $ U nU(       d&  [        [        R                  R	                  US95      e[        X15      (       a#  US:  a  US-  n[        X15      (       a  US:  a  M  US:X  a6  [        X15      (       a&  [        [        R
                  R	                  US95      e[        R                  " [        R                  R	                  XUS95        U$ )a  Given a starting port and a host, handle finding a port.

If `auto_select` is False, a busy port will raise an error.

If `auto_select` is True, the next free higher port will be used.

start (int): the port to start looking from
host (str): the host to find a port on
auto_select (bool): whether to automatically select a new port if the given port is busy (default False)
RETURNS (int): The port to use.
)rf  i  r)   )rg  )rg  rf  
serve_port)
ro  r  r0   E1050r   E1049r  r  r1   W124)r  rg  rp  rf  s       r]   find_available_portrv    s     5''D,,$,788
$
%
%$,	 $
%
%$, u}44,,$,788 MM(--&&D&NOKr\   )T)FTr   )	localhost)F)rg  r  importlib.utilr~   r  loggingr"  r   rS  r  r  ri  r  r  r$  r  r  collectionsr   
contextlibr   pathlibr   typesr   typingr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   numpyr  thincr   r   packaging.requirementsr   packaging.specifiersr   r   packaging.versionr   r   	thinc.apir   r    r!   r"   r#   r$   r%   cupy.randomr,   r  r&   r'   r(   r1  r*   compatr+   r-   r.   r^  r/   r0   r1   symbolsr3   languager4   r5   r  r6   r7   r+  r8   iinfouint64r  OOV_RANKDEFAULT_OOV_PROBrO  rw  	getLoggerrP  StreamHandlerlogger_stream_handlersetFormatter	Formatter
addHandlerrR   r_   rx  r   r  r   r   r   r   r  r  r  r  r  r)  _DEFAULT_EMPTY_PIPESr:  r5  r6  rG  rX  ri  rp  rF  r  r  r  r  r  r  r  r  r  r  r  r  r  rD  r4  r  r  r  CompletedProcessr  r  r$  r+  r/  r2  r<  rA  rL  rZ  r^  rb  rd  rj  ri  r  r|  r   r  r  r  r  r  r  r  r  r  r  rH  r  r  r  r  r  r  rE  rb  r  r  r  floatr	  r  r   r*  r/  r3  r  r<  r@  rB  rJ  rU  rX  r`  re  ro  rv  rU   r\   r]   <module>r     s         	  	      
   # %      (      - . ? 5   = <  D D 9 9 0! ;;u||$(( i  A  
		7	#--/   " "AB   ' (0 0x u~~ x v.t .2&.t &.R&s &t &4 4# 4n( (j!1 (>0 04
#3 0 0	c 	c 	4U39- 4%d
2C 4 	J 	4 	 ()  #')=(<)=,<,>'1
T	
'1 $'1 3%&	'1
 #x}$%'1 3%&'1 $sCx.&()'1 '1Z #')=(<)=,<,>a
a $a 3%&	a
 #x}$%a 3%&a $sCx.&()a a@ &*"&)=(<)=,<,>+K+K 4S>
"+K $	+K
 3%&+K #x}$%+K 3%&+K $sCx.&()+K +Kb ,-"&)=(<)=/$sCx.&()/ sCx./ $	/
 3%&/ #x}$%/ 3%&/ / / /d$sCx.&()	#tCH~
""#HSM2"
38_"P #')=(<)=,<,>'T3Y' $' 3%&	'
 #x}$%' 3%&' $sCx.&()' 'X !1 2

T	

CH~
 
 	
8 ,<+=SX
sCx.LP2d3i 2
c 
hsm 
 8<!04d^6 *."&d^6*3 *5c? *;S ;S ;	 	 	*3 *4 *)c )c )
"s 
"x} 
"6c 6c 6d 6(E#t)$ (c3h (V/sDy) /d38n /
S 
T 
	D3 	D4 	D4e 4U 4 44 4$63 649 6  	43S	>"4 C=4 	4
   4n  eCI&  8D>      :idD 01 : :.t 064 6  1 1 1d 1, 15	5	5)-	5j	5"U39% "' ""(5g+>"? "G ""(5g+>"? "G ""%W*=!> "7 "
Mhucz2 
M3%QT*AU 
M C5#:& 15c3h IL  #tDz/*	#tDz/,
sDJ
),7:	#tDz/6 ?C#&.6sm
38_$( T&\ ,1x/ 1DL 1:d3U 334 :x} :QV :HH#x--.H c]H 
	H#xC(()4<SM	#s(^		c3h	#xs
++,	 c]	 
#s(^		
T	
#x--. c] 
	

T	

#x--.
 c]
 
	

c 
c4i 0 
Z 
>c >c >c c &%C=&% #&%
49d4j !&%R
Fd38nf45 
F& 
FS#X 4T	? " @E T#t)_  cSVh & 3 &Gf Gs G3 G4 G( /1&KP&
sCx.&"&s)&DH&eDIsN#$&&E ET#Y E -=,>$sHUO+,-C%() 
#x
4 .) " d 0
 
 
4
5/4
4 4 $	nd5k95x?	4 CH4 e_48	-N3 .	S$s)^ 4 	0# S 4 "s # D S {6  Ds   b bb