
    h~.                     ,   S SK r S SKrS SKrS SKJr  S SKJrJrJrJ	r	J
r
JrJrJr  S SKJr  SSKJr  SSKJrJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJr  SSKJ r   SSKJ!r!J"r"J#r#  SSK$J%r%  SSK&J'r'  S\	\    S\\(\4   4S jr)S r* " S S\'5      r+S r,g)    N)Path)AnyCallableDictIterableListOptionalTupleUnion)Model   )util)ErrorsWarnings)Language)Lookupsload_lookups)Scorer)DocToken)Example)SimpleFrozenListloggerregistry)Vocab   )Pipeexamplesreturnc                 2    [         R                  " U S40 UD6$ )Nlemma)r   score_token_attr)r   kwargss     S/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/lemmatizer.pylemmatizer_scorer%      s    ""8W???    c                      [         $ N)r%    r&   r$   make_lemmatizer_scorerr*      s    r&   c                   *   \ rS rSrSr\S\S\\\   \\   4   4S j5       r	 S'SS\
S.S	\S
\\   S\S\S\S\\   SS4S jjjr\S 5       rS\S\4S jr S(SSS.S\\/ \\   4      S\\   S\\   4S jjjr\R6                  4S\SS4S jjrS\S\\   4S jrS\S\\   4S jrS\S\4S jr \!" 5       S.S\"\\#4   S \\   4S! jjr$\!" 5       S.S\"\\#4   S \\   SS 4S" jjr%\!" 5       S.S \\   S\&4S# jjr'\!" 5       S.S$\&S \\   SS 4S% jjr(S&r)g))
Lemmatizer   z
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
lookup tables.

DOCS: https://spacy.io/api/lemmatizer
moder   c                 :    US:X  a  S// 4$ US:X  a  S/SS/4$ / / 4$ )zReturns the lookups configuration settings for a given mode for use
in Lemmatizer.load_lookups.

mode (str): The lemmatizer mode.
RETURNS (Tuple[List[str], List[str]]): The required and optional
    lookup tables for this mode.
lookuplemma_lookuprulelemma_rules	lemma_exclemma_indexr)   )clsr.   s     r$   get_lookups_configLemmatizer.get_lookups_config%   s<     8#$b))V^"Ok=%ABBBxr&   r0   F)r.   	overwritescorervocabmodelnamer9   r:   Nc                   Xl         X l        X0l        X@l        [	        5       U l        XPl        SU l        U R                  S:X  a  U R                  U l
        OwU R                  S:X  a  U R                  U l
        OUU R                   S3n[        X5      (       d&  [        [        R                  R!                  US95      e[#        X5      U l
        0 U l        X`l        g)a  Initialize a Lemmatizer.

vocab (Vocab): The vocab.
model (Model): A model (not yet implemented).
name (str): The component name. Defaults to "lemmatizer".
mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup".
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
    `False`.
scorer (Optional[Callable]): The scoring method. Defaults to
    Scorer.score_token_attr for the attribute "lemma".

DOCS: https://spacy.io/api/lemmatizer#init
Fr0   r2   
_lemmatize)r.   N)r;   r<   r=   _moder   lookupsr9   
_validatedr.   lookup_lemmatize	lemmatizerule_lemmatizehasattr
ValueErrorr   E1003formatgetattrcacher:   )selfr;   r<   r=   r.   r9   r:   	mode_attrs           r$   __init__Lemmatizer.__init__4   s    . 

	
y"99 !22DNYY& !00DN99+Z0I4++ !4!4$!4!?@@$T5DN
r&   c                     U R                   $ r(   )r@   )rL   s    r$   r.   Lemmatizer.mode^   s    zzr&   docc                 j   U R                   (       d  U R                  [        R                  5        U R	                  5       n U H?  nU R
                  (       d  UR                  S:X  d  M&  U R                  U5      S   Ul        MA     U$ ! [         a  nU" U R                  X/U5         SnAgSnAff = f)zApply the lemmatizer to one document.

doc (Doc): The Doc to process.
RETURNS (Doc): The processed Doc.

DOCS: https://spacy.io/api/lemmatizer#call
r   N)rB   _validate_tablesr   E1004get_error_handlerr9   r!   rD   lemma_	Exceptionr=   )rL   rR   error_handlertokenes        r$   __call__Lemmatizer.__call__b   s     !!&,,/..0	5>>U[[A%5#'>>%#8#;EL  J 	5$))T5!44	5s   $B	 *B	 	
B2B--B2)nlprA   get_examplesr^   rA   c                   U R                  U R                  5      u  pEUc  [        R                  " S5        [	        U R
                  R                  US9n[	        U R
                  R                  USS9nUR                   H#  nUR                  XvR                  U5      5        M%     X0l
        U R                  [        R                  5        g)a  Initialize the lemmatizer and load in data.

get_examples (Callable[[], Iterable[Example]]): Function that
    returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
lookups (Lookups): The lookups object containing the (optional) tables
    such as "lemma_rules", "lemma_index", "lemma_exc" and
    "lemma_lookup". Defaults to None.
Nz2Lemmatizer: loading tables from spacy-lookups-data)langtablesF)ra   rb   strict)r7   r.   r   debugr   r;   ra   rb   	set_table	get_tablerA   rT   r   rU   )rL   r_   r^   rA   required_tablesoptional_tablesoptional_lookupstables           r$   
initializeLemmatizer.initializeu   s      ,0+B+B499+M(?LLMN"

PG+ZZ___U  *00!!%)C)CE)JK 1fll+r&   error_messagec           	          U R                  U R                  5      u  p#U HJ  nX@R                  ;  d  M  [        UR	                  U R                  UU R                  R
                  S95      e   SU l        g)z8Check that the lookups are correct for the current mode.)r.   rb   foundTN)r7   r.   rA   rG   rI   rb   rB   )rL   rm   rg   rh   rj   s        r$   rT   Lemmatizer._validate_tables   sm    +/+B+B499+M($ELL( !((!YY."ll11 )   % r&   rZ   c                     U R                   R                  S0 5      nUR                  UR                  UR                  5      n[	        U[
        5      (       a  U/nU$ )zLemmatize using a lookup-based approach.

token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string.

DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
r1   )rA   rf   gettext
isinstancestr)rL   rZ   lookup_tableresults       r$   rC   Lemmatizer.lookup_lemmatize   sM     ||--nbA!!%**ejj9fc""XFr&   c                    UR                   UR                  UR                  R                  4nX R                  ;   a  U R                  U   $ UR
                  nUR                  R                  5       nUS;   a;  US:X  a$  [        R                  " [        R                  5        UR                  5       /$ U R                  U5      (       a  UR                  5       /$ U R                  R                  S0 5      nU R                  R                  S0 5      nU R                  R                  S0 5      n[        UR!                  U5      UR!                  U5      UR!                  U5      45      (       d  US:X  a  U/$ UR                  5       /$ UR!                  U0 5      nUR!                  U0 5      n	UR!                  U0 5      n
UnUR                  5       n/ n/ nU
 H  u  pUR#                  U5      (       d  M  US[%        U5      [%        U5      -
   U-   nU(       d  MC  UU;   d  UR'                  5       (       d  UR)                  U5        Mq  UR)                  U5        M     [+        [,        R/                  U5      5      nU	R!                  U/ 5       H  nUU;  d  M  UR1                  SU5        M     U(       d  UR3                  U5        U(       d  UR)                  U5        XR                  U'   U$ )	zLemmatize using a rule-based approach.

token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string.

DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
) eolspacerz   r5   r4   r3   propnNr   )orthposmorphkeyrK   rs   pos_lowerwarningswarnr   W108is_base_formrA   rf   anyrr   endswithlenisalphaappendlistdictfromkeysinsertextend)rL   rZ   	cache_keystringuniv_posindex_table	exc_tablerules_tableindex
exceptionsrulesorigforms	oov_formsoldnewforms                    r$   rE   Lemmatizer.rule_lemmatize   sc    ZZEKKOO<	

"::i((::##%++2~hmm,LLN##U##LLN##ll,,]B?LL**;;	ll,,]B?)h')
 
 7"x''"-]]8R0
"-	HCs##6Fc#h 67#=U]$,,..LL&$$T*  T]]5)*
 NN62.D5 Q% / LL#LL %

9r&   c                     g)zCheck whether the token is a base form that does not need further
analysis for lemmatization.

token (Token): The token.
RETURNS (bool): Whether the token is a base form.

DOCS: https://spacy.io/api/lemmatizer#is_base_form
Fr)   )rL   rZ   s     r$   r   Lemmatizer.is_base_form   s     r&   excludepathr   c                `   ^ ^ 0 nUU 4S jUS'   U 4S jUS'   [         R                  " XT5        g)zSerialize the pipe to disk.

path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.

DOCS: https://spacy.io/api/lemmatizer#to_disk
c                 8   > TR                   R                  U TS9$ Nr   )r;   to_diskpr   rL   s    r$   <lambda>$Lemmatizer.to_disk.<locals>.<lambda>  s    tzz'9'9!W'9'Mr&   r;   c                 :   > TR                   R                  U 5      $ r(   )rA   r   r   rL   s    r$   r   r     s    )=)=a)@r&   rA   N)r   r   )rL   r   r   	serializes   ` ` r$   r   Lemmatizer.to_disk   s,     	M	'@	)Tg.r&   c                   ^ ^ 0 nUU 4S jUS'   U 4S jUS'   [         R                  " XT5        T R                  5         T $ )a   Load the pipe from disk. Modifies the object in place and returns it.

path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Lemmatizer): The modified Lemmatizer object.

DOCS: https://spacy.io/api/lemmatizer#from_disk
c                 8   > TR                   R                  U TS9$ r   )r;   	from_diskr   s    r$   r   &Lemmatizer.from_disk.<locals>.<lambda>  s    )=)=a)=)Qr&   r;   c                 :   > TR                   R                  U 5      $ r(   )rA   r   r   s    r$   r   r     s    4<<+A+A!+Dr&   rA   )r   r   rT   )rL   r   r   deserializes   ` ` r$   r   Lemmatizer.from_disk
  s=     8:QG!DIt'2r&   c                ~   ^ ^ 0 nUU 4S jUS'   T R                   R                  US'   [        R                  " UT5      $ )zSerialize the pipe to a bytestring.

exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.

DOCS: https://spacy.io/api/lemmatizer#to_bytes
c                  6   > TR                   R                  T S9$ r   )r;   to_bytes)r   rL   s   r$   r   %Lemmatizer.to_bytes.<locals>.<lambda>%  s    TZZ%8%8%8%Ir&   r;   rA   )rA   r   r   )rL   r   r   s   `` r$   r   Lemmatizer.to_bytes  s9     	I	'#||44	)}}Y00r&   
bytes_datac                   ^ ^ 0 nUU 4S jUS'   U 4S jUS'   [         R                  " XT5        T R                  5         T $ )zLoad the pipe from a bytestring.

bytes_data (bytes): The serialized pipe.
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Lemmatizer): The loaded Lemmatizer.

DOCS: https://spacy.io/api/lemmatizer#from_bytes
c                 8   > TR                   R                  U TS9$ r   )r;   
from_bytes)br   rL   s    r$   r   'Lemmatizer.from_bytes.<locals>.<lambda>5  s    )>)>q')>)Rr&   r;   c                 :   > TR                   R                  U 5      $ r(   )rA   r   )r   rL   s    r$   r   r   6  s    4<<+B+B1+Er&   rA   )r   r   rT   )rL   r   r   r   s   ` ` r$   r   Lemmatizer.from_bytes)  s=     8:RG!EI
9r&   )
r@   rB   rK   rD   rA   r<   r=   r9   r:   r;   )
lemmatizerr(   )*__name__
__module____qualname____firstlineno____doc__classmethodru   r
   r   r7   r%   r   r	   r   boolr   rN   propertyr.   r   r\   r   r   r   r   rk   r   E912rT   r   rC   rE   r   r   r   r   r   r   bytesr   r   __static_attributes__r)   r&   r$   r,   r,      s-    c eDItCy4H.I  $ !	( %5(( ( 	( ( ( "( 
(T  5C 5C 5* CG, #'%),xHW,=(=>?, h	,
 '",8 5;KK c D e S	 AE Ad3i AF	% 	D 	 CSBT/#t)$/2:3-/  CSBT#t)$2:3-	$ 4D3E 18C= 1% 1 >N=O-5c]	 r&   r,   c                 |    U S:X  a"  [         R                  " S5      nUR                  $ [        S[         SU  35      e)Nmake_lemmatizerzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler   AttributeErrorr   )r=   modules     r$   __getattr__r   =  sA      (()CD%%%
78*,>tfE
FFr&   )-r   sysr   pathlibr   typingr   r   r   r   r   r	   r
   r   	thinc.apir   rz   r   errorsr   r   languager   rA   r   r   r:   r   tokensr   r   trainingr   r   r   r   r;   r   piper   ru   r%   r*   r,   r   r)   r&   r$   <module>r      s{     
   N N N   %  +    5 5  @x0 @tCH~ @\ \@	Gr&   