
    h                     B    S SK JrJr  SSKJr  SSKJr   " S S\5      rg)    )ListTuple   )
Lemmatizer)Tokenc                   p   ^  \ rS rSrSr\S\S\\\   \\   4   4U 4S jj5       r	S\
S\\   4S jrSrU =r$ )	CatalanLemmatizer   aV  
Copied from French Lemmatizer
Catalan language lemmatizer applies the default rule based lemmatization
procedure with some modifications for better Catalan language support.

The parts of speech 'ADV', 'PRON', 'DET', 'ADP' and 'AUX' are added to use
the rule-based lemmatization. As a last resort, the lemmatizer checks in
the lookup table.
modereturnc                 >   > US:X  a  / SQnU/ 4$ [         TU ]  U5      $ )Nrule)lemma_lookuplemma_rules	lemma_exclemma_index)superget_lookups_config)clsr   required	__class__s      R/home/james-whalen/.local/lib/python3.13/site-packages/spacy/lang/ca/lemmatizer.pyr   $CatalanLemmatizer.get_lookups_config   s)    6>RHb>!7-d33    tokenc                 8   UR                   UR                  4nX R                  ;   a  U R                  U   $ UR                  nUR                  R                  5       nUS;   a  UR                  5       /$ SU R                  ;  d  US;  a  U R                  U5      $ U R                  R                  S0 5      nU R                  R                  S0 5      nU R                  R                  S0 5      nU R                  R                  S0 5      nUR                  U0 5      n	UR                  U0 5      n
UR                  U/ 5      nUR                  5       n/ nX9;   a!  UR                  U5        XR                  U'   U$ UR                  U
R                  U/ 5      5        / nU(       d  U H  u  pUR                  U5      (       d  M  US [        U5      [        U5      -
   U-   nU(       d  MC  UU	;   d  UR                  5       (       d  UR                  U5        Mq  UR                  U5        M     U(       d  UR                  U5        U(       d$  UR                  UR                  X3/5      S   5        [        [         R#                  U5      5      nXR                  U'   U$ )N) eolspacer   )nounverbadjadpadvauxcconjdetpronpunctsconjr   r   r   r   )orthposcachetextpos_lowerlookupslookup_lemmatize	get_tablegetappendextendendswithlenisalphalistdictfromkeys)selfr   	cache_keystringuniv_posindex_table	exc_tablerules_tablelookup_tableindex
exceptionsrulesforms	oov_formsoldnewforms                    r   rule_lemmatize CatalanLemmatizer.rule_lemmatize   s(   ZZ+	

"::i((::##%++LLN##$,,.( C
 3
 ((//ll,,]B?LL**;;	ll,,]B?||--nbA"-]]8R0
"-?LL $)JJy!LZ^^FB/0	!??3''!":CK#c($:;cADdllnnT*!((. " LL# LL))&(;A>?T]]5)* %

9r    )__name__
__module____qualname____firstlineno____doc__classmethodstrr   r   r   r   rM   __static_attributes____classcell__)r   s   @r   r	   r	      sX     4c 4eDItCy4H.I 4 47E 7d3i 7 7r   r	   N)typingr   r   pipeliner   tokensr   r	   rO   r   r   <module>r\      s     " J
 Jr   