
    h>                     N    S SK r S SKJrJrJr  SSKJr  SSKJr   " S S\5      r	g)    N)ListOptionalTuple   )
Lemmatizer)Tokenc                   $  ^  \ rS rSrSr\S\S\\\   \\   4   4U 4S jj5       r	S\
S\\   4S jrS\S	\\   S\\   4S
 jrS\S	\\   S\S\\   S\\   4
S jrS\S	\\   S\S\\   S\\   4
S jrS\S	\\   S\S\\   S\\   4
S jrS\S	\\   S\S\\   S\\   4
S jrS\S	\\   S\S\\   S\\   4
S jrS\S	\\   S\\   S\\   S\\   4
S jrS\S	\\   S\\   S\\   S\\   4
S jrS\S	\\   S\\   S\\   S\\   4
S jrSrU =r$ )SpanishLemmatizer   z@
Spanish rule-based lemmatizer with morph-based rule selection.
modereturnc                 >   > US:X  a  / SQnU/ 4$ [         TU ]  U5      $ )Nrule)lemma_ruleslemma_rules_groupslemma_index	lemma_exc)superget_lookups_config)clsr   required	__class__s      R/home/james-whalen/.local/lib/python3.13/site-packages/spacy/lang/es/lemmatizer.pyr   $SpanishLemmatizer.get_lookups_config   s)    6>XHb>!7-d33    tokenc                 x   UR                   UR                  [        UR                  5      4nX R                  ;   a  U R                  U   $ UR
                  nUR                  R                  5       n[        UR                  5      nUS;   a  UR                  5       /$ US;   a+  UR                  (       a  US:w  a  UR                  5       /$ U/$ UR                  5       nU R                  R                  S5      R                  U0 5      R                  U5      nUb  [        U5      nOUS:X  a  SnOUnU R                  U[        U5      5      n	U R                  R                  S5      R                  U/ 5      n
[        U SU-   5      " X5X5      n[        [         R#                  U5      5      nXpR                  U'   U$ )	N) eolspace)	adpcconjintjpartpropnpunctsconjsymxr%   r   auxverbr   
lemmatize_)orthposstrmorphcachetextpos_lowersetis_sent_startlookups	get_tablegetlistselect_rulegetattrdictfromkeys)selfr   	cache_keystringr.   featuresexclemmasrule_posr   indexs              r   rule_lemmatize SpanishLemmatizer.rule_lemmatize   s{   ZZC,<=	

"::i((jj u{{#&&LLN## 

 

 ""sg~''xll$$[155c2>BB6J?#YFe|!##Hd8n=DLL**=9==hKET<(#:;$F $--/0F &

9r   r.   rB   c                     U R                   R                  S5      nX;   a4  X1    H,  n[        US   5      R                  U5      (       d  M'  US   s  $    g )Nr      r   )r7   r8   r5   issubset)r?   r.   rB   groupsgroups        r   r;   SpanishLemmatizer.select_ruleB   sO    ''(<==uQx=))(33 8O % r   wordr   rF   c           	         / n/ nU R                   R                  S5      R                  U/ 5       H7  u  px[        R                  " US-   X5      n	X:w  d  M&  UR                  U	5        M9     / n
SU;   a  U H  n	U	R                  S5      (       d  U	R                  S5      (       d  M1  U R                   R                  S5      R                  S/ 5       H+  u  pxU
R                  [        R                  " XxU	5      5        M-     M     UR                  U
5        U H  nX;   d  M
  UR                  U5        M     [        U5      S:  a  U$ [        U5      S:  a  U$ U/$ )z
Lemmatize an adjective.

word (str): The word to lemmatize.
features (List[str]): The morphological features as a list of Feat=Val
    pairs.
index (List[str]): The POS-specific lookup list.

RETURNS (List[str]): The list of lemmas.
r   $Number=Plurnsaccentsr   	r7   r8   r9   resubappendendswithextendlenr?   rO   rB   r   rF   possible_lemmasselected_lemmasoldnewpossible_lemmaadditional_lemmaslemmas               r   lemmatize_adjSpanishLemmatizer.lemmatize_adjJ   ?     ..}=AA$KHCVVC#Is9N%&&~6 L H$"1!**3//>3J3J33O3O$(LL$:$:=$I$M$M!2% *00.1QR% #2 	01$E~&&u- %
 !#""!A%""6Mr   c                     U R                   R                  S5      R                  S/ 5       H  u  pVX:X  d  M  U/s  $    U/$ )z
Lemmatize an adverb.

word (str): The word to lemmatize.
features (List[str]): The morphological features as a list of Feat=Val
    pairs.
index (List[str]): The POS-specific lookup list.

RETURNS (List[str]): The list of lemmas.
r   adverbs)r7   r8   r9   )r?   rO   rB   r   rF   r`   ra   s          r   lemmatize_advSpanishLemmatizer.lemmatize_advz   sB     ..}=AA)RPHC{u Q
 vr   c                    / n/ nU R                   R                  S5      R                  S/ 5       H  u  pxX:X  d  M  U/s  $    U R                   R                  S5      R                  S/ 5       H  u  pxX:X  d  M  U/s  $    U R                   R                  S5      R                  S/ 5       H0  u  px[        R                  " US-   X5      n	UR                  U	5        M2     UR                  U5        [        U5      S:X  a  U$ [        U5      S:  a4  U H  n
X;   d  M
  UR                  U
5        M     [        U5      S:  a  U$ U$ / $ )z
Lemmatize a determiner.

word (str): The word to lemmatize.
features (List[str]): The morphological features as a list of Feat=Val
    pairs.
index (List[str]): The POS-specific lookup list.

RETURNS (List[str]): The list of lemmas.
r   detdet_and_pron_fixeddet_and_pron_generalrQ   rJ   r7   r8   r9   rW   rX   rY   r\   r?   rO   rB   r   rF   r^   r_   r`   ra   rb   rd   s              r   lemmatize_detSpanishLemmatizer.lemmatize_det   s>     ..}=AA%LHC{u M ..}=AA "
HC {u	
 ..}=AA"B
HC  VVC#Is9N"">2	

 	t$1$""!A%(>#**51 ) ?#q(&&&&Ir   c           	         / n/ nU R                   R                  S5      R                  U/ 5       H7  u  px[        R                  " US-   X5      n	X:w  d  M&  UR                  U	5        M9     / n
SU;   a  U H  n	U	R                  S5      (       d  U	R                  S5      (       d  M1  U R                   R                  S5      R                  S/ 5       H+  u  pxU
R                  [        R                  " XxU	5      5        M-     M     UR                  U
5        U H  nX;   d  M
  UR                  U5        M     [        U5      S:  a  U$ [        U5      S:  a  U$ U/$ )z
Lemmatize a noun.

word (str): The word to lemmatize.
features (List[str]): The morphological features as a list of Feat=Val
    pairs.
index (List[str]): The POS-specific lookup list.

RETURNS (List[str]): The list of lemmas.
r   rQ   rR   rS   rT   rU   r   rV   r]   s               r   lemmatize_noun SpanishLemmatizer.lemmatize_noun   rg   r   c                 D   U R                   R                  S5      R                  S/ 5       H  u  pVX:X  d  M  U/s  $    UR                  S5      n[        R
                  " SUS   5      (       a  [        R                  " SSU5      n[        R                  " SSU5      nU/$ )	z
Lemmatize a numeral.

word (str): The word to lemmatize.
features (List[str]): The morphological features as a list of Feat=Val
    pairs.
index (List[str]): The POS-specific lookup list.

RETURNS (List[str]): The list of lemmas.
r   num,z(\.)([0-9]{3})$r   z\.r   .)r7   r8   r9   splitrW   searchrX   )r?   rO   rB   r   rF   r`   ra   splitted_words           r   lemmatize_numSpanishLemmatizer.lemmatize_num   s      ..}=AA%LHC{u M
 

399'q)9::66%d+DvvdD$'vr   c                    / n/ nU R                   R                  S5      R                  S/ 5       H  u  pxX:X  d  M  U/s  $    U R                   R                  S5      R                  S/ 5       H  u  pxX:X  d  M  U/s  $    U R                   R                  S5      R                  S/ 5       H7  u  px[        R                  " US-   X5      n	X:w  d  M&  UR                  U	5        M9     UR                  U5        [        U5      S:X  a  U$ [        U5      S:  a4  U H  n
X;   d  M
  UR                  U
5        M     [        U5      S:  a  U$ U$ / $ )z
Lemmatize a pronoun.

word (str): The word to lemmatize.
features (List[str]): The morphological features as a list of Feat=Val
    pairs.
index (List[str]): The POS-specific lookup list.

RETURNS (List[str]): The list of lemmas.
r   pronrn   ro   rQ   rJ   rp   rq   s              r   lemmatize_pron SpanishLemmatizer.lemmatize_pron  sD     ..}=AA&"MHC{u N ..}=AA "
HC {u	
 ..}=AA"B
HC  VVC#Is9N%&&~6
 	t$1$""!A%(>#**51 ) ?#q(&&&&Ir   c           	         SU;   a  U R                  XX45      $ / n/ n[        U=(       d    S5      nU R                  R                  S5      R	                  U/ 5       H7  u  px[
        R                  " US-   X5      n	X:w  d  M&  UR                  U	5        M9     U H  n
X;   d  M
  UR                  U
5        M     [        U5      S:X  a  U H  n
U R                  R                  S5      R	                  S/ 5       HO  u  pxXz;   d  M  [        U
5       H4  u  pX:X  d  M  U
SU U-   XS-   S -   nX;   d  M#  UR                  U5        M6     MQ     U R                  R                  S5      R	                  S	/ 5       H6  u  pxXz;   d  M  U
R                  XxS5      nX;   d  M%  UR                  U5        M8     M     / nU H]  n	U R                  R                  S5      R	                  S
/ 5       H+  u  pxUR                  [
        R                  " XxU	5      5        M-     M_     UR                  U5        [        U5      S:  a  U$ [        U5      S:  a  U$ U/$ )z
Lemmatize a verb.

word (str): The word to lemmatize.
features (List[str]): The morphological features as a list of Feat=Val
    pairs.
index (List[str]): The POS-specific lookup list.

RETURNS (List[str]): The list of lemmas.
PronType=Prsr   r   rQ   r   	voc_alt_1NrJ   	voc_alt_2rU   )lemmatize_verb_pronr/   r7   r8   r9   rW   rX   rY   r\   	enumeratereplacer[   )r?   rO   rB   r   rF   r^   r_   r`   ra   rb   rd   icharvoc_alt_lemmarc   s                  r   lemmatize_verb SpanishLemmatizer.lemmatize_verbG  s*    X%++DDHH  4:2..}=AA$KHCVVC#Is9N%&&~6 L
 %E~&&u- % 1$ ) $ 6 6} E I I!HC |'0'7GA#{05bq	C%A.0P#0#9$3$:$:=$I	 (8	! !% 6 6} E I I!HC |(-c(B(1+22=A! )& -N LL22=AEEiQST!((.)IJ U . 	01 !#""!A%""6Mr   c           	         Sn/ nUn[         R                  " XW5      nUbx  [        U5      S::  ai  [         R                  " UR	                  S5      S-   SU5      nUR	                  S5      /U-   n[         R                  " XW5      nUb  [        U5      S::  a  Mi  U R
                  R                  S5      R                  S/ 5       H  u  p[         R                  " XU5      nM     U R
                  R                  S5      R                  S	0 5      R                  U5      nUb  US
   nO+U R                  S	U5      nU R                  XrS1-
  X45      S
   n/ nU H  nU R
                  R                  S5      R                  S0 5      R                  U5      nUb  UR                  US
   5        MV  U R                  SU5      nUR                  U R                  XX45      S
   5        M     US-   SR                  U5      -   /$ )Nz^(.*?)([mts]e|l[aeo]s?|n?os)$r      rQ   r   r   rU   r   r+   r   r   r    )rW   r|   r\   rX   rM   r7   r8   r9   r;   r   rY   r   join)r?   rO   rB   r   rF   	pron_pattpronsr+   mr`   ra   rC   
verb_lemmapron_lemmasr   s                  r   r   %SpanishLemmatizer.lemmatize_verb_pron  s    4	IIi&mE
a66!''!*s*B5DWWQZL5(E		)*A mE
a
 ..}=AA)RPHC66#D)D Q ll$$[155fbAEEdK?QJ##FH5D,,.!114J D,,((599&"EII$OC""3q6*''9""4#6#6tt#STU#VW  S 388K#8899r    )__name__
__module____qualname____firstlineno____doc__classmethodr/   r   r   r   r   rG   r   r;   re   rj   rr   ru   r~   r   r   r   __static_attributes____classcell__)r   s   @r   r
   r
      sZ    4c 4eDItCy4H.I 4 4+E +d3i +Zs d3i HSM ..#'9.47.@DS	.	c.`#'947@DS		c,33#'93473@DS	3	c3j..#'9.47.@DS	.	c.`#'947@DS		c655#'954<SM5JNs)5	c5nCC#'9C4<SMCJNs)C	cCJ : :#'9 :4<SM :JNs) :	c :  :r   r
   )
rW   typingr   r   r   pipeliner   tokensr   r
   r   r   r   <module>r      s"    	 ( ( " d:
 d:r   