
    h!5              	          S SK r S SKrS SKJr  S SKJrJrJrJrJ	r	J
r
JrJr  S SKrSSKJr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  SSKJrJr  SSK J!r!  SSKJ"r"J#r#  SSK$J%r%  SSK&J'r'  \	\\\(\)4   \4      r*\\)\\*\\(4   4   r+\\)\\\(\)4   \\(\)4   4   4   r,\\)\\)\\\(\)4   \\(\)4   4   4   4   r-S\\!   S\\)\4   4S jr.S r/ " S S\'5      r0S\1S\\1\14   4S jr2S r3g)    N)Path)AnyCallableDictIterableListOptionalTupleUnion   )util)Errors)Language)Matcher)Scorer)IDS)DocSpan)normalize_token_attrsset_token_attrs)Example)SimpleFrozenListregistry)Vocab   )Pipeexamplesreturnc                    S n0 nUR                  [        R                  " U S40 UD65        UR                  [        R                  " U S40 UD65        UR                  [        R                  " U S4SU0UD65        UR                  [        R                  " U S4SU0UD65        UR                  [        R                  " U S40 UD65        U$ )Nc                 ,    [        X5      R                  $ N)getattrkey)tokenattrs     W/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/attributeruler.pymorph_key_getter/attribute_ruler_score.<locals>.morph_key_getter   s    u#'''    tagposmorphgetterlemma)updater   score_token_attrscore_token_attr_per_feat)r   kwargsr'   resultss       r&   attribute_ruler_scorer4      s    ( GNN6**8UEfEFNN6**8UEfEFNN'U:JUfU NN((g	
&6	
:@	

 NN6**8WGGHNr)   c                      [         $ r!   )r4    r)   r&   make_attribute_ruler_scorerr7   .   s      r)   c                   z   \ rS rSrSr S'S\S.S\S\S\S\	\
   S	S
4
S jjjrS(S jrS
S
S
S
S.S\	\
/ \\   4      S\	\   S\	\\      S\	\   S\	\   S	S
4S jjrS\S	\4S jrS\4S jrS rS\\\\\\4   \\\4   4   4   S	S
4S jrS\\\\\\\\4   \\\4   4   4   4   S	S
4S jr S)S\\   S\S\S	S
4S jjrS\\   S	S
4S jr\ S	\!\   4S j5       r"\#" 5       4S\\   S	\$4S  jjr%\#" 5       4S!\$S\\   S	S 4S" jjr&\#" 5       4S#\\'\4   S\\   S	S
4S$ jjr(\#" 5       4S#\\'\4   S\\   S	S 4S% jjr)S&r*g
)*AttributeRuler2   zSet token-level attributes for tokens matched by Matcher patterns.
Additionally supports importing patterns from tag maps and morph rules.

DOCS: https://spacy.io/api/attributeruler
F)validatescorervocabnamer;   r<   r   Nc                    X l         Xl        [        U R                  US9U l        X0l        / U l        / U l        / U l        X@l        g)a  Create the AttributeRuler. After creation, you can add patterns
with the `.initialize()` or `.add_patterns()` methods, or load patterns
with `.from_bytes()` or `.from_disk()`. Loading patterns will remove
any patterns you've added previously.

vocab (Vocab): The vocab.
name (str): The pipe name. Defaults to "attribute_ruler".
scorer (Optional[Callable]): The scoring method. Defaults to
    Scorer.score_token_attr for the attributes "tag", "pos", "morph" and
    "lemma" and Scorer.score_token_attr_per_feat for the attribute
    "morph".

RETURNS (AttributeRuler): The AttributeRuler component.

DOCS: https://spacy.io/api/attributeruler#init
r;   N)	r>   r=   r   matcherr;   attrs_attrs_unnormedindicesr<   )selfr=   r>   r;   r<   s        r&   __init__AttributeRuler.__init__9   s@    0 	
tzzH= !#
+-"$r)   c                 t    [        U R                  U R                  S9U l        / U l        / U l        / U l        g)zReset all patterns.r@   N)r   r=   r;   rA   rB   rC   rD   rE   s    r&   clearAttributeRuler.clearZ   s.    tzzDMMB
!r)   )nlppatternstag_mapmorph_rulesget_examplesrL   rM   rN   rO   c                    U R                  5         U(       a  U R                  U5        U(       a  U R                  U5        U(       a  U R                  U5        gg)zInitialize the attribute ruler by adding zero or more patterns.

Rules can be specified as a sequence of dicts using the `patterns`
keyword argument. You can also provide rules using the "tag map" or
"morph rules" formats supported by spaCy prior to v3.
N)rJ   add_patternsload_from_tag_mapload_from_morph_rules)rE   rP   rL   rM   rN   rO   s         r&   
initializeAttributeRuler.initializea   sE     	

h'""7+&&{3 r)   docc                     U R                  5       n U R                  U5      nU R                  X5        U$ ! [         a  nU" U R                  X/U5      s SnA$ SnAff = f)zApply the AttributeRuler to a Doc and set all attribute exceptions.

doc (Doc): The document to process.
RETURNS (Doc): The processed Doc.

DOCS: https://spacy.io/api/attributeruler#call
N)get_error_handlermatchset_annotations	Exceptionr>   )rE   rW   error_handlermatcheses        r&   __call__AttributeRuler.__call__x   s\     ..0	<jjoG  .J 	< D%;;	<s   #6 
A AAAc           	          U R                  USSS9nU VVVs/ s H+  u  p4n[        U R                  R                  U   5      X4U4PM-     nnnnUR	                  5         U$ s  snnnf )NTF)allow_missingas_spans)rA   intr=   stringssort)rE   rW   r^   m_idsr_   s         r&   rZ   AttributeRuler.match   sj    ,,s$,G KR
JQJDQS##D)*DQ7' 	 
 		
s   2A c                    U HA  u  p4pV[        XXdS9nU R                  U   nU R                  U   n	 Xy   n
[        Xy   U5        MC     g! [         am    [	        [
        R                  R                  U R                  R                  UR                  5      U Vs/ s H  oR                  PM     Os  snf snU	S95      Sef = f)zModify the document in place)label)rM   spanindexN)r   rB   rD   
IndexError
ValueErrorr   E1001formatrA   getrl   textr   )rE   rW   r^   attr_idmatch_idstartendrm   rB   rn   r$   ts               r&   r[   AttributeRuler.set_annotations   s    -4)GuC8DJJw'ELL)E  DK/% .5  	 !LL''!%!1!1$**!=.23dffd3# (  	s   A		AC B/.C c                    UR                  5        H  u  p#SU0/n[        U5      u  p5SU;  aB  U R                  R                  R	                  U5      nU R                  R
                  U   US'   ODU R                  R                  R	                  US   5      nU R                  R
                  U   US'   U R	                  U/U5        M     g)zLoad attribute ruler patterns from a tag map.

tag_map (dict): The tag map that maps fine-grained tags to
    coarse-grained tags and morphological features.

DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
TAGMORPHN)items_split_morph_attrsr=   
morphologyaddrf   )rE   rN   r*   rB   patternmorph_attrsr,   s          r&   rS    AttributeRuler.load_from_tag_map   s     "--/JCs|nG!3E!:Ee#

--11+>!%!3!3E!:g

--11%.A!%!3!3E!:gHHgY& *r)   c                    U H  nX    H  nX2S./nX   U   n[        U5      u  pVSU;   aE  U R                  R                  R                  US   5      nU R                  R                  U   US'   OHU(       aA  U R                  R                  R                  U5      nU R                  R                  U   US'   U R                  U/U5        M     M     g)a  Load attribute ruler patterns from morph rules.

morph_rules (dict): The morph rules that map token text and
    fine-grained tags to coarse-grained tags, lemmas and morphological
    features.

DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
)ORTHr|   r}   N)r   r=   r   r   rf   )rE   rO   r*   wordr   rB   r   r,   s           r&   rT   $AttributeRuler.load_from_morph_rules   s     C#($(56#(.%7%>"e# JJ1155eGnEE%)ZZ%7%7%>E'N  JJ1155kBE%)ZZ%7%7%>E'N'E* ) r)   rB   rn   c                    [        [        U R                  5      5      nU R                  R	                  U R
                  R                  R	                  U5      U5        U R                  R                  U5        [        U R
                  U5      nU R                  R                  U5        U R                  R                  U5        g)a  Add Matcher patterns for tokens that should be modified with the
provided attributes. The token at the specified index within the
matched span will be assigned the attributes.

patterns (Iterable[List[Dict]]): A list of Matcher patterns.
attrs (Dict): The attributes to assign to the target token in the
    matched span.
index (int): The index of the token in the matched span to modify. May
    be negative to index from the end of the span. Defaults to 0.

DOCS: https://spacy.io/api/attributeruler#add
N)strlenrB   rA   r   r=   rf   rC   appendr   rD   )rE   rM   rB   rn   r#   s        r&   r   AttributeRuler.add   s    " #djj/"++//4h?##E*%djj%8

% E"r)   c                 :    U H  nU R                   " S0 UD6  M     g)a3  Add patterns from a list of pattern dicts with the keys as the
arguments to AttributeRuler.add.
patterns (Iterable[dict]): A list of pattern dicts with the keys
    as the arguments to AttributeRuler.add (patterns/attrs/index) to
    add as patterns.

DOCS: https://spacy.io/api/attributeruler#add_patterns
Nr6   )r   )rE   rM   ps      r&   rR   AttributeRuler.add_patterns   s     AHHMqM r)   c                    / n[        [        U R                  5      5       Hd  n0 nU R                  R	                  [        U5      5      S   US'   U R                  U   US'   U R                  U   US'   UR                  U5        Mf     U$ )zAll the added patterns.r   rM   rB   rn   )	ranger   rB   rA   rs   r   rC   rD   r   )rE   all_patternsir   s       r&   rM   AttributeRuler.patterns   s     s4::'AA LL,,SV4Q7AjM--a0AgJaAgJ" ( r)   excludec                 ^   ^ ^ 0 nUU 4S jUS'   U 4S jUS'   [         R                  " UT5      $ )zSerialize the AttributeRuler to a bytestring.

exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.

DOCS: https://spacy.io/api/attributeruler#to_bytes
c                  6   > TR                   R                  T S9$ N)r   )r=   to_bytes)r   rE   s   r&   <lambda>)AttributeRuler.to_bytes.<locals>.<lambda>  s    TZZ%8%8%8%Ir)   r=   c                  D   > [         R                  " T R                  5      $ r!   )srslymsgpack_dumpsrM   rI   s   r&   r   r     s    (;(;DMM(Jr)   rM   )r   r   )rE   r   	serializes   `` r&   r   AttributeRuler.to_bytes  s/     	I	' J	*}}Y00r)   
bytes_datac                 X   ^ ^ U 4S jnUU 4S jUS.n[         R                  " XT5        T $ )zLoad the AttributeRuler from a bytestring.

bytes_data (bytes): The data to load.
exclude (Iterable[str]): String names of serialization fields to exclude.
returns (AttributeRuler): The loaded object.

DOCS: https://spacy.io/api/attributeruler#from_bytes
c                 P   > TR                  [        R                  " U 5      5        g r!   )rR   r   msgpack_loads)brE   s    r&   load_patterns0AttributeRuler.from_bytes.<locals>.load_patterns  s    e11!45r)   c                 8   > TR                   R                  U TS9$ r   )r=   
from_bytes)r   r   rE   s    r&   r   +AttributeRuler.from_bytes.<locals>.<lambda>!  s    tzz44Q4Hr)   r=   rM   )r   r   )rE   r   r   r   deserializes   ` `  r&   r   AttributeRuler.from_bytes  s-    	6 I%
 	
9r)   pathc                 R   ^ ^ UU 4S jU 4S jS.n[         R                  " XT5        g)zSerialize the AttributeRuler to disk.

path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.

DOCS: https://spacy.io/api/attributeruler#to_disk
c                 8   > TR                   R                  U TS9$ r   )r=   to_diskr   r   rE   s    r&   r   (AttributeRuler.to_disk.<locals>.<lambda>2  s    tzz11!W1Er)   c                 F   > [         R                  " U TR                  5      $ r!   )r   write_msgpackrM   r   rE   s    r&   r   r   3  s    %"5"5a"Gr)   r   N)r   r   )rE   r   r   r   s   ` ` r&   r   AttributeRuler.to_disk'  s#     FG
	 	Tg.r)   c                 X   ^ ^ U 4S jnUU 4S jUS.n[         R                  " XT5        T $ )a   Load the AttributeRuler from disk.

path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (AttributeRuler): The loaded object.

DOCS: https://spacy.io/api/attributeruler#from_disk
c                 P   > TR                  [        R                  " U 5      5        g r!   )rR   r   read_msgpackr   s    r&   r   /AttributeRuler.from_disk.<locals>.load_patternsC  s    e0034r)   c                 8   > TR                   R                  U TS9$ r   )r=   	from_diskr   s    r&   r   *AttributeRuler.from_disk.<locals>.<lambda>G  s    tzz33Aw3Gr)   r   )r   r   )rE   r   r   r   r   s   ` `  r&   r   AttributeRuler.from_disk7  s-    	5 H%
 	t'2r)   )rC   rB   rD   rA   r>   r<   r;   r=   )attribute_ruler)r   N)r   )+__name__
__module____qualname____firstlineno____doc__r4   r   r   boolr	   r   rF   rJ   r   r   r   AttributeRulerPatternType
TagMapTypeMorphRulesTyperU   r   r`   rZ   r[   r   r   re   rS   rT   MatcherPatternTyper   rR   propertyr   rM   r   bytesr   r   r   r   r   __static_attributes__r6   r)   r&   r9   r9   2   s    &
 %: 
  " 
B #'BF(,044xHW,=(=>?4 h	4
 8$=>?4 *%4 n-4 
4.<C <C <  0,'CeCHouS#X&F!GGH'	'*+T#tE#s(OU3PS8_4T/U*U%V VW+	+2 QR# !34#=A#JM#	#0
X.G%H 
T 
 	$89 	 	 1A0B 1 1u 1 ;K:L*23-	. @P?Q/$)$//7}/	/" @P?Q$)$/7}	 r)   r9   rB   c                     0 n0 nU R                  5        HG  u  p4US;   d2  U[        R                  " 5       ;   d  U[        R                  " 5       ;   a  XAU'   MC  XBU'   MI     X4$ )zSplit entries from a tag map or morph rules dict into to two dicts, one
with the token-level features (POS, LEMMA) and one with the remaining
features, which are presumed to be individual MORPH features._)r~   r   keysvalues)rB   other_attrsr   kvs        r&   r   r   N  sY     KK8qCHHJ!szz|*;NN	 
 ##r)   c                 |    U S:X  a"  [         R                  " S5      nUR                  $ [        S[         SU  35      e)Nmake_attribute_rulerzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler   AttributeErrorr   )r>   modules     r&   __getattr__r   ]  sA    %%(()CD***
78*,>tfE
FFr)   )4r   syspathlibr   typingr   r   r   r   r   r	   r
   r   r    r   errorsr   languager   rA   r   r<   r   symbolsr   tokensr   r   tokens._retokenizer   r   trainingr   r   r   r=   r   piper   re   r   r   r   r   r   r4   r7   r9   dictr   r   r6   r)   r&   <module>r      s*    
  N N N         G  -  $uS#X345  e,>c,I&J!JK #tE#s(OU38_<==>
c4T%S/5c?*J%K KLLMHW$5 DcN &!YT Yx$d $uT4Z'8 $Gr)   