
    h1J                        S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
JrJrJrJrJrJrJrJr  S SKrSSKJr  SSKJrJr  SSKJr  SS	KJrJr  SS
KJr  SSK J!r!  SSK"J#r#J$r$  SSK%J&r&  SSKJ'r'J(r(J)r)  SSK*J+r+  \
\,\\,\\
\,\4      4   4   r-Sr.S\\$   S\\$   S\\$   4S jr/S r0S\\$   S\\$   S\\$   4S jr1S r2\.S.S\\&   S\
\,\4   4S jjr3\.4S\,4S jjr4 " S S\+5      r5S  r6g)!    N)partial)Path)AnyCallableDictIterableListOptionalSequenceSetTupleUnioncast   )util)ErrorsWarnings)Language)MatcherPhraseMatcher)levenshtein_compare)Scorer)DocSpan)Example)SimpleFrozenListensure_pathregistry   )Piperulerentitiesspansreturnc                   ^ S n[        XSS9n[        U 5      n / n[        5       mU H  nUR                  nUR                  n[        U4S jU 5       5      (       d  M7  UR                  U5        U  Vs/ s H&  owR                  U:  a  UR                  U:  a  M$  UPM(     n nTR                  [        XV5      5        M     X-   $ s  snf )a  Merge entities and spans into one list without overlaps by allowing
spans to overwrite any entities that they overlap with. Intended to
replicate the overwrite_ents=True behavior from the EntityRuler.

entities (Iterable[Span]): The entities, already filtered for overlaps.
spans (Iterable[Span]): The spans to merge, may contain overlaps.
RETURNS (List[Span]): Filtered list of non-overlapping spans.
c                 N    U R                   U R                  -
  U R                  * 4$ Nendstartspans    S/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/span_ruler.py<lambda>,prioritize_new_ents_filter.<locals>.<lambda>0       DJJ!6 D    Tkeyreversec              3   @   >#    U  H  oR                   T;  v   M     g 7fr'   i.0tokenseen_tokenss     r-   	<genexpr>-prioritize_new_ents_filter.<locals>.<genexpr>8        <tewwk)t   )	sortedlistsetr*   r)   allappendupdaterange)	r"   r#   get_sort_keynew_entitiesr,   r*   r)   er;   s	           @r-   prioritize_new_ents_filterrJ   %   s     EL5D9EH~HLEK

hh<t<<<%#+U8aGGcMaeeem8HUuU01  "" Vs   4#CCc                      [         $ r'   )rJ    r1   r-   make_prioritize_new_ents_filterrM   ?   s    %%r1   c                 P  ^ S n[        XSS9n[        U 5      n / n[        5       mTR                  " S U  5       6   U Hb  nUR                  nUR
                  n[        U4S jU 5       5      (       d  M7  UR                  U5        TR                  [        XV5      5        Md     X-   $ )ar  Merge entities and spans into one list without overlaps by prioritizing
existing entities. Intended to replicate the overwrite_ents=False behavior
from the EntityRuler.

entities (Iterable[Span]): The entities, already filtered for overlaps.
spans (Iterable[Span]): The spans to merge, may contain overlaps.
RETURNS (List[Span]): Filtered list of non-overlapping spans.
c                 N    U R                   U R                  -
  U R                  * 4$ r'   r(   r+   s    r-   r.   1prioritize_existing_ents_filter.<locals>.<lambda>N   r0   r1   Tr2   c              3   b   #    U  H%  n[        UR                  UR                  5      v   M'     g 7fr'   )rF   r*   r)   )r9   ents     r-   r<   2prioritize_existing_ents_filter.<locals>.<genexpr>S   s"     Ghssyy#''22hs   -/c              3   @   >#    U  H  oR                   T;  v   M     g 7fr'   r6   r8   s     r-   r<   rS   W   r>   r?   )	r@   rA   rB   rE   r*   r)   rC   rD   rF   )r"   r#   rG   rH   r,   r*   r)   r;   s          @r-   prioritize_existing_ents_filterrU   C   s     EL5D9EH~HLEKGhGH

hh<t<<<%uU01  ""r1   c                      [         $ r'   )rU   rL   r1   r-   "make_preserve_existing_ents_filterrW   ]   s    **r1   	spans_keyexamplesc                  ^^ [        U5      nSmUR                  ST T 35        UR                  SS5        UR                  SS5        UR                  SU4S j5        UR                  SU4S	 j5        [        R                  " U 40 UD6$ )
Nspans_attrallow_overlapTlabeledgetterc                 T   > U R                   R                  U[        T5      S  / 5      $ r'   )r#   getlen)docr3   attr_prefixs     r-   r.   1overlapping_labeled_spans_score.<locals>.<lambda>j   s!    399==S5E5G1H"#Mr1   has_annotationc                 "   > TU R                   ;   $ r'   )r#   )rd   rY   s    r-   r.   rf   l   s    I4Jr1   )dict
setdefaultr   score_spans)rZ   rY   kwargsre   s    ` @r-   overlapping_labeled_spans_scorerm   a   s     &\FK
fi[9:
ot,
i&
M &(JKh1&11r1   rY   c                     [        [        U S9$ )NrX   )r   rm   rX   s    r-   %make_overlapping_labeled_spans_scorerro   p   s    2iHHr1   c                      \ rS rSrSr S3\SS\R                  S\SS\	" \
\S9S.	S\S\S	\\   S
\\\\   \\   /\\   4      S\S\\\   \\   /\\   4   S\\\\4      S\S\S\S\\   SS4S jjjrS\4S jrS\S\4S jr\S\\   4S j5       rS\S\4S jrS\4S jrS r\S\\S4   4S j5       r\S\\S4   4S j5       rSSS.S \/ \\    4   S\\   S!\\!\"      4S" jjr#\S\$\"   4S# j5       r%S!\$\"   SS4S$ jr&S4S% jr'S\SS4S& jr(S'\SS4S( jr)S4S) jr*\+" 5       S*.S+\,S,\\   SS 4S- jjr-\+" 5       S*.S,\\   S\,4S. jjr.\+" 5       S*.S/\\\/4   S,\\   SS 4S0 jjr0\+" 5       S*.S/\\\/4   S,\\   SS4S1 jjr1S2r2g)5	SpanRulert   zThe SpanRuler lets you add spans to the `Doc.spans` using token-based
rules or exact phrase matches.

DOCS: https://spacy.io/api/spanruler
USAGE: https://spacy.io/usage/rule-based-matching#spanruler
NFrX   )	rY   spans_filterannotate_entsents_filterphrase_matcher_attrmatcher_fuzzy_comparevalidate	overwritescorernlpnamerY   rs   rt   ru   rv   rw   rx   ry   rz   r$   c       	             Xl         X l        X0l        XPl        Xpl        Xl        Xl        X@l        X`l        Xl	        Xl
        0 U l        U R                  5         g)a  Initialize the span ruler. If patterns are supplied here, they
need to be a list of dictionaries with a `"label"` and `"pattern"`
key. A pattern can either be a token pattern (list) or a phrase pattern
(string). For example: `{'label': 'ORG', 'pattern': 'Apple'}`.

nlp (Language): The shared nlp object to pass the vocab to the matchers
    and process phrase patterns.
name (str): Instance name of the current pipeline component. Typically
    passed in automatically from the factory when the component is
    added. Used to disable the current span ruler while creating
    phrase patterns with the nlp object.
spans_key (Optional[str]): The spans key to save the spans under. If
    `None`, no spans are saved. Defaults to "ruler".
spans_filter (Optional[Callable[[Iterable[Span], Iterable[Span]], List[Span]]):
    The optional method to filter spans before they are assigned to
    doc.spans. Defaults to `None`.
annotate_ents (bool): Whether to save spans to doc.ents. Defaults to
    `False`.
ents_filter (Callable[[Iterable[Span], Iterable[Span]], List[Span]]):
    The method to filter spans before they are assigned to doc.ents.
    Defaults to `util.filter_chain_spans`.
phrase_matcher_attr (Optional[Union[int, str]]): Token attribute to
    match on, passed to the internal PhraseMatcher as `attr`. Defaults
    to `None`.
matcher_fuzzy_compare (Callable): The fuzzy comparison method for the
    internal Matcher. Defaults to
    spacy.matcher.levenshtein.levenshtein_compare.
validate (bool): Whether patterns should be validated, passed to
    Matcher and PhraseMatcher as `validate`.
overwrite (bool): Whether to remove any existing spans under this spans
    key if `spans_key` is set, and/or to remove any ents under `doc.ents` if
    `annotate_ents` is set. Defaults to `True`.
scorer (Optional[Callable]): The scoring method. Defaults to
    spacy.pipeline.span_ruler.overlapping_labeled_spans_score.

DOCS: https://spacy.io/api/spanruler#init
N)r{   r|   rY   rt   rv   rx   ry   rs   ru   rz   rw   _match_label_id_mapclear)selfr{   r|   rY   rs   rt   ru   rv   rw   rx   ry   rz   s               r-   __init__SpanRuler.__init__|   sR    t 	"*#6  "(&%:">@ 

r1   c                 ,    [        U R                  5      $ )z1The number of all labels added to the span ruler.)rc   	_patternsr   s    r-   __len__SpanRuler.__len__   s    4>>""r1   labelc                 \    U R                   R                  5        H  nUS   U:X  d  M    g   g)z+Whether a label is present in the patterns.r   TF)r~   values)r   r   label_ids      r-   __contains__SpanRuler.__contains__   s0    00779H E) : r1   c                     U R                   $ )z2Key of the doc.spans dict to save the spans under.rX   r   s    r-   r3   SpanRuler.key   s     ~~r1   rd   c                     U R                  5       n U R                  U5      nU R                  X5        U$ ! [         a  nU" U R                  X/U5      s SnA$ SnAff = f)zFind matches in document and add them as entities.

doc (Doc): The Doc object in the pipeline.
RETURNS (Doc): The Doc with added entities, if available.

DOCS: https://spacy.io/api/spanruler#call
N)get_error_handlermatchset_annotations	Exceptionr|   )r   rd   error_handlermatchesrI   s        r-   __call__SpanRuler.__call__   s\     ..0	<jjoG  .J 	< D%;;	<s   #6 
A AAAc           
        ^ ^ T R                  5         [        R                  " 5          [        R                  " SSS9  [	        [
        [        [        [        [        4      [        T R                  T5      5      [        T R                  T5      5      -   5      nS S S 5        [        UU 4S jW 5       5      n[        [        U5      5      $ ! , (       d  f       N8= f)Nignorez\[W036)messagec           	   3      >#    U  H<  u  pnX#:w  d  M  [        TUUTR                  U   S    TR                  U   S   S9v   M>     g7f)r   id)r   span_idN)r   r~   )r9   m_idr*   r)   rd   r   s       r-   r<   "SpanRuler.match.<locals>.<genexpr>   s\      
#
 %, S|D..t4W=006t< %,s
   A4A)_require_patternswarningscatch_warningsfilterwarningsr   r	   r   intrA   matcherphrase_matcherrB   r@   )r   rd   r   deduplicated_matchess   ``  r-   r   SpanRuler.match   s     $$&##Hi@U3S=)*T\\#&'$t/B/B3/G*HHG '  # 
#
 %,
#
 
 
 d/011# '&s   A2C
Cc                 N   U R                   (       a  / nU R                   UR                  ;   a*  U R                  (       d  UR                  U R                      nUR                  U R                  (       a  U R	                  X25      OU5        X1R                  U R                   '   U R
                  (       aK  / nU R                  (       d  [        UR                  5      nU R                  X25      n [        U5      Ul        gg! [         a    [        [        R                  5      ef = f)zModify the document in placeN)r3   r#   ry   extendrs   rt   rA   entsru   r@   
ValueErrorr   E854)r   rd   r   r#   s       r-   r   SpanRuler.set_annotations   s     88Exx399$T^^		$((+LL595F5F!!%1G #(IIdhhE>>SXX$$U4E.!%=   . --.s   .D   $D$.c                     [        [        [        U R                   Vs/ s H  n[	        [
        US   5      PM     sn5      5      5      $ s  snf )zzAll labels present in the match patterns.

RETURNS (set): The string labels.

DOCS: https://spacy.io/api/spanruler#labels
r   )tupler@   rB   r   r   strr   ps     r-   labelsSpanRuler.labels  s:     VC O1c1W:!6 OPQRR Os   A
c                     [        [        [        U R                   Vs/ s H"  n[	        [
        UR                  S5      5      PM$     sn5      [        S/5      -
  5      5      $ s  snf )zqAll IDs present in the match patterns.

RETURNS (set): The string IDs.

DOCS: https://spacy.io/api/spanruler#ids
r   N)r   r@   rB   r   r   r   rb   r   s     r-   idsSpanRuler.ids  sO     3G1S!%%+.GH3PTv;VW
 	
Gs   )A#)r{   patternsget_examplesr   c                V    U R                  5         U(       a  U R                  U5        gg)a[  Initialize the pipe for training.

get_examples (Callable[[], Iterable[Example]]): Function that
    returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
patterns (Optional[Iterable[PatternType]]): The list of patterns.

DOCS: https://spacy.io/api/spanruler#initialize
N)r   add_patterns)r   r   r{   r   s       r-   
initializeSpanRuler.initialize%  s#      	

h' r1   c                     U R                   $ )zGet all patterns that were added to the span ruler.

RETURNS (list): The original patterns, one dictionary per pattern.

DOCS: https://spacy.io/api/spanruler#patterns
)r   r   s    r-   r   SpanRuler.patterns9  s     ~~r1   c           	      z    Sn[        U R                  R                  5       H  u  nu  pEX:X  d  M  Un  O   U R                  R                  US  Vs/ s H  oUPM     nnU R                  R                  US9   / n/ nU GH<  n	[        [        U	S   5      n
[        [        U	R                  SS5      5      n[        X45      nU
US.U R                  U R                  R                  R                  R                  U5      '   [        U	S   [        5      (       a&  UR                  U5        UR                  U	S   5        Oa[        U	S   [         5      (       a   U R"                  R%                  XS   /5        O)[	        [&        R(                  R+                  U	S   S	95      eU R,                  R                  U	5        GM?     [/        UU R                  R1                  U5      5       H!  u  pU R2                  R%                  X/5        M#     SSS5        gs  snf ! [         a    / n GNf = f! , (       d  f       g= f)
a~  Add patterns to the span ruler. A pattern can either be a token
pattern (list of dicts) or a phrase pattern (string). For example:
{'label': 'ORG', 'pattern': 'Apple'}
{'label': 'ORG', 'pattern': 'Apple', 'id': 'apple'}
{'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]}

patterns (list): The patterns to add.

DOCS: https://spacy.io/api/spanruler#add_patterns
N)disabler   r    )r   r   pattern)r   )	enumerater{   pipeline
pipe_namesr   select_pipesr   r   rb   reprr~   vocabstringsas_int
isinstancerD   rA   r   addr   E097formatr   zippiper   )r   r   current_indexr7   r|   r   subsequent_pipesphrase_pattern_labelsphrase_pattern_textsentryp_labelp_idr   r   s                 r-   r   SpanRuler.add_patternsC  s   	"M#,TXX->->#?<D<$%M $@ 261D1D]^1TU1T1TU XX""+;"<$&!#% !sE'N3C4!45g_-$R(()?)?)F)Fu)MN eI.44)007(//i0@Ai 0$77LL$$U9-=,>?$V[[%7%7i@P%7%QRR%%e, "  #&%23# ##''y9	#' =<  V 	"!	"<<s5   ,H !H HH 9FH,H H)(H),
H:c                     / U l         [        U R                  R                  U R                  U R
                  S9U l        [        U R                  R                  U R                  U R                  S9U l	        g)zNReset all patterns.

RETURNS: None
DOCS: https://spacy.io/api/spanruler#clear
)rx   fuzzy_compare)r]   rx   N)
r   r   r{   r   rx   rw   r   r   rv   r   r   s    r-   r   SpanRuler.clears  s\     -/ 'HHNN]]44!

 .;HHNN))]].
r1   c                 2   X;  a1  [        [        R                  R                  SXR                  S95      eU R
                   Vs/ s H  o"S   U:w  d  M  UPM     snU l        U R                   H  nU R                  U   S   U:X  d  M  U R                  R                  R                  R                  U5      nX@R                  ;   a  U R                  R                  U5        X@R                  ;   d  M  U R                  R                  U5        M     gs  snf )zRemove a pattern by its label.

label (str): Label of the pattern to be removed.
RETURNS: None
DOCS: https://spacy.io/api/spanruler#remove
r   	attr_typer   	componentN)r   r   E1024r   r|   r   r~   r{   r   r   	as_stringr   remover   )r   r   r   m_labelm_label_strs        r-   r   SpanRuler.remove  s     ##gUii#X  &*^^K^zU7J!^K//G''09UB"hhnn44>>wG"5"55''..{;,,.LL''4 0 Ls   DD
pattern_idc                 t   [        U 5      nU R                   Vs/ s H  o3R                  S5      U:w  d  M  UPM     snU l        U[        U 5      :X  a1  [        [        R
                  R                  SXR                  S95      eU R                   H  nU R                  U   S   U:X  d  M  U R                  R                  R                  R                  U5      nXPR                  ;   a  U R                  R                  U5        XPR                  ;   d  M  U R                  R                  U5        M     gs  snf )zRemove a pattern by its pattern ID.

pattern_id (str): ID of the pattern to be removed.
RETURNS: None
DOCS: https://spacy.io/api/spanruler#remove_by_id
r   IDr   N)rc   r   rb   r   r   r   r   r|   r~   r{   r   r   r   r   r   r   )r   r   orig_lenr   r   r   s         r-   remove_by_idSpanRuler.remove_by_id  s     t9%)^^Q^uuT{j7P!^Qs4y ##"*		 $  
 //G''06*D"hhnn44>>wG"5"55''..{;,,.LL''4 0 Rs
   D5D5c                     [        U 5      S:X  a<  [        R                  " [        R                  R                  U R                  S95        gg)z:Raise a warning if this component has no patterns defined.r   )r|   N)rc   r   warnr   W036r   r|   r   s    r-   r   SpanRuler._require_patterns  s4    t9>MM(--..DII.>? r1   )exclude
bytes_datar   c                f   ^  T R                  5         SU 4S j0n[        R                  " XU5        T $ )zLoad the span ruler from a bytestring.

bytes_data (bytes): The bytestring to load.
RETURNS (SpanRuler): The loaded span ruler.

DOCS: https://spacy.io/api/spanruler#from_bytes
r   c                 N   > TR                  [        R                  " U 5      5      $ r'   )r   srsly
json_loads)br   s    r-   r.   &SpanRuler.from_bytes.<locals>.<lambda>      $"3"3E4D4DQ4G"Hr1   )r   r   
from_bytes)r   r   r   deserializerss   `   r-   r   SpanRuler.from_bytes  s0     	

H
 	
7;r1   c                @   ^  SU 4S j0n[         R                  " X!5      $ )zSerialize the span ruler to a bytestring.

RETURNS (bytes): The serialized patterns.

DOCS: https://spacy.io/api/spanruler#to_bytes
r   c                  D   > [         R                  " T R                  5      $ r'   )r   
json_dumpsr   r   s   r-   r.   $SpanRuler.to_bytes.<locals>.<lambda>  s     0 0 ?r1   )r   to_bytes)r   r   serializerss   `  r-   r  SpanRuler.to_bytes  s#     ?
 }}[22r1   pathc                |   ^  T R                  5         [        U5      nSU 4S j0n[        R                  " X0 5        T $ )zLoad the span ruler from a directory.

path (Union[str, Path]): A path to a directory.
RETURNS (SpanRuler): The loaded span ruler.

DOCS: https://spacy.io/api/spanruler#from_disk
r   c                 N   > TR                  [        R                  " U 5      5      $ r'   )r   r   
read_jsonlr   r   s    r-   r.   %SpanRuler.from_disk.<locals>.<lambda>  r   r1   )r   r   r   	from_disk)r   r  r   r   s   `   r-   r  SpanRuler.from_disk  s:     	

4 H
 	tB/r1   c                Z   ^  [        U5      nSU 4S j0n[        R                  " X0 5        g)zSave the span ruler patterns to a directory.

path (Union[str, Path]): A path to a directory.

DOCS: https://spacy.io/api/spanruler#to_disk
r   c                 F   > [         R                  " U TR                  5      $ r'   )r   write_jsonlr   r  s    r-   r.   #SpanRuler.to_disk.<locals>.<lambda>  s    %"3"3At}}"Er1   N)r   r   to_disk)r   r  r   r  s   `   r-   r  SpanRuler.to_disk  s,     4 E
 	T+r1   )r~   r   rt   ru   r   rw   r|   r{   ry   r   rv   rz   rs   rY   rx   )
span_ruler)r$   N)3__name__
__module____qualname____firstlineno____doc__DEFAULT_SPANS_KEYr   filter_chain_spansr   r   rm   r   r   r
   r   r   r   boolr   r   r   r   r   propertyr3   r   r   r   r   r   r   r   r   r   PatternTyper   r	   r   r   r   r   r   r   r   bytesr   r  r   r  r  __static_attributes__rL   r1   r-   rq   rq   t   s7    !F
 $5 # ##9=*=%,+7H&
#FF F
 C=F htnhtn5x~EF
F F d^Xd^,htn<
F &eCHo6F  (F F  !F" "#F( 
)FP# ## $  Xc]  <C <C < 2 2*., Sc3h S S 	
U38_ 	
 	
 #'48(r8G#445( h	(
 8K01(( ${+  .:T+%6 .:4 .:`
$5C 5D 5(5s 5t 5.@ >N=O-5c]	" 4D3E 
38C= 
3% 
3 CSBT#t)$2:3-	& CSBT,#t)$,2:3-,	, ,r1   rq   c                     U S:X  a"  [         R                  " S5      nUR                  $ U S:X  a"  [         R                  " S5      nUR                  $ [	        S[
         SU  35      e)Nmake_span_rulerzspacy.pipeline.factoriesmake_entity_rulerzmodule z has no attribute )	importlibimport_moduler#  make_future_entity_rulerAttributeErrorr  )r|   modules     r-   __getattr__r*    sg      (()CD%%%	$	$(()CD...
78*,>tfE
FFr1   )7r%  sysr   	functoolsr   pathlibr   typingr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   errorsr   r   languager   r   r   r   matcher.levenshteinr   rz   r   tokensr   r   trainingr   r   r   r   r   r    r   r  r  rJ   rM   rU   rW   rm   ro   rq   r*  rL   r1   r-   <module>r4     s'    
         %  , 5    : : 3c4S#X#77889 #tn#%-d^#	$Z#4&#tn#%-d^#	$Z#4+
 /@2w2	#s(^2 <M IS I}, },BGr1   