
    h9                     b   S SK r S SKrS SKJr  S SKJr  S SKJrJrJ	r	J
r
JrJrJrJr  S SKrS SKrS SKJrJrJrJr  S SKJrJr  SSKJr  SS	KJr  SS
KJr  SSK J!r!  SSK"J#r#J$r$J%r%  SSK&J'r'  SSK(J)r)  SSK*J+r+  SSK,J-r-  SSK.J/r/  Sr0Sr1\" 5       Re                  \15      S   r3 " S S\/5      r4S r5g)    N)Counter)islice)AnyCallableDictIterableListOptionalTuplecast)ConfigModelNumpyOpsSequenceCategoricalCrossentropy)Floats2dInts2d   )util)Errors)Language)Doc)Examplevalidate_examplesvalidate_get_examples)Vocab   )	EditTrees)validate_edit_tree)lemmatizer_score)TrainablePipe   z
[model]
@architectures = "spacy.Tagger.v2"

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
embed_size = 2000
window_size = 1
maxout_pieces = 3
subword_features = true
modelc                      \ rS rSrSr S.SSSS\S.S\S	\S
\S\	\   S\
S\S\
S\	\   4S jjjrS\\   S\\   S\\\\   4   4S jrS\\   S\\   4S jrS rS rS rS\\   4S jr\S\\
S4   4S j5       r\S\4S j5       r\S\4S j5       r SSS .S!\/ \\   4   S"\	\!   S#\	\   4S$ jjr"\#" 5       S%.S& jr$\#" 5       S%.S' jr%\#" 5       4S( jr&\#" 5       4S) jr'S#\4S* jr(S!\/ \\   4   4S+ jr)S/S, jr*S-r+g)0EditTreeLemmatizer,   zC
Lemmatizer that lemmatizes each word using a predicted edit tree.
orth   Fr   )backoffmin_tree_freq	overwritetop_kscorervocabr"   namer(   r)   r*   r+   r,   c                    Xl         X l        X0l        X@l        XPl        X`l        Xpl        [        U R                   R                  5      U l	        0 U l
        S/ 0U l        Xl        [        5       U l        g)a  
Construct an edit tree lemmatizer.

backoff (Optional[str]): backoff to use when the predicted edit trees
    are not applicable. Must be an attribute of Token or None (leave the
    lemma unset).
min_tree_freq (int): prune trees that are applied less than this
    frequency in the training data.
overwrite (bool): overwrite existing lemma annotations.
top_k (int): try to apply at most the k most probable edit trees.
labelsN)r-   r"   r.   r(   r)   r*   r+   r   stringstrees
tree2labelcfgr,   r   	numpy_ops)	selfr-   r"   r.   r(   r)   r*   r+   r,   s	            ]/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/edit_tree_lemmatizer.py__init__EditTreeLemmatizer.__init__1   s^    . 

	*"
tzz112
*,$,b>!    examplesscoresreturnc           	      x   [        US5        [        SSS9n/ nU H  n/ n[        UR                  UR	                  SSS95       Hd  u  pxUb  US:X  a  Sn	OBU R
                  R                  UR                  U5      n
U R                  R                  U
S	5      n	UR                  U	5        Mf     UR                  U5        M     U" X$5      u  pU R                  R                  R                  R                  U5      (       a0  [        [         R"                  R%                  U R&                  S
95      e[)        U5      U4$ )NzEditTreeLemmatizer.get_lossF)	normalizemissing_valueLEMMAT)	as_string r   r.   )r   r   zip	predictedget_alignedr2   addtextr3   getappendr"   opsxpisnan
ValueErrorr   E910formatr.   float)r6   r;   r<   	loss_functruthseg	eg_truthsrG   
gold_lemmalabeltree_idd_scoreslosss                r7   get_lossEditTreeLemmatizer.get_lossW   s    	($AB3eSUV	BI+.bnnWnE,' %r)9E"jjnnY^^ZHG OO//;E  ', MM)$  #62::>>""4((V[[//TYY/?@@T{H$$r:   docsc                 D   U R                   S:X  a  U R                  nO-U R                   [        ::  a  U R                  nOU R                  n[        [        U5      5      n[        S U 5       5      (       d`  [        U R                  S   5      nU Vs/ s H(  oPR                  R                  R                  SU5      PM*     nn[        U5      U:X  d   eU$ U R                  R                  U5      n[        U5      U:X  d   eU" X5      n[        U5      U:X  d   eU$ s  snf )Nr   c              3   8   #    U  H  n[        U5      v   M     g 7fN)len).0docs     r7   	<genexpr>-EditTreeLemmatizer.predict.<locals>.<genexpr>   s     ,t3s88ts   r0   r   )r+   _scores2guesses_top_k_equals_1TOP_K_GUARDRAIL_scores2guesses_top_k_greater_1_scores2guesses_top_k_guardrailrc   listanyr4   r"   rM   alloc2ipredict)r6   r_   scores2guessesn_docsn_labels_guessesr<   s           r7   ro   EditTreeLemmatizer.predictr   s    ::?!@@NZZ?*!AAN!AAN T$Z,t,,,488H-.HRV$WRVQZZ^^%;%;Ax%HRVG$Ww<6)))N##D)6{f$$$ .7|v%%% %Xs   /Dc                    / n[        X5       H  u  pEUR                  SS9nU R                  R                  U5      n/ n[	        U5       Hc  u  pU R
                  S   Xh      n
U R                  R                  XR                  5      b  UR                  U
5        MR  UR                  S5        Me     UR                  [        R                  " U5      5        M     U$ )Nr   )axisr0   r?   )rF   argmaxr5   asarray	enumerater4   r2   applyrJ   rL   nparray)r6   r_   r<   rt   re   
doc_scoresdoc_guessesdoc_compat_guessesitokenrZ   s              r7   rh   1EditTreeLemmatizer._scores2guesses_top_k_equals_1   s    "40OC$+++3K..00=K!#%cN((8,[^<::##GZZ8D&--g6&--b1 + NN288$678  1 r:   c                    / n[        U R                  [        U R                  5      5      n[	        X5       GH  u  pVU R
                  R                  U5      n/ n[        U5       H  u  p[        U5       H  n
[        Xh   R                  5       5      nU R                  S   U   nU R                  R                  XR                  5      b  UR                  U5          Mz  [         R"                  " [         R$                  5      R                   XhU4'   M     UR                  S5        M     UR                  [         R&                  " U5      5        GM     U$ )Nr0   r?   )minr+   rc   r0   rF   r5   ry   rz   rangeintrx   r4   r2   r{   rJ   rL   r|   finfofloat32r}   )r6   r_   r<   rt   r+   re   r~   r   r   r   rs   	candidatecandidate_tree_ids                r7   rj   2EditTreeLemmatizer._scores2guesses_top_k_greater_1   s   DJJDKK 01"40OC//
;J!#%cNuA #JM$8$8$: ;I(,(:9(E%zz''(9::FR*112CD/1xx

/C/G/GJ)|, & '--b1 + NN288$678  1 r:   c                    / n[        X5       H  u  pE[        R                  " U5      SS U R                  * S-
  S24   nU R                  R                  U5      n/ n[        XF5       H]  u  pSn
U	 H?  nU R                  S   U   nU R                  R                  XR                  5      c  M=  Un
  O   UR                  U
5        M_     UR                  [        R                  " U5      5        M     U$ )N.r   r?   r0   )rF   r|   argsortr+   r5   ry   r4   r2   r{   rJ   rL   r}   )r6   r_   r<   rt   re   r~   r   r   r   
candidatesrZ   r   r   s                r7   rk   2EditTreeLemmatizer._scores2guesses_top_k_guardrail   s    "40OC**Z06La"6L1LMK..00=K!#%(%:!!+I(,(:9(E%zz''(9::FR"3 ", #))'2 &; NN288$678  1" r:   c                    [        U5       H  u  p4X#   n[        US5      (       a  UR                  5       n[        U5       H  u  pgU R                  (       d  XF   R                  S:X  d  M*  US:X  a0  U R
                  b!  [        XF   U R
                  5      XF   l        M^  M`  U R                  R                  XtU   R                  5      nXU   l
        M     M     g )NrK   r   r?   )rz   hasattrrK   r*   lemmar(   getattrr2   r{   rJ   lemma_)	r6   r_   batch_tree_idsr   re   doc_tree_idsjrZ   r   s	            r7   set_annotations"EditTreeLemmatizer.set_annotations   s    oFA),L|U+++//1'5
>>SV\\Q%6 "}<<3+2364<<+HCFL 4 !%

 0 0a&++ F(-A 6	 &r:   .c                 2    [        U R                  S   5      $ )z4Returns the labels currently added to the component.r0   )tupler4   r6   s    r7   r0   EditTreeLemmatizer.labels   s     TXXh'((r:   c                     g)NT r   s    r7   hide_labelsEditTreeLemmatizer.hide_labels   s    r:   c                 d   / n[        [        U R                  5      5       Hm  nU R                  U   nSU;   a  U R                  R                  US      US'   SU;   a  U R                  R                  US      US'   UR                  U5        Mo     [        U[        U R                  S   5      S9$ )Norigsubstr0   )r2   r0   )	r   rc   r2   r-   r1   rL   dictr   r4   )r6   r2   rZ   trees       r7   
label_dataEditTreeLemmatizer.label_data   s    S_-G::g&D~#zz11$v,?V$ $

 2 24= AWLL . %dhhx.@(ABBr:   N)nlpr0   get_examplesr   r0   c          	         [        US5        Uc  U R                  U5        OU R                  U5        / n/ n[        U" 5       S5       H  nUR	                  UR
                  5        / nUR                   Hr  nUR                  S:X  a  S n	O&U R                  UR                  UR                  5      n	UR	                  U R                  S    V
s/ s H  n
X:X  a  SOSPM     sn
5        Mt     [        [        U5      nUR	                  U R                  R                  R!                  USS95        M     U R#                  5         [%        U5      S:  d,   [&        R(                  R+                  U R,                  S	95       e[%        U5      S:  d,   [&        R(                  R+                  U R,                  S	95       eU R                  R/                  XES
9  g s  sn
f )NzEditTreeLemmatizer.initialize
   r   r0   g      ?g        r   )dtyperE   )XY)r   _labels_from_data_add_labelsr   rL   x	referencer   _pair2labelrJ   r   r4   r   r   r"   rM   ry   _require_labelsrc   r   E923rR   r.   
initialize)r6   r   r   r0   
doc_samplelabel_sampleexamplegold_labelsr   
gold_labelrY   s              r7   r   EditTreeLemmatizer.initialize   s    	l,KL>""<0V$ 
lnb1Ggii(-/K **;;!#!%J!%!1!1%**ell!KJ"" &*XXh%7%7E  %2;%7 + x5K

 6 6{) 6 TU# 2& 	:"FFKK$6$6DII$6$FF"< 1$Hfkk&8&8dii&8&HH$


;s   Gexcludec                h   ^ ^ U 4S jU 4S jUU 4S jU 4S jS.n[         R                  " XT5        T $ )Nc                 b   > TR                   R                  [        R                  " U 5      5      $ rb   )r4   updatesrsly
json_loadsbr6   s    r7   <lambda>/EditTreeLemmatizer.from_bytes.<locals>.<lambda>  s    TXX__U-=-=a-@Ar:   c                 :   > TR                   R                  U 5      $ rb   )r"   
from_bytesr   s    r7   r   r         tzz44Q7r:   c                 8   > TR                   R                  U TS9$ Nr   )r-   r   )r   r   r6   s    r7   r   r     s    tzz44Q4Hr:   c                 :   > TR                   R                  U 5      $ rb   )r2   r   r   s    r7   r   r     r   r:   r4   r"   r-   r2   )r   r   )r6   
bytes_datar   deserializerss   ` ` r7   r   EditTreeLemmatizer.from_bytes  s+    A7H7	
 	
7;r:   c                d   ^ ^ U 4S jU 4S jUU 4S jU 4S jS.n[         R                  " UT5      $ )Nc                  D   > [         R                  " T R                  5      $ rb   )r   
json_dumpsr4   r   s   r7   r   -EditTreeLemmatizer.to_bytes.<locals>.<lambda>$  s    5++DHH5r:   c                  8   > T R                   R                  5       $ rb   )r"   to_bytesr   s   r7   r   r   %      TZZ002r:   c                  6   > TR                   R                  T S9$ r   )r-   r   )r   r6   s   r7   r   r   &  s    TZZ000Ar:   c                  8   > T R                   R                  5       $ rb   )r2   r   r   s   r7   r   r   '  r   r:   r   )r   r   )r6   r   serializerss   `` r7   r   EditTreeLemmatizer.to_bytes"  s)    52A2	
 }}['22r:   c                    ^ ^ [         R                  " U5      nU 4S jU 4S jUU 4S jU 4S jS.n[         R                  " XT5        g )Nc                 F   > [         R                  " U TR                  5      $ rb   )r   
write_jsonr4   pr6   s    r7   r   ,EditTreeLemmatizer.to_disk.<locals>.<lambda>/  s    U--a:r:   c                 :   > TR                   R                  U 5      $ rb   )r"   to_diskr   s    r7   r   r   0      tzz11!4r:   c                 8   > TR                   R                  U TS9$ r   )r-   r   r   r   r6   s    r7   r   r   1  s    tzz11!W1Er:   c                 :   > TR                   R                  U 5      $ rb   )r2   r   r   s    r7   r   r   2  r   r:   r   )r   ensure_pathr   )r6   pathr   r   s   ` ` r7   r   EditTreeLemmatizer.to_disk,  s6    %:4E4	
 	T0r:   c                 l   ^ ^ U 4S jnU 4S jUUU 4S jU 4S jS.n[         R                  " XT5        T $ )Nc                    >  [        U S5       nTR                  R                  UR                  5       5        S S S 5        g ! , (       d  f       g = f! [         a    [        [        R                  5      S ef = f)Nrb)openr"   r   readAttributeErrorrP   r   E149)r   mfiler6   s     r7   
load_model0EditTreeLemmatizer.from_disk.<locals>.load_model7  sT    8!T]eJJ))%**,7 #]]! 8 -478s'   A *AA 
AA A %A8c                 b   > TR                   R                  [        R                  " U 5      5      $ rb   )r4   r   r   	read_jsonr   s    r7   r   .EditTreeLemmatizer.from_disk.<locals>.<lambda>?  s    TXX__U__Q-?@r:   c                 8   > TR                   R                  U TS9$ r   )r-   	from_diskr   s    r7   r   r   A  s    tzz33Aw3Gr:   c                 :   > TR                   R                  U 5      $ rb   )r2   r   r   s    r7   r   r   B  s    tzz33A6r:   r   )r   r   )r6   r   r   r   r   s   ` `  r7   r   EditTreeLemmatizer.from_disk6  s3    	8 AG6	
 	tG4r:   c                    SU;  a&  [        [        R                  R                  SS95      eSU;  a&  [        [        R                  R                  SS95      e[	        US   5      U R
                  S'   / nUS    H  n[        U5      nU(       a5  [        [        R                  R                  SR                  U5      S95      e[        U5      nSU;   a+  U R                  R                  R                  US   5      US'   SU;   a+  U R                  R                  R                  US   5      US'   UR                  U5        M     U R                  R                  U5        [!        U R"                  5       H  u  pSXPR$                  U'   M     g )Nr0   rE   r2   
)errorsr   r   )rP   r   E857rR   rl   r4   r   E1026joinr   r-   r1   rI   rL   r2   	from_jsonrz   r0   r3   )r6   r0   r2   r   r  rY   s         r7   r   EditTreeLemmatizer._add_labelsH  sF   6!V[[//X/>??& V[[//W/=>>!&"237OD'-F !4!4DIIf<M!4!NOO:D~#zz1155d6lCV~ $

 2 2 6 6tG} EWLL $ 	

U#$T[[1KE$)OOD! 2r:   c                    [        5       n[        UR                  5      n[        5       n0 nU" 5        Ht  nUR                   Ha  nUR
                  S:w  d  M  UR                  UR                  UR                  5      nXH==   S-  ss'   UR                  UR                  4XX'   Mc     Mv     UR                  5        H,  u  pXR                  :  d  M  XX   u  pU R                  XSS9  M.     g )Nr   r   T)	add_label)r   r   r1   r   r   r   rI   rJ   r   itemsr)   r   )r6   r   r-   r2   
tree_freqs
repr_pairsr   r   rZ   freqformr   s               r7   r   $EditTreeLemmatizer._labels_from_datab  s     %--(%i

#~G **;;!##ii

ELLAG'1,'+0::u||*DJ'	 + & (--/MG)))(1   = 0r:   c                 
   U R                   R                  X5      nX@R                  ;  aK  U(       d  g[        U R                  S   5      U R                  U'   U R                  S   R                  U5        U R                  U   $ )z
Look up the edit tree identifier for a form/label pair. If the edit
tree is unknown and "add_label" is set, the edit tree will be added to
the labels.
Nr0   )r2   rI   r3   rc   r4   rL   )r6   r  r   r  rZ   s        r7   r   EditTreeLemmatizer._pair2labelw  sh     **..-//)'*488H+='>DOOG$HHX%%g.w''r:   )r(   r4   r)   r"   r.   r5   r*   r,   r+   r3   r2   r-   )trainable_lemmatizer)F),__name__
__module____qualname____firstlineno____doc__r   r   r   strr
   r   boolr   r8   r   r   r	   r   r   rS   r]   r   r   ro   rh   rj   rk   r   propertyr0   r   r   r   r   r   r   r   r   r   r   r   r   r   __static_attributes__r   r:   r7   r$   r$   ,   s    +	$$ "(%5$$$$ $$ 	$$ #$$ $$ $$ $$ "$$L% )%37>%	ud8n$	%%6HSM d6l 4"(*.HSM ." )c3h ) ) T   	CD 	C 	C #'!%(<r8G#445(< h	(<
 (<T 16 
 #(' 3 %*G 1 ',g $*$ *4>hr8G;L7L.M >*(r:   r$   c                 |    U S:X  a"  [         R                  " S5      nUR                  $ [        S[         SU  35      e)Nmake_edit_tree_lemmatizerzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler  r   r  )r.   modules     r7   __getattr__r     sA    **(()CD///
78*,>tfE
FFr:   )6r  syscollectionsr   	itertoolsr   typingr   r   r   r   r	   r
   r   r   numpyr|   r   	thinc.apir   r   r   r   thinc.typesr   r   rD   r   r  r   languager   tokensr   trainingr   r   r   r-   r   _edit_tree_internals.edit_treesr   _edit_tree_internals.schemasr   
lemmatizerr   trainable_piper    ri   default_model_configfrom_str"DEFAULT_EDIT_TREE_LEMMATIZER_MODELr$   r   r   r:   r7   <module>r2     s     
   M M M   N N (     H H  6 < ( )   &,X%6%67K%LW%U "X( X(x
Gr:   