
    h4                     ,   S SK r S SKrS SKJr  S SKJrJrJrJrJ	r	J
r
Jr  S SKJrJrJrJr  SSKJr  SSKJr  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  Sr\" 5       RA                  \5      S   r! " S S\5      r" " S S\5      r#S\#S\$4S jr%S r&S r'g)    N)islice)AnyCallableDictIterableListOptionalSequence)ConfigModel	Optimizerset_dropout_rate   )Errors)Language)Doc)Examplevalidate_examplesvalidate_get_examples)Vocab   )TrainablePipez
[model]
@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
embed_size = 2000
window_size = 1
maxout_pieces = 3
subword_features = true
modelc                   V   \ rS rSrSrS"S\S\S\SS4S jjr\	S\
S	   4S
 j5       r\	S\
\   4S j5       rSS	S\SS4S jrSS	S\S\4S jrS#S jrS\\   4S jrS\\   SS4S jrSSSS.S\\   S\S\\   S\\\\4      4S jjrS#S jrSS.S\/ \\   4   S\\   4S jjrS  r S!r!g)$Tok2Vec   a  Apply a "token-to-vector" model and set its outputs in the doc.tensor
attribute. This is mostly useful to share a single subnetwork between multiple
components, e.g. to have one embedding and CNN network shared between a
parser, tagger and NER.

In order to use the `Tok2Vec` predictions, subsequent components should use
the `Tok2VecListener` layer as the tok2vec subnetwork of their model. This
layer will read data from the `doc.tensor` attribute during prediction.
During training, the `Tok2Vec` component will save its prediction and backprop
callback for each batch, so that the subsequent components can backpropagate
to the shared weights. This implementation is used because it allows us to
avoid relying on object identity within the models to achieve the parameter
sharing.
vocabr   namereturnNc                 D    Xl         X l        X0l        0 U l        0 U l        g)a[  Initialize a tok2vec component.

vocab (Vocab): The shared vocabulary.
model (thinc.api.Model[List[Doc], List[Floats2d]]):
    The Thinc Model powering the pipeline component. It should take
    a list of Doc objects as input, and output a list of 2d float arrays.
name (str): The component instance name.

DOCS: https://spacy.io/api/tok2vec#init
N)r   r   r   listener_mapcfg)selfr   r   r   s       P/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/tok2vec.py__init__Tok2Vec.__init__-   s"     

	@B#%    Tok2VecListenerc                 v    U R                    VVs/ s H  oR                  U     H  o"PM     M     snn$ s  snnf )zeRETURNS (List[Tok2VecListener]): The listener models listening to this
component. Usually internals.
)listening_componentsr!   )r#   cms      r$   	listenersTok2Vec.listeners>   s7    
  44S4a>O>OPQ>R>R4SSSs   !5c                 H    [        U R                  R                  5       5      $ )z_RETURNS (List[str]): The downstream components listening to this
component. Usually internals.
)listr!   keys)r#   s    r$   r*   Tok2Vec.listening_componentsE   s    
 D%%**,--r'   listenercomponent_namec                     U R                   R                  U/ 5        XR                   U   ;  a  U R                   U   R                  U5        gg)z=Add a listener for a downstream component. Usually internals.N)r!   
setdefaultappendr#   r3   r4   s      r$   add_listenerTok2Vec.add_listenerL   sG    $$^R8,,^<<n-44X> =r'   c                     X R                   ;   aR  XR                   U   ;   a@  U R                   U   R                  U5        U R                   U   (       d  U R                   U	 gg)z@Remove a listener for a downstream component. Usually internals.TF)r!   remover8   s      r$   remove_listenerTok2Vec.remove_listenerR   sZ    ...,,^<<!!.188B((8)).9r'   c                 ,   SU R                   4n[        [        USS5      [        5      (       af  UR                  R                  5        HG  n[        U[        5      (       d  M  UR                  U;   d  M,  U R                  X1R                   5        MI     gg)a|  Walk over a model of a processing component, looking for layers that
are Tok2vecListener subclasses that have an upstream_name that matches
this component. Listeners can also set their upstream_name attribute to
the wildcard string '*' to match any `Tok2Vec`.

You're unlikely to ever need multiple `Tok2Vec` components, so it's
fine to leave your listeners upstream_name on '*'.
*r   N)	r   
isinstancegetattrr   r   walkr(   upstream_namer9   )r#   	componentnamesnodes       r$   find_listenersTok2Vec.find_listeners]   sq     dii gi$7??!,,.dO449K9Ku9T%%dNN; / @r'   docsc                    [        S U 5       5      (       dR  U R                  R                  S5      nU Vs/ s H)  o0R                  R                  R	                  SU45      PM+     sn$ U R                  R                  U5      nU$ s  snf )a  Apply the pipeline's model to a batch of docs, without modifying them.
Returns a single tensor for a batch of documents.

docs (Iterable[Doc]): The documents to predict.
RETURNS: Vector representations for each token in the documents.

DOCS: https://spacy.io/api/tok2vec#predict
c              3   8   #    U  H  n[        U5      v   M     g 7fN)len.0docs     r$   	<genexpr>"Tok2Vec.predict.<locals>.<genexpr>u   s     ,t3s88t   nOr   )anyr   get_dimopsallocpredict)r#   rJ   widthrQ   tokvecss        r$   rZ   Tok2Vec.predictl   sr     ,t,,,JJ&&t,E@DEJJNN((!U4EE**$$T* Fs   0Bc                 t    [        X5       H)  u  p4UR                  S   [        U5      :X  d   eXCl        M+     g)zModify a batch of documents, using pre-computed scores.

docs (Iterable[Doc]): The documents to modify.
tokvecses: The tensors to set, produced by Tok2Vec.predict.

DOCS: https://spacy.io/api/tok2vec#set_annotations
r   N)zipshaperN   tensor)r#   rJ   	tokvecsesrQ   r\   s        r$   set_annotationsTok2Vec.set_annotations|   s5      0LC==#s3x/// J 1r'           )dropsgdlossesexamplesrf   rg   rh   c                  ^ ^^^^^^ Tc  0 m[        US5        U Vs/ s H  oUR                  PM     nn[        T R                  U5        T R                  R	                  U5      u  mmT Vs/ s H0  nT R                  R
                  R                  " UR                  6 PM2     snmTR                  T R                  S5        UUU U4S jmUUUU U4S jn[        R                  U5      n	T R                  SS  H  n
U
R                  U	TT5        M     T R                  (       a   T R                  S   R                  U	TU5        T$ s  snf s  snf )a  Learn from a batch of documents and gold-standard information,
updating the pipe's model.

examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate.
sgd (thinc.api.Optimizer): The optimizer.
losses (Dict[str, float]): Optional record of the loss during training.
    Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.

DOCS: https://spacy.io/api/tok2vec#update
NzTok2Vec.updatere   c                 D  > [        [        U 5      5       HE  nTU==   X   -  ss'   TTR                  ==   [        X   S-  R	                  5       5      -  ss'   MG     T Vs/ s H0  nTR
                  R                  R                  " UR                  6 PM2     sn$ s  snf )zAccumulate tok2vec loss and gradient. This is passed as a callback
to all but the last listener. Only the last one does the backprop.
r   )	rangerN   r   floatsumr   rX   alloc2fr`   )one_d_tokvecsit2v	d_tokvecsrh   r#   r\   s      r$   accumulate_gradient+Tok2Vec.update.<locals>.accumulate_gradient   s    
 3}-.! 00tyy!UM,<,A+F+F+H%II! / CJJ'3DJJNN**CII6'JJJs   #7Bc                 P   > T" U 5        T" T5      nTb  TR                  T5        U$ )z>Callback to actually do the backprop. Passed to last listener.)finish_update)rp   d_docsrt   
bp_tokvecsrs   r#   rg   s     r$   backprop Tok2Vec.update.<locals>.backprop   s-    .	*F""3'Mr'   )r   	predictedr   r   begin_updaterX   ro   r`   r6   r   r(   get_batch_idr-   receive)r#   ri   rf   rg   rh   egrJ   rr   rz   batch_idr3   rt   ry   rs   r\   s   `  ``      @@@@r$   updateTok2Vec.update   s   ( >F($45'/0xx0T*"jj55d;CJK7CTZZ^^++SYY77K	$))S)	K 	K	 	 #//5s+HXw0CD ,>>NN2&&x(C; 1 Ls   E,7Ec                     g rM    )r#   ri   scoress      r$   get_lossTok2Vec.get_loss   s    r'   )nlpget_examplesr   c                   [        US5        / n[        U" 5       S5       H  nUR                  UR                  5        M      U(       d,   [        R
                  R                  U R                  S95       eU R                  R                  US9  g)aD  Initialize the pipe for training, using a representative set
of data examples.

get_examples (Callable[[], Iterable[Example]]): Function that
    returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.

DOCS: https://spacy.io/api/tok2vec#initialize
zTok2Vec.initialize
   r   )XN)
r   r   r7   xr   E923formatr   r   
initialize)r#   r   r   
doc_sampleexamples        r$   r   Tok2Vec.initialize   sq     	l,@A
lnb1Ggii( 2=6;;--499-==z


+r'   c                     [         erM   )NotImplementedError)r#   labels     r$   	add_labelTok2Vec.add_label   s    !!r'   )r"   r!   r   r   r   )tok2vec)r   N)"__name__
__module____qualname____firstlineno____doc__r   r   strr%   propertyr   r-   r*   r9   boolr=   rH   r   r   rZ   r
   rc   r   rm   r	   r   r   r   r   r   r   r   r   __static_attributes__r   r'   r$   r   r      sX   &e &E & &T &" T4 12 T T .d3i . .?%6 ? ?PT ?	(9 	3 	SW 	<HSM  
!HSM 
! 
!  #'-147#4 	4
 i 4 c5j)*4l #'	,r8G#445, h	,,"r'   r   c                   t    \ rS rSrSrSrS\S\SS4S jr\	S	\
\   S\4S
 j5       rS\SS4S jrS\4S jrSrg)r(      a  A layer that gets fed its answers from an upstream connection,
for instance from a component earlier in the pipeline.

The Tok2VecListener layer is used as a sublayer within a component such
as a parser, NER or text categorizer. Usually you'll have multiple listeners
connecting to a single upstream Tok2Vec component, that's earlier in the
pipeline. The Tok2VecListener layers act as proxies, passing the predictions
from the Tok2Vec component into downstream components, and communicating
gradients back upstream.
ztok2vec-listenerrD   r[   r   Nc                     [         R                  " X R                  [        SU0S9  Xl        SU l        SU l        SU l        g)a  
upstream_name (str): A string to identify the 'upstream' Tok2Vec component
    to communicate with. The upstream name should either be the wildcard
    string '*', or the name of the `Tok2Vec` component. You'll almost
    never have multiple upstream Tok2Vec components, so the wildcard
    string will almost always be fine.
width (int):
    The width of the vectors produced by the upstream tok2vec component.
rU   )r   forwarddimsN)r   r%   r   r   rD   	_batch_id_outputs	_backprop)r#   rD   r[   s      r$   r%   Tok2VecListener.__init__   s7     	t))WD%=Q*(,r'   inputsc                 &    [        S U 5       5      $ )zzCalculate a content-sensitive hash of the batch of documents, to check
whether the next batch of documents is unexpected.
c              3   F   #    U  H  n[        S  U 5       5      v   M     g7f)c              3   8   #    U  H  oR                   v   M     g 7frM   )orth)rP   tokens     r$   rR   9Tok2VecListener.get_batch_id.<locals>.<genexpr>.<genexpr>   s     3sezzsrT   Nrn   rO   s     r$   rR   /Tok2VecListener.get_batch_id.<locals>.<genexpr>   s     Fv33s333vs   !r   )clsr   s     r$   r   Tok2VecListener.get_batch_id   s    
 FvFFFr'   r   c                 (    Xl         X l        X0l        g)zStore a batch of training predictions and a backprop callback. The
predictions and callback are produced by the upstream Tok2Vec component,
and later will be used when the listener's component's model is called.
N)r   r   r   )r#   r   outputsrz   s       r$   r   Tok2VecListener.receive   s    
 "!r'   c                 
   U R                   c&  U R                  c  [        [        R                  5      eU R                  U5      nX R                   :w  a0  [        [        R                  R                  X R                   S95      eg)zOCheck that the batch of Doc objects matches the ones we have a
prediction for.
)id1id2T)r   r   
ValueErrorr   E954r   E953r   )r#   r   r   s      r$   verify_inputsTok2VecListener.verify_inputs	  sd     >>!dmm&;V[[))((0H>>) !3!3nn!3!UVVr'   )r   r   r   rD   )r   r   r   r   r   r   r   intr%   classmethodr   r   r   r   r   r   r   r   r'   r$   r(   r(      sq    	 Dc # $   G(3- GC G G" "4 "t r'   r(   is_trainc                 v   U(       a  U R                   cn  / nU H^  nUR                  R                  S:X  a&  [        [        R
                  R                  SS95      eUR                  UR                  5        M`     U[        4$ U R                  U5        U R                  U R                  4$ / nU R                  S5      nU Hn  nUR                  R                  S:X  a6  UR                  U R                  R                  [        U5      U5      5        MS  UR                  UR                  5        Mp     U[        4$ )z7Supply the outputs from the upstream Tok2Vec component.r   r   r   rU   )r   ra   sizer   r   E203r   r7   _empty_backpropr   r   r   rW   rX   ro   rN   )r   r   r   r   rQ   r[   s         r$   r   r     s     ??"G::??a'$V[[%7%7Y%7%GHHNN3::.	 
 O++'>>5??22 d#Czz!# uyy00S5ABszz*  ''r'   c                     / $ rM   r   )dXs    r$   r   r   <  s    Ir'   c                 |    U S:X  a"  [         R                  " S5      nUR                  $ [        S[         SU  35      e)Nmake_tok2veczspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler   AttributeErrorr   )r   modules     r$   __getattr__r   A  s@    ~(()CD"""
78*,>tfE
FFr'   )(r   sys	itertoolsr   typingr   r   r   r   r   r	   r
   	thinc.apir   r   r   r   errorsr   languager   tokensr   trainingr   r   r   r   r   trainable_piper   default_model_configfrom_strDEFAULT_TOK2VEC_MODELr   r(   r   r   r   r   r   r'   r$   <module>r      s     
  J J J @ @    H H  )
  ))*>?H {"m {"|9e 9x"(? "(d "(J
Gr'   