
    h                         S SK JrJrJr  S SKJrJrJrJrJ	r	J
r
  S SKJr  SSKJr  SSKJr  SSKJr  SSKJr  S	S
KJr  S	SKJr   SS\\\   \\   4   S\S   S\S\S\S\S\\   S\4S jjrS rS rS rS rS r g)    )ListOptionalcast)LinearModelchain
list2arrayuse_ops	zero_init)Floats2d   )Literal)Errors)Doc)registry   PrecomputableAffine)TransitionModelNtok2vec
state_type)parsernerextra_state_tokenshidden_widthmaxout_pieces	use_uppernOreturnc                 "   US:X  a  U(       a  SOSnO8US:X  a  U(       a  SOSnO&[        [        R                  R                  US95      eU R	                  S5      (       a  U R                  S5      OS	n[        U [        5       [        X85      5      n U R                  SU5        [        U(       a  UOUUU R                  S5      US
9n	S	n
U(       a  [        S5         [        US	S9n
S	S	S	5        [        X	U
[        5      $ ! , (       d  f       N= f)a  
Build a transition-based parser model. Can apply to NER or dependency-parsing.

Transition-based parsing is an approach to structured prediction where the
task of predicting the structure is mapped to a series of state transitions.
You might find this tutorial helpful as background:
https://explosion.ai/blog/parsing-english-in-python

The neural network state prediction model consists of either two or three
subnetworks:

* tok2vec: Map each token into a vector representations. This subnetwork
    is run once for each batch.
* lower: Construct a feature-specific vector for each (token, feature) pair.
    This is also run once for each batch. Constructing the state
    representation is then simply a matter of summing the component features
    and applying the non-linearity.
* upper (optional): A feed-forward network that predicts scores from the
    state representation. If not present, the output from the lower model is
    used as action scores directly.

tok2vec (Model[List[Doc], List[Floats2d]]):
    Subnetwork to map tokens into vector representations.
state_type (str):
    String value denoting the type of parser model: "parser" or "ner"
extra_state_tokens (bool): Whether or not to use additional tokens in the context
    to construct the state vector. Defaults to `False`, which means 3 and 8
    for the NER and parser respectively. When set to `True`, this would become 6
    feature sets (for the NER) or 13 (for the parser).
hidden_width (int): The width of the hidden layer.
maxout_pieces (int): How many pieces to use in the state prediction layer.
    Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
    is replaced with a ReLu non-linearity if use_upper=True, and no
    non-linearity if use_upper=False.
use_upper (bool): Whether to use an additional hidden layer after the state
    vector in order to predict the action scores. It is recommended to set
    this to False for large pretrained models such as transformers, and True
    for smaller networks. The upper layer is computed on CPU, which becomes
    a bottleneck on larger GPU-based models, where it's also less necessary.
nO (int or None): The number of actions the model will predict between.
    Usually inferred from data at the beginning of training, or loaded from
    disk.
r         r      r   )valuer   Nr   nFnInPcpur   r'   )
ValueErrorr   E917formathas_dimget_dimr   r	   r   set_dim_define_lowerr
   _define_upperr   resize_output)r   r   r   r   r   r   r   nr_feature_tokens	t2v_widthloweruppers              P/home/james-whalen/.local/lib/python3.13/site-packages/spacy/ml/models/parser.pybuild_tb_parser_modelr9      s    h X"4B!	u	!3A++*+=>>)0)>)>%DI|'G
 OOD,'$<"??4 	E EU^!RD1E  75-@@ ^s   D  
Dc                     [        X[        S9$ )N)r   r'   init_W)r   r   r*   s     r8   r2   r2   ]   s    Ry11    c                     [        XX#S9$ )Nr%   r   r%   s       r8   r1   r1   a   s    "::r<   c                 V    U R                   S   (       a  [        X5      $ [        X5      $ )N	has_upper)attrs_resize_upper_resize_lower)modelnew_nOs     r8   r3   r3   e   s$    {{;U++''r<   c                 H   U R                  S5      nUR                  S5      c  UR                  SU5        U $ XR                  S5      :X  a  U $ UnUR	                  S5      n[        S5         [        XS9nS S S 5        UR                  S5      (       a  WR                  R                  X5      nUR                  R                  U5      nUR                  S5      nUR                  S5      n	UR                  S5      (       aI  UR                  S5      n
XS U
& XS U
& [        X5       H!  nU R                  S   R                  U5        M#     UR                  SU5        UR                  SU5        WU R                   S	'   U R#                  SU5        U $ ! , (       d  f       GN#= f)
Nr7   r   r'   r)   r*   Wbunseen_classes)get_refr.   r0   r/   maybe_get_dimr
   r2   	has_paramopsalloc2falloc1f	get_paramranger@   add	set_param_layersset_ref)rC   rD   r7   smallerr'   largerlarger_Wlarger_b	smaller_W	smaller_bold_nOis               r8   rA   rA   k   sq   MM'"E}}T"dF#	==&	&G			t	$B	&0 
 ::%%f1::%%f-%%c*	%%c*	 ??4  __T*F )Wf )Wf6*,-11!4 + 	h'h'EMM"	MM'6"L+ 
s   ,
F
F!c                    U R                  S5      nUR                  S5      c  UR                  SU5        U $ UnUR                  S5      nUR                  S5      nUR                  S5      n[	        XXVS9nUR                  S5      (       Ga>  UR                  R                  XQXd5      nUR                  R                  X5      n	UR                  R                  SXQU5      n
UR                  S5      nUR                  S	5      nUR                  S
5      nUR                  S5      (       ak  UR                  S5      nXS S 2SU2S S 2S S 24'   XS S 2S S 2SU2S S 24'   XSU2S S 24'   [        X5       H!  nU R                  S   R                  U5        M#     UR                  SU5        UR                  S	U	5        UR                  S
U
5        XpR                  S'   U R!                  SU5        U $ )Nr6   r   r'   r&   r(   )r   r'   r&   r(   rF      rG   padr   rH   )rJ   r.   r0   rK   r1   rL   rM   alloc4frN   rP   r/   rQ   r@   rR   rS   rT   rU   )rC   rD   r6   rV   r'   r&   r(   rW   rX   rY   
larger_padrZ   r[   smaller_padr\   r]   s                   r8   rB   rB      s   MM'"E}}T"dF#G			t	$B			t	$B			t	$Bf:F::%%b"9::%%f1ZZ''2r:
%%c*	%%c*	''.??4  __T*F*3Q&!Q&',7q!QvXq()$-QvXq[!6*,-11!4 + 	h'h'
+MM!	MM'6"Lr<   )N)!typingr   r   r   	thinc.apir   r   r   r	   r
   r   thinc.typesr   compatr   errorsr   tokensr   utilr   _precomputable_affiner   tb_frameworkr   boolintr9   r2   r1   r3   rA   rB    r<   r8   <module>rp      s    ' ' J J       7 * LA49d8n,-LA(LA LA 	LA
 LA LA 	LA LA^2;(D!r<   