
    h#                     T   S SK Jr  S SKJrJrJrJrJrJrJ	r	J
r
  S SKrS SKJrJrJrJrJrJrJrJrJrJrJrJr  S SKJr  S SKJrJr  SSKJrJ r   SS	K!J"r"  SS
K#J$r$J%r%  SSK&J'r(  \(       a  SSK)J*r*  SSK+J,r,  S\-S\-S\.S\S\/\4   4S jr/S\-S\-S\-S\S\/\4   4S jr0S r1S r2 S,S\S\-S\-S\\-   S\4
S jjr3SSS\S\-S\-S\4
S jr4SSS\S\-S\-S\-S\4S jr5 S-SSS \S!\6S\4S" jjr7 " S# S$5      r8 S-S%\S&   S'\8S!\6S\	\Rr                  \S&   4   4S( jjr:S.S)\.S'\8S*\.S\.4S+ jjr;g)/    )partial)TYPE_CHECKINGAnyCallableIterableListOptionalTuplecastN)CosineDistance
L2Distance	LayerNormLinearMaxoutModelMultiSoftmaxSoftmaxchain
list2arrayto_categorical	zero_init)Loss)Floats2dInts1d   )IDORTH)Errors)OOV_RANKregistry)ModeDoc)Vocabmaxout_pieceshidden_sizelossreturnr$   c                 Z   ^ ^^^ SSS[         S[         4UUU 4S jjnS[        4U4S jjmU$ )Nvocabr$   tok2vecr(   c                    > U R                   R                  S   S:X  a  [        [        R                  5      e[        XTTS9nT" 5       UR                  S'   U$ )N   r   )r&   r%   r'   )vectorsshape
ValueErrorr   E875build_cloze_multi_task_modelattrs)r*   r+   modelcreate_vectors_lossr&   r%   s      T/home/james-whalen/.local/lib/python3.13/site-packages/spacy/ml/models/multi_task.pycreate_vectors_objective9create_pretrain_vectors.<locals>.create_vectors_objective$   sR    ==q!Q&V[[)),=
 23F    c                     > TS:X  a  [        SSS9n [        [        U S9$ TS:X  a  [        SS9n [        [        U S9$ [	        [
        R                  R                  TSS95      e)	NcosineT)	normalizeignore_zeros)distanceL2)r<   z'cosine', 'L2')found	supported)r   r   get_vectors_lossr   r0   r   E906format)r>   r'   s    r6   r5   4create_pretrain_vectors.<locals>.create_vectors_loss-   sd    8%4HH+h??T\!D1H+h??V[[//dFV/WXXr9   )r   r   )r%   r&   r'   r7   r5   s   ``` @r6   create_pretrain_vectorsrF   !   s8     % E  	Y 	Y $#r9   n_charactersc                 <   ^ ^^ SSS[         S[         4UU U4S jjnU$ )Nr*   r$   r+   r(   c                 X   > [        U UTTTS9n[        [        TS9UR                  S'   U$ )N)r&   r%   nr_charrJ   r'   )'build_cloze_characters_multi_task_modelr   get_characters_lossr3   )r*   r+   r4   r&   r%   rG   s      r6   create_characters_objective?create_pretrain_characters.<locals>.create_characters_objective>   s9    7#' 
 &&9<PFr9   )r   )r%   r&   rG   rN   s   ``` r6   create_pretrain_charactersrP   ;   s*    	7 	U 	u 	 	 '&r9   c                 @   US   R                   nUR                  R                  [        R                  :X  a  U R                  U Vs/ s H%  oUR                  [        5      R                  5       PM'     sn5      nUS   R                   R                  R                  U   nSXv[        :H  '   U" X'5      u  pX4$ UR                  R                  [        R                  :X  a|  U R                  U Vs/ s H&  n[        [        UR                  [        5      5      PM(     sn5      n
UR                  R                  U
5      nU R!                  U5      nU" X'5      u  pX4$ [#        [$        R&                  R)                  UR                  R                  S95      es  snf s  snf )zVCompute a loss based on a distance between the documents' vectors and
the prediction.
r   )mode)r*   r.   rR   VectorsModedefaultflattento_arrayr   raveldatar   floretr   r   r   	get_batch	as_contigr0   r   E850rD   )opsdocs
predictionr>   r*   docidstargetd_targetr'   keyss              r6   rB   rB   L   sL    GMME}}[000
 kktDt<<+113tDEa&&++C0"#h!*5 > 
		{11	1{{MDd);<MN((.v&!*5 > ++1C1C+DEE E
 Ns   ,F/-Fc           	      Z   [         R                  " U Vs/ s H  oDR                  US9PM     sn5      nUR                  S5      nU R	                  [        USS9SS9nUR                  SSU-  45      nX&-
  nUS-  R                  5       nU[        UR                  S	   5      -  n	X4$ s  snf )
zGCompute a loss based on a number of characters predicted from the docs.rK   )   )	n_classesfdtyperf      r   )	numpyvstackto_utf8_arrayreshapeasarrayr   sumfloatr/   )
r]   r^   r_   rJ   r`   
target_idsrb   diffr'   rc   s
             r6   rM   rM   d   s    TRTc000ATRSJ##E*J[[
cB#[NF^^Rw/0FD!G==?DeJ,,Q/00H> Ss   B(r+   token_vector_widthnOc           
          [        X2S-  S9n[        U [        US-  UUSS9[        US-  5      U5      nUR	                  SU 5        UR	                  SU5        U$ )Nrl   )rw   nI        )rw   ry   nPdropoutr+   output_layer)r   r   r   r   set_ref)r+   r%   rv   rw   softmaxr4   s         r6   build_multi_task_modelr   p   so     Q 67G!A%!		
 	$q()
E 
MM)W%	MM.'*Lr9   r*   c                 ^   U R                   R                  S   n[        [        [        [
        S   [        4   [        5       5      [        UUR                  S5      USSS9[        XC[        S95      n[        X5      n[        X5      nUR                  SU5        UR                  S	U5        U$ )
Nr-   r   rw   Trz   )rw   ry   r{   r<   r|   )rw   ry   init_Wr+   r}   )r.   r/   r   r   r   r   r   r   r   get_dimr   r   build_masked_language_modelr~   )r*   r+   r%   r&   rw   r}   r4   s          r6   r2   r2      s     
		Q	BU4
#X-.
=t$	
 	"Y7
L '(E'5E	MM)W%	MM.,/Lr9   rJ   c           
         [        [        [        [        S   [        4   [        5       5      [        X2S9[        US9[        S/U-  US95      n[        U [        X5      5      nUR                  SU5        UR                  SU5        U$ )Nr   )rw   r{   )ry   rg   r+   r}   )r   r   r   r   r   r   r   r   r   r   r~   )r*   r+   r%   r&   rJ   r}   r4   s          r6   rL   rL      s}     U4
#X-.
=+0[!cUW_5	L (uW/KLE	MM)W%	MM.,/Lr9   wrapped_model	mask_probc                    ^^ [        U 5      mUU4S jnSS[        4S jjn[        SUU/USU0UR                   Vs0 s H  oUS_M     snS9nUR                  SU5        U$ s  snf )	z7Convert a model into a BERT-style masked language modelc                    >^^ [        UTTS9u  mnU R                  R                  T5      R                  TR                  S   S45      mU R
                  S   " X5      u  nmUU4S jnX44$ )N)r   r   r-   c                 $   > U ST-
  -  n T" U 5      $ )Nr-    )d_outputbackpropmasks    r6   mlm_backwardFbuild_masked_language_model.<locals>.mlm_forward.<locals>.mlm_backward   s    D HH%%r9   )_apply_maskr]   rq   rp   r/   layers)	r4   r^   is_trainoutputr   r   r   r   random_wordss	        @@r6   mlm_forward0build_masked_language_model.<locals>.mlm_forward   sh     |yI
dyy  &..

1q/AB <<?4:	& ##r9   Nr4   c                     U R                   S   nUR                  XS9  UR                   H;  nUR                  U5      (       d  M  U R	                  XCR                  U5      5        M=     g )Nr   )XY)r   
initialize	dim_nameshas_dimset_dimr   )r4   r   r   wrappeddims        r6   mlm_initialize3build_masked_language_model.<locals>.mlm_initialize   sT    ,,q/Q$$$Cs##c??3#78 %r9   zmasked-language-modelr   )r   initrefsdims)NN)_RandomWordsr   r   r~   )r*   r   r   r   r   r   	mlm_modelr   s     `    @r6   r   r      s{      &L	$9e 9 '#0#:#:;#:C4i#:;I i/ <s   A"c                   ,    \ rS rSrSS jrS\4S jrSrg)r      r(   Nc                    U Vs/ s H   o"R                   S:w  d  M  UR                  PM"     snU l        U R                  S S U l        U Vs/ s H   o"R                   S:w  d  M  UR                   PM"     nnUS S n[        R                  " [        R
                  " USS95      nX3R                  5       -  nX0l        / U l        g s  snf s  snf )Nrz   '  ri   rj   )	probtextwordsrm   exparrayrr   probs_cache)selfr*   lexr   s       r6   __init___RandomWords.__init__   s    */C%388s?hchh%C
ZZ'
 &+>Uchh#oU>fu$yyU#)FG
 "$ D ?s   CCC#Cc                     U R                   (       dU  U R                   R                  [        R                  R	                  [        U R                  5      SU R                  S95        U R                   R                  5       nU R                  U   $ )Nr   )p)	r   extendrm   randomchoicelenr   r   pop)r   indexs     r6   next_RandomWords.next   sa    {{KK##C

OUdjj#I !zz%  r9   )r   r   r   )r*   r$   r(   N)__name__
__module____qualname____firstlineno__r   strr   __static_attributes__r   r9   r6   r   r      s    $!c !r9   r   r^   r#   r   c           	         SSK Jn  [        S U  5       5      n[        R                  R                  SSU45      nXR:  nSn/ nU  H  n/ n	U HE  n
XV   (       d  [        U
R                  U5      nOU
R                  nU	R                  U5        US-  nMG     U Vs/ s H  n[        UR                  5      PM     nnUR                  U" UR                  XS95        M     XW4$ s  snf )	Nr   r"   c              3   8   #    U  H  n[        U5      v   M     g 7fN)r   ).0r`   s     r6   	<genexpr>_apply_mask.<locals>.<genexpr>   s     %CHHs   rz   g      ?r   r-   )r   spaces)
tokens.docr#   rr   rm   r   uniform_replace_wordr   appendboolwhitespace_r*   )r^   r   r   r#   Nr   imasked_docsr`   r   tokenwordwr   s                 r6   r   r      s     "%%%A<<S1$/DD	AKE7$UZZ>zzLLFA  033s!$q}}%s3 	3syyEF    4s   C!r   r   c                 ~    [         R                  R                  5       nUS:  a  U$ US:  a  UR                  5       $ U $ )Ng?g?)rm   r   r   )r   r   r   rolls       r6   r   r     s;    << Dcz	  ""r9   r   )g333333?)z[MASK])<	functoolsr   typingr   r   r   r   r   r	   r
   r   rm   	thinc.apir   r   r   r   r   r   r   r   r   r   r   r   
thinc.lossr   thinc.typesr   r   r3   r   r   errorsr   utilr   r    r.   r!   rS   r   r#   r*   r$   intr   rF   rP   rB   rM   r   r2   rL   rs   r   r   ndarrayr   r   r   r9   r6   <module>r      s    V V V      (   & *!$$%($03$w%&$4''%('8;'w%&'"0	  	  		
 ."36EH
,"36EHSV
  >B!!#(!5:!
!H! !4 KO
5/)5BG
5==$u+%&< < s RU r9   