
    h!                     B   S SK Jr  S SKJr  S SKrS SKJr  S SKJrJ	r	J
r
  S SKJr  \R                  S 5       r\R                  S 5       r\R                  S	 5       rS
 rS rS rS rS rS rS rS rS r\R4                  R7                  S5      S\4S j5       rS rg)    )Random)ListN)Matcher)DocSpan	SpanGroup)filter_spansc                    U " S5      n[        U R                  SS9nUR                  S0 0 0 0 //5        UR                  S0 0 //5        UR                  S0 //5        U" U5      n/ nU H?  nUR                  [	        XS   US   U R                  R
                  US	      5      5        MA     [        S
5      R                  U5        [        USSS0US9UR                  S'   U$ Nz0 1 2 3 4 5 6T)validate421      r   *   SPANSkeyvaluenameattrsspans
r   vocabaddappendr   stringsr   shuffler   r   en_tokenizerdocmatchermatchesr   matchs         Y/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/doc/test_span_group.pyr"   r"      s    

'Cl((48G KKr2r2&'(KKr2j\"KKrfXclGEAha,*<*<*D*DU1X*NO	
  2Ju"'%!1CIIg J    c                    U " S5      n[        U R                  SS9nUR                  S0 0 0 0 //5        UR                  S0 0 //5        UR                  S0 //5        U" U5      n/ nU H?  nUR                  [	        XS   US   U R                  R
                  US	      5      5        MA     [        S
5      R                  U5        [        USSS0US9UR                  S'   U$ r   r   r    s         r&   	other_docr)   "   s    

'Cl((48G KKr2r2&'(KKr2j\"KKrfX clGEAha,*<*<*D*DU1X*NO	
  2Ju"'%!1CIIg Jr'   c                    U " S5      n[        U R                  SS9nUR                  S0 0 0 0 //5        UR                  S0 0 //5        UR                  S0 //5        U" U5      n/ nU H?  nUR                  [	        XS   US   U R                  R
                  US	      5      5        MA     [        S
5      R                  U5        [        USSS0US9UR                  S'   g r   r   r    s         r&   
span_groupr+   :   s    

'Cl((48G KKr2r2&'(KKr2j\"KKrfX clGEAha,*<*<*D*DU1X*NO	
  2Ju"'%!1CIIgr'   c                    U R                   S   nUR                  5       nX!:w  d   eUR                  UR                  :X  d   eUR                  UR                  :X  d   e[	        U5      [	        U5      :X  d   e[        U5      [        U5      :X  d   eSUl        SUR                  S'   UR                  [        U SSS5      5        UR                  UR                  :w  d   eUR                  UR                  :w  d   eUR                  S   S:X  d   e[        U5      [        U5      :w  d   e[        U R                  U  Vs/ s H  o3R                  S	-   PM     snS
9n[        R                  " [        5         UR                  US9  S S S 5        U R                  5       nUR                  5        nUR                  USS 5        UR                  USS 5        S S S 5        [!        X SS U SS /S9n[#        XR                  US95       H=  u  pxUR$                  UR$                  :X  d   eUR&                  UR&                  :X  a  M=   e   g s  snf ! , (       d  f       N= f! , (       d  f       N= f)Nr   new_name	new_valuer   r      LABELr   x)words)r"   r      r   )r   copyr   r   lenlistr   r   r   r   textpytestraises
ValueError
retokenizemerger   zip
start_charend_char)	r"   r+   clonetdoc2doc3retokenizerspan1span2s	            r&   test_span_group_copyrH   Q   s   7#JOOE::(((;;******u:Z(((
tE{***EJ$EKK	LLc1a)*::(((;;******E"g---
tE{*** syys ;s!#s ;<D	z	"D! 
# 88:D		k$q)$$q)$ 
 31QxQq&:;JJD(AB5#3#3333~~/// C !<	"	"
 
	s   I
8I0)I"
I"
I0c                 f   U R                   S   nSnX#   nSUl        U R                  R                  S   Ul        X#   R
                  UR
                  :w  d   eX#   R                  UR                  :w  d   eXBU'   X#   R                  UR                  :X  d   eX#   R                  UR                  :X  d   eX#   R
                  UR
                  :X  d   eX#   R                  UR                  :X  d   eX#   U:X  d   e[        R                  " [        5         XBS'   S S S 5        [        R                  " [        5         XBS'   S S S 5        [        USS5      n[        R                  " [        5         XBU'   S S S 5        g ! , (       d  f       Nj= f! , (       d  f       NT= f! , (       d  f       g = f)	Nr      z	NEW LABELKB_IDd   r   r   )r   label_r   r   kb_idlabelstartendr9   r:   
IndexErrorr   r;   )r"   r)   r+   indexspans        r&   test_span_group_set_itemrV   q   sk   7#JEDDK""7+DJ""djj000""djj000u""djj000  DHH,,,""djj000""djj000$$$	z	"4 
#	z	"3 
# 	1a D	z	" 5 
#	" 
#	"	"	" 
#	"s$   F >F2F" 
F
F"
F0c                 H    U R                   S   nUR                  (       d   eg )Nr   )r   has_overlap)r"   r+   s     r&   test_span_group_has_overlaprY      s    7#J!!!!r'   c                    U R                   S   nU SS U SS /n[        U SSSS.US9nUR                  U5      nUR                  UR                  :X  d   eUR                  S	SS.:X  d   e[        U5      [        U5      -   n[        U5      [        U5      :X  d   e[        U5      [        U5      -   nUR                  US
S9nXR:X  d   eUR                  UR                  :X  d   eUR                  S	SS.:X  d   e[        U5      [        U5      :X  d   eUR                   S   n[        R                  " [        5         UR                  U5        S S S 5        g ! , (       d  f       g = f)Nr   r   rJ   r/   
MORE_SPANSr.   r   new_keyr   r   T)inplace)	r   r   _concatr   r   r7   r9   r:   r;   )r"   r)   span_group_1r   span_group_2span_group_3span_list_expecteds          r&   test_span_group_concatrd      sl   99W%L1Xs1Qx E!k:	L  ''5L 1 1111[!IIIIl+d<.@@&8!9999 l+d<.@@''d'CL''' 1 1111[!IIII&8!9999??7+L	z	"\* 
#	"	"s   >E
E'c                    U R                   S   n[        U5      nSnX   nXS-      nX	 [        U5      US-
  :X  d   eX   U:w  d   eX   U:X  d   e[        R                  " [        5         US	 S S S 5        [        R                  " [        5         US	 S S S 5        g ! , (       d  f       N5= f! , (       d  f       g = f)Nr   rJ   r   rL   rM   )r   r6   r9   r:   rS   )r"   r+   lengthrT   rU   	next_spans         r&   test_span_doc_delitemrh      s    7#J_FED19%Iz?fqj((($$$	)))	z	"t 
#	z	"sO 
#	" 
#	"	"	"s   ,BB0
B-0
B>c                    U R                   S   nU SS U SS /n[        U SSSS.US9nUR                  U5      nX-   n[        U5      [        U5      :X  d   eUR                  S	SS.:X  d   e[        U5      [        U5      :X  d   eg )
Nr   r   rJ   r/   r[   r.   r\   r   r   )r   r   r_   r6   r   r7   )r"   r`   r   ra   span_group_3_expectedrb   s         r&   test_span_group_addrk      s    99W%L1Xs1Qx E!k:	L )00>.L|$9 ::::[!IIII&;!<<<<r'   c                    U R                   S   R                  5       nU SS U SS /n[        U SSSS.US9nUR                  U5      nX-  n[	        U5      [	        U5      :X  d   eUR
                  S	SS.:X  d   e[        U5      [        U5      :X  d   eU R                   S   R                  5       nX-  n[	        U5      [	        U5      :X  d   eUR
                  S
S	0:X  d   e[        U5      [        U5      :X  d   eg Nr   r   rJ   r/   r[   r.   r\   r   r   r   )r   r5   r   r_   r6   r   r7   r"   r`   r   ra   span_group_1_expecteds        r&   test_span_group_iaddrp      s%   99W%**,L1Xs1Qx E!k:	L )00> L|$9 ::::[!IIII&;!<<<<99W%**,LL|$9 ::::w"    &;!<<<<r'   c                    U R                   S   R                  5       nU SS U SS /n[        U SSSS.US9nUR                  U5      nUR	                  U5        [        U5      [        U5      :X  d   eUR                  S	SS.:X  d   e[        U5      [        U5      :X  d   eU R                   S   nUR	                  U5        [        U5      [        U5      :X  d   eUR                  S
S	0:X  d   e[        U5      [        U5      :X  d   eg rm   )r   r5   r   r_   extendr6   r   r7   rn   s        r&   test_span_group_extendrs      s   99W%**,L1Xs1Qx E!k:	L )00>%|$9 ::::[!IIII&;!<<<<99W%L|$9 ::::%!1111&;!<<<<r'   c                     [         R                  " [        5         [        U R                  5        S S S 5        g ! , (       d  f       g = f)N)r9   r:   AttributeErrorprintr"   )r+   s    r&   test_span_group_deallocrw     s&    	~	&jnn 
'	&	&s	   :
Ai.  r"   c                     U R                   S   n[        U5      n[        U5       H  u  p4XAU   s=:X  a  X#   :X  a  M   e   e   [        U5        g)zXTests whether typing of `SpanGroup` as `Iterable[Span]`-like object is accepted by mypy.r   N)r   r7   	enumerater	   )r"   r+   r   irU   s        r&   test_span_group_typingr{     sR      IIg.JZ(EZ(!}000000 )r'   c                     U " S5      nU " S5      n[        XSS USS /S9n[        R                  " [        5         [        XSS USS /S9nSSS5        g! , (       d  f       g= f)z5Test that all spans must come from the specified doc.za b cr   r   r   r4   N)r   r9   r:   r;   )r!   doc1rC   r+   s       r&   test_span_group_init_docr~     sd     D D4Qq	4!9'=>J	z	"t1ItAay+AB
 
#	"	"s   A
A%)randomr   typingr   r9   spacy.matcherr   spacy.tokensr   r   r   
spacy.utilr	   fixturer"   r)   r+   rH   rV   rY   rd   rh   rk   rp   rs   rw   markissuer{   r~    r'   r&   <module>r      s       ! - - #  ,  .  ,0@!8"
+8"=$=4=0
 5  Cr'   