
    h,                        S SK r S SKrS SKJrJrJrJrJrJr  S SK	J
r
  S SKJr  S SKJr  S SKJr  \R"                  S 5       rS rS	 r\R*                  R-                  S
S/5      S 5       r\R*                  R-                  S
S/5      S 5       rS rS rS rS rS rS rS rS r S r!S r"S r#S r$S r%S r&S r'S r(g)    N)IS_ALPHAIS_DIGITIS_LOWERIS_PUNCTIS_STOPIS_TITLE)VERBDoc)Example)Vocabc                 .    / SQn/ SQn/ SQn[        XX#S9$ )N)Thisisasentence.r   r   anotherr   r   Andr   thirdr   )   r      r   r      r      r   r   
      r   r   )nsubjROOTdetattrpunctr   r   r   r    r!   r   r   npadvmodr!   wordsheadsdepsr
   )en_vocabr$   r%   r&   s       X/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/doc/test_token_api.pydocr)      s&     uE:EAD xE==    c                    / SQn/ SQn/ SQn/ SQn[        XX#US9nUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R                  S	:X  d   eUS   R
                  S
:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eg )N)Giveitback!Hepleadedr   )r	   PRONPARTPUNCTr2   r	   r4   )r   r   r   r      r5   r5   )r   dobjprtr!   r   r   r!   )r$   posr%   r&   r   r,   zGive giveXxxxGiver	   r   )
r   orth_texttext_with_wslower_shape_prefix_suffix_pos_dep_)r'   r$   r8   r%   r&   r)   s         r(   test_doc_token_api_stringsrF      s    =E
DC!EED
h
ECq6<<6!!!q6;;&   q6')))q6==F"""q6==F"""q6>>S   q6>>U"""q6;;&   q6;;&   r*   c                    SnU " U5      nUS   R                  [        5      (       d   eUS   R                  [        5      (       a   eUS   R                  [        5      (       d   eUS   R                  [        5      (       d   eUS   R                  [
        5      (       d   eUS   R                  [        5      (       d   eUS   R                  [        5      (       a   eg )NGive it back! He pleaded.r   r   r      r5   )
check_flagr   r   r   r   r   r   en_tokenizerr>   tokenss      r(   test_doc_token_api_flagsrN   '   s    &D$F!9))))ay##H----!9))))!9))))!9))))!9((((ay##G,,,,,r*   r>   rH   c                     UR                  5       S   nSU R                  U   l        U " U5      nUS   R                  S:w  d   eg )Nr   )splitvocabprob)rL   r>   wordrM   s       r(   ,test_doc_token_api_prob_inherited_from_vocabrU   4   sD    ::<?D$&Lt!$F!9>>Qr*   zone twoc                     U " U5      n[        US   5      UR                  S5      S   :X  d   e[        US   5      UR                  S5      S   :X  d   eg )Nr    r   )strrQ   rK   s      r(   test_doc_token_api_str_builtinrY   <   sR    $Fvay>TZZ_Q////vay>TZZ_Q////r*   c                 |   [        U / SQS9nUS   R                  (       d   eUS   R                  (       d   eUS   R                  (       a   eUS   R                  (       d   eUS   R
                  (       d   eUS   R                  (       a   eUS   R                  (       d   eUS   R                  (       d   eg )N)Hi,myemailr   ztest@me.comr$   r   r   r      r5   )	r   is_titleis_alphais_digitis_punctis_asciilike_urlis_lower
like_email)r'   r)   s     r(    test_doc_token_api_is_propertiesri   C   s    
hM
NCq6???q6???1vq6???q6???1vq6???q6r*   c                     [        5       n U R                  SS9  U R                  S[        R                  " SS/SS9S9  U R                  S	[        R                  " SS
/SS9S9  [        U / SQS9nUR                  (       d   eUS   R                  (       d   eUS   R                  (       d   eUS   R                  (       a   eSnS
nSX#-  -  nUS   R                  US   5      U:X  d   eg )NrI   )widthapplesg        g       @f)dtype)vectororangesg      ?)rl   rp   oovr_   r   r   )r   reset_vectors
set_vectornumpyasarrayr   
has_vector
similarity)rR   r)   apples_normoranges_normcosines        r(   test_doc_token_api_vectorsr{   O   s    GE	a 	XemmS#Jc&JK	Yu}}c3Zs'KL
e7
8C>>>q6q61v    (K)LK$>?Fq6SV$...r*   c                    / SQn/ SQnS/[        U5      -  n[        XX#S9nUS   R                   Vs/ s H  oUR                  PM     snSS/:X  d   eUS   R                   Vs/ s H  oUR                  PM     snS/:X  d   eUS	   R                   Vs/ s H  oUR                  PM     sn/ :X  d   eUS	   R	                  US
   5      (       d   eUS   R	                  US	   5      (       a   eg s  snf s  snf s  snf )N		YesterdayIsawr   dogthatbarkedloudlyr   	rI   rI   rI   r`   rI   r   r`   r   rI   depr#   r   r   r   r   rI      )lenr   	ancestorsr>   is_ancestor)r'   r$   r%   r&   r)   ts         r(   test_doc_token_api_ancestorsr   _   s    RE'E7SZD
h5
<CF,,-,qFF,-%???F,,-,qFF,-%888F,,-,qFF,-333q6c!f%%%%1v!!#a&)))))	 .--s   C/$C4C9c           	         / SQn/ SQnS/[        U5      -  n[        XX#S9nUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R                  R
                  S:X  d   eUS   R                  R
                  S:X  d   eUS	   R                  S:X  d   eUS	   R                  S:X  d   eUS	   R                  R
                  S
:X  d   eUS	   R                  R
                  S:X  d   eUS
   R                  S:X  d   eUS
   R                  S:X  d   eUS
   R                  R
                  S
:X  d   eUS
   R                  R
                  S
:X  d   eUS   R                  R
                  S:X  d   eUS   R                  R
                  S:X  d   eUS
   US   l        US   R                  S:X  d   eUS   R                  S:X  d   eUS   R                  R
                  S:X  d   eUS   R                  R
                  S:X  d   eUS
   R                  S:X  d   eUS
   R                  S:X  d   eUS
   R                  R
                  S
:X  d   eUS
   R                  R
                  S:X  d   eUS	   R                  S:X  d   eUS	   R                  S:X  d   eUS	   R                  R
                  S
:X  d   eUS	   R                  R
                  S:X  d   eUS   R                  R
                  S:X  d   eUS   R                  R
                  S:X  d   eUS   US   l        US   R                  R
                  S:X  d   eUS   R                  R
                  S:X  d   eUS
   R                  R
                  S:X  d   eUS	   R                  R
                  S:X  d   eUS   R                  R
                  S:X  d   e[        XUS/[        U5      -  S9n[        R                  " [        5         US   US   l        S S S 5        / SQn/ SQn[        XUS/[        U5      -  S9nUS   R                  (       d   eUS   R                  (       d   eUS   R                  US   :X  d   eUS   R                  US	   :X  d   eUS   R                  US   :X  d   eUS   R                  US   :X  d   eUS
   US   l        US   R                  (       d   eUS   R                  (       d   eUS   R                  US   :X  d   eUS   R                  US	   :X  d   eUS   R                  US   :X  d   eUS   R                  US   :X  d   eUS   US   l        US   R                  (       d   eUS   R                  (       a   eUS   R                  US   :X  d   eUS   R                  US   :X  d   eg ! , (       d  f       GN= f)Nr}   r   r   r#   r   r   r5   r   r`   r   r   rI   r   )
r   r   oner   r   r   r   r   r   r   )
r   r   r   r   r   r5   r5   r5   r5   r5   	   )r   r   n_leftsn_rights	left_edgei
right_edgeheadpytestraises
ValueErroris_sent_start)r'   r$   r%   r&   r)   doc2s         r(   test_doc_token_api_head_setterr   l   s   RE'E7SZD
h5
<Cq6>>Qq6??aq6"""q6!###q6>>Qq6??aq6"""q6!###q6>>Qq6??aq6"""q6!###q6"""q6!###a&CFKq6>>Qq6??aq6"""q6!###q6>>Qq6??aq6"""q6!###q6>>Qq6??aq6"""q6!###q6"""q6!###a&CFKq6"""q6"""q6"""q6"""q6"""xE#e*8LMD	z	"1gA 
# ]E*E
h5wU7K
LCq6q6q6s1v%%%q6A&&&q6s1v%%%q6A&&&a&CFKq6q6q6s1v%%%q6A&&&q6s1v%%%q6A&&&a&CFKq61v####q6s1v%%%q6A&&&9 
#	"s   5V::
W	c                     U " S5      nUS   R                   b   eSUS   l         US   R                   SL d   e[        [        UR                  5      5      S:X  d   eg )N$This is a sentence. This is another.r5   TrI   )r   r   listsentsrL   r)   s     r(   test_is_sent_startr      sb    
=
>Cq6'''CFq64'''tCII1$$$r*   c                     U " S5      nUS   R                   b   eSUS   l        US   R                   SL d   e[        [        UR                  5      5      S:X  d   eg )Nr   r`   Tr5   rI   )is_sent_endr   r   r   r   r   s     r(   test_is_sent_endr      sb    
=
>Cq6%%%CFq6%%%tCII1$$$r*   c                      [        [        5       SS/S9n SU S   l        U S   R                  S:X  d   e[        U S   l        U S   R                  S:X  d   eg )Nhelloworldr_   NOUNr   r   r	   )r   r   rD   r	   r8   r)   s    r(   test_set_posr      sW    
eggw/
0CCFKq6;;&   CFJq6;;&   r*   c                      [        [        5       SS/S9n [        R                  " [        5         SU S   l        S S S 5        g ! , (       d  f       g = f)Nr   r   r_   blahr   )r   r   r   r   r   rD   r   s    r(   test_set_invalid_posr      s8    
eggw/
0C	z	"A 
#	"	"s   A
Ac                    [        [        U R                  5      5      S:X  d   eU S   R                  R                  S:X  d   eU S   R                  R                  S:X  d   eU S   R                  R
                  R                  R                  S:X  d   eU S   R                  R
                  R                  R                  S:X  d   eg)zTest token.sent propertyr   r   zThis is a sentence .r   zThis is another sentence .r   N)r   r   r   sentr>   rootr   r   s    r(   test_tokens_sentr      s    tCII1$$$q6;;5555q6;;;;;;q6;;%%**f444q6;;%%**f444r*   c                      [        [        5       SS/S9n U S   R                  SL d   eU S   R                  b   eU R                  S5      (       a   eg )Nr   r   r_   r   Tr   
SENT_START)r   r   r   has_annotationr   s    r(   test_token0_has_sent_start_truer      s]    
eggw/
0Cq64'''q6'''!!,/////r*   c                      [        [        5       SS/S9n U S   R                  b   eU S   R                  SL d   eU R                  S5      (       a   eg )Nr   r   r_   r   r   Tr   )r   r   r   r   r   s    r(    test_tokenlast_has_sent_end_truer      s]    
eggw/
0Cq6%%%q6%%%!!,/////r*   c                 t   / SQn/ SQn/ SQn[        XX#S9nUS   R                   Vs/ s H  oUR                  PM     snSS/:X  d   eUS   R                   Vs/ s H  oUR                  PM     snS	S/:X  d   eUS
   R                   Vs/ s H  oUR                  PM     snS	S/:X  d   eg s  snf s  snf s  snf )N)
Theboyandthegirlr   r   manwentr   )
r   r   r   r`   r   r`   r   r`   r   r   )
r   r   ccr   conjr   r   r   r   r!   r#   r   r   r   r`   r   r   r   	conjunctsr>   r'   r$   r%   r&   r)   ws         r(   test_token_api_conjuncts_chainr      s    RE*EVD
h5
<CF,,-,qFF,-&%@@@F,,-,qFF,-%???F,,-,qFF,-%@@@ .--s   B+B0
B5c                    / SQn/ SQn/ SQn[        XX#S9nUS   R                   Vs/ s H  oUR                  PM     snS/:X  d   eUS   R                   Vs/ s H  oUR                  PM     snS/:X  d   eg s  snf s  snf )	N)Theycamer   r   r   )r   r   r   r   r   )r   r   r   r   r   r#   r   r   r   r   r   r   s         r(   test_token_api_conjuncts_simpler      s    0EE1D
h5
<CF,,-,qFF,-&999F,,-,qFF,-&999 .-s   A8A=c                     / SQn/ SQn/ SQn[        XX#S9nUS   R                   Vs/ s H  oUR                  PM     sn/ :X  d   eUS   R                   Vs/ s H  oUR                  PM     sn/ :X  d   eg s  snf s  snf )N)r   r   r   )r   r   r   )r   r   r!   r#   r   r   r   r   s         r(   test_token_api_non_conjunctsr      s{    !EE%D
h5
<CF,,-,qFF,-333F,,-,qFF,-333 .-s   A6A;c                    / SQn/ SQn/ SQn[        XXS9nU Vs/ s H  oUR                  5       PM     nnU Vs/ s H  oUR                  5       PM     nnU Vs/ s H  oUR                  R                  PM     nnU Vs/ s H  oUR
                  PM     n	nU Vs/ s H  oUR                  PM     n
nU/ SQ:X  d   eU/ SQ:X  d   eUSS / SQ:X  d   eU	SS / S	Q:X  d   eU
/ S
Q:X  d   e[        R                  " XAUS.5      nUR                   Vs/ s H  oUR                  5       PM     nnUR                   Vs/ s H  oUR                  5       PM     nnUR                   Vs/ s H  oUR                  R                  PM     nnUR                   Vs/ s H  oUR
                  PM     nnUR                   Vs/ s H  oUR                  PM     nnX:X  d   eX:X  d   eX:X  d   eX:X  d   eUU
:X  d   eUR                  SS9u  nnUS   US   :X  d   eUS   US   :X  d   eUS   US   :X  d   eUS   US   :X  d   egs  snf s  snf s  snf s  snf s  snf s  snf s  snf s  snf s  snf s  snf )zWCheck that the Doc constructor and Example.from_dict parse missing information the same)r   r   r   r   rI   N) r   r6   r   r   N)r   likeLondonr   Berlinr   r#   )FTTTTFr   r5   )r   r   r   rI   )r   r6   r   r   )TFFFFF)r%   r&   T)projectivizer   N)r   has_headhas_depr   r   rE   r   r   	from_dict	referenceget_aligned_parse)r'   r%   r&   r$   r)   r   pred_has_headspred_has_deps
pred_heads	pred_depspred_sent_startsexampleref_has_headsref_has_deps	ref_headsref_depsref_sent_startsaligned_headsaligned_depss                      r(   test_missing_head_depr     sp   !E3D9E
h5
<C,/0CqjjlCN0*-.#QYY[#M.$'(Cq&&((CJ(!$%AI%145A5CCCCBBBBa?l***Qq>;;;;HHHHd%CDG+2+<+<=+<aZZ\+<M=)0):):;):AIIK):L;#*#4#45#4a#4I5 ' 1 12 11 1H2070A0AB0A10AOB***)))"""   ....")";";";"NM<?hqk)))y|+++?hqk)))y|+++5 1.(%5 >;52Bs:   H;I I>I
II8I!II#5I(c                     U " S5      nU " S5      nUS   USS :X  a   eUS   USS :X  a   eUS   US   :X  a   eUS   U:X  a   eg )Nza bzb cr   r   rI    )rL   doc1r   s      r(   test_token_api_richcmp_otherr   +  so    DDAw$q)###Aw$q)###Aw$q'!!!Aw$r*   ))rt   r   spacy.attrsr   r   r   r   r   r   spacy.symbolsr	   spacy.tokensr   spacy.trainingr   spacy.vocabr   fixturer)   rF   rN   markparametrizerU   rY   ri   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r(   <module>r      s      Q Q   "  > >!"	- "=!>? @ )-0 .0	/ 
*H'V%%!500A:4 ,Fr*   