
    hnE                     T   S SK r S SKrS SKJr  S SKJrJrJr  S SKJ	r	  S SK
Jr  S SKJrJr  \R                  R!                  S5      S 5       r\R                  R!                  S	5      S
 5       r\R                  R!                  S5      S 5       r\R                  R!                  S5      S 5       r\R                  R!                  S5      S 5       r\R                  R!                  S5      S 5       r\R                  R!                  S5      S 5       rS rS rS rS rS rS rS rS rS r S r!S r"S  r#S! r$S" r%S# r&S$ r'\R                  R!                  S%5      S& 5       r(\R                  R!                  S'5      S+S( j5       r)\R                  R!                  S)5      S* 5       r*g),    N)displacy)DependencyRendererEntityRendererSpanRenderer)English)Persian)DocSpani9	  c                     Sn/ SQn[        XS/[        U5      -  S9n[        R                  " U5      nU H
  nXT;   a  M
   e   g)z#Test if < is escaped when rendering)z&lt;z&gt;z&amp;z&quot;)<>&"depwordsdepsN)r	   lenr   render)de_vocabcharsr   dochtmlchars         S/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/test_displacy.pytest_issue2361r      sG     0E E
h5'CJ*>
?C??3D||     i
  c                     [        U / SQS9n[        USSSS9/Ul        [        R                  " USS9nS	U;   d   e[        USS
SS9/Ul        [        R                  " USS9nS	U;   d   eg)z9Test that displaCy ENT visualizer escapes HTML correctly.)testz	<RELEASE>r   r   r      TESTlabelentstylez&lt;RELEASE&gt;   N)r	   r
   entsr   r   en_vocabr   r   s      r   test_issue2728r,      sw     h;
<CS!Qf-.CH??3e,D$$$S!Qf-.CH??3e,D$$$r   i  c                     / SQn/ SQn/ SQn[        XX#S9n[        R                  " [        U5      S4SS9Ul        [
        R                  " U5        g)	zTest that retokenization works correctly via displaCy when punctuation
is merged onto the preceeding token and tensor is resized.)HelloWorld!Whenisthisbreaking?)r!   r!   r!      r6      r6   r6   )intjROOTpunctadvmodr9   detnsubjr:   )r   headsr   `   float32)dtypeN)r	   numpyzerosr   tensorr   r   )r+   r   r>   r   r   s        r   test_issue3288rE   "   sG     KE$EOD
h5
<Cc%j"-Y?CJOOCr   i  c            
      
   SSS.SSS.SSS.SSS.S	S
S.SSS./SSSSS.SSSSS.SSSSS.SSSSS.SSSSS./S.n SSSSS./S .n[         R                  " U S!S"S#9nU(       d   e[         R                  " US$S"S#9nU(       d   eg%)&z;Test that displaCy renderer doesn't require "settings" key.ButCCONJtexttagGooglePROPNr2   VERBstartingfromADPzbehind.ADVr      ccleftstartendr$   dirr!   r=   r(   auxr6   prepright   pcompr   arcs#But Google is starting from behind.
   ORGrW   rX   r$   rJ   r)   r   Tr'   manualr%   Nr   r   )example_depexample_entdep_htmlent_htmls       r   test_issue3531rm   .   s    
 7+g.&)/E*u-
 D@GFCE&AF7CGGD
K$ 6R%89K {%EHO8{%EHO8r   i*  c                 |    [        U SS/SS/S9n[        5       UR                  S'   [        R                  " U5        g)zVTest that displaCy doesn't serialize the doc.user_data when making a
copy of the Doc.
r.   worldr   r   r   N)r	   set	user_datar   
parse_deps)r+   r   s     r   test_issue3882rs   L   s9    
 hw0u~
FCECMM&r   iG  c            	          [        5       n SSS.SSS.SSS.SSS./nS	S
SSS.SSSSS.SSSSS.SSSSS.S
SSSS./nU R                  XS./5        U R                  S:X  d   eg)zITest that overlapping arcs get separate levels, unless they're identical.ThisDTrI   r2   VBZaz	sentence.NNr   r!   r=   rU   rV   r(   rS   r<   overlap)rX   r$   rW   rY   attrr_   N)r   r   highest_levelrendererr   r`   s      r   test_issue5447r   V   s     "#H%e$T"T*	E A?Af=A	&AI&AAv>D OOu345!!Q&&&r   i  c                      Sn [        5       nU" U 5      n[        USSSS9/Ul        [        R                  " USS9nUR                  S5      nUS	:X  d   eg )
Nz8First line
Second line, with ent
Third line
Fourth line
      r   r#   r%   r&   z<br>r6   )r   r
   r)   r   r   count)sample_textnlpr   r   founds        r   test_issue5838r   k   sZ     QK
)C
k
CS!Qf-.CH??3e,DJJvEA::r   c           
         [        U / SQS9n[        USSS5      [        USSS5      /UR                  S'   [        R                  " U5      n[        U[        5      (       d   eUS	   S
:X  d   eUS   SSSSSSSS.SSSSSSSS./:X  d   eg)z>Test that spans on a Doc are converted into displaCy's format.WelcometotheBankofChinar    rS   r7   rc   r]   GPEscrJ   Welcome to the Bank of China spans       #rW   rX   start_token	end_tokenr$   kb_idkb_url   Nr	   r
   r   r   parse_spans
isinstancedictr+   r   r   s      r   test_displacy_parse_spansr   x   s    
hM
NCCAu-tCAu/EFCIIdO  %EeT""""=;;;;>	
 	
   r   c           
         [        U / SQS9n[        USSSSS9[        USSS	S
S9/UR                  S'   [        R                  " USS05      n[        U[        5      (       d   eUS   S:X  d   eUS   SSSSSSSS.SSSSS	S
SS./:X  d   eg)zHTest that spans with kb_id on a Doc are converted into displaCy's formatr   r    rS   r7   rc   Q790068)r   r]   r   Q148r   kb_url_templatezhttps://wikidata.org/wiki/{}rJ   r   r   r   r   z!https://wikidata.org/wiki/Q790068r   r   zhttps://wikidata.org/wiki/Q148Nr   r   s      r   ,test_displacy_parse_spans_with_kb_id_optionsr      s    
hM
NCS!QY/S!QV,CIIdO
   !?@E eT""""=;;;;>9	
 6	
   r   c           	      @   [        U / SQS9n[        USSS5      [        USSS5      /UR                  S'   [        USSS	5      /UR                  S
'   [        R                  " USS
0S9n[        U[        5      (       d   eUS   S:X  d   eUS   SSSSS	SSS./:X  d   eg)z7Test that spans in a different spans key will be parsedr   r    rS   r7   rc   r]   r   r   BANKcustom	spans_key)optionsrJ   r   r   r   r   r   r   r   Nr   r   s      r   -test_displacy_parse_spans_different_spans_keyr      s    
hM
NCCAu-tCAu/EFCIIdOQ623CIIh  {H.EFEeT""""=;;;;>	

 
 
 
r   c                    [        U / SQS9n[        USSS5      /UR                  S'   [        R                  " [
        SS9   [        R                  " U5      nS	S	S	5        [        W[        5      (       d   eg	! , (       d  f       N&= f)
z:Test that having an unset spans key doesn't raise an errorr   r    rS   r7   r   r   W117)matchN)
r	   r
   r   pytestwarnsUserWarningr   r   r   r   r   s      r   #test_displacy_parse_empty_spans_keyr      sl    
hM
NCQ623CIIh	k	0$$S) 
1 eT"""" 
1	0s   A99
Bc                    [        U / SQS9n[        USSUR                  R                  S   S9/Ul        [
        R                  " U5      n[        U[        5      (       d   eUS   S:X  d   eUS	   S
SSSSS./:X  d   e[        USSUR                  R                  S   SS9/Ul        [
        R                  " U5      n[        U[        5      (       d   eUS   S:X  d   eUS	   S
SSSSS./:X  d   eg)zGTest that named entities on a Doc are converted into displaCy's format.rG   rL   r2   rO   rP   behindr    r!   r(   rc   r#   rJ   #But Google is starting from behind r)   r6   rb   r   r   rW   rX   r$   r   r   Q95r$   r   N	r	   r
   vocabstringsr)   r   
parse_entsr   r   r+   r   r)   s      r   test_displacy_parse_entsr      s   
hS
TCS!Qcii&7&7&>?@CHs#DdD!!!!<@@@@<BsK    S!Qcii&7&7&>eLMCHs#DdD!!!!<@@@@<B#N   r   c                 
   [        U / SQS9n[        USSUR                  R                  S   SS9/Ul        [
        R                  " USS	05      n[        U[        5      (       d   eUS
   S:X  d   eUS   SSSSSS./:X  d   eg)zRTest that named entities with kb_id on a Doc are converted into displaCy's format.r   r    r!   r(   rc   r   r   r   z https://www.wikidata.org/wiki/{}rJ   r   r)   r6   rb   z!https://www.wikidata.org/wiki/Q95r   Nr   r   s      r   +test_displacy_parse_ents_with_kb_id_optionsr      s    
hS
TCS!Qcii&7&7&>eLMCH!CDD dD!!!!<@@@@<9	
   r   c           	      L   / SQn/ SQn/ SQn/ SQn/ SQn[        XX#XES9n[        R                  " U5      n[        U[        5      (       d   eUS   SUS	   US	   S
.SUS   US   S
.SUS   US   S
.SUS   US   S
./:X  d   eUS   S	SSSS.SSSSS.SSSSS./:X  d   e[        R                  " USS 5      n[        U[        5      (       d   eUS   SUS	   US	   S
.SUS   US   S
.SUS   US   S
.SUS   US   S
./:X  d   eUS   S	SSSS.SSSSS.SSSSS./:X  d   eg)zFTest that deps and tags on a Doc are converted into displaCy's format.)ru   r2   rx   sentence)r!   r!   rS   r!   )DETrN   r   NOUN)rv   rw   rv   ry   )r=   r9   r<   r{   )r   r>   postagsr   r   Nr   )lemmarJ   rK   r!   r(   rS   r`   r=   rU   rV   r<   r{   r\   )r	   r   rr   r   r   )r+   r   r>   r   r   r   r   s          r   test_displacy_parse_depsr     s   +EE
(C$D+D
h5
PCs#DdD!!!!=aQ8aQ8aQ8aQ8	    <A?Af=Aw?    s1v&DdD!!!!=aQ8aQ8aQ8aQ8	    <A?Af=Aw?   r   c                      [        5       n SSS.SSS./nSSSS	S
.SSSS	S
./n[        R                  " [        5         U R	                  XS./5        S S S 5        g ! , (       d  f       g = f)Nru   r   rI   r2   rN   r   r!   r=   rU   rV   r(   r<   r_   )r   r   raises
ValueErrorr   r}   s      r   test_displacy_invalid_arcsr   &  si    !#HU+d6-JKEA?Qv>D 
z	"5789 
#	"	"s   A
A)c                     [        U / SQS9n[        USSUR                  R                  S   S9/Ul        [
        R                  " USS SS	9nUR                  S
5      (       d   eg)z$Test that displaCy can render Spans.r   r    r!   r(   rc   r#   r6   r%   r&   z<divN)r	   r
   r   r   r)   r   r   
startswithr*   s      r   test_displacy_spansr   1  s^    
hS
TCS!Qcii&7&7&>?@CH??3q851D??6""""r   c                     [         R                  " [        5         [        R                  " S5        S S S 5        g ! , (       d  f       g = f)Nzhello world)r   r   r   r   r   )r+   s    r   #test_displacy_raises_for_wrong_typer   9  s&    	z	"& 
#	"	"s	   ;
A	c                  f   / SQn / SQn/ SQn/ SQn[        5       n[        UR                  XX2S9n[        USSSS	9/Ul        [
        R                  " US
SS9nSU;   d   eSU;   d   eSUR                   S3U;   d   e[
        R                  " US
SS9nSU;   d   eSUR                   S3U;   d   eg )N)u   ماu
   بسیارu   کتابu   می‌خوانیم)PROrR   N_PLV_SUB)foobarr   baz)r!   r   rS   r!   )r   r   r>   r   r!   rS   r"   r#   Tr   )pager'   zdirection: rtlzdirection="rtl"zlang="r   r%   )r   r	   r   r
   r)   r   r   lang)r   r   r   r>   r   r   r   s          r   test_displacy_rtlr   >  s    HE
)C'DE
)C
ciiue
GCS!Qf-.CH??3T7Dt###$$$CHH:Q4'''??3T7Dt###CHH:Q4'''r   c                 X   S n[         R                  " U5        [        U / SQS9n[        USSUR                  R
                  S   S9/Ul        [         R                  " USS	9nUR                  S
5      (       d   eUR                  S5      (       d   e[         R                  " S 5        g)z4Test that displaCy accepts custom rendering wrapper.c                     SU -   S-   $ )Nr"    r   s    r   wrapper-test_displacy_render_wrapper.<locals>.wrapperT  s    }v%%r   r   r    r!   r(   rc   r#   r%   r&   zTEST<divz	/div>TESTc                     U $ )Nr   r   s    r   <lambda>.test_displacy_render_wrapper.<locals>.<lambda>^  s    Tr   N)
r   set_render_wrapperr	   r
   r   r   r)   r   r   endswith)r+   r   r   r   s       r   test_displacy_render_wrapperr   Q  s    & (
hS
TCS!Qcii&7&7&>?@CH??3e,D??:&&&&==%%%% 12r   c                      SSS.SSS.SSS.SSS./S	S
SSS.SSSSS.S
SSSS./SS.n [         R                  " U /SSS9nU S    H  nUS   U;   d   eUS   U;   a  M   e   g)z3Test displacy.render with manual data for dep styleru   rv   rI   r2   rw   rx   r   ry   r   r!   r=   rU   rV   r(   rS   r<   r{   r\   Title)r   r`   titler   Trf   r   rJ   rK   Nrh   )
parsed_depr   words      r   test_displacy_render_manual_depr   a  s     D)%(&-	
 GFCE&AF7C

 J ??J<uTBD7#F|t###E{d""" $r   c                      SSSSS./S.SSSS	S./S
S./n [         R                  " U SSS9nU  H'  nUS   S   S   U;   d   eSU;   d  M  US   U;   a  M'   e   g)z3Test displacy.render with manual data for ent stylera   r6   rb   rc   rd   re   id   COMPANYr   rJ   r)   r   r%   Trf   r)   r   r$   r   Nrh   )parsed_entsr   
parsed_ents      r   test_displacy_render_manual_entr   w  s     : e<=	

 :#C)DE	

K ??;eDAD!
&!!$W-555j g&$... "r   c                      SSSSS.SSSS.// SQS	.SSSSS.SSSS.// SQS
S./n [         R                  " U SSS9nU  H'  nUS   S   S   U;   d   eSU;   d  M  US   U;   a  M'   e   g)z4Test displacy.render with manual data for span stylezWelcome to the Bank of China.rS   r7   rc   r   r   r$   r]   r   r   r   r   r   r   r   .)rJ   r   tokensr   )rJ   r   r   r   spanTrf   r   r   r$   r   Nrh   )parsed_spansr   parsed_spans      r    test_displacy_render_manual_spanr    s     4 !EB !EB K	
 4 !EB !EB K	
L( ??<vdCD#7#A&w/4777k!w'4/// $r   c                     SS/n SSS.n[        XS.5      nSn/ SQn[        [        U5      5       Vs/ s H  oUUS	-   XE   S
.PM     nnUR                  SUS 5      R	                  S5      nSUS   ;   a	  SUS   ;   d   eSUS	   ;   a	  SUS	   ;   d   eSUS   ;   a	  SUS   ;   d   eSUS   ;   a	  SUS   ;   d   eg s  snf )Nr   BARredgreen)FOOr   )r)   colorsabcd)r   r   r  r  r!   rd   abcdez

r   r   r(   r  rS   )r   ranger   render_entssplit)r)   r  r~   rJ   labelsir   results           r   test_displacy_options_caser    s    5>D7+Ft>?HD)FEJ3t9EUVEUQ;EUEV!!'5$7==fEFF1I%6!9"444fQiEVAY$666F1I%6!9"444fQiEVAY$666$6 Ws   B<i)  c                      SSSSS.SSSS./S S	.n [         R                  " U S
SS9nUR                  S5      UR                  S5      :  d   eg )Nra         SECONDrd   r6   rb   FIRSTr   r%   Trf   )r   r   find)r   r   s     r   $test_displacy_manual_sorted_entitiesr    s`     6h7W5
 C ??3eD9D99W		( 3333r   i2  c           	         [        U SS/S9n[        USSSS9/UR                  S'   [        R                  " USS	9nS
U;   d   eUR                  S   R                  [        USSSS95        [        R                  " USS	9nS
U;   d   eg)zKTest that displaCy's span visualizer escapes annotated HTML tags correctly.r   z<TEST>r    r   r!   r#   r   r   r&   z&lt;TEST&gt;r(   N)r	   r
   r   r   r   appendr*   s      r   test_issue12816r    s     hvx0
1CCAV45CIIdO ??3f-DT!!! IIdO4Q89 ??3f-DT!!!r   i 3  c            	         SSSS.SSSS.SSSS./n / S	Qn[         R                  " XS
9n[        U5      [        U5      :X  d   e[        S Vs/ s H  n[        X#   S   5      S:H  PM     sn5      (       d   e[        S Vs/ s H  n[        X#   S   5      S:H  PM     sn5      (       d   eUS   S   S   S   S:X  d   eUS   S   S   S   S:X  d   eUS   S   S   S   S:X  d   eUS   S   S   S   S:X  d   egs  snf s  snf )zITest whether span stacking works properly for multiple overlapping spans.r(   r]   SkillNCr   r   Skillr!   rS   r   )r   r   )r   rS   r6   entities)r!   r(   render_slotN)r   _assemble_per_token_infor   all)r   r   per_token_infor  s       r   test_displacy_span_stackingr$    sM    I>G<G<E
 BF!::VN~#f+---KAN%j12a7KLLLLHAN%j12a7HIIII!Z(+M:a???!Z(+M:a???!Z(+M:a???!Z(+M:a??? LHs   C=D)returnN)+rB   r   spacyr   spacy.displacy.renderr   r   r   spacy.lang.enr   spacy.lang.far   spacy.tokensr	   r
   markissuer   r,   rE   rm   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r$  r   r   r   <module>r-     s      R R ! ! " 4  4% % 4  4 : 4  4' '( 4	 	: F,#(*"J:#'
(&3 #,/*0:7 54 4 5" "$ 5@ @r   