
    h\5                     J   S SK r S SKrS SKrS SKJr  S SKJrJrJr  SSKJ	r	  \ R                  " 5       S 5       r\ R                  " 5       S 5       r\ R                  " 5       S 5       rS	 rS
 rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS r S r!g)    N)schemas)DocSpanToken   )clean_underscorec                 d    / SQn/ SQn/ SQn/ SQn/ SQn/ SQn/ SQn/ SQn[        U UUUUUUUUS	9	$ )
Ncde)TTTVERBNOUNr   VBPNNr   )r   r   r   )ROOTdobjr   OzB-ORGr   Feat1=AFeat1=BFeat1=A|Feat2=D)wordsspacespostagsheadsdepsentsmorphsr   )	en_vocabr   r   r   r   r    r!   r"   r#   s	            b/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/doc/test_json_doc_conversion.pydocr'      sR    EF
"CDE#DD6F
 
    c           
      L    / SQn/ SQn/ SQn/ SQn/ SQn[        U UUUUU/ SQS9$ )Nr
   r   r   r   r   )TFT)r   r   r   r"   r#   sent_startsr$   )r%   r   r   r   r"   r#   s         r&   doc_without_depsr+   #   s=    E
"CDD6F' r(   c                  ^    SSSSS./SSS./SSS	S
SSSSS.S	SSSSSSSS.SSSSSSSS	S./S.$ )Nc d e       ORG)startendlabelr      )r1   r2   r   r   r   r   r   )idr1   r2   tagr   morphdepheadr   r   r   r      r   )textr"   sentstokens r>   r(   r&   doc_jsonr?   6   s     Q78a() "	 "	 *	+
	$ $r(   c                    U R                  5       nUS   S:X  d   e[        US   5      S:X  d   eUS   S   S   S:X  d   eUS   S   S   S	:X  d   eUS   S   S
   S:X  d   e[        US   5      S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   e[        [        R                  " [        R                  U5      5      S:X  d   e[
        R                  " [
        R                  " U5      5      U:X  d   eg )Nr;   r-   r=   r/   r   r   r   r6   r   r8   r   r"   r   r1   r.   r2   r3   r0   )to_jsonlenr   validateDocJSONSchemasrsly
json_loads
json_dumpsr'   json_docs     r&   test_doc_to_jsonrJ   _   sH   {{}HFx'''x!"a'''Ha '6111Ha '5000Ha '6111x A%%%FAw'1,,,FAu%***FAw'5000w 5 5x@AQFFFE,,X678CCCr(   c                    [         R                  " SSS9  [         R                  " SSS9  SU R                  l        / SQU R                  l        U R                  SS/S9nSU;   d   eUS   S   S:X  d   eUS   S   / SQ:X  d   e[        [        R                  " [        R                  U5      5      S	:X  d   e[        R                  " [        R                  " U5      5      U:X  d   eg )
N
json_test1Fdefault
json_test2hello worldr   r.   r/   
underscore_r   )r   set_extensionrT   rL   rO   rA   rB   r   rC   rD   rE   rF   rG   rH   s     r&   test_doc_to_json_underscorerV   n   s    lE2lE2$CEE CEE{{|\&B{CH(??C=&-777C=&)333w 5 5x@AQFFFE,,X678CCCr(   c                    [         R                  " SSS9  [         R                  " SSS9  [        R                  " SSS9  [        R                  " SSS9  SU R                  l        / SQU R                  l        S	U S
S R                  l        SU S
S R                  l        SU S
   R                  l        SU S   R                  l        U S
S /U R                  S'   U R                  / SQS9nSU;   d   eUS   S   S:X  d   eUS   S   / SQ:X  d   eSU;   d   eSU;   d   eUS   S   S
   S   S:X  d   eUS   S   S   S   S:X  d   eUS   S   S
   S   S	:X  d   eUS   S   S   S   S:X  d   e[        [        R                  " [        R                  U5      5      S
:X  d   e[        R                   " [        R"                  " U5      5      U:X  d   eg )NrL   FrM   rO   
token_test	span_testrP   rQ   span_attributer   r   span_attribute_2r.   u   v   
span_grouprL   rO   rX   rY   rR   rT   underscore_tokenunderscore_spanvalue)r   rU   r   r   rT   rL   rO   rY   rX   spansrA   rB   r   rC   rD   rE   rF   rG   rH   s     r&   +test_doc_to_json_with_token_span_attributesrd   |   s   lE2lE2	e4{E2$CEE CEE+C!HJJ-C!HJJCFHHCFHH"1QxjCIIl{{J  H (??C=&-777C=&)333)))(((&'5a8ASHHH&'5a8ASHHH%&{3A6w?CSSSS%&{3A6w?CUUUUw 5 5x@AQFFFE,,X678CCCr(   c                    [         R                  " SSS9  [        R                  " SSS9  [        R                  " SSS9  SU R                  l        SU SS	 R                  l        S
U S   R                  l        U R                  / SQS9nSU R                  S'   SU R                  S'   SU;   d   eUS   S   S:X  d   eSU;   d   eSU;   d   eUS   S   S   S   S
:X  d   eUS   S   S   S   S:X  d   e[        [        R                  " [        R                  U5      5      S:X  d   e[        R                  " [        R                   " U5      5      U:X  d   eg )N	json_testFrM   rX   rY   rP   rZ   r   r   r\   )rf   rX   rY   rR   
   user_data_test)user_data_test2TrT   r`   ra   rb   )r   rU   r   r   rT   rf   rY   rX   rA   	user_datarB   r   rC   rD   rE   rF   rG   rH   s     r&   &test_doc_to_json_with_custom_user_datark      s^   k51	e4{E2#CEEO+C!HJJCFHH{{&N{OH&(CMM"#/1CMM+,(??C=%666)))(((&'5a8ASHHH%&{3A6w?CSSSSw 5 5x@AQFFFE,,X678CCCr(   c                 x   [         R                  " SSS9  [        R                  " SSS9  [        R                  " SSS9  SU R                  l        SU SS R                  l        SU S   R                  l        U R                  S/S	9nS
U;   d   eUS
   S   S:X  d   eSU;   d   eSU;   d   eUS   S   S   S   S:X  d   eUS   S   S   S   S:X  d   e[        [        R                  " [        R                  U5      5      S:X  d   e[        R                  " [        R                  " U5      5      U:X  d   eg )Nmy_extFrM   rP   rZ   r   r   r\   rR   rT   r`   ra   rb   )r   rU   r   r   rT   rm   rA   rB   r   rC   rD   rE   rF   rG   rH   s     r&   0test_doc_to_json_with_token_span_same_identifierrn      s@   h.	%0x/ CEEL(C!HJJCFHHO{{xj{1H(??C="m333)))(((&'1!4W=DDD%&x03G<@PPPPw 5 5x@AQFFFE,,X678CCCr(   c                 x   [         R                  " SSS9  [        R                  " SSS9  SU SS R                  l        SU S   R                  l        U R                  S/S	9nS
U;   d   eUS
   S   S   S   S:X  d   eSU;  d   e[        [        R                  " [        R                  U5      5      S:X  d   eg )NrX   FrM   rY   rZ   r   r   r\   rR   ra   rb   r`   )r   rU   r   rT   rY   rX   rA   rB   r   rC   rD   rH   s     r&   .test_doc_to_json_with_token_attributes_missingrp      s    	e4{E2+C!HJJCFHH{{{m{4H(((%&{3A6w?CSSSSX---w 5 5x@AQFFFr(   c                     [         R                  " [        5         U R                  S/S9  SSS5        g! , (       d  f       g= f)z\Test that Doc.to_json() raises an error if a custom attribute doesn't
exist in the ._ space.
json_test3rR   N)pytestraises
ValueErrorrA   r'   s    r&   &test_doc_to_json_underscore_error_attrrw      s,     
z	"~. 
#	"	"s	   5
Ac                     [         R                  " SS S9  [        R                  " [        5         U R                  S/S9  SSS5        g! , (       d  f       g= f)z\Test that Doc.to_json() raises an error if a custom attribute value
isn't JSON-serializable.
json_test4c                     U R                   $ N)r;   rv   s    r&   <lambda>=test_doc_to_json_underscore_error_serialize.<locals>.<lambda>   s    sxxr(   )methodrR   N)r   rU   rs   rt   ru   rA   rv   s    r&   +test_doc_to_json_underscore_error_serializer      s=     l+?@	z	"~. 
#	"	"s   A
Ac                 l   [        U SSS5      [        U SSS5      /U R                  S'   U R                  5       nSU;   d   e[        US   5      S:X  d   e[        US   S   5      S:X  d   eUS   S   S   S   S:X  d   e[        [        R
                  " [        R                  U5      5      S:X  d   eg)z&Test that Doc.to_json() includes spansr   r.   testr   rc   r1   N)r   rc   rA   rB   r   rC   rD   rH   s     r&   test_doc_to_json_spanr      s    c1a0$sAq&2IJCIIf{{}Hhx !Q&&&x ()Q...GV$Q'0A555w 5 5x@AQFFFr(   c                    U R                  5       n[        R                  " [        R                  " U5      5      n[	        U R
                  5      R                  USS9nUR                  U R                  s=:X  a  S:X  d   e   e[        U5      [        U 5      s=:X  a  S:X  d   e   eUS   R                  U S   R                  :X  d   eUS   R                  U S   R                  :X  d   eUS   R                  U S   R                  :X  d   eUS   R                  R                  U S   R                  R                  :X  d   eUS   R                  U S   R                  :X  d   e[        UR                  5      S:X  d   eUR                  S   R                   S:X  d   eUR                  S   R"                  S:X  d   eUR                  S   R$                  S:X  d   eU R'                  5       UR'                  5       :X  d   eg 	NTrC   r-   r/   r   r   r.   r0   )rA   rE   rF   rG   r   vocab	from_jsonr;   rB   r   r6   r8   r9   idxlemmar"   r1   r2   label_to_bytesr'   rI   new_docs      r&   test_json_to_docr      s   {{}H 0 0 :;H#))n&&x$&?G<<388/x/////w<3s8(q(((((1:>>SVZZ'''1:>>SVZZ'''1:>>SVZZ'''1:??#a&++//1111:s1v||+++w||!!!<<?  A%%%<<?!###<<?!!U***<<>W--////r(   c                    [        U R                  5      R                  USS9nU Vs/ s H  o3PM     nnUR                  U R                  s=:X  a  S:X  d   e   e[	        U5      [	        U  Vs/ s H  o3PM     sn5      s=:X  a  S:X  d   e   eUS   R
                  U S   R
                  :X  d   eUS   R                  U S   R                  :X  d   eUS   R                  U S   R                  :X  d   eUS   R                  R                  U S   R                  R                  :X  d   eUS   R                  U S   R                  :X  d   e[	        UR                  5      S:X  d   eUR                  S   R                  S:X  d   eUR                  S   R                  S:X  d   eUR                  S   R                  S:X  d   eg s  snf s  snf r   )r   r   r   r;   rB   r   r6   r8   r9   r   r   r"   r1   r2   r   )r'   r?   r   token
new_tokenss        r&   test_json_to_doc_compatr      s   #))n&&x$&?G%,-WE%WJ-<<388/x/////z?cc":cU5c":;@q@@@@@a=A

***a=A

***a=A

***a=!!SV[[__444a=#a&,,...w||!!!<<?  A%%%<<?!###<<?!!U*** .":s   G.G
c           	      J   [         R                  " SSS9  [         R                  " SSS9  SU R                  l        / SQU R                  l        U R                  SS/S9n[        U R                  5      R                  USS	9n[        [        S
S5       Vs/ s H  o2R                  SU 35      PM     sn5      (       d   eUR                  R                  S:X  d   eUR                  R                  / SQ:X  d   eU R                  5       UR                  5       :X  d   eg s  snf )NrL   FrM   rO   rP   rQ   rR   Tr   r   r/   rf   )r   rU   rT   rL   rO   rA   r   r   allrangehas_extensionr   r'   rI   r   is       r&   test_json_to_doc_underscorer     s    lE2lE2$CEE CEE{{|\&B{CH#))n&&x$&?GaL1%%	!o6LMMMM99=000999,,,<<>W--//// Ms   D c           	         [         R                  " SSS9  [         R                  " SSS9  [        R                  " SSS9  [        R                  " SSS9  SU R                  l        / SQU R                  l        S	U S
S R                  l        SU S
S R                  l        SU S
   R                  l        SU S   R                  l        U R                  / SQS9n[        R                  " [        R                  " U5      5      n[        U R                  5      R                  USS9n[        [!        SS5       Vs/ s H  o2R#                  SU 35      PM     sn5      (       d   eUR                  R
                  S:X  d   eUR                  R                  / SQ:X  d   eUS
   R                  R                  S:X  d   eUS   R                  R                  S:X  d   eUS
S R                  R                  S	:X  d   eUS
S R                  R                  S:X  d   eUR$                  U R$                  :X  d   eUR'                  S/S9U R'                  S/S9:X  d   eg s  snf )NrL   FrM   rO   rX   rY   rP   rQ   rZ   r   r   r[   r.   r\   r]   r_   rR   Tr   r/   rf   rj   )exclude)r   rU   r   r   rT   rL   rO   rY   rX   rA   rE   rF   rG   r   r   r   r   r   rj   r   r   s       r&   +test_json_to_doc_with_token_span_attributesr     s   lE2lE2	e4{E2$CEE CEE+C!HJJ-C!HJJCFHHCFHH{{J  H  0 0 :;H#))n&&x$&?GaL1%%	!o6LMMMM99=000999,,,1:<<""c)))1:<<""c)))1Q<>>##'77771Q<>>##'9999---[M2cll 7C 7    Ms   :Ic           	      ,   [        U SSSS9[        U SSSSS9/U R                  S'   U R                  5       n[        U R                  5      R                  USS	9n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   e[        S5       H  nUR                  S   U   R                  U R                  S   U   R                  :X  d   eUR                  S   U   R                  U R                  S   U   R                  :X  d   eUR                  S   U   R                  U R                  S   U   R                  :X  d   eUR                  S   U   R                  U R                  S   U   R                  :X  a  M   e   g
)z1Test that Doc.from_json() includes correct.spans.r   r.   r   )r3   r      )r3   kb_idTr   N)r   rc   rA   r   r   r   rB   r   r1   r2   r3   r   r   s       r&   test_json_to_doc_spansr   :  su    	S!Qf%S!QfA.CIIf {{}H#))n&&x$&?Gw}}"""w}}V$%***1X}}V$Q'--61B11E1K1KKKK}}V$Q'++syy/@/C/G/GGGG}}V$Q'--61B11E1K1KKKK}}V$Q'--61B11E1K1KKKK	 r(   c                    X4 H  nUR                  5       n[        U R                  5      R                  USS9nUR                   Vs/ s H  oUR
                  PM     snUR                   Vs/ s H  oUR
                  PM     sn:X  d   eU Vs/ s H  ofR                  PM     snU Vs/ s H  ofR                  PM     sn:X  a  M   e   gs  snf s  snf s  snf s  snf )z1Test that Doc.from_json() includes correct.sents.Tr   N)rA   r   r   r   r<   r;   is_sent_start)r'   r+   test_docrI   r   sentr   s          r&   test_json_to_doc_sentsr   K  s    +##%cii.**8d*C&.nn5nd		n5")--:
"/$II-:
 
 	
 
 2::##:-4?
-4EW?
 
 	
 
 , 6 :
 ; ?
s   C-CC)Cc                     SSS.nXl         U R                  5       n[        U R                  5      R	                  USS9nUR                   U:X  d   eg)z1Test that Doc.from_json() includes correct .cats.g333333?gffffff?)ABTr   N)catsrA   r   r   r   )r'   r   rI   r   s       r&   test_json_to_doc_catsr   X  sK    3DH{{}H#))n&&x$&?G<<4r(   c                      [         R                  " S5      " S5      n U R                  5       n[        U R                  5      R                  USS9nU R                  UR                  :X  d   eg)z5Test that Doc.from_json() preserves spaces correctly.enzThis is just brilliant.Tr   N)spacyblankrA   r   r   r   r;   r   s      r&   test_json_to_doc_spacesr   a  sT    
++d
5
6C{{}H#))n&&x$&?G88w||###r(   c                    U R                  5       nUS   S   R                  S5        [        R                  " [        5         [        U R                  5      R                  U5        SSS5        g! , (       d  f       g= f)zbTest that Doc.from_json() raises an exception if tokens don't all have the same set of properties.r=   r   r7   NrA   poprs   rt   ru   r   r   r   r'   r?   s     r&   &test_json_to_doc_attribute_consistencyr   i  sS    {{}HXqg&	z	"CII  * 
#	"	"s   %A00
A>c                     U R                  5       nUR                  S5        [        R                  " [        5         [        U R                  5      R                  USS9  SSS5        g! , (       d  f       g= f)zLTest that Doc.from_json() raises an exception when validating invalid input.r=   Tr   Nr   r   s     r&   !test_json_to_doc_validation_errorr   q  sL    {{}HLL	z	"CII  D 9 
#	"	"s   $A))
A7c                 |    S n[         R                  " SUS9  U R                  S/S9nUS   S   U" U 5      :X  d   eg )Nc                 ,    [        U R                  5      $ r{   )rB   r;   rv   s    r&   get_text_length<test_to_json_underscore_doc_getters.<locals>.get_text_lengthz  s    388}r(   text_length)getterrR   rT   )r   rU   rA   )r'   r   r?   s      r&   #test_to_json_underscore_doc_gettersr   y  sG     mO<{{}o{6HC='?3+????r(   )"rs   rE   r   r   spacy.tokensr   r   r   test_underscorer   fixturer'   r+   r?   rJ   rV   rd   rk   rn   rp   rw   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r(   r&   <module>r      s        ) ) -  .  $ % %PDDD<D,D(G//G0$+ 
0>L"

 $+:@r(   