
    h                     V   S SK r S SKrS SKJr  S SKJrJr  S SKJr  \ R                  R                  S5      S 5       r\ R                  R                  S5      S 5       r\ R                  R                  S	5      S
 5       rS rS r\ R                  R!                  S/ SQ5      S 5       rg)    N)English)DocDocBin)
Underscorei  c                  B    [        5         [        S/S9  [        / SQS9  g)zTest that docbin init goes wellLEMMAattrs)r   ENT_IOBENT_TYPEN)r        e/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/serialize/test_serialize_docbin.pytest_issue4367r   	   s     H
'
12r   i  c                 j   [        U SS/S9nSUR                  S'   SUR                  S'   [        SS9nUR                  U5        UR	                  5       n[        SS9R                  U5      n[        UR                  U 5      5      S	   nUR                  S   S:X  d   eUR                  S   S:X  d   eg
)z6Test that user_data is correctly serialized in DocBin.helloworldwordsbarfoo)z._.r   NNTstore_user_datar   N)r   	user_datar   addto_bytes
from_byteslistget_docs)en_vocabdocdoc_bindoc_bin_bytesnew_doc_binnew_docs         r   test_issue4528r&      s     hw0
1C CMM%05CMM,-T*GKK$$&M.99-HK;''1215GU#u,,,78EAAAr   i  c                     [        SS/S9n[        UR                  U 5      5      / :X  d   eUR                  5       n[        5       R	                  U5      n[        UR                  U 5      5      / :X  d   eg)z6Ensure an empty DocBin does not crash on serializationDEPHEADr	   N)r   r   r   r   r   )r    r"   r#   	doc_bin_2s       r   test_issue5141r+   !   sn     E6?+G  *+r111$$&M##M2I	""8,-333r   c                     [        / SQSS9n / SQnSS0n[        5       nUR                  U5       HX  nX$l        USS nS	Ul        S
Ul        SUl        U/UR                  S'   SUS   l        SUS   l	        U R                  U5        MZ     U R                  5       n[        R                  " S5      n[        5       R                  U5      n [        U R!                  UR"                  5      5      n[%        U5       H  u  pUR&                  X   :X  d   eUR                  U:X  d   e[)        UR                  5      S:X  d   eUR                  S   S   R                  S	:X  d   eUR                  S   S   R
                  S
:X  d   eUR                  S   S   R                  S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  a  M   e   g )N)r   r   r   NORMENT_IDT)r
   r   )z	Some textzLots of texts...z...Ag      ?r      UNUSUAL_SPAN_LABELUNUSUAL_SPAN_IDUNUSUAL_SPAN_KB_IDstartUNUSUAL_TOKEN_NORMUNUSUAL_TOKEN_ENT_IDen   )r   r   pipecatslabel_id_kb_id_spansnorm_ent_id_r   r   spacyblankr   r   r   vocab	enumeratetextlen)	r"   textsr:   nlpr!   span
bytes_datareloaded_docsis	            r   test_serialize_doc_binrM   +   s   @RVG 5E:D
)Cxx1Qx*$*"V		'+A/AC  !!#J ++d
Ch!!*-G))#))45MM*xx58###xx4399~"""yy!!$++/CCCCyy!!$((,====yy!!$++/CCCC1v||33331v~~!7777 +r   c                    [        U SS/S9nUR                  (       d   eUR                  S:X  d   e[        U SS/SS/S9nUR                  (       a   eUR                  S:X  d   e[        5       R	                  [        X/S9R                  5       5      nUR                  U 5      u  pEUR                  (       d   eUR                  S:X  d   eUR                  (       a   eUR                  S:X  d   eg )	Nthatz'sr   zthat 's F)r   spaceszthat's)docs)r   has_unknown_spacesrE   r   r   r   r   )r    doc1doc2r"   re_doc1re_doc2s         r   %test_serialize_doc_bin_unknown_spacesrW   M   s    x~.D""""99
"""x~uenED&&&&99   h!!&tl";"D"D"FGG''1G%%%%<<:%%%))))<<8###r   z$writer_flag,reader_flag,reader_value))TTr   )TFr   )FTnothing)FFrX   c                 t   [         R                  " SSS9  [        U SS/S9nSUR                  l        [	        US9nUR                  U5        UR                  5       n[	        US9R                  U5      n[        UR                  U 5      5      S	   nUR                  R                  U:X  d   e0 [        l        g
)z?Test that custom extensions are correctly serialized in DocBin.r   rX   )defaultr   r   r   r   r   r   N)r   set_extension_r   r   r   r   r   r   r   r   doc_extensions)	r    writer_flagreader_flagreader_valuer!   	doc_bin_1r#   r*   doc_2s	            r   test_serialize_custom_extensionrc   ]   s     eY/
hw0
1CCEEI{3IMM#&&(M{3>>}MI##H-.q1E77;;,&&& "Jr   )pytestrA   spacy.lang.enr   spacy.tokensr   r   spacy.tokens.underscorer   markissuer   r&   r+   rM   rW   parametrizerc   r   r   r   <module>rk      s      ! $ . 43 3 4B B 44 48D$  *##r   