
    hu                         S SK r S SKJr  S SKJr  S SKJrJr  SSKJ	r	  \ R                  S 5       r\ R                  S 5       rS	 rS
 rS rS r\ R"                  R%                  S5      S 5       rg)    N)Language)merge_subtokens)DocSpan   )clean_underscorec                 .    / SQn/ SQn/ SQn[        XX#S9$ )N)Thisisasentence.r
   r   anotherr   r   Andr   thirdr   )   r      r   r      r      r   r            r   )nsubjROOTsubtokattrpunctr   r   r   r   r   r   r   r   r   )wordsheadsdeps)r   )en_vocabr   r   r    s       ]/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/pipeline/test_functions.pydocr#   
   s&     uE:EMD xE==    c           	      h    / SQn/ SQn/ SQn/ SQn/ SQn[        XX#XES9n[        USSS	S
9/Ul        U$ )N)IlikeNewYorkinAutumnr   )r   r   r   r   r      r   )PRPINNNPr/   r.   r/   r   )PRONVERBPROPNr2   ADPr2   PUNCT)r   prepcompoundpobjr5   r7   r   )r   r   tagsposr    r   r,   GPE)label)r   r   ents)r!   r   r   r8   r9   r    r#   s          r"   doc2r=      sC    =E!E8D
ECHD
h5
PCS!Qe,-CHJr$   c                 l    [        U 5      n U  Vs/ s H  oR                  PM     sn/ SQ:X  d   eg s  snf )N)	r
   r   z
a sentencer   r
   r   zanother sentencer   zAnd a third .)r   text)r#   ts     r"   test_merge_subtokensrA   !   s1    
#
C   CqFFC $}}}} s   1c                     [        U 5      S:X  d   e[        5       nUR                  S5      nU" U 5        [        U 5      S:X  d   eU S   R                  S:X  d   eg )N   merge_noun_chunksr   r   New York)lenr   create_piper?   )r=   nlprD   s      r"    test_factories_merge_noun_chunksrI   )   sW    t9>>
*C(;<dt9>>7<<:%%%r$   c                 H   [        U 5      S:X  d   e[        [        U R                  5      5      S:X  d   e[        5       nUR	                  S5      nU" U 5        [        U 5      S:X  d   e[        [        U R                  5      5      S:X  d   eU S   R
                  S:X  d   eg )NrC   r   merge_entitiesr   r   rE   )rF   listr<   r   rG   r?   )r=   rH   rK   s      r"   test_factories_merge_entsrM   2   s    t9>>tDII1$$$
*C__%56N4t9>>tDII1$$$7<<:%%%r$   c                  @  ^ [        5       n SSS.nU R                  SUS9mU " S5      nU Vs/ s H  o3R                  PM     sn/ SQ:X  d   eU " S5      nU Vs/ s H  o3R                  PM     sn/ S	Q:X  d   e[        U4S
 jU 5       5      (       d   eg s  snf s  snf )N      )
min_lengthsplit_lengthtoken_splitterconfigzaaaaabbbbbcccccdddd e f g)aaaaabbbbbcccccddddefgz!aaaaabbbbbcccccdddddeeeeeff g h i)	aaaaabbbbbcccccdddddeeeeeffrY   hic              3   h   >#    U  H'  n[        UR                  5      TR                  :*  v   M)     g 7f)N)rF   r?   rR   ).0r@   rS   s     r"   	<genexpr>&test_token_splitter.<locals>.<genexpr>O   s$     G3as166{n9993s   /2)r   add_piper?   all)rH   rU   r#   r@   rS   s       @r"   test_token_splitterrh   =   s    
*C2F\\"26\BN
)
*C CqFFC $JJJJ
1
2C CqFFC  
% 
 
 
 G3GGGGG ! s   BBr   c                  >   [        5       n U R                  S5        U R                  S5      n/ SQUl        U " U5      nUR                  b   e[        5       n U R                  SSS0S9  [        R
                  " [        5         U " S5      nS S S 5        [        R                  " SSS	9  [        5       n U R                  SS
SS00S9  U R                  S5      nSUR                  l
        U " U5      nUR                  R                  S:X  d   eg ! , (       d  f       N= f)Ndoc_cleanerr?   )r   r   r   silentFrT   	test_attr)defaultattrsz_.test_attrr   d   )r   rf   make_doctensorpytestwarnsUserWarningr   set_extension_rl   )rH   r#   s     r"   test_factories_doc_cleanerrx   R   s    
*CLL
,,v
CCJ
c(C::
*CLL%'8L9	k	"&k 
# k2.
*CLL-1C'DLE
,,v
CCEEO
c(C55??a 
#	"s   	D
D)rs   spacy.languager   spacy.pipeline.functionsr   spacy.tokensr   r   doc.test_underscorer   fixturer#   r=   rA   rI   rM   rh   markusefixturesrx    r$   r"   <module>r      s~     # 4 " 2 > >  ~&&H* +,  - r$   