
    hF                     <   S SK r S SKJr  S SKJr  / SQr/ SQr/ SQr\ R                  R                  S/ SQ5      S	 5       r
\ R                  R                  S
\5      \ R                  R                  SS/5      S 5       5       r\ R                  R                  S
\5      \ R                  R                  SS/5      S 5       5       r\ R                  R                  S
\5      \ R                  R                  SS/5      \ R                  R                  SS/5      S 5       5       5       r\ R                  R                  S
\5      \ R                  R                  SS/5      \ R                  R                  SS/5      S 5       5       5       r\ R                  R                  S
\5      \ R                  R                  SS/5      S 5       5       r\ R                  R                  S
\5      \ R                  R                  SS/5      S 5       5       r\ R                  R                  SS/5      S 5       r\ R                  R                  SS/5      S 5       r\ R                  R                  S\5      \ R                  R                  SS/5      S 5       5       r\ R                  R                  S\5      \ R                  R                  SS/5      \ R                  R                  SS/5      S 5       5       5       r\ R                  R                  SS/5      S  5       rS! rg)"    N)TOKENIZER_PREFIXES)compile_prefix_regex)([{*))]}r   ))r   r	   )r   r
   )r   r   )r   r   text)r   z((<c                 H    U " U5      n[        U5      [        U5      :X  d   eg Nlenen_tokenizerr   tokenss      X/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/lang/en/test_punct.py$test_en_tokenizer_handles_only_punctr      s"    $Fv;#d)###    punctHelloc                     U " X-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg N   r      r   r   r   r   r   r   s       r   #test_en_tokenizer_splits_open_punctr       sN     %,'Fv;!!9>>U"""!9>>T!!!r   c                     U " X!-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg r   r   r   s       r   $test_en_tokenizer_splits_close_punctr"      sN     $,'Fv;!!9>>T!!!!9>>U"""r   	punct_add`c                     U " X-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg N   r   r   r   r   r   r   r#   r   r   s        r   ,test_en_tokenizer_splits_two_diff_open_punctr)   #   sj     %+d23Fv;!!9>>U"""!9>>Y&&&!9>>T!!!r   'c                     U " X1-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg r&   r   r(   s        r   -test_en_tokenizer_splits_two_diff_close_punctr,   .   si     $,23Fv;!!9>>T!!!!9>>U"""!9>>Y&&&r   c                     U " X-   U-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg )N   r   r'   r   r   s       r   (test_en_tokenizer_splits_same_open_punctr/   9   sX     %-%/$67Fv;!!9>>U"""!9>>T!!!r   c                     U " X!-   U-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg )Nr.   r   r   r   r   s       r   )test_en_tokenizer_splits_same_close_punctr1   B   sX     $,.67Fv;!!9>>T!!!!9>>U"""r   z'Thec                 `    U " U5      n[        U5      S:X  d   eUS   R                  S:X  d   eg )Nr   r   r*   r   r   s      r   )test_en_tokenizer_splits_open_appostropher3   K   s4    $Fv;!!9>>S   r   zHello''c                 h    U " U5      n[        U5      S:X  d   eU " S5      n[        U5      S:X  d   eg )Nr   z''r   r   )r   r   r   tokens_puncts       r   )test_en_tokenizer_splits_double_end_quoter6   R   s;    $Fv;!%L|!!!r   zpunct_open,punct_closec                     U " X-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg r&   r   )r   
punct_openpunct_closer   r   s        r   )test_en_tokenizer_splits_open_close_punctr:   Z   sj    
 *+k9:Fv;!!9>>Z'''!9>>T!!!!9>>[(((r   zpunct_open2,punct_close2)r$   r*   c                    U " X1-   U-   U-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg )N   r   r   r   r'   r.   r   )r   r8   r9   punct_open2punct_close2r   r   s          r    test_en_tokenizer_two_diff_punctr?   f   s     +2T9KG,VWFv;!!9>>[(((!9>>Z'''!9>>T!!!!9>>[(((!9>>\)))r   z
text,punct)z(can'tr   c                 r    [        [        5      R                  nU" U 5      nUR                  5       U:X  d   eg r   )r   r   searchgroup)r   r   en_search_prefixesmatchs       r   (test_en_tokenizer_splits_pre_punct_regexrE   u   s2    -.@AHHt$E;;=E!!!r   c                 Z    SnU " U5      nU[        U5      S-
     R                  S:X  d   eg )Nz*(And a 6a.m. run through Washington Park).r   .r   r   s      r   'test_en_tokenizer_splits_bracket_periodrH   |   s2    7D$F#f+/"''3...r   )pytestspacy.lang.punctuationr   
spacy.utilr   
PUNCT_OPENPUNCT_CLOSEPUNCT_PAIREDmarkparametrizer   r    r"   r)   r,   r/   r1   r3   r6   r:   r?   rE   rH    r   r   <module>rR      s    5 +!
"? !12$ 3$
 *-'+" , ." +.'+# , /# *-se,'+" , - ." +.se,'+' , - /' *-'+" , ." +.'+# , /# &*! +! )-" ." 1<@'+) , A) 1<@3j\B'+	* , C A	* '89" :"/r   