
    hA              	          S SK r S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  / \R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9PS	P\R                  " S
\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9PSPSP\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9PSP\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9PSP\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S\R                  R                  5       S9P\R                  " S \R                  R                  5       S9P\R                  " S!\R                  R                  5       S9PS"P\R                  " S#\R                  R                  5       S9P\R                  " S$\R                  R                  5       S9P\R                  " S%\R                  R                  5       S9P\R                  " S&\R                  R                  5       S9P\R                  " S'\R                  R                  5       S9P\R                  " S(\R                  R                  5       S9P\R                  " S)\R                  R                  5       S9P\R                  " S*\R                  R                  5       S9P\R                  " S+\R                  R                  5       S9P\R                  " S,\R                  R                  5       S9P\R                  " S-\R                  R                  5       S9P\R                  " S.\R                  R                  5       S9P\R                  " S/\R                  R                  5       S9P\R                  " S0\R                  R                  5       S9P\R                  " S1\R                  R                  5       S9Pr\R                  R                  S2\5      S3 5       rS4 rS5 r\R$                  R&                  S=S6\R$                  R(                  S7\S8\4S9 jj5       r\R                  R0                  \R                  R                  S2\5      \R2                  " \" 5       S:9S2\S;\S8S4S< j5       5       5       rg)>    N)	Tokenizer)get_lang_classfr)marksafarbgbnbocacsdadeelenesetfafigahehihrhuiditknlbltlvnbnlplptrosiskslsqsrsvtatetltrtturkmrlangc                 b   [        U 5      " 5       R                  n[        R                  " SU  S35      nUR                   Ha  nU" U5       Vs/ s H!  oDR
                  (       a  M  UR                  PM#     nnUR                  U5       Vs/ s H  oDS   PM	     nnXV:X  a  Ma   e   g s  snf s  snf )Nzspacy.lang.z	.examples   )r   	tokenizerpytestimportorskip	sentencesis_spacetextexplain)r3   r6   examplessentencettokensdebug_tokenss          \/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/tokenizer/test_explain.pytest_tokenizer_explainrC   A   s    t$&00I""[i#@AH&&"+H"5H"5QZZ&!&&"5H&/&7&7&AB&A!&AB%%% 'HBs   B'&B'B,c                 T   [         R                  " S5      n[         R                  " S5      nSSS0/0n[        U UUR                  UR                  S9nU" S5       Vs/ s H  oUR
                  PM     nnUR                  S5       Vs/ s H  oUS   PM	     nnXg:X  d   eg s  snf s  snf )Nz[\.]$z[/]za.ORTH)rulessuffix_searchinfix_finditerza/a.r5   )recompiler   searchfinditerr;   r<   )en_vocab	suffix_reinfix_rerF   r6   r?   r@   explain_tokenss           rB   &test_tokenizer_explain_special_matcherrQ   K   s    

8$Izz&!HVTN#$E&&((	I (/0/ff/F0$-$5$5f$=>$=qd$=N>### 1>s   B B%c                     SSS0/0n[        U US9nSnU" U5       Vs/ s H  oDR                  PM     nnUR                  U5       Vs/ s H  oDS   PM	     nnXV:X  d   eg s  snf s  snf )Nz:]rE   )rF   z: ]r5   )r   r;   r<   )rM   rF   r6   r;   r?   r@   rP   s          rB   1test_tokenizer_explain_special_matcher_whitespacerS   Z   s{    VTN#$EI D'o.offoF.$-$5$5d$;<$;qd$;N<### /<s   A A%drawmax_n_wordsreturnc           	         SR                  / [        R                   Vs/ s H  n[        R                  " U5      PM     snQSP5      n[        U " [        R                  R                  SUS95      5       Vs/ s HK  nU " [        R                  R                  SS95      U " [        R                  R                  U5      5      /PMM     nnSR                  U VVs/ s H  of  H  owPM     M     snn5      $ s  snf s  snf s  snnf )a[  
Composite strategy for fuzzily generating sentence with varying interpunctation.

draw (hypothesis.strategies.DrawFn): Protocol for drawing function allowing to fuzzily pick from hypothesis'
                                     strategies.
max_n_words (int): Max. number of words in generated sentence.
RETURNS (str): Fuzzily generated sentence.
|z\s   )	min_value	max_valuer5   )min_size )joinstringpunctuationrI   escaperange
hypothesis
strategiesintegersr;   
from_regex)rT   rU   ppunctuation_and_space_regex_r>   
token_pairtokens           rB   sentence_strategyrl   f   s     #&((<!3!3
4!3A299Q<!3
4<e<# &&//!{/ST


A &&++Q+78&&112MNO	

   88XNXz:%U:UXNOO 5 Os    C8
8AC=D
)r>   r>   c                 T   [         R                  " U 5      R                  n[        R                  " SSU5      R                  5       nU" U5       Vs/ s H  o3R                  PM     nnUR                  U5       Vs/ s H  o3S   PM	     nnXE:X  d   U SU SU 35       egs  snf s  snf )z
Tests whether output of tokenizer.explain() matches tokenizer output. Input generated by hypothesis.
lang (str): Language to test.
text (str): Fuzzily generated sentence to tokenize.
z\s+r]   r5   z, N)spacyblankr6   rI   substripr;   r<   )r3   r>   r6   r?   r@   rA   s         rB   test_tokenizer_explain_fuzzyrr      s     !;;t,66I vvfc8,224H'121ff1F2"+"3"3H"=>"=QaD"=L>!JfXR~Rz#JJ! 3>s   B ;B%)   )rI   r_   rc   hypothesis.strategiesr7   rn   spacy.tokenizerr   
spacy.utilr   parammarkslow	LANGUAGESparametrizerC   rQ   rS   rd   	compositeDrawFnintstrrl   xfailgivenrr        rB   <module>r      s   	      % %
/
LLV[[--/0/
LLV[[--/0/ LLV[[--/0/ LLV[[--/0	/
 	/ LLV[[--/0/ LLV[[--/0/ LLV[[--/0/ LLV[[--/0/ LLV[[--/0/ 	/ 	/ LLV[[--/0/ LLV[[--/0/ LLV[[--/0/  LLV[[--/0!/" 	#/$ LLV[[--/0%/& LLV[[--/0'/( LLV[[--/0)/* LLV[[--/0+/, 	-/. LLV[[--/0//0 LLV[[--/01/2 LLV[[--/03/4 LLV[[--/05/6 LLV[[--/07/8 LLV[[--/09/: LLV[[--/0;/< LLV[[--/0=/> 	?/@ LLV[[--/0A/B LLV[[--/0C/D LLV[[--/0E/F LLV[[--/0G/H LLV[[--/0I/J LLV[[--/0K/L LLV[[--/0M/N LLV[[--/0O/P LLV[[--/0Q/R LLV[[--/0S/T LLV[[--/0U/V LLV[[--/0W/X LLV[[--/0Y/Z LLV[[--/0[/\ LLfkk..01]/	d +& ,&$	$   PJ1188 Ps PSV P !P4 +,./Ks Kc Kd K 0 , Kr   