
    h>                        S SK r S SKJr  / SQr\/ SQ-   r/ SPSPSPSPS	PS
PSPSPSPSPSPSPSPSPSPSPSPSPSPSPSPSPSPSPSPSPSPS PS!PS"P\ R
                  " S#\ R                  R                  5       S$9P\ R
                  " S%\ R                  R                  5       S$9PS&PS'PS(PS)PS*PS+PS,PS-PS.PS/PS0PS1PS2Pr/ S3PS4PS5PS6PS7PS8PS9PS:PS;PS<PS=PS>PS?PS@PSAPSBPSCPSDPSEPSFPSGPSHPSIPSJPSKPSLPSMPSNPSOPSPPSQPSRPSSPSTP\ R
                  " SU\ R                  R                  5       S$9PSVPSWPr	/ SXQr
/ SYQr\ R                  R                  SZ\5      S[ 5       r\ R                  R                  SZ\	5      S\ 5       r\ R                  R                  SZ\5      S] 5       r\ R                  R                  SZ\5      S^ 5       r\ R                  R"                  \ R                  R                  S_\
5      \ R                  R                  SZ\5      S` 5       5       5       r\ R                  R"                  \ R                  R                  Sa\5      \ R                  R                  SZ\5      Sb 5       5       5       r\ R                  R"                  \ R                  R                  S_\
5      \ R                  R                  Sa\5      \ R                  R                  SZ\5      Sc 5       5       5       5       r\ R                  R"                  \ R                  R                  Sd\
5      \ R                  R                  Se\
5      \ R                  R                  SZ\5      Sf 5       5       5       5       r\ R                  R"                  \ R                  R                  Sg\5      \ R                  R                  Sh\5      \ R                  R                  SZ\5      Si 5       5       5       5       rg)j    N)BASE_EXCEPTIONS)zhttp://www.nytimes.com/2016/04/20/us/politics/new-york-primary-preview.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=a-lede-package-region&region=top-news&WT.nav=top-news&_r=0zwww.red-stars.comzmailto:foo.bar@baz.com)zmailto:foo-bar@baz-co.comz$mailto:foo-bar@baz-co.com?subject=hizwww.google.com?q=google&http://foo.com/blah_(wikipedia)#cite-1zhttp://foo.com/blah_blahzhttp://BlahBlah.com/Blah_Blahzhttp://foo.com/blah_blah/z%http://www.example.com/wpstyle/?p=364z1https://www.example.com/foo/?bar=baz&inga=42&quuxz'http://userid:password@example.com:8080z(http://userid:password@example.com:8080/zhttp://userid@example.comzhttp://userid@example.com/zhttp://userid@example.com:8080zhttp://userid@example.com:8080/z"http://userid:password@example.comz#http://userid:password@example.com/zhttp://142.42.1.1/zhttp://142.42.1.1:8080/r   z+http://foo.com/blah_(wikipedia)_blah#cite-1u&   http://foo.com/unicode_(✪)_in_parensz'http://foo.com/(something)?after=parensz/http://code.google.com/events/#&product=browserzhttp://j.mpzftp://foo.bar/bazz,http://foo.bar/?q=Test%20URL-encoded%20stuffz2http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.comzhttp://1337.netzhttp://a.b-c.dezhttp://223.255.255.254zhttp://a.b--c.de/z+ssh://login@server.com:12345/repository.gitz&svn+ssh://user@ssh.yourdomain.com/pathz8chrome://extensions/?id=mhjfbmdgcfjbbpaeojofohoefgiehjai)marksz3chrome-extension://mhjfbmdgcfjbbpaeojofohoefgiehjaiz$http://foo.com/blah_blah_(wikipedia)z,http://foo.com/blah_blah_(wikipedia)_(again)zhttp://www.foo.co.ukzhttp://www.foo.co.uk/zhttp://www.foo.co.uk/blah/blahu   http://⌘.wsu   http://⌘.ws/u   http://☺.damowmow.com/u   http://✪df.ws/123u   http://➡.ws/䨹u   http://مثال.إختبارu   http://例子.测试u/   http://उदाहरण.परीक्षाzhttp://zhttp://.z	http://..z
http://../zhttp://?z	http://??z
http://??/zhttp://#z	http://##z
http://##/z)http://foo.bar?q=Spaces should be encodedz//z//az///az///z	http:///azrdar://1234zh://testzhttp:// shouldfail.comz:// should failzhttp://foo.bar/foo(bar)baz quuxzhttp://-error-.invalid/zhttp://a.b-.cozhttp://0.0.0.0zhttp://10.1.1.0zhttp://10.1.1.255zhttp://224.1.1.1zhttp://123.123.123zhttp://3628126748zhttp://.www.foo.bar/zhttp://.www.foo.bar./zhttp://10.1.1.1zNASDAQ:GOOGzhttp://-a.b.cozfoo.comzhttp://1.1.1.1.1zhttp://www.foo.bar./)(">)r   :r   urlc                 ,    U R                  U5      c   eg N	url_matchen_tokenizerr
   s     Y/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/tokenizer/test_urls.pytest_should_matchr   y   s    !!#&222    c                 ,    U R                  U5      b   eg r   r   r   s     r   test_should_not_matchr   ~   s    !!#&...r   c                 `    U " U5      n[        U5      S:X  d   eUS   R                  U:X  d   eg )N   r   lentext	tokenizerr
   tokenss      r   !test_tokenizer_handles_simple_urlr      s3    s^Fv;!!9>>S   r   c                     U " SU-   S-   5      n[        U5      S:X  d   eUS   R                  S:X  d   eUS   R                  U:X  d   eUS   R                  S:X  d   eg )Nr   )   r   r      r   r   s      r   *test_tokenizer_handles_simple_surround_urlr#      sh    sSy3'Fv;!!9>>S   !9>>S   !9>>S   r   prefixc                     U " X-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg Nr"   r   r   r   )r   r$   r
   r   s       r   #test_tokenizer_handles_prefixed_urlr'      sN     v|$Fv;!!9>>V###!9>>S   r   suffixc                     U " X-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg r&   r   )r   r
   r(   r   s       r   #test_tokenizer_handles_suffixed_urlr*      sN     s|$Fv;!!9>>S   !9>>V###r   c                     U " X-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg Nr!   r   r   r"   r   )r   r$   r(   r
   r   s        r   #test_tokenizer_handles_surround_urlr-      si    
 v|f,-Fv;!!9>>V###!9>>S   !9>>V###r   prefix1prefix2c                     U " X-   U-   5      n[        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg r,   r   )r   r.   r/   r
   r   s        r   %test_tokenizer_handles_two_prefix_urlr1      sj    
 w(3./Fv;!!9>>W$$$!9>>W$$$!9>>S   r   suffix1suffix2c                 R   U " X1-   U-   5      nX-   [         ;   a>  [        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  X-   :X  d   eg [        U5      S:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eUS   R                  U:X  d   eg )Nr"   r   r   r!   )r   r   r   )r   r2   r3   r
   r   s        r   %test_tokenizer_handles_two_suffix_urlr5      s    
 s}w./FO+6{aay~~$$$ay~~!22226{aay~~$$$ay~~(((ay~~(((r   )pytestspacy.lang.tokenizer_exceptionsr   
URLS_BASIC	URLS_FULLparammarkxfailURLS_SHOULD_MATCHURLS_SHOULD_NOT_MATCHPREFIXESSUFFIXESparametrizer   r   r   r#   slowr'   r*   r-   r1   r5    r   r   <module>rD      sW    ;
   	33#3  3 ,	3
 83 .3 /3  3 !3 %3 &3 )3 *3 3 3  -!3" 2#3$ -%3& .'3( 6)3* +3, -3. 3/30 9132 334 536 738 93: 2;3< -=3> LLBkk!?3F LL=V[[EVEVEXG3L +M3N 3O3P Q3R S3T %U3V W3X Y3Z [3\ ]3^ _3` #a3b c3d 6e3 j&&& & 	&
 & & & & & & 0& 	& 
& & 
&  !&" #&$ %&& '&( )&* &+&, -&. /&0 1&2 3&4 5&6 7&8 9&: ;&< =&> ?&@ A&B C&D E&F LL&++"3"3"56G&H I&J K& T    123 33  56/ 7/ 
+! ,! 
+! ,! 8,	*! + - ! 8,	*$ + - $ 8,8,	*$ + - - $ H-H-	*! + . . ! H-H-	*
) + . . 
)r   