ó
    ^i"	  ã                   ó0   • S r SSKrSSKJr   " S S5      rg)zSimple filter ranker.é    N)ÚFinalc            	       óÈ   • \ rS rSr% Sr\R                  " S5      r\\	S'   SS\
SS4S jjrS	\S\\   4S
 jrS\S\\   S\4S jrS\\\\4      S	\S\\\\4      4S jrSrg)ÚSimpleFilterRankeré   a!  Simple filter ranker.

1) Pull any doc with 'wikipedia.org' in its href to the top.
2) Bucket the rest according to where query tokens appear:
   - both title & body/description
   - title only
   - body only
   - neither
3) Return wikipedia-top + both + title-only + body-only + neither.
z\W+Ú	_splitterÚmin_token_lengthÚreturnNc                 ó   • Xl         g ©N©r   )Úselfr   s     ÚI/home/james-whalen/.local/lib/python3.13/site-packages/ddgs/similarity.pyÚ__init__ÚSimpleFilterRanker.__init__   s   € Ø 0Õó    Úqueryc                 ó¶   • U R                   R                  UR                  5       5       Vs1 s H   n[        U5      U R                  :¼  d  M  UiM"     sn$ s  snf )z7Split on non-word characters & filter out short tokens.)r   ÚsplitÚlowerÚlenr   )r   r   Útokens      r   Ú_extract_tokensÚ"SimpleFilterRanker._extract_tokens   sC   € à#'§>¡>×#7Ñ#7¸¿¹»Ô#FÓnÒ#F˜%Ì#ÈeË*ÐX\×XmÑXmÑJm—Ñ#FÑnÐnùÒns   ¬AÁAÚtextÚtokensc                 óN   ^• UR                  5       m[        U4S jU 5       5      $ )z:Check if any token is a substring of the lower-cased text.c              3   ó,   >#   • U  H	  oT;   v •  M     g 7fr   © )Ú.0ÚtokÚ
lower_texts     €r   Ú	<genexpr>Ú4SimpleFilterRanker._has_any_token.<locals>.<genexpr>   s   øé € Ð7²¨˜*Ö$²ùó   ƒ)r   Úany)r   r   r   r!   s      @r   Ú_has_any_tokenÚ!SimpleFilterRanker._has_any_token   s   ø€ à—Z‘Z“\ˆ
ÜÔ7±Ó7Ó7Ð7r   Údocsc                 ór  ^• U R                  U5      n/ n/ n/ n/ n/ nU GH  n	U	R                  SS5      n
U	R                  SS5      mU	R                  SU	R                  SS5      5      n[        U4S jS 5       5      (       a  Mf  SU
;   a  UR                  U	5        M  U R	                  TU5      nU R	                  X³5      nU(       a  U(       a  UR                  U	5        MÃ  U(       a  UR                  U	5        MÝ  U(       a  UR                  U	5        M÷  UR                  U	5        GM     XE-   U-   U-   U-   $ )	z,Rank a list of docs based on a query string.ÚhrefÚ ÚtitleÚbodyÚdescriptionc              3   ó,   >#   • U  H	  oT;   v •  M     g 7fr   r   )r   Úxr,   s     €r   r"   Ú*SimpleFilterRanker.rank.<locals>.<genexpr>2   s   øé € ÐBÒ'A !˜–:Ò'Aùr$   )z	Category:Ú	Wikimediazwikipedia.org)r   ÚgetÚallÚappendr&   )r   r(   r   r   Ú	wiki_hitsÚbothÚ
title_onlyÚ	body_onlyÚneitherÚdocr*   r-   Ú	hit_titleÚhit_bodyr,   s                 @r   ÚrankÚSimpleFilterRanker.rank!   s   ø€ à×%Ñ% eÓ,ˆàˆ	ØˆØˆ
Øˆ	ØˆäˆCØ—7‘7˜6 2Ó&ˆDØ—G‘G˜G RÓ(ˆEà—7‘7˜6 3§7¡7¨=¸"Ó#=Ó>ˆDô ÔBÑ'AÓB×BÑBÙð  $Ó&Ø× Ñ  Ô%Ùð ×+Ñ+¨E°6Ó:ˆIØ×*Ñ*¨4Ó8ˆHæžXØ—‘˜CÖ ÞØ×!Ñ! #Ö&ÞØ× Ñ  Ö%à—‘˜s×#ñ5 ð: Ñ *Ñ,¨yÑ8¸7ÑBÐBr   r   )é   )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__ÚreÚcompiler   r   Ú__annotations__Úintr   ÚstrÚsetr   Úboolr&   ÚlistÚdictr>   Ú__static_attributes__r   r   r   r   r      sŸ   ‡ ñ	ð —z’z &Ó)€IˆuÓ)ñ1¨ð 1°Tõ 1ðo Sð o¨S°©Xô oð8 3ð 8°°C±ð 8¸Tô 8ð
'C˜˜d 3¨ 8™nÑ-ð 'C°cð 'C¸dÀ4ÈÈSÈÁ>Ñ>R÷ 'Cr   r   )rE   rF   Útypingr   r   r   r   r   Ú<module>rQ      s   ðÙ ã 	Ý ÷ACò ACr   