
    h#                    p    S SK Jr  S SKJrJr  S SKJr  S SKJrJ	r	  / SQr
 " S S\5      r " S S	\5      rg
)    )annotations)ABCabstractmethod)Iterable)AutoTokenizerPreTrainedTokenizerBase(  !"z''z``#$%&'()*+,-./:;<=>?@[\]^_`{|}~aaboutaboveacrossafter
afterwardsagainagainstainallalmostalonealongalreadyalsoalthoughalwaysamamongamongstamoungstamountanandanotheranyanyhowanyoneanythinganywayanywherearearenaroundasatbackbebecamebecausebecomebecomesbecomingbeenbefore
beforehandbehindbeingbelowbesidebesidesbetweenbeyondbillbothbottombutbycallcancannotcantcoconcouldcouldncouldntcryddedescribedetaildiddidndodoesdoesndoingdondonedowndueduringeachegeighteitherelevenelse	elsewhereemptyenoughetcevenevereveryeveryone
everything
everywhereexceptfewfifteenfiftyfillfindfirefirstfiveforformerformerlyfortyfoundfourfromfrontfullfurthergetgivegohadhadnhashasnhasnthavehavenhavinghehenceherhere	hereafterherebyhereinhereuponhersherselfhimhimselfhishowhoweverhundrediieifinincindeedinterestintoisisnititsitselfjustkeeplastlatterlatterlyleastlessllltdmmamademanymayme	meanwhilemightmightnmillminemoremoreovermostmostlymovemuchmustmustnmymyselfnamenamelyneednneitherneverneverthelessnextninenonobodynonenoonenornotnothingnownowhereoofoffoftenononceoneonlyontoorotherothers	otherwiseourours	ourselvesoutoverownpartperperhapspleaseputratherressameseeseemseemedseemingseemsseriousseveralshansheshouldshouldnshowsidesincesinceresixsixtysosomesomehowsomeone	somethingsometime	sometimes	somewherestillsuchsystemttaketenthanthatthetheirtheirsthem
themselvesthenthencethere
thereafterthereby	thereforetherein	thereuponthesetheythickthinthirdthisthosethoughthreethrough
throughoutthruthustotogethertootoptowardtowardstwelvetwentytwoununderuntilupuponusveveryviawaswasnwewellwerewerenwhatwhateverwhenwhencewheneverwhere
whereafterwhereaswherebywherein	whereuponwhereverwhetherwhichwhilewhitherwhowhoeverwholewhomwhosewhywillwithwithinwithoutwonwouldwouldnyyetyouyouryoursyourself
yourselvesc                      \ rS rSr\S	S j5       r\S	S j5       r\S
S j5       r\SS j5       r\	\SS j5       5       r
Srg)WordTokenizeri  c                    g N selfvocabs     n/home/james-whalen/.local/lib/python3.13/site-packages/sentence_transformers/models/tokenizer/WordTokenizer.py	set_vocabWordTokenizer.set_vocab          c                    g r  r  r  s     r  	get_vocabWordTokenizer.get_vocab  r  r  c                    g r  r  )r  textkwargss      r  tokenizeWordTokenizer.tokenize  r  r  c                    g r  r  r  output_paths     r  saveWordTokenizer.save  r  r  c                    g r  r  
input_paths    r  loadWordTokenizer.load  s     	r  r  Nr  zIterable[str])r  strreturnz	list[int]r  r  r  r  )__name__
__module____qualname____firstlineno__r   r  r  r  r  staticmethodr  __static_attributes__r  r  r  r  r    sl              r  r  c                  h   ^  \ rS rSrS	U 4S jjrS
S jrSS jrSS jrSS jr\	SS j5       r
SrU =r$ )TransformersTokenizerWrapperi  c                .   > [         TU ]  5         Xl        g r  )super__init__	tokenizer)r  r  	__class__s     r  r  %TransformersTokenizerWrapper.__init__  s    "r  c                4    U R                  U5      nUS   S   $ )N	input_idsr   r  )r  sentenceencodeds      r  r  %TransformersTokenizerWrapper.tokenize  s     ..*{#A&&r  c                    g r  r  r  s     r  r  &TransformersTokenizerWrapper.set_vocab  s    r  c                6    U R                   R                  5       $ r  )r  r  r  s     r  r  &TransformersTokenizerWrapper.get_vocab  s    ~~''))r  c                :    U R                   R                  U5        g r  )r  save_pretrainedr  s     r  r  !TransformersTokenizerWrapper.save  s    &&{3r  c                >    [        [        R                  " U SS95      $ )NT)use_fast)r  r   from_pretrainedr  s    r  r  !TransformersTokenizerWrapper.load  s    +M,I,I*_c,deer  r  )r  r   )r  r  r  r  r  )r  r  r  r  r  r  r  r  r  r  r  r  __classcell__)r  s   @r  r  r    s2    #'*4 f fr  r  N)
__future__r   abcr   r   collections.abcr   transformersr   r   ENGLISH_STOP_WORDSr  r  r  r  r  <module>r     s5    " # $ ?C LC .f= fr  