
    h,                         S SK JrJr  S SKJr  S SKJrJrJr  S SK	J
r
  SSKJr  SSKJr  SrS	 r " S
 S\5      r " S S\5      r " S S\5      rS/rg)   )BaseDefaultsLanguage)Doc)DummyTokenizerload_config_from_strregistry)Vocab   )	LEX_ATTRS)
STOP_WORDSz?
[nlp]

[nlp.tokenizer]
@tokenizers = "spacy.th.ThaiTokenizer"
c                      S n U $ )Nc                 ,    [        U R                  5      $ )N)ThaiTokenizervocab)nlps    P/home/james-whalen/.local/lib/python3.13/site-packages/spacy/lang/th/__init__.pythai_tokenizer_factory5create_thai_tokenizer.<locals>.thai_tokenizer_factory   s    SYY''     )r   s    r   create_thai_tokenizerr      s    ( "!r   c                   8    \ rS rSrS\SS4S jrS\S\4S jrSr	g)	r      r   returnNc                 ^     SSK Jn  X l        Xl        g ! [         a    [        S5      S ef = f)N    )word_tokenizezYThe Thai tokenizer requires the PyThaiNLP library: https://github.com/PyThaiNLP/pythainlp)pythainlp.tokenizer   ImportErrorr   )selfr   r   s      r   __init__ThaiTokenizer.__init__   s?    	8 +
  	9 	s    ,textc                 |    [        U R                  U5      5      nS/[        U5      -  n[        U R                  X#S9$ )NF)wordsspaces)listr   lenr   r   )r    r#   r%   r&   s       r   __call__ThaiTokenizer.__call__#   s8    T''-.3u:%4::U::r   )r   r   )
__name__
__module____qualname____firstlineno__r	   r!   strr   r)   __static_attributes__r   r   r   r   r      s(    	e 	 	;S ;S ;r   r   c                   ,    \ rS rSr\" \5      r\r\	r
Srg)ThaiDefaults)   r   N)r+   r,   r-   r.   r   DEFAULT_CONFIGconfigr   lex_attr_gettersr   
stop_wordsr0   r   r   r   r2   r2   )   s    !.1F Jr   r2   c                       \ rS rSrSr\rSrg)Thai/   thr   N)r+   r,   r-   r.   langr2   Defaultsr0   r   r   r   r9   r9   /   s    DHr   r9   N)languager   r   tokensr   utilr   r   r   r   r	   	lex_attrsr   r7   r   r4   r   r   r2   r9   __all__r   r   r   <module>rC      sS    .  B B    "";N ;$< 8 
 (r   