
    h              	          S SK Jr  \(       d  S\;   a  SSKJr  OS SKr S SKrS r
S rS rS	 r " S
 S\5      r " S S\5      r\R$                  " \5         " S S\5      r\R(                  " \5         " S S\5      r\R,                  " \5         " S S\5      r\R0                  " \5        S rS r " S S\5      r\R8                  " \5         " S S\5      r\R<                  " \5        S rS SK r S SK!r!S SK r S SK"r"S SK#r$S SK%J&r&  S SK%J'r'  S r(S r)\RT                  r+\RT                  r,\-" \S\R\                  5        \-" \S\R\                  5        \R^                  \l0        \Rb                  \l2        S  H  r3\)" \\35        M     \(" \5        \(" \5        \(" \5        \r4\r5SS!K6J7r7  \" \"Rp                  Rs                  \:" \$Rv                  Ry                  S"5      5      S#5      5         " S$ S%\5      r=g! \	 a    S SKr GNf = f)&    )version_info.   )_sentencepieceNc                      SU R                   R                  5       -   nSU R                  R
                  < SU R                  R                  < SU< S3$ ! [        R                   a    Sn NOf = f)Nz	proxy of  <r   z; z >)this__repr____builtin__	Exception	__class__
__module____name__)selfstrthiss     P/home/james-whalen/.local/lib/python3.13/site-packages/sentencepiece/__init__.py
_swig_reprr      s_    		 2 2 44 "^^668O8OQXZZ    s   A A/.A/c                    ^  U 4S jnU$ )Nc                   > US:X  a
  T" XU5        g US:X  a  U R                   R                  U5        g [        X5      (       a2  [        [	        [        U 5      U5      [        5      (       a
  T" XU5        g [        SU -  5      e)Nr
   thisownz(You cannot add instance attributes to %s)r
   ownhasattr
isinstancegetattrtypepropertyAttributeError)r   namevaluesets      r   set_instance_attrE_swig_setattr_nondynamic_instance_variable.<locals>.set_instance_attr   si    6>E"YIIMM% T  ZT
D0I8%T%TE" !Kd!RSS     )r!   r"   s   ` r   *_swig_setattr_nondynamic_instance_variabler&      s    T r$   c                    ^  U 4S jnU$ )Nc                    > [        X5      (       a(  [        [        X5      [        5      (       d
  T" XU5        g [	        SU -  5      e)Nz%You cannot add class attributes to %s)r   r   r   r   r   )clsr   r    r!   s      r   set_class_attr?_swig_setattr_nondynamic_class_variable.<locals>.set_class_attr)   s:    3j1CX&N&N5! !H3!NOOr$   r%   )r!   r*   s   ` r   '_swig_setattr_nondynamic_class_variabler,   (   s    P
 r$   c                    ^  U 4S jnU$ )zlClass decorator for adding a metaclass to a SWIG wrapped class - a slimmed down version of six.add_metaclassc                 p   > T" U R                   U R                  U R                  R                  5       5      $ N)r   	__bases____dict__copy)r)   	metaclasss    r   wrapper$_swig_add_metaclass.<locals>.wrapper3   s'    s}}cll6G6G6IJJr$   r%   )r3   r4   s   ` r   _swig_add_metaclassr6   1   s    KNr$   c                   <    \ rS rSrSr\" \R                  5      rSrg)_SwigNonDynamicMeta8   zKMeta class to enforce nondynamic attributes (no new attributes) for a classr%   N)	r   r   __qualname____firstlineno____doc__r,   r   __setattr____static_attributes__r%   r$   r   r8   r8   8   s    U9$:J:JKKr$   r8   c                       \ rS rSr\" S S SS9r\rS r\	R                  rS rS rS	 rS
 rS rS rS r\" \5      r\" \5      r\" \5      r\" \5      r\" \5      r\" \5      r\" \5      rS rS rS r\rSrg)1ImmutableSentencePieceText_ImmutableSentencePiece=   c                 6    U R                   R                  5       $ r/   r
   r   xs    r   <lambda>:ImmutableSentencePieceText_ImmutableSentencePiece.<lambda>>       r$   c                 8    U R                   R                  U5      $ r/   rC   rE   vs     r   rF   rG   >       AFFJJqMr$   The membership flagdocc                 X    [         R                  " U [         R                  " 5       5        g r/   )r   :ImmutableSentencePieceText_ImmutableSentencePiece_swiginit5new_ImmutableSentencePieceText_ImmutableSentencePiecer   s    r   __init__:ImmutableSentencePieceText_ImmutableSentencePiece.__init__A   s,    QQRVXf  Y]  Y]  Y_  	`r$   c                 .    [         R                  " U 5      $ r/   )r   8ImmutableSentencePieceText_ImmutableSentencePiece__piecerS   s    r   _piece8ImmutableSentencePieceText_ImmutableSentencePiece._pieceE       VVW[\\r$   c                 .    [         R                  " U 5      $ r/   )r   :ImmutableSentencePieceText_ImmutableSentencePiece__surfacerS   s    r   _surface:ImmutableSentencePieceText_ImmutableSentencePiece._surfaceH   s    XXY]^^r$   c                 .    [         R                  " U 5      $ r/   )r   5ImmutableSentencePieceText_ImmutableSentencePiece__idrS   s    r   _id5ImmutableSentencePieceText_ImmutableSentencePiece._idK   s    SSTXYYr$   c                 .    [         R                  " U 5      $ r/   )r   8ImmutableSentencePieceText_ImmutableSentencePiece__beginrS   s    r   _begin8ImmutableSentencePieceText_ImmutableSentencePiece._beginN   rZ   r$   c                 .    [         R                  " U 5      $ r/   )r   6ImmutableSentencePieceText_ImmutableSentencePiece__endrS   s    r   _end6ImmutableSentencePieceText_ImmutableSentencePiece._endQ   s    TTUYZZr$   c                 .    [         R                  " U 5      $ r/   )r   CImmutableSentencePieceText_ImmutableSentencePiece__surface_as_bytesrS   s    r   _surface_as_bytesCImmutableSentencePieceText_ImmutableSentencePiece._surface_as_bytesT   s    aabfggr$   c                 .    [         R                  " U 5      $ r/   )r   AImmutableSentencePieceText_ImmutableSentencePiece__piece_as_bytesrS   s    r   _piece_as_bytesAImmutableSentencePieceText_ImmutableSentencePiece._piece_as_bytesW   s    __`deer$   c                     SR                  U R                  U R                  U R                  U R                  U R
                  5      $ )Nz3piece: "{}"
id: {}
surface: "{}"
begin: {}
end: {}
)formatpieceidsurfacebeginendrS   s    r   __str__9ImmutableSentencePieceText_ImmutableSentencePiece.__str__b   s7     "6$**dggt||"&**dhh8	8r$   c                 4   U R                   UR                   :H  =(       ay    U R                  UR                  :H  =(       aY    U R                  UR                  :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ r/   )ru   rv   rw   rx   ry   r   others     r   __eq__8ImmutableSentencePieceText_ImmutableSentencePiece.__eq__j   s    ZZ5;;&  Y477ehh+>  Y4<<SXS`S`C`  Yeieoeosxs~s~e~  Y  DH  DL  DL  PU  PY  PY  DY  Yr$   c                 *    [        [        U 5      5      $ r/   )hashstrrS   s    r   __hash__:ImmutableSentencePieceText_ImmutableSentencePiece.__hash__m   s    #d)_r$   r%   N)r   r   r:   r;   r   r   r   r   rT   r   8delete_ImmutableSentencePieceText_ImmutableSentencePiece__swig_destroy__rX   r]   ra   re   ri   rm   rq   ru   piece_as_bytesrw   surface_as_bytesrv   rx   ry   rz   r   r   r>   r%   r$   r   r@   r@   =   s    -/IOdeGH`%^^]_Z][hf VEo.Nx G 12	#BVE
4.C8Y Hr$   r@   c                       \ rS rSr\" S S SS9r\rS r\	R                  rS rS rS	 rS
 rS rS r\" \5      r\" \5      r\" \5      r " S S5      r\S 5       rS rS rS r\rSrg)ImmutableSentencePieceTextu   c                 6    U R                   R                  5       $ r/   rC   rD   s    r   rF   #ImmutableSentencePieceText.<lambda>v   rH   r$   c                 8    U R                   R                  U5      $ r/   rC   rJ   s     r   rF   r   v   rL   r$   rM   rN   c                 X    [         R                  " U [         R                  " 5       5        g r/   )r   #ImmutableSentencePieceText_swiginitnew_ImmutableSentencePieceTextrS   s    r   rT   #ImmutableSentencePieceText.__init__y   s    ::4AnAnApqr$   c                 .    [         R                  " U 5      $ r/   )r   'ImmutableSentencePieceText__pieces_sizerS   s    r   _pieces_size'ImmutableSentencePieceText._pieces_size}   s    EEdKKr$   c                 .    [         R                  " X5      $ r/   )r   "ImmutableSentencePieceText__piecesr   indexs     r   _pieces"ImmutableSentencePieceText._pieces   s    @@MMr$   c                 .    [         R                  " U 5      $ r/   )r    ImmutableSentencePieceText__textrS   s    r   _text ImmutableSentencePieceText._text   s    >>tDDr$   c                 .    [         R                  " U 5      $ r/   )r   !ImmutableSentencePieceText__scorerS   s    r   _score!ImmutableSentencePieceText._score   s    ??EEr$   c                 .    [         R                  " U 5      $ r/   )r   ,ImmutableSentencePieceText_SerializeAsStringrS   s    r   SerializeAsString,ImmutableSentencePieceText.SerializeAsString       JJ4PPr$   c                 .    [         R                  " U 5      $ r/   )r   )ImmutableSentencePieceText__text_as_bytesrS   s    r   _text_as_bytes)ImmutableSentencePieceText._text_as_bytes   s    GGMMr$   c                   0    \ rS rSrS rS rS rS r\rSr	g)9ImmutableSentencePieceText.ImmutableSentencePieceIterator   c                 N    Xl         U R                   R                  5       U l        g r/   )protor   lenr   r   s     r   rT   BImmutableSentencePieceText.ImmutableSentencePieceIterator.__init__       
::**,r$   c                     U R                   $ r/   r   rS   s    r   __len__AImmutableSentencePieceText.ImmutableSentencePieceIterator.__len__       xxr$   c                    [        U[        5      (       ab  [        U R                  5       Vs/ s H  o R                  R                  U5      PM     snUR                  UR                  UR                  2   $ US:  a  XR                  -   nUS:  d  XR                  :  a  [        S5      eU R                  R                  U5      $ s  snf )Nr   zpiece index is out of range)
r   sliceranger   r   r   startstopstep
IndexErrorr   r   is      r   __getitem__EImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__   s    eU##16txxAA**$$Q'A%++ejjY^YcYcBcd
d19(("%19)89
9zz!!%(( B   $Cc           
          SR                  U  Vs/ s H  nSR                  [        U5      5      PM     sn5      $ s  snf )N
pieces {{
{}}}joinrt   r   r   rE   s     r   rz   AImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__   3    yyDIDq+223q6:DIJJI   $;r   r   N
r   r   r:   r;   rT   r   r   rz   r   r>   r%   r$   r   ImmutableSentencePieceIteratorr          -)K hr$   r   c                 ,    [         R                  U 5      $ r/   )r   r   rS   s    r   pieces!ImmutableSentencePieceText.pieces   s    'FFtLLr$   c                 D    U R                  5       UR                  5       :H  $ r/   r   r}   s     r   r   !ImmutableSentencePieceText.__eq__       ##%)@)@)BBBr$   c                 4    [        U R                  5       5      $ r/   r   r   rS   s    r   r   #ImmutableSentencePieceText.__hash__       $((*++r$   c                     SR                  U R                  U R                  SR                  U R                   Vs/ s H  nSR                  [        U5      5      PM     sn5      5      $ s  snf )Nztext: "{}"
score: {}
{}r   r   )rt   textscorer   r   r   r   s     r   rz   "ImmutableSentencePieceText.__str__   s^    F499djj99PTP[P[%\P[1&7&>&>s1v&FP[%\]__ &]s   $A*r%   N)r   r   r:   r;   r   r   r   r   rT   r   !delete_ImmutableSentencePieceTextr   r   r   r   r   r   r   r   text_as_bytesr   r   r   r   r   rz   r>   r%   r$   r   r   r   u   s    -/IOdeGHr%GGLNEFQN E?D^,MVE , M MC,_ Hr$   r   c                       \ rS rSr\" S S SS9r\rS r\	R                  rS rS rS	 r " S
 S5      r\S 5       rS rS rS r\rSrg)ImmutableNBestSentencePieceText   c                 6    U R                   R                  5       $ r/   rC   rD   s    r   rF   (ImmutableNBestSentencePieceText.<lambda>   rH   r$   c                 8    U R                   R                  U5      $ r/   rC   rJ   s     r   rF   r      rL   r$   rM   rN   c                 X    [         R                  " U [         R                  " 5       5        g r/   )r   (ImmutableNBestSentencePieceText_swiginit#new_ImmutableNBestSentencePieceTextrS   s    r   rT   (ImmutableNBestSentencePieceText.__init__   s    ??nFxFxFz{r$   c                 .    [         R                  " U 5      $ r/   )r   ,ImmutableNBestSentencePieceText__nbests_sizerS   s    r   _nbests_size,ImmutableNBestSentencePieceText._nbests_size   r   r$   c                 .    [         R                  " X5      $ r/   )r   'ImmutableNBestSentencePieceText__nbestsr   s     r   _nbests'ImmutableNBestSentencePieceText._nbests   s    EEdRRr$   c                 .    [         R                  " U 5      $ r/   )r   1ImmutableNBestSentencePieceText_SerializeAsStringrS   s    r   r   1ImmutableNBestSentencePieceText.SerializeAsString   s    OOPTUUr$   c                   0    \ rS rSrS rS rS rS r\rSr	g)BImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator   c                 N    Xl         U R                   R                  5       U l        g r/   )r   r   r   r   s     r   rT   KImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__init__   r   r$   c                     U R                   $ r/   r   rS   s    r   r   JImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__len__   r   r$   c                    [        U[        5      (       ab  [        U R                  5       Vs/ s H  o R                  R                  U5      PM     snUR                  UR                  UR                  2   $ US:  a  XR                  -   nUS:  d  XR                  :  a  [        S5      eU R                  R                  U5      $ s  snf )Nr   znbests index is out of range)
r   r   r   r   r   r   r   r   r   r   r   s      r   r   NImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__   s    eU##16txxAA**$$Q'A%++ejjY^YcYcBcd
d19(("%19)9:
:zz!!%(( Br   c           
          SR                  U  Vs/ s H  nSR                  [        U5      5      PM     sn5      $ s  snf Nr   znbests {{
{}}}r   r   s     r   rz   JImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__   r   r   r   Nr   r%   r$   r   "ImmutableSentencePieceTextIteratorr      r   r$   r  c                 ,    [         R                  U 5      $ r/   )r   r  rS   s    r   nbests&ImmutableNBestSentencePieceText.nbests   s    ,OOPTUUr$   c                 D    U R                  5       UR                  5       :H  $ r/   r   r}   s     r   r   &ImmutableNBestSentencePieceText.__eq__   r   r$   c                 4    [        U R                  5       5      $ r/   r   rS   s    r   r   (ImmutableNBestSentencePieceText.__hash__   r   r$   c           
          SR                  U R                   Vs/ s H  nSR                  [        U5      5      PM     sn5      $ s  snf r  )r   r
  rt   r   r   s     r   rz   'ImmutableNBestSentencePieceText.__str__   s7    YY$++N+Q)00Q8+NOONs   $Ar%   N)r   r   r:   r;   r   r   r   r   rT   r   &delete_ImmutableNBestSentencePieceTextr   r   r   r   r  r
  r   r   rz   r>   r%   r$   r   r   r      st    -/IOdeGH|%LLQSV , V VC,P Hr$   r   c                   .   \ rS rSr\" S S SS9r\rS r\	R                  rS rS rS	 rS
 rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS r S r!S r"S r#S r$S  r%S! r&S" r'S# r(S$ r)S% r*S& r+S' r,S( r-S) r.S* r/S+ r0S, r1S- r2S. r3S/ r4S0 r5S1 r6S2 r7S3 r8S4 r9S5 r:S6 r;S7 r<S8 r=S9 r>S: r?S; r@S< rAS= rBS> rCS?S?\DS@S@S@S@S@SASBSA4SC jrE         SiSD jrFSE rGSF rHSG rISH rJSjSI jrKSjSJ jrLSjSK jrMSjSL jrN      SkSM jrOSlSN jrPSlSO jrQSlSP jrRSlSQ jrS         SiSR jrTSjSS jrUSjST jrVSjSU jrWSjSV jrX\YS?4SW jrZ\Y4SX jr[\Y4SY jr\SmSZ jr]SmS[ jr^SnS\ jr_SnS] jr`SlS^ jrSlS_ jraS` rbSa rcSb rdSc reSd rfSe rgSf rhSjSg jriShrjg?)oSentencePieceProcessor   c                 6    U R                   R                  5       $ r/   rC   rD   s    r   rF   SentencePieceProcessor.<lambda>   rH   r$   c                 8    U R                   R                  U5      $ r/   rC   rJ   s     r   rF   r     rL   r$   rM   rN   c                 X    [         R                  " U [         R                  " 5       5        g r/   )r   SentencePieceProcessor_swiginitnew_SentencePieceProcessorrS   s    r   rT   SentencePieceProcessor.__init__   s    66t^=f=f=hir$   c                 .    [         R                  " X5      $ r/   )r   .SentencePieceProcessor_LoadFromSerializedProtor   
serializeds     r   LoadFromSerializedProto.SentencePieceProcessor.LoadFromSerializedProto   s    LLT^^r$   c                 .    [         R                  " X5      $ r/   )r   ,SentencePieceProcessor_SetEncodeExtraOptionsr   extra_options     r   SetEncodeExtraOptions,SentencePieceProcessor.SetEncodeExtraOptions      JJ4^^r$   c                 .    [         R                  " X5      $ r/   )r   ,SentencePieceProcessor_SetDecodeExtraOptionsr%  s     r   SetDecodeExtraOptions,SentencePieceProcessor.SetDecodeExtraOptions  r)  r$   c                 .    [         R                  " X5      $ r/   )r   $SentencePieceProcessor_SetVocabulary)r   valid_vocabs     r   SetVocabulary$SentencePieceProcessor.SetVocabulary  s    BB4UUr$   c                 .    [         R                  " U 5      $ r/   )r   &SentencePieceProcessor_ResetVocabularyrS   s    r   ResetVocabulary&SentencePieceProcessor.ResetVocabulary  s    DDTJJr$   c                 0    [         R                  " XU5      $ r/   )r   %SentencePieceProcessor_LoadVocabulary)r   filename	thresholds      r   LoadVocabulary%SentencePieceProcessor.LoadVocabulary  s    CCDT]^^r$   c                 0    [         R                  " U /UQ76 $ r/   )r   'SentencePieceProcessor_CalculateEntropyr   argss     r   CalculateEntropy'SentencePieceProcessor.CalculateEntropy  s    EEdRTRRr$   c                 .    [         R                  " U 5      $ r/   )r   #SentencePieceProcessor_GetPieceSizerS   s    r   GetPieceSize#SentencePieceProcessor.GetPieceSize  s    AA$GGr$   c                 .    [         R                  " X5      $ r/   )r    SentencePieceProcessor_PieceToIdr   ru   s     r   	PieceToId SentencePieceProcessor.PieceToId  s    >>tKKr$   c                 .    [         R                  " X5      $ r/   )r    SentencePieceProcessor_IdToPiecer   rv   s     r   	IdToPiece SentencePieceProcessor.IdToPiece      >>tHHr$   c                 .    [         R                  " X5      $ r/   )r   SentencePieceProcessor_GetScorerN  s     r   GetScoreSentencePieceProcessor.GetScore      ==dGGr$   c                 .    [         R                  " X5      $ r/   )r    SentencePieceProcessor_IsUnknownrN  s     r   	IsUnknown SentencePieceProcessor.IsUnknown   rQ  r$   c                 .    [         R                  " X5      $ r/   )r    SentencePieceProcessor_IsControlrN  s     r   	IsControl SentencePieceProcessor.IsControl#  rQ  r$   c                 .    [         R                  " X5      $ r/   )r   SentencePieceProcessor_IsUnusedrN  s     r   IsUnusedSentencePieceProcessor.IsUnused&  rV  r$   c                 .    [         R                  " X5      $ r/   )r   SentencePieceProcessor_IsByterN  s     r   IsByteSentencePieceProcessor.IsByte)  s    ;;DEEr$   c                 .    [         R                  " U 5      $ r/   )r   SentencePieceProcessor_unk_idrS   s    r   unk_idSentencePieceProcessor.unk_id,      ;;DAAr$   c                 .    [         R                  " U 5      $ r/   )r   SentencePieceProcessor_bos_idrS   s    r   bos_idSentencePieceProcessor.bos_id/  rk  r$   c                 .    [         R                  " U 5      $ r/   )r   SentencePieceProcessor_eos_idrS   s    r   eos_idSentencePieceProcessor.eos_id2  rk  r$   c                 .    [         R                  " U 5      $ r/   )r   SentencePieceProcessor_pad_idrS   s    r   pad_idSentencePieceProcessor.pad_id5  rk  r$   c                 .    [         R                  " U 5      $ r/   )r   -SentencePieceProcessor_serialized_model_protorS   s    r   serialized_model_proto-SentencePieceProcessor.serialized_model_proto8  s    KKDQQr$   c                 .    [         R                  " X5      $ r/   )r   #SentencePieceProcessor_LoadFromFiler   args     r   LoadFromFile#SentencePieceProcessor.LoadFromFile;  s    AA$LLr$   c	                 6    [         R                  " XX#XEXgU5	      $ r/   )r   #SentencePieceProcessor__EncodeAsIds	r   r   enable_sampling
nbest_sizealphaadd_bosadd_eosreverseemit_unk_pieces	            r   _EncodeAsIds#SentencePieceProcessor._EncodeAsIds>  s,    AA$okp  |C  N\  ]  	]r$   c	                 6    [         R                  " XX#XEXgU5	      $ r/   )r   &SentencePieceProcessor__EncodeAsPiecesr  s	            r   _EncodeAsPieces&SentencePieceProcessor._EncodeAsPiecesA  s-    DDTQ`ns  F  Q_  `  	`r$   c	                 6    [         R                  " XX#XEXgU5	      $ r/   )r   /SentencePieceProcessor__EncodeAsSerializedProtor  s	            r   _EncodeAsSerializedProto/SentencePieceProcessor._EncodeAsSerializedProtoD  s-    MMdZiw|  HO  Zh  i  	ir$   c	                 6    [         R                  " XX#XEXgU5	      $ r/   )r   .SentencePieceProcessor__EncodeAsImmutableProtor  s	            r   _EncodeAsImmutableProto.SentencePieceProcessor._EncodeAsImmutableProtoG  s-    LLTYhv{  GN  Yg  h  	hr$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   (SentencePieceProcessor__EncodeAsIdsBatch
r   insnum_threadsr  r  r  r  r  r  r  s
             r   _EncodeAsIdsBatch(SentencePieceProcessor._EncodeAsIdsBatchJ  s-    FFtR]pz  DK  V]  n  	nr$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   +SentencePieceProcessor__EncodeAsPiecesBatchr  s
             r   _EncodeAsPiecesBatch+SentencePieceProcessor._EncodeAsPiecesBatchM  s-    II$U`s}  GN  Y`  q  	qr$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   4SentencePieceProcessor__EncodeAsSerializedProtoBatchr  s
             r   _EncodeAsSerializedProtoBatch4SentencePieceProcessor._EncodeAsSerializedProtoBatchP  s2    RRSW^i  }G  PW  bi  z  	zr$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   3SentencePieceProcessor__EncodeAsImmutableProtoBatchr  s
             r   _EncodeAsImmutableProtoBatch3SentencePieceProcessor._EncodeAsImmutableProtoBatchS  s2    QQRV]h  |F  OV  ah  y  	yr$   c                 .    [         R                  " X5      $ r/   )r   !SentencePieceProcessor__DecodeIdsr   idss     r   
_DecodeIds!SentencePieceProcessor._DecodeIdsV  s    ??JJr$   c                 .    [         R                  " X5      $ r/   )r   (SentencePieceProcessor__DecodeIdsAsBytesr  s     r   _DecodeIdsAsBytes(SentencePieceProcessor._DecodeIdsAsBytesY  s    FFtQQr$   c                 .    [         R                  " X5      $ r/   )r   $SentencePieceProcessor__DecodePiecesr   r   s     r   _DecodePieces$SentencePieceProcessor._DecodePieces\  s    BB4PPr$   c                 .    [         R                  " X5      $ r/   )r   2SentencePieceProcessor__DecodeIdsAsSerializedProtor  s     r   _DecodeIdsAsSerializedProto2SentencePieceProcessor._DecodeIdsAsSerializedProto_  s    PPQU[[r$   c                 .    [         R                  " X5      $ r/   )r   5SentencePieceProcessor__DecodePiecesAsSerializedProtor  s     r   _DecodePiecesAsSerializedProto5SentencePieceProcessor._DecodePiecesAsSerializedProtob  s    SSTXaar$   c                 .    [         R                  " X5      $ r/   )r   1SentencePieceProcessor__DecodeIdsAsImmutableProtor  s     r   _DecodeIdsAsImmutableProto1SentencePieceProcessor._DecodeIdsAsImmutableProtoe  s    OOPTZZr$   c                 .    [         R                  " X5      $ r/   )r   4SentencePieceProcessor__DecodePiecesAsImmutableProtor  s     r   _DecodePiecesAsImmutableProto4SentencePieceProcessor._DecodePiecesAsImmutableProtoh  s    RRSW``r$   c                 0    [         R                  " XU5      $ r/   )r   &SentencePieceProcessor__DecodeIdsBatchr   r  r  s      r   _DecodeIdsBatch&SentencePieceProcessor._DecodeIdsBatchk  s    DDTP[\\r$   c                 0    [         R                  " XU5      $ r/   )r   -SentencePieceProcessor__DecodeIdsAsBytesBatchr  s      r   _DecodeIdsAsBytesBatch-SentencePieceProcessor._DecodeIdsAsBytesBatchn  s    KKDWbccr$   c                 0    [         R                  " XU5      $ r/   )r   7SentencePieceProcessor__DecodeIdsAsSerializedProtoBatchr  s      r    _DecodeIdsAsSerializedProtoBatch7SentencePieceProcessor._DecodeIdsAsSerializedProtoBatchq  s    UUVZalmmr$   c                 0    [         R                  " XU5      $ r/   )r   6SentencePieceProcessor__DecodeIdsAsImmutableProtoBatchr  s      r   _DecodeIdsAsImmutableProtoBatch6SentencePieceProcessor._DecodeIdsAsImmutableProtoBatcht  s    TTUY`kllr$   c                 0    [         R                  " XU5      $ r/   )r   )SentencePieceProcessor__DecodePiecesBatchr  s      r   _DecodePiecesBatch)SentencePieceProcessor._DecodePiecesBatchw  s    GGS^__r$   c                 0    [         R                  " XU5      $ r/   )r   :SentencePieceProcessor__DecodePiecesAsSerializedProtoBatchr  s      r   #_DecodePiecesAsSerializedProtoBatch:SentencePieceProcessor._DecodePiecesAsSerializedProtoBatchz  s    XXY]doppr$   c                 0    [         R                  " XU5      $ r/   )r   9SentencePieceProcessor__DecodePiecesAsImmutableProtoBatchr  s      r   "_DecodePiecesAsImmutableProtoBatch9SentencePieceProcessor._DecodePiecesAsImmutableProtoBatch}  s    WWX\cnoor$   c           	      4    [         R                  " XX#XEU5      $ r/   )r   (SentencePieceProcessor__NBestEncodeAsIdsr   r   r  r  r  r  r  s          r   _NBestEncodeAsIds(SentencePieceProcessor._NBestEncodeAsIds  s&    FFtS]ho  {I  J  	Jr$   c           	      4    [         R                  " XX#XEU5      $ r/   )r   +SentencePieceProcessor__NBestEncodeAsPiecesr  s          r   _NBestEncodeAsPieces+SentencePieceProcessor._NBestEncodeAsPieces  s&    II$V`kr  ~L  M  	Mr$   c           	      4    [         R                  " XX#XEU5      $ r/   )r   4SentencePieceProcessor__NBestEncodeAsSerializedProtor  s          r   _NBestEncodeAsSerializedProto4SentencePieceProcessor._NBestEncodeAsSerializedProto  s'    RRSW_it{  GU  V  	Vr$   c           	      4    [         R                  " XX#XEU5      $ r/   )r   3SentencePieceProcessor__NBestEncodeAsImmutableProtor  s          r   _NBestEncodeAsImmutableProto3SentencePieceProcessor._NBestEncodeAsImmutableProto  s'    QQRV^hsz  FT  U  	Ur$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   1SentencePieceProcessor__SampleEncodeAndScoreAsIds
r   r   num_samplesr  worinclude_bestr  r  r  r  s
             r   _SampleEncodeAndScoreAsIds1SentencePieceProcessor._SampleEncodeAndScoreAsIds  s.    OOPT\gps  DK  V]  n  	nr$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   4SentencePieceProcessor__SampleEncodeAndScoreAsPiecesr  s
             r   _SampleEncodeAndScoreAsPieces4SentencePieceProcessor._SampleEncodeAndScoreAsPieces  s.    RRSW_jsv  GN  Y`  q  	qr$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   =SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProtor  s
             r   &_SampleEncodeAndScoreAsSerializedProto=SentencePieceProcessor._SampleEncodeAndScoreAsSerializedProto  s.    [[\`hs|  PW  bi  z  	zr$   c
                 6    [         R                  " XX#XEXgX5
      $ r/   )r   <SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProtor  s
             r   %_SampleEncodeAndScoreAsImmutableProto<SentencePieceProcessor._SampleEncodeAndScoreAsImmutableProto  s.    ZZ[_gr{~  OV  ah  y  	yr$   c                 .    [         R                  " X5      $ r/   )r   !SentencePieceProcessor__Normalizer   r   s     r   
_Normalize!SentencePieceProcessor._Normalize  s    ??KKr$   c                 .    [         R                  " X5      $ r/   )r   ,SentencePieceProcessor__NormalizeWithOffsetsr  s     r   _NormalizeWithOffsets,SentencePieceProcessor._NormalizeWithOffsets  s    JJ4VVr$   c                 0    [         R                  " XU5      $ r/   )r   (SentencePieceProcessor__CalculateEntropy)r   r   r  s      r   _CalculateEntropy(SentencePieceProcessor._CalculateEntropy  s    FFtSXYYr$   c                 0    [         R                  " XX#5      $ r/   )r   -SentencePieceProcessor__CalculateEntropyBatch)r   r  r  r  s       r   _CalculateEntropyBatch-SentencePieceProcessor._CalculateEntropyBatch  s    KKDW\jjr$   c                 .    [         R                  " X5      $ r/   )r   .SentencePieceProcessor__OverrideNormalizerSpecr?  s     r   _OverrideNormalizerSpec.SentencePieceProcessor._OverrideNormalizerSpec  s    LLTXXr$   NFg?c                     [        U 5        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        Xl        Xl	        U(       d  U(       a  U R                  XS9  gg)a-  Initialzie sentencepieceProcessor.

Args:
  model_file: The sentencepiece model file path.
  model_proto: The sentencepiece model serialized proto.
  out_type: output type. int or str.
  add_bos: Add <s> to the result (Default = false)
  add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
    reversing (if enabled).
  reverse: Reverses the tokenized sequence (Default = false)
  emit_unk_piece: Emits the unk literal string (Default = false)
  nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
              nbest_size = {0,1}: No sampling is performed.
              nbest_size > 1: samples from the nbest_size results.
              nbest_size < 0: assuming that nbest_size is infinite and samples
                from the all hypothesis (lattice) using
                forward-filtering-and-backward-sampling algorithm.
  alpha: Soothing parameter for unigram sampling, and dropout probability of
         merge operations for BPE-dropout.
  num_threads: number of threads in batch processing (Default = -1, auto-detected)
)
model_filemodel_protoN)$_sentencepiece_processor_init_native	_out_type_add_bos_add_eos_reverse_emit_unk_piece_enable_sampling_nbest_size_alpha_num_threadsLoad)r   r%  r&  out_typer  r  r  r  r  r  r  r  s               r   InitSentencePieceProcessor.Init  sV    D +40nmmm+-#k%	{		Z	A #r$   c                    Uc  U R                   nUc  U R                  nUc  U R                  nUc  U R                  nUc  U R                  nUc  U R
                  nUc  U R                  nU	c  U R                  n	U
c  U R                  n
US:X  a  Ub  US:X  d	  US:X  d  U	c  [        S5      eU
b  [        U
5      [        La  [        S5      e[        U5      [        L ax  U[        L a  U R                  XXxXXEU5	      $ U[        L a  U R                  XXxXXEU5	      $ US:X  d  US:X  a  U R!                  XXxXXEU5	      $ US:X  a  U R#                  XXxXXEU5	      $ U[        L a  U R%                  XUXXEU5      $ U[        L a  U R'                  XUXXEU5      $ US:X  d  US:X  a  U R)                  XUXXEU5      $ US:X  a  U R+                  XUXXEU5      $ [        S	R-                  U5      5      e)
a  Encode text input to segmented ids or tokens.

Args:
input: input string. accepsts list of string.
out_type: output type. int or str.
add_bos: Add <s> to the result (Default = false)
add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
         reversing (if enabled).
reverse: Reverses the tokenized sequence (Default = false)
emit_unk_piece: Emits the unk literal string (Default = false)
nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
            nbest_size = {0,1}: No sampling is performed.
            nbest_size > 1: samples from the nbest_size results.
            nbest_size < 0: assuming that nbest_size is infinite and samples
            from the all hypothesis (lattice) using
            forward-filtering-and-backward-sampling algorithm.
alpha: Soothing parameter for unigram sampling, and merge probability for
       BPE-dropout (probablity 'p' in BPE-dropout paper).
num_threads: the number of threads used in the batch processing (Default = -1).
Tr   r   a  When enable_sampling is True, We must specify "nbest_size > 1" or "nbest_size = -1", and "alpha". "nbest_size" is enabled only on unigram mode ignored in BPE-dropout. when "nbest_size = -1" , this method samples from all candidates on the lattice instead of nbest segmentations.num_threads must be intserialized_protor   immutable_protozunknown out_type={})r(  r)  r*  r+  r,  r-  r.  r/  r0  RuntimeErrorr   intlistr  r   r  r  r  r  r  r  r  rt   )r   inputr2  r  r  r  r  r  r  r  r  s              r   EncodeSentencePieceProcessor.Encode  sW   @ 
	>>	--	--	--		--		 //		%%
			''	D	 j&8J!O&0Ao.
 	
 
	[ 1 <455	e	s?''O(-.Z Zs?**5+07^] ]))X-@33E49GVdf f((225387Uce e 
S  !&>S 	S	S##EJ$)GnV 	V	'	'8w+>,,UZ-2W~_ 	_	&	&++EJ,1Gn^ 	^ .55h?@@r$   c                 4    U R                   " SU[        S.UD6$ Nr<  r2  r%   r=  r   r   r<  kwargss      r   EncodeAsPieces%SentencePieceProcessor.EncodeAsPieces5      [[=us=f==r$   c                 4    U R                   " SU[        S.UD6$ r@  r=  r:  rC  s      r   EncodeAsIds"SentencePieceProcessor.EncodeAsIds9  rG  r$   c                 ,    U R                   " SUSS.UD6$ )Nr7  rA  r%   r=  rC  s      r   EncodeAsSerializedProto.SentencePieceProcessor.EncodeAsSerializedProto=  s    [[Lu/ALVLLr$   c                 ,    U R                   " SUSS.UD6$ )Nr8  rA  r%   rM  rC  s      r   EncodeAsImmutableProto-SentencePieceProcessor.EncodeAsImmutableProtoA  s    [[Ku/@KFKKr$   c           	      8    U R                   " SXU[        SS.UD6$ NTr<  r  r  r2  r  r%   rB  r   r<  r  r  rD  s        r   SampleEncodeAsPieces+SentencePieceProcessor.SampleEncodeAsPiecesE  -    [[ Gu5"%tG?EG Gr$   c           	      8    U R                   " SXU[        SS.UD6$ rT  rI  rV  s        r   SampleEncodeAsIds(SentencePieceProcessor.SampleEncodeAsIdsJ  rY  r$   c           	      0    U R                   " SXUSSS.UD6$ )Nr7  TrU  r%   rM  rV  s        r   SampleEncodeAsSerializedProto4SentencePieceProcessor.SampleEncodeAsSerializedProtoO  s-    [[ Vu5"4dVNTV Vr$   c           	      0    U R                   " SXUSSS.UD6$ )Nr8  TrU  r%   rM  rV  s        r   SampleEncodeAsImmutableProto3SentencePieceProcessor.SampleEncodeAsImmutableProtoT  s-    [[ Uu5"3TUMSU Ur$   c                 Z  ^ ^^^^^^ Tc  T R                   mTc  T R                  mTc  T R                  mTc  T R                  mTc  T R                  mTc  T R
                  mTS::  a  SmUUUUUUU 4S jn[        U5      [        L a  U V	s/ s H
  o" U	5      PM     sn	$ U" U5      $ s  sn	f )a  NBestEncode text input to segmented ids or tokens.

Args:
input: input string. accepsts list of string.
out_type: output type. int or str.
add_bos: Add <s> to the result (Default = false)
add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
reverse: Reverses the tokenized sequence (Default = false)
emit_unk_piece: Emits the unk literal string (Default = false)
nbest_size: nbest size
r   r   c                   > T[         L a  TR                  U TTTTT5      $ T[        L a  TR                  U TTTTT5      $ TS:X  d  TS:X  a  TR	                  U TTTTT5      $ TS:X  a  TR                  U TTTTT5      $ [        S5      e)Nr7  r   r8  zunknown out_type)r:  r  r   r  r  r  r9  )r   r  r  r  r  r2  r  r   s    r   _encode3SentencePieceProcessor.NBestEncode.<locals>._encode}  s    s?''j(/'>S Ss?**4+2GWnV V))X-@33D*4;Wg~_ _((2243:GWn^ ^ -..r$   )r(  r)  r*  r+  r,  r.  r   r;  )
r   r<  r2  r  r  r  r  r  re  ns
   ` ``````  r   NBestEncode"SentencePieceProcessor.NBestEncodeY  s    ( 
	>>	--	--	--		--		%%
	q
/ /  
e	$)*Eq
E**U^ +s   B(c                 4    U R                   " SX[        S.UD6$ Nr<  r  r2  r%   )rh  r   r   r<  r  rD  s       r   NBestEncodeAsPieces*SentencePieceProcessor.NBestEncodeAsPieces  '     6E'*6.46 6r$   c                 4    U R                   " SX[        S.UD6$ rk  )rh  r:  rm  s       r   NBestEncodeAsIds'SentencePieceProcessor.NBestEncodeAsIds  rp  r$   c                 ,    U R                   " SXSS.UD6$ )Nr7  rl  r%   rh  rm  s       r   NBestEncodeAsSerializedProto3SentencePieceProcessor.NBestEncodeAsSerializedProto  s+     EE'9E=CE Er$   c                 ,    U R                   " SXSS.UD6$ )Nr8  rl  r%   ru  rm  s       r   NBestEncodeAsImmutableProto2SentencePieceProcessor.NBestEncodeAsImmutableProto  s+     DE'8D<BD Dr$   c           
        ^ ^^^^^^^^	^
 Tc  T R                   mTc  T R                  mTc  T R                  mTc  T R                  mTc  T R                  mTc  SmTc  SmT	c  Sm	T
c  Sm
TS::  a  [        S5      eT
(       a  T	(       d  [        S5      eUUUUU
UUUU U	4
S jn[        U5      [        L a  U Vs/ s H
  o" U5      PM     sn$ U" U5      $ s  snf )a  SampleEncodeAndScore text input to segmented ids or tokens.

Args:
input: input string. accepsts list of string.
out_type: output type. int or str or 'serialized_proto' or 'immutable_proto'
add_bos: Add <s> to the result (Default = false)
add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
reverse: Reverses the tokenized sequence (Default = false)
emit_unk_piece: Emits the unk literal string (Default = false)
num_samples: How many samples to return (Default = 1)
alpha: inverse temperature for sampling
wor: whether to sample without replacement (Default = false)
include_best: whether to include the best tokenization, requires wor=True (Default = false)
r   g      ?Fr   znum_examples must be positivez8When include_best is True, We must specify "wor = True".c                 *  >
 T[         L a  T	R                  U TTT
TTTTT5	      $ T[        L a  T	R                  U TTT
TTTTT5	      $ TS:X  d  TS:X  a  T	R	                  U TTT
TTTTT5	      $ TS:X  a  T	R                  U TTT
TTTTT5	      $ [        S5      e)Nr7  r   r8  zunknown output type)r:  r   r   r  r  r  r9  )r   r  r  r  r  r  r  r2  r  r   r  s    r   re  <SentencePieceProcessor.SampleEncodeAndScore.<locals>._encode  s    s?00{E3P\18'7N\ \s?33D+ucS_4;Wg~_ _ ))X-@<<T;PUWZ\h=DgwXfh h ((;;D+uVY[g<CWgWeg g 011r$   )r(  r)  r*  r+  r,  r9  r   r;  )r   r<  r2  r  r  r  r  r  r  r  r  re  rg  s   ` `````````  r   SampleEncodeAndScore+SentencePieceProcessor.SampleEncodeAndScore  s    4 
	>>	--	--	--		--								:;;	cUVV2 2& 
e	$)*Eq
E**U^ +s   9Cc                 6    U R                   " SXU[        S.UD6$ Nr<  r  r  r2  r%   )r~  r   r   r<  r  r  rD  s        r   SampleEncodeAndScoreAsPieces3SentencePieceProcessor.SampleEncodeAndScoreAsPieces  *    && ?USX03?7=? ?r$   c                 6    U R                   " SXU[        S.UD6$ r  )r~  r:  r  s        r   SampleEncodeAndScoreAsIds0SentencePieceProcessor.SampleEncodeAndScoreAsIds  r  r$   c                 .    U R                   " SXUSS.UD6$ )Nr7  r  r%   r~  r  s        r   %SampleEncodeAndScoreAsSerializedProto<SentencePieceProcessor.SampleEncodeAndScoreAsSerializedProto  s.    && NUSX0BNFLN Nr$   c                 .    U R                   " SXUSS.UD6$ )Nr8  r  r%   r  r  s        r   $SampleEncodeAndScoreAsImmutableProto;SentencePieceProcessor.SampleEncodeAndScoreAsImmutableProto  s.    && MUSX0AMEKM Mr$   c                 :
   Uc  U R                   nUb  [        U5      [        La  [        S5      eU(       d  gU[        L Ga.  [        U5      [        L a  U R                  U/5      $ [        U5      [        L a  U R                  U/5      $ [        U5      [        L a  [        U5      S:X  d  [        US   5      [        L a  U R                  U5      $ [        US   5      [        L a  U R                  U5      $ [        US   5      [        L ad  [        US   5      S:X  d  [        US   S   5      [        L a  U R                  X5      $ [        US   S   5      [        L a  U R                  X5      $ U[        L Ga.  [        U5      [        L a  U R                  U/5      $ [        U5      [        L a  U R                  U/5      $ [        U5      [        L a  [        U5      S:X  d  [        US   5      [        L a  U R                  U5      $ [        US   5      [        L a  U R                  U5      $ [        US   5      [        L ad  [        US   5      S:X  d  [        US   S   5      [        L a  U R                  X5      $ [        US   S   5      [        L a  U R                  X5      $ US:X  Ga.  [        U5      [        L a  U R                  U/5      $ [        U5      [        L a  U R                  U/5      $ [        U5      [        L a  [        U5      S:X  d  [        US   5      [        L a  U R                  U5      $ [        US   5      [        L a  U R                  U5      $ [        US   5      [        L ad  [        US   5      S:X  d  [        US   S   5      [        L a  U R!                  X5      $ [        US   S   5      [        L a  U R#                  X5      $ US:X  Ga.  [        U5      [        L a  U R%                  U/5      $ [        U5      [        L a  U R'                  U/5      $ [        U5      [        L a  [        U5      S:X  d  [        US   5      [        L a  U R%                  U5      $ [        US   5      [        L a  U R'                  U5      $ [        US   5      [        L ad  [        US   5      S:X  d  [        US   S   5      [        L a  U R)                  X5      $ [        US   S   5      [        L a  U R+                  X5      $ [        S5      e)zDecode processed id or token sequences.

Args:
  out_type: output type. str, bytes or 'serialized_proto' or 'immutable_proto' (Default = str)
  num_threads: the number of threads used in the batch processing (Default = -1).
r6  r   r   r7  r8  zunknown output or input type)r0  r   r:  r9  r   r  r  r;  r   r  r  bytesr  r  r  r  r  r  r  r  r  r  )r   r<  r2  r  s       r   DecodeSentencePieceProcessor.Decode  sj    
	''		[ 1 <455	S;#%)
);###UG,
,;$Z1_U1X# 5??5))%(^s"%%e,,%(^t#58}!T%(1+%6#%=((<<E!HQK C'++E??	U	;#''0
0;###UG,
,;$Z1_U1X# 5))%00%(^s"%%e,,%(^t#58}!T%(1+%6#%=//CCE!HQK C'++E??	'	';#115':
:;#44eW=
=;$Z1_U1X# 533E::%(^s"66u==%(^t#58}!T%(1+%6#%=99%MME!HQK C'<<UPP 
&	&;#00%9
9;#33UG<
<;$Z1_U1X# 522599%(^s"55e<<%(^t#58}!T%(1+%6#%=88LLE!HQK C';;EOO 788r$   c                 *    U R                   " SXS.UD6$ r@  r  r   r<  r2  rD  s       r   DecodePieces#SentencePieceProcessor.DecodePiecesg      [[BuB6BBr$   c                 *    U R                   " SXS.UD6$ r@  r  r  s       r   	DecodeIds SentencePieceProcessor.DecodeIdsk  r  r$   c                 *    U R                   " SXS.UD6$ r@  r  r  s       r   DecodePiecesAsSerializedProto4SentencePieceProcessor.DecodePiecesAsSerializedProtoo  r  r$   c                 *    U R                   " SXS.UD6$ r@  r  r  s       r   DecodeIdsAsSerializedProto1SentencePieceProcessor.DecodeIdsAsSerializedProtos  r  r$   c                 *    U R                   " SXS.UD6$ r@  r  r  s       r   DecodePiecesAsImmutableProto3SentencePieceProcessor.DecodePiecesAsImmutableProtow  r  r$   c                 *    U R                   " SXS.UD6$ r@  r  r  s       r   DecodeIdsAsImmutableProto0SentencePieceProcessor.DecodeIdsAsImmutableProto{  r  r$   c                     [        U5      [        L aA  Uc  U R                  nUb  [        U5      [        La  [	        S5      eU R                  XU5      $ U R                  X5      $ )zCalculate sentence entropyr6  )r   r;  r0  r:  r9  r  r  )r   r<  r  r  s       r   rA  rB    sa    	e	))+${"33">67
7**5EE##E11r$   c                    ^ ^ U U4S jn[        U5      [        L a  U Vs/ s H
  oC" U5      PM     sn$ U" U5      $ s  snf )Nc                 V   > T(       a  TR                  U 5      $ TR                  U 5      $ r/   r  r  r   r   with_offsetss    r   
_normalize4SentencePieceProcessor.Normalize.<locals>._normalize  &    ++D1
1t$$r$   r   r;  r   r<  r  r  rE   s   ` `  r   	Normalize SentencePieceProcessor.Normalize  ?    %
 
e	',-u!
1u-- .   <c                 t    0 nUR                  5        H  u  p4[        U5      X#'   M     U R                  U5      $ r/   )itemsr   r!  )r   rD  
new_kwargskeyr    s        r   OverrideNormalizerSpec-SentencePieceProcessor.OverrideNormalizerSpec  s5    j*#e*
 '))*55r$   c                 "    U R                  5       $ r/   rE  rS   s    r   
piece_size!SentencePieceProcessor.piece_size        r$   c                 "    U R                  5       $ r/   r  rS   s    r   
vocab_size!SentencePieceProcessor.vocab_size  r  r$   c                 "    U R                  5       $ r/   rz  rS   s    r   __getstate__#SentencePieceProcessor.__getstate__      ((**r$   c                 F    U R                  5         U R                  U5        g r/   rT   r!  r   rz  s     r   __setstate__#SentencePieceProcessor.__setstate__      
mmo
""#9:r$   c                 "    U R                  5       $ r/   r  rS   s    r   r   SentencePieceProcessor.__len__  r  r$   c                 $    U R                  U5      $ r/   )rJ  rI  s     r   r   "SentencePieceProcessor.__getitem__  s    ^^E""r$   c                     U(       a  U(       a  [        S5      eU(       a  U R                  U5      $ U R                  U5      $ )zOverwride SentencePieceProcessor.Load to support both model_file and model_proto.

Args:
  model_file: The sentencepiece model file path.
  model_proto: The sentencepiece model serialized proto. Either `model_file`
    or `model_proto` must be set.
z-model_file and model_proto must be exclusive.)r9  r!  r  )r   r%  r&  s      r   r1  SentencePieceProcessor.Load  s:     
JKK	++K88z**r$   )	r)  r*  r/  r,  r-  r.  r0  r(  r+  )	NNNNNNNNNNN)NNNNNNr/   )r7  )r8  )kr   r   r:   r;   r   r   r   r   rT   r   delete_SentencePieceProcessorr   r!  r'  r,  r1  r5  r;  rA  rE  rJ  rO  rT  rY  r]  ra  re  ri  rn  rr  rv  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r!  r:  r3  r=  rE  rJ  rN  rQ  rW  r[  r^  ra  rh  rn  rr  rv  ry  r~  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r1  r>   r%   r$   r   r  r     s   -/IOdeGHj%CC___VK_SHLIHIIHFBBBBRM]`ihnqzyKRQ\b[a]dnm`qpJMVUnqzyLWZkY !"-Bd "#[|>>MLG
G
V
U "   #'#7t6
6
E
D '+%)%)%),0)-#'!%*.JZ?
?
N
M
 &)d \~ ,/ C ), CCCCC	26!!+;
!#+r$   r  c                 .    [         R                  " U 5      $ r/   )r   SetRandomGeneratorSeed)seeds    r   r  r    s    0066r$   c                 .    [         R                  " U 5      $ r/   )r   SetMinLogLevel)rK   s    r   r  r    s    ((++r$   c                       \ rS rSr\" S S SS9rS r\r\	S 5       r
\	S 5       r\	S	 5       r\	S
 5       r\	S 5       r\	SS j5       r\	SS j5       rSrg)SentencePieceTraineri  c                 6    U R                   R                  5       $ r/   rC   rD   s    r   rF   SentencePieceTrainer.<lambda>  rH   r$   c                 8    U R                   R                  U5      $ r/   rC   rJ   s     r   rF   r    rL   r$   rM   rN   c                     [        S5      e)NzNo constructor defined)r   )r   r@  rD  s      r   rT   SentencePieceTrainer.__init__  s    566r$   c                 .    [         R                  " U 5      $ r/   )r   %SentencePieceTrainer__TrainFromString)r  s    r   _TrainFromString%SentencePieceTrainer._TrainFromString  s    CCCHHr$   c                 .    [         R                  " U 5      $ r/   )r   "SentencePieceTrainer__TrainFromMapr@  s    r   _TrainFromMap"SentencePieceTrainer._TrainFromMap  s    @@FFr$   c                 .    [         R                  " X5      $ r/   )r   #SentencePieceTrainer__TrainFromMap2r@  iters     r   _TrainFromMap2#SentencePieceTrainer._TrainFromMap2      AA$MMr$   c                 .    [         R                  " U 5      $ r/   )r   #SentencePieceTrainer__TrainFromMap3r  s    r   _TrainFromMap3#SentencePieceTrainer._TrainFromMap3  s    AA$GGr$   c                 .    [         R                  " X5      $ r/   )r   #SentencePieceTrainer__TrainFromMap4r  s     r   _TrainFromMap4#SentencePieceTrainer._TrainFromMap4  r  r$   Nc                    U b'  [        U 5      [        L a  [        R                  U 5      $ S nSnSn0 nUR	                  5        H#  u  pgUS;   a  UnM  US;   a  UnM  U" U5      XV'   M%     U(       aE  U(       a  [        R                  UU5      nO[        R                  U5      nUR                  U5        gU(       a  [        R                  XS5      $ [        R                  U5      $ )zDTrain Sentencepiece model. Accept both kwargs and legacy string arg.Nc                 >   [        U 5      [        L a|  [        R                  S   S:X  a  [	        5       nO
[        5       n[        R                  " USS9nUR                  U  Vs/ s H  n[        U5      PM     sn5        UR                  5       $ [        U 5      $ s  snf )zEncode value to CSV..r      r   )lineterminator)r   r;  sysr   StringIOBytesIOcsvwriterwriterowr   getvalue)r    fr  rK   s       r   re  ,SentencePieceTrainer._Train.<locals>._encode  sy    ;$a A%
A	A::a3&
//515a3q651
2
U
 2s   #B)sentence_iteratorsentence_reader)model_writer)
r   r   r  r  r  r  r  writer  r  )	r  rD  re  r  r  r  r  r    r&  s	            r   _TrainSentencePieceTrainer._Train  s     
T#Y#-#44S99 lj*#::#
$$,#EN*/ ' 
,;;J;LN+ -;;JG+;'  %44ZS
S%33J?
?r$   c                 z    [        US9   [        R                  " SSU 0UD6  S S S 5        g ! , (       d  f       g = f)N)ostreamr  r%   )
_LogStreamr  r  )r  	logstreamrD  s      r   TrainSentencePieceTrainer.Train  s,    i(##66v6 )((s   ,
:r%   r/   r  )r   r   r:   r;   r   r   rT   r   r   staticmethodr  r  r  r  r  r  r  r>   r%   r$   r   r  r    s    -/IOdeG7HI I G G N N H H N N * *X 7 7r$   r  c                       \ rS rSr\" S S SS9r\rS r\	R                  rS rS rS	 rS
 rS rS rS rS r       SS jrSS jrS rS rSrg)SentencePieceNormalizeri  c                 6    U R                   R                  5       $ r/   rC   rD   s    r   rF    SentencePieceNormalizer.<lambda>  rH   r$   c                 8    U R                   R                  U5      $ r/   rC   rJ   s     r   rF   r    rL   r$   rM   rN   c                 X    [         R                  " U [         R                  " 5       5        g r/   )r    SentencePieceNormalizer_swiginitnew_SentencePieceNormalizerrS   s    r   rT    SentencePieceNormalizer.__init__   s    77n>h>h>jkr$   c                 .    [         R                  " X5      $ r/   )r   /SentencePieceNormalizer_LoadFromSerializedProtor  s     r   r!  /SentencePieceNormalizer.LoadFromSerializedProto$  s    MMd__r$   c                 .    [         R                  " X5      $ r/   )r   'SentencePieceNormalizer_LoadFromRuleTSV)r   r9  s     r   LoadFromRuleTSV'SentencePieceNormalizer.LoadFromRuleTSV'  s    EEdUUr$   c                 .    [         R                  " X5      $ r/   )r   (SentencePieceNormalizer_LoadFromRuleName)r   r   s     r   LoadFromRuleName(SentencePieceNormalizer.LoadFromRuleName*  s    FFtRRr$   c                 .    [         R                  " U 5      $ r/   )r   .SentencePieceNormalizer_serialized_model_protorS   s    r   rz  .SentencePieceNormalizer.serialized_model_proto-  s    LLTRRr$   c                 .    [         R                  " X5      $ r/   )r   $SentencePieceNormalizer_LoadFromFiler~  s     r   r  $SentencePieceNormalizer.LoadFromFile0  s    BB4MMr$   c                 .    [         R                  " X5      $ r/   )r   "SentencePieceNormalizer__Normalizer  s     r   r  "SentencePieceNormalizer._Normalize3  s    @@LLr$   c                 .    [         R                  " X5      $ r/   )r   -SentencePieceNormalizer__NormalizeWithOffsetsr  s     r   r  -SentencePieceNormalizer._NormalizeWithOffsets6  s    KKDWWr$   c                 0    [         R                  " XU5      $ r/   )r   &SentencePieceNormalizer__SetProtoField)r   r   r    s      r   _SetProtoField&SentencePieceNormalizer._SetProtoField9  s    DDTQVWWr$   Nc                 t   [        U 5        U(       a  U R                  U5      nOVU(       a  U R                  U5      nO=U(       a  U R                  U5      nO$U(       a  U R	                  U5      nO[        S5      eU(       a7  U R                  SU5        U R                  SU5        U R                  SU5        gg)a  Initialzie sentencePieceNormalizer.

Args:
  model_file: The sentencepiece model file path.
  model_proto: The sentencepiece model serialized proto.
  rule_tsv: The normalization rule file in TSV format.
  rule_name: Pre-defined normalization name.
  add_dummy_prefix: add dummy prefix.
  escape_whitespaces: escape whitespaces.
  remove_extra_whitespaces: remove extra whitespaces.
zno model is specifiedadd_dummy_prefixescape_whitespacesremove_extra_whitespacesN)%_sentencepiece_normalizer_init_nativer  r!  r&  r*  r9  r:  )	r   r%  r&  rule_tsv	rule_namer=  r>  r?  statuss	            r   r3  SentencePieceNormalizer.Init<  s    ( ,D1	"":.--k:%%h/&&y1233	.0@A02DE68PQ 
r$   c                    ^ ^ U U4S jn[        U5      [        L a  U Vs/ s H
  oC" U5      PM     sn$ U" U5      $ s  snf )Nc                 V   > T(       a  TR                  U 5      $ TR                  U 5      $ r/   r  r  s    r   r  5SentencePieceNormalizer.Normalize.<locals>._normalizec  r  r$   r  r  s   ` `  r   r  !SentencePieceNormalizer.Normalizeb  r  r  c                 "    U R                  5       $ r/   r  rS   s    r   r  $SentencePieceNormalizer.__getstate__m  r  r$   c                 F    U R                  5         U R                  U5        g r/   r  r  s     r   r  $SentencePieceNormalizer.__setstate__q  r  r$   r%   )NNNNFFFr/   )r   r   r:   r;   r   r   r   r   rT   r   delete_SentencePieceNormalizerr   r!  r&  r*  rz  r  r  r  r:  r3  r  r  r  r>   r%   r$   r   r  r    s    -/IOdeGHl%DD`VSSNMXX # %&+$RL+;r$   r  c                 .    [         R                  " U 5      $ r/   )r   
SetDataDir)data_dirs    r   rO  rO  y  s    $$X..r$   )r  )r  c                 H   0 nU R                   R                  5        H]  u  p#[        R                  " SU5      (       d  M#  [        R                  " SSU5      R                  5       R                  SS5      nX1U'   M_     UR                  5        H  u  p#[        XU5        M     g)z1Added snake_cased method from CammelCased method.z^[A-Z]+z(?<!^)(?=[A-Z])_n_bestnbestN)r1   r  rematchsublowerreplacesetattr)	classname	snake_mapkrK   snakes        r   _add_snake_caser_    s     )  &&(da	xx
Aff'' : 	 )
 oodaI!  r$   c                 P   ^^ [        XS5      mU4S jmU4S jn[        XU5        g)z4Enables batch request for the method classname.name.Nc                    > [        U5      [        L a$  US:  d  XR                  5       :  a  [        S5      eT" X5      $ )Nr   zpiece id is out of range.)r   r:  r  r   )rK   rg  funcs     r   _func_batchnize.<locals>._func  s5    Aw#~1q5A$7233:r$   c                 t   > [        U5      [        L a  U Vs/ s H  nT" X5      PM     sn$ T" X5      $ s  snf r/   r  )r   r  rg  rc  s      r   _batched_func!_batchnize.<locals>._batched_func  s8    CyD&)*ceDnc**4 +s   5)r   rZ  )r[  r   rf  rc  rb  s      @@r   
_batchnizerh    s%    	$	'$
 
)=)r$   rT   )rJ  rO  rT  rY  r]  ra  re  )__version__sentencepiecepackage_datac                   *    \ rS rSrSS jrS rS rSrg)r  i  Nc                 r    Xl         U R                   b$  [        R                  R                  5       U l        g g r/   )r  r  stderrfilenoorig_stream_fileno)r   r  s     r   rT   _LogStream.__init__  s+    L|| #

 1 1 3d  r$   c                     U R                   b_  [        R                  " U R                  5      U l        [        R
                  " U R                   R                  5       U R                  5        g g r/   )r  osduprp  orig_stream_dupdup2ro  rS   s    r   	__enter___LogStream.__enter__  sI    ||VVD$;$;<dggdll!!#T%<%<=  r$   c                 *   U R                   b  [        R                  " U R                  5        [        R                  " U R
                  U R                  5        [        R                  " U R
                  5        U R                   R                  5         g g r/   )r  rs  closerp  rv  ru  )r   r   r    	tracebacks       r   __exit___LogStream.__exit__  s_    ||hht&&'ggd""D$;$;<hht##$
ll	  r$   )ru  rp  r  r/   )r   r   r:   r;   rT   rw  r|  r>   r%   r$   r   r  r    s    4
>
r$   r  )>r  r   _swig_python_version_info__package__r   r   r   builtinsr   ImportErrorr   r&   r,   r6   r   r8   objectr@   >ImmutableSentencePieceText_ImmutableSentencePiece_swigregisterr   'ImmutableSentencePieceText_swigregisterr   ,ImmutableNBestSentencePieceText_swigregisterr  #SentencePieceProcessor_swigregisterr  r  r  !SentencePieceTrainer_swigregisterr  $SentencePieceNormalizer_swigregisterrO  rU  r  rs  importlib.resources	importlibior  r  r_  rh  rT   r'  r@  rZ  r3  r=  Tokenizer  
Detokenizemset_random_generator_seedset_min_log_level_versionri  pathr   r   	resourcesfilesr  r%   r$   r   <module>r     s&   :#/ "[
L$ L
3 3n  M MN  AD DP  6 67Q R4f 4p  ; ;<[ \J+V J+\  2 23I J7,K76 K7^  0 01E FW;f W;v  3 34K L/ 
 
 
 	   
*" (>'F'F $(?(H(H % 
,B,G,G H -D-I-I J"8"?"?  $:$A$A  !
A #Q'	
 & ' $ % ' (2 "  ! 
277<<I//55oFGX Y Y%  s   G; ;
H	H	