
    11i                         % S r SSKrSSKrSSKJr  SSKJrJr  SSKJ	r	J
r
Jr  SSKJr  \
S   r\	\S'   S	\S
\SS4S jr " S S\R"                  5      r " S S\5      r " S S\5      r " S S\5      rg)z5Manages words count mismatches for the espeak backend    N)Logger)ListTuple)	TypeAliasLiteralUnion)	Separator)warnignoreWordMismatchmodeloggerreturnBaseWordsMismatchc           
          [         [        [        S.n X    " U5      $ ! [         a0    [	        SU  SSR                  UR                  5       5       35      Sef = f)a7  Returns a word count mismatch processor according to `mode`

The `mode` can be one of the following:
- `ignore` to ignore words mismatches
- `warn` to display a warning on each mismatched utterance
- `remove` to remove any utterance containing a words mismatch

Raises a RuntimeError if the `mode` is unknown.

)r   r
   removezmode z invalid, must be in z, N)IgnoreWarnRemoveKeyErrorRuntimeErrorjoinkeys)r   r   
processorss      b/home/james-whalen/.local/lib/python3.13/site-packages/phonemizer/backend/espeak/words_mismatch.pyget_words_mismatch_processorr      sh     J
'' D6.tyy9J/K.LM
	s	   	 :Ac            	       4   \ rS rSrSr\R                  " S5      rS\4S jr	\
\4S\\   S\\\R                  4   S\\   4S	 jj5       rS\\\\\4      4S
 jrS\S\4S jrS\\   4S jrS\\   S\4S jr\R0                  S\\   S\\   4S j5       rSrg)r   6   z4The base class of all word count mismatch processorsz\s+r   c                 ,    Xl         / U l        / U l        g N)_logger
_count_txt
_count_phn)selfr   s     r   __init__BaseWordsMismatch.__init__:   s        textwordsepr   c                 4   [        U[        R                  5      (       d  [        R                  " U5      nU VVs/ s HI  n[	        [        R
                  " X#R                  5       5       Vs/ s H  oD(       d  M  UPM     sn5      PMK     snn$ s  snf s  snnf )z;Return the number of words contained in each line of `text`)
isinstancerePatternescapelensplitstrip)clsr(   r)   linews        r   _count_wordsBaseWordsMismatch._count_words?   st     '2::..ii(G  BHHWjjl;A;qq;AB 	As   1B,
B:B BBc                 n   [        U R                  5      [        U R                  5      :w  a7  [        S[        U R                  5       S[        U R                  5       35      e[	        [        U R                  U R                  5      5       VVVs/ s H  u  nu  p#X#:w  d  M  XU4PM     snnn$ s  snnnf )zReturns a list of (num_line, nwords_input, nwords_output)

Consider only the lines where nwords_input != nwords_output. Raises a
RuntimeError if input and output do not have the same number of lines.

zBnumber of lines in input and output must be equal, we have: input=z	, output=)r/   r"   r#   r   	enumeratezip)r$   ntps       r   _mismatched_lines#BaseWordsMismatch._mismatched_linesL   s     t3t#77""%doo"6!7 8doo./12 2 c$//4??;<< $!Vav Q1I< 	 s   B0#B0	nmismatchnlinesc                 l    U(       a-  U R                   R                  S[        X-  S5      S-  X5        gg)z$Logs a high level undetailed warningz1words count mismatch on %s%% of the lines (%s/%s)   d   N)r!   warninground)r$   r?   r@   s      r   _resumeBaseWordsMismatch._resume^   s4    LL  Ci(!,s2IG r'   c                 0    U R                  U5      U l        g)z-Stores the number of words in each input lineN)r5   r"   r$   r(   s     r   
count_textBaseWordsMismatch.count_texte   s    ++D1r'   	separatorc                 D    U R                  XR                  5      U l        g)z.Stores the number of words in each output lineN)r5   wordr#   )r$   r(   rL   s      r   count_phonemized"BaseWordsMismatch.count_phonemizedi   s    ++D..Ar'   c                     g)zDetects and process word count misatches according to the mode

This method is called at the very end of phonemization, during
post-processing.

N rI   s     r   processBaseWordsMismatch.processm   s    r'   )r#   r"   r!   N)__name__
__module____qualname____firstlineno____doc__r,   compile
_RE_SPACESr   r%   classmethodr   strr   r-   intr5   r   r=   rF   rJ   r	   rO   abcabstractmethodrS   __static_attributes__rR   r'   r   r   r   6   s    >F#Jv 
  /9
s)
 3

?+
 >B#Y
 
4c3m(<#= $G Gc G2tCy 2BT#Y B9 B 	DI $s)  r'   c                   6    \ rS rSrSrS\\   S\\   4S jrSrg)r   w   zIgnores word count mismatchesr(   r   c                 j    U R                  [        U R                  5       5      [        U5      5        U$ r    )rF   r/   r=   rI   s     r   rS   Ignore.processz   s'    S//12CI>r'   rR   N	rU   rV   rW   rX   rY   r   r]   rS   ra   rR   r'   r   r   r   w   s     'DI $s) r'   r   c                   6    \ rS rSrSrS\\   S\\   4S jrSrg)r      z Warns on every mismatch detectedr(   r   c                     U R                  5       nU H&  u  p4nU R                  R                  SUS-   XE5        M(     U R                  [	        U5      [	        U5      5        U$ )Nz>words count mismatch on line %s (expected %s words but get %s)   )r=   r!   rD   rF   r/   )r$   r(   mismatchnumntxtnphns         r   rS   Warn.process   s[    ))+'OCtLL  1a%  ( 	S]CI.r'   rR   Nrf   rR   r'   r   r   r      s     *	DI 	$s) 	r'   r   c                   6    \ rS rSrSrS\\   S\\   4S jrSrg)r      z6Removes any utterance containing a word count mismatchr(   r   c                     U R                  5        Vs/ s H  o"S   PM	     nnU R                  [        U5      [        U5      5        U R                  R	                  S5        U H  nSX'   M	     U$ s  snf )Nr   zremoving the mismatched lines )r=   rF   r/   r!   rD   )r$   r(   r3   rk   indexs        r   rS   Remove.process   sh    (,(>(>(@A(@G(@AS]CI.<=EDK  Bs   A1rR   Nrf   rR   r'   r   r   r      s     @DI $s) r'   r   )rY   r_   r,   loggingr   typingr   r   typing_extensionsr   r   r   phonemizer.separatorr	   r   __annotations__r   ABCr   r   r   r   rR   r'   r   <module>r|      s    < 
 	   7 7 * ""23i 3| V H[ 0> >B  
 
r'   