
    11i#                         S r SSKrSSKrSSKrSSKJr  SSKJrJrJ	r	J
r
JrJrJr  SSKrSSKJr  SSKJr  SSKJrJr  SSKJr   " S	 S
\R0                  5      rg)z.Abstract base class for phonemization backends    N)Logger)OptionalListAnyDictTupleUnionPattern)
get_logger)Punctuation)	Separatordefault_separator)chunksc                   X   \ rS rSrSr   SS\S\\\\4      S\	S\\
   4S jjr\S	 5       r\S
 5       r\S 5       r\\R&                  S 5       5       r\\R&                  S 5       5       r\\R&                  S 5       5       r\\R&                  S\\\4   4S j5       5       r\S\4S j5       r   SS\\   S\\   S\	S\S\\   4
S jjr\S\\\      4S j5       r\R&                  S\\   S\S\S\	S\\   4
S j5       r S\\   S\!\\\\   4   \4   4S jr"S\\   S\S\	4S jr#Sr$g) BaseBackend   a  Abstract base class of all the phonemization backends

Provides a common interface to all backends. The central method is
`phonemize()`

Parameters
----------
language: str
    The language code of the input text, must be supported by
    the backend. If ``backend`` is 'segments', the language can be a file with
    a grapheme to phoneme mapping.

preserve_punctuation: bool
    When True, will keep the punctuation in the
    phonemized output. Not supported by the 'espeak-mbrola' backend. Default
    to False and remove all the punctuation.

punctuation_marks: str
    The punctuation marks to consider when dealing with punctuation, either for removal or preservation.
    Can be defined as a string or regular expression. Default to Punctuation.default_marks().

logger: logging.Logger
    the logging instance where to send
    messages. If not specified, use the default system logger.

Raises
------
RuntimeError
    if the backend is not available of if the `language` cannot be initialized.

Nlanguagepunctuation_markspreserve_punctuationloggerc           	         Uc  [         R                  " 5       nUc
  [        5       nU R                  5       (       d(  [	        SR                  U R                  5       5      5      eX@l        U R                  R                  SU R                  5       SR                  S U R                  5        5       5      5        U R                  U5      U l        X0l        [        U5      U l        g )Nz{} not installed on your systemzinitializing backend %s-%s.c              3   8   #    U  H  n[        U5      v   M     g 7f)N)str).0vs     Q/home/james-whalen/.local/lib/python3.13/site-packages/phonemizer/backend/base.py	<genexpr>'BaseBackend.__init__.<locals>.<genexpr>S   s     !A.Q#a&&.s   )r   default_marksr   is_availableRuntimeErrorformatname_loggerinfojoinversion_init_language	_language_preserve_punctuation_punctuator)selfr   r   r   r   s        r   __init__BaseBackend.__init__@   s    
 $ + 9 9 ;>\F   ""188EG G (IIK!A$,,.!AA	C
 ,,X6 &:"&'89    c                 r    U R                  U5      (       d   [        SU SU R                  5        S35      eU$ )z`Language initialization

This method may be overloaded in child classes (see Segments backend)

z
language "z" is not supported by the z backend)is_supported_languager"   r$   clsr   s     r   r)   BaseBackend._init_language\   sE     ((22XJ&@88:,h() ) r0   c                     U R                   $ )z0A logging.Logger instance where to send messages)r%   r-   s    r   r   BaseBackend.loggeri   s     ||r0   c                     U R                   $ )z9The language code configured to be used for phonemization)r*   r7   s    r   r   BaseBackend.languagen   s     ~~r0   c                      g)zThe name of the backendN r<   r0   r   r$   BaseBackend.names       r0   c                     g)z9Returns True if the backend is installed, False otherwiseNr<   r4   s    r   r!   BaseBackend.is_availablex   r>   r0   c                     g)z;Return the backend version as a tuple (major, minor, patch)Nr<   r@   s    r   r(   BaseBackend.version}   r>   r0   returnc                      g)z@Return a dict of language codes -> name supported by the backendNr<   r<   r0   r   supported_languagesBaseBackend.supported_languages   r>   r0   c                 &    XR                  5       ;   $ )z6Returns True if `language` is supported by the backend)rF   r3   s     r   r2   !BaseBackend.is_supported_language   s     22444r0   text	separatorstripnjobsc           	        ^ ^^ [        U[        5      (       a  [        S5      eTc  [        mT R	                  U5      u  pUS:X  a  T R                  USTT5      nOqT R                  R                  ST R                  5       U5        [        R                  " US9" U UU4S j[        [        X5      6  5       5      nT R                  U5      nT R                  XeTT5      $ )a  Returns the `text` phonemized for the given language

Parameters
----------
text: list of str
    The text to be phonemized. Each string in the list
    is considered as a separated line. Each line is considered as a text
    utterance. Any empty utterance will be ignored.

separator: Separator
    string separators between phonemes, syllables
    and words, default to separator.default_separator. Syllable separator
    is considered only for the festival backend. Word separator is
    ignored by the 'espeak-mbrola' backend.

strip: bool
    If True, don't output the last word and phone separators
    of a token, default to False.

njobs : int
    The number of parallel jobs to launch. The input text is
    split in ``njobs`` parts, phonemized on parallel instances of the
    backend and the outputs are finally collapsed.

Returns
-------
phonemized text: list of str
    The input ``text`` phonemized for the given ``language`` and ``backend``.

Raises
------
RuntimeError
    if something went wrong during the phonemization

z;input text to phonemize() is str but it must be list of str   r   zrunning %s on %s jobs)n_jobsc              3      >#    U  H4  n[         R                  " TR                  5      " US    US   TT5      v   M6     g7f)r   rO   N)joblibdelayed_phonemize_aux)r   chunkr-   rK   rL   s     r   r   (BaseBackend.phonemize.<locals>.<genexpr>   sF      78 7E t223!HeAh	5: : 7s   <?)
isinstancer   r"   r   _phonemize_preprocessrT   r   r&   r$   rR   Parallelzipr   _flatten_phonemize_postprocess)r-   rJ   rK   rL   rM   r   
phonemizeds   ` ``   r   	phonemizeBaseBackend.phonemize   s    N dC  MO O )I"&"<"<T"BA:,,T1iGJ KK4diik5I  6 78 !&"56	78 8J z2J**:)UZ[[r0   r]   c                 :    [        [        R                  " U 6 5      $ )zFlatten a list of lists into a single one

From [[1, 2], [3], [4]] returns [1, 2, 3, 4]. This method is used to
format the output as obtained using multiple jobs.

)list	itertoolschain)r]   s    r   r[   BaseBackend._flatten   s     IOOZ011r0   offsetc                     g)a  The "concrete" phonemization method

Must be implemented in child classes. `separator` and `strip`
parameters are as given to the phonemize() method. `text` is as
returned by _phonemize_preprocess(). `offset` is line number of the
first line in `text` with respect to the original text (this is only
usefull with running on chunks in multiple jobs. When using a single
jobs the offset is 0).

Nr<   )r-   rJ   re   rK   rL   s        r   rT   BaseBackend._phonemize_aux   r>   r0   c                     U R                   (       a  U R                  R                  U5      $ U R                  R                  U5      / 4$ )zPreprocess the text before phonemization

Removes the punctuation (keep trace of punctuation marks for further
restoration if required by the `preserve_punctuation` option).

)r+   r,   preserveremove)r-   rJ   s     r   rX   !BaseBackend._phonemize_preprocess   s@     %%##,,T22&&t,b00r0   c                 `    U R                   (       a  U R                  R                  XX45      $ U$ )zLPostprocess the raw phonemized output

Restores the punctuation as needed.

)r+   r,   restore)r-   r]   r   rK   rL   s        r   r\   "BaseBackend._phonemize_postprocess   s,     %%##++J9\\r0   )r*   r%   r+   r,   )NFN)NFrO   )%__name__
__module____qualname____firstlineno____doc__r   r   r	   r
   boolr   r.   classmethodr)   propertyr   r   staticmethodabcabstractmethodr$   r!   r(   r   rF   r2   r   r   intr^   r   r[   rT   r   rX   r\   __static_attributes__r<   r0   r   r   r      sU   B EI.3,0: :$,U3<-@$A:'+: "&):8 
 
     &  & H  H J  J Oc3h O  O 5S 5 5
 48 % C\d3i C\%i0C\C\ C\ &*#YC\J 2T$s)_ 2 2 	
49 
c 
i 
X\ 
aefiaj 
 

1$s) 
1eCcN>SUY>Y8Z 
1c *3 '+r0   r   )rs   rx   rb   reloggingr   typingr   r   r   r   r   r	   r
   rR   phonemizer.loggerr   phonemizer.punctuationr   phonemizer.separatorr   r   phonemizer.utilsr   ABCr   r<   r0   r   <module>r      s@    5 
  	  C C C  ( . = #`#'' `r0   