
    11i=                         S r SSKrSSKrSSKrSSKrSSKJrJrJrJ	r	J
r
  SSKJr   " S S5      rS rS rS	 rS
 r\S 5       r\S:X  a  \" 5         gg)zBCommand-line phonemizer tool, have a 'phonemizer --help' to get in    N)	phonemize	separatorversionloggerpunctuation)BACKENDSc                   4    \ rS rSrSrS rS r\S 5       rSr	g)CatchExceptions   a\  Decorator wrapping a function in a try/except block

When an exception occurs, display a user friendly message on
standard output before exiting with error code 1.

The detected exceptions are ValueError, OSError, RuntimeError,
AssertionError and KeyboardInterrupt.

Parameters
----------
function :
    The function to wrap in a try/except block

c                     Xl         g )Nfunction)selfr   s     I/home/james-whalen/.local/lib/python3.13/site-packages/phonemizer/main.py__init__CatchExceptions.__init__*   s         c                      U R                  5         g! [        [        [        [        [
        4 a*  nU R                  SR                  U5      5         SnAgSnAf[         a    U R                  S5         gf = f)z9Executes the wrapped function and catch common exceptionszfatal error: {}Nzkeyboard interruption, exiting)	r   IOError
ValueErrorOSErrorRuntimeErrorAssertionErrorexitformatKeyboardInterrupt)r   errs     r   __call__CatchExceptions.__call__-   s^    	8MMOWn. 	5II'..s344  	8II67	8s    A8 AA87A8c                     [         R                  R                  U R                  5       S-   5        [         R                  " S5        g)z0Write `msg` on stderr and exit with error code 1
   N)sysstderrwritestripr   )msgs    r   r   CatchExceptions.exit9   s,     	

t+,r   r   N)
__name__
__module____qualname____firstlineno____doc__r   r   staticmethodr   __static_attributes__ r   r   r
   r
      s%    !
8  r   r
   c            	         [         R                  " [         R                  SSS9n U R                  SSSSS9  U R	                  5       nUR                  S	S
SSS9  UR                  SSSSS9  U R                  SS[
        SSSS9  U R                  S5      nUR                  S[        R                  SSSS9  UR                  SS[        R                  SSS9  UR                  SS S!SS"S#S$9  UR                  S%SS&S9  U R                  S'5      nUR                  S(S)S"S*/ S+QS,S-9  UR                  S.S/SS0S9  U R                  S15      nUR                  S2S3S4S5S6S79  U R                  S85      nUR                  S9S:S"[        R                  R                  S;S79  UR                  S<S=S"[        R                  R                  S>S79  UR                  S?S@S"[        R                  R                  SAS79  UR                  SBSSCS9  U R                  SD5      n [        SE   R!                  5       nUR                  SFS*[$        SGSHU SI3SJ9  UR                  SKSS S!SLSMSN9  UR                  SOSSPS9  UR                  SQSR/ SSQSTSU9  UR                  SVSW/ SXQSYSU9  U R                  SZ5      n [        S[   R'                  5       nUR                  S\S*[$        S]S^U S_3SJ9  U R                  S`SaSb9nUR                  ScSSdS9  UR                  Se[$        S"[(        R*                  R-                  5       SfS9  UR                  SgSShS9  U R/                  5       $ ! ["         a    S*n GN0f = f! ["         a    S*n Nf = f)iz,Argument parser for the phonemization scripta  Multilingual text to phonemes converter

The 'phonemize' program allows simple phonemization of words and texts
in many language using four backends: espeak, espeak-mbrola, festival
and segments.

- espeak is a text-to-speech software supporting multiple languages
  and IPA (International Phonetic Alphabet) output. See
  http://espeak.sourceforge.net or
  https://github.com/espeak-ng/espeak-ng

- espeak-mbrola uses the SAMPA phonetic alphabet, it requires mbrola to be
  installed as well as additional mbrola voices. It does not support word or
  syllable tokenization. See
  https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md

- festival is also a text-to-speech software. Currently only American
  English is supported and festival uses a custom phoneset
  (http://www.festvox.org/bsv/c4711.html), but festival is the only
  backend supporting tokenization at the syllable
  level. See http://www.cstr.ed.ac.uk/projects/festival

- segments is a Unicode tokenizer that build a phonemization from a
  grapheme to phoneme mapping provided as a file by the user. See
  https://github.com/cldf/segments.

See the '--list-languages' option below for details on the languages
supported by each backend.

un  
Examples:

* Phonemize a US English text with espeak

   $ echo 'hello world' | phonemize -l en-us -b espeak
   həloʊ wɜːld

* Phonemize a US English text with festival

   $ echo 'hello world' | phonemize -l en-us -b festival
   hhaxlow werld

* Phonemize a Japanese text with segments

  $ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
  konnitʃiwa t͡sekai

* Add a separator between phones

  $ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
  hh-ax-l-ow w-er-l-d

* Phonemize some French text file using espeak

  $ phonemize -l fr-fr -b espeak text.txt -o phones.txt
        )formatter_classdescriptionepilogz-Vz	--version
store_truez"show version information and exit.)actionhelpz-vz	--verbosezEwrite all log messages to stderr (displays only warnings by default).z-qz--quietz.do not display any log message, even warnings.z-jz--njobsz<int>r"   z0number of parallel jobs, default is %(default)s.)typemetavardefaultr7   zinput/outputinput?z<file>z?input text file to phonemize, if not specified read from stdin.)r:   nargsr9   r7   z-oz--outputz<output text file to write, if not specified write to stdout.)r:   r9   r7   z--prepend-textFTz<str>a:  prepend each line of the phonemized output text with its
        matching input text. If a string is specified as option value, use it
        as field separator, else use one of "|", "||", "|||", "||||" by
        selecting the first one that is not configured as a token separator
        (see -p/-s/-w options).)r:   constr=   r9   r7   z--preserve-empty-lineszUpreserve the empty lines in the phonemized output, default is
        to remove them.backendsz-bz	--backendN)espeakespeak-mbrolafestivalsegmentsztthe phonemization backend, must be 'espeak', 'espeak-mbrola',
        'festival' or 'segments'. Default is 'espeak'.)r9   r:   choicesr7   z-Lz--list-languageszllist available languages (and exit) for the specified backend,
        or for all backends if none selected.languagez-lz
--languagez
<str|file>zen-usz~the language code of the input text, use '--list-languages'
        for a list of supported languages. Default is %(default)s.)r9   r:   r7   ztoken separatorsz-pz--phone-separatorz*phone separator, default is "%(default)s".z-wz--word-separatorzVword separator, not valid for espeak-mbrola backend,
        default is "%(default)s".z-sz--syllable-separatorzsyllable separator, only valid for festival backend,
        this option has no effect if another backend is used.
        Default is "%(default)s".z--stripz0removes the end separators in phonemized tokens.zspecific to espeak backendr@   z--espeak-libraryz	<library>zthe path to the espeak shared library to use (*.so on Linux,
        *.dylib on Mac and *.dll on Windows, useful to overload the default
        espeak version installed on the system). Default to
        zc. This path can also be specified
        using the PHONEMIZER_ESPEAK_LIBRARY environment variable.)r:   r8   r9   r7   z--tiez<chr>u   when the option is set, use a tie character within multi-letter
        phoneme names, default to U+361 (as in d͡ʒ), 'z' means ZWJ character,
        only compatible with espeak>1.48 and incompatible with the
        -p/--phone-separator option)r=   r:   r>   r9   r7   z--with-stressu   when the option is set, the stresses on phonemes are present
        (stresses characters are ˈ'ˌ). By default stresses are removed.z--language-switch
keep-flags)rF   zremove-flagszremove-utterancea)  espeak can pronounce some words in another language (typically
        English) when phonemizing a text. This option setups the policy to use
        when such a language switch occurs. Three values are available:
        'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The
        'keep-flags' policy keeps the language switching flags, for example
        (en) or (jp), in the output. The 'remove-flags' policy removes them and
        the 'remove-utterance' policy removes the whole line of text including
        a language switch.)r:   rD   r7   z--words-mismatchignore)rG   warnremovea  espeak can join two consecutive words or drop some words,
        yielding a word count mismatch between orthographic and phonemized
        text. This option setups the policy to use when such a words count
        mismatch occurs. Three values are available: 'ignore' (the default)
        which do nothing, 'warn' which issue a warning for each mismatched
        line, and 'remove' which remove the mismatched lines from the
        output.zspecific to festival backendrB   z--festival-executablez<executable>zthe path to the festival executable to use (useful to
        overload the default festival installed on the system). Default to
        zh. This path can also be specified using the
        PHONEMIZER_FESTIVAL_EXECUTABLE environment variable.zpunctuation processingz'not available for espeak-mbrola backend)r3   z--preserve-punctuationz[preserve the punctuation marks in the phonemized output,
        default is to remove them.z--punctuation-markszythe marks to consider during punctuation processing (either
        for removal or preservation). Default is %(default)s.z--punctuation-marks-is-regexzfinterpret the '--punctuation-marks' parameter as a regex.
        Default is to interpret as a string.)argparseArgumentParserRawDescriptionHelpFormatteradd_argumentadd_mutually_exclusive_groupintadd_argument_groupr#   stdinstdoutr   default_separatorphonewordsyllabler   libraryr   str
executabler   Punctuationdefault_marks
parse_args)parsergroupespeak_libraryfestival_executables       r   r\   r\   @   s   $$ <<<A:Fz k1  3
 //1E	k/  0
 
i=  ?
 i'1?  A %%n5E			hN  P
 
j

HK  M
 
Tg#  $ 
    %%j1E	kC:	  ; 
 1  2 %%j1E	lgF  G %%&89E	!!<!<!B!B9  ;
 
 !<!<!A!A%  & 
$!<!<!E!E%  & 
?  A
 %%&BCE!(+335 
3	 
 BE  F 
5g'  ( 
M  N
 
B	   
">  	 %%&DEE#&z2==? 
3		 =@  A %% = & ?E 
 &  '
 
'''557A	  B 
&0  1 a  b  #"#s$   M M MMM'&M'c                     U (       d  [         R                  " 5       OU / HW  n[        SU S3SR                  S [	        [         U   R                  5       R                  5       5       5       5      -   5        MY     g)z@Returns the available languages for the given `backend` as a strzsupported languages for z are:
r!   c              3   6   #    U  H  u  pS U SU 3v   M     g7f)	z	->	Nr0   ).0kvs      r   	<genexpr>!list_languages.<locals>.<genexpr>7  s*      B 5ADA1#VA3' 5As   N)r   keysprintjoinsortedsupported_languagesitems)args_backendbackends     r   list_languagesrq   2  sr    *68==?\NJ&wiw7II BF!557==?5A B BB	C Kr   c                 T    SnU (       a  SnO	U(       a  Sn[         R                  " US9$ )zReturns a configured loggernormalverbosequiet)	verbosity)r   
get_logger)rt   ru   rv   s      r   rw   rw   ;  s(    I			y11r   c                 D    [        U [        5      (       a
  [        XSS9$ U $ )z,If `stream` is a filename, open it as a fileutf8)encoding)
isinstancerX   open)streammodes     r   setup_streamr   E  s!    &#F622Mr   c                  2  ^ [        5       n U R                  (       a"  [        S   R                  U R                  5        U R                  (       a"  [        S   R                  U R                  5        U R                  (       a  [        [        R                  " 5       5        gU R                  (       a  [        [        U R                  5      5        gU R                  =(       d    SU l	        [        U R                  U R                  5      n[        U R                  S5      nUR                  SUR                   5        [        U R"                  S5      nUR                  SUR                   5        U R                  S:X  a2  UR                  S	5        [$        R&                  " U R(                  SSS
9nO4[$        R&                  " U R(                  U R*                  U R,                  S
9nUR                  SU5        U R.                  (       a.  UR1                  U R.                  5      mUR                  ST5        OSmU R2                  (       aB   UR                  SU R4                  5        [6        R8                  " U R4                  5      U l        [A        URC                  5       U RD                  U R                  UU RF                  U R.                  U RH                  U RJ                  U R4                  U RL                  U RN                  U RP                  U RR                  U RT                  US9nU(       aQ  T(       aJ  URW                  [X        RZ                  R]                  U4S jU 5       5      [X        RZ                  -   5        gU(       a@  URW                  [X        RZ                  R]                  U5      [X        RZ                  -   5        gg! [6        R:                   a9    UR=                  5         UR=                  5         [?        SU R4                   35      ef = f)z,Phonemize a text from command-line argumentsr@   rB   Nrzreading from %swzwriting to %srA   z4using espeak-mbrola backend: ignoring word separator)rT   rV   rU   zseparator is %sz/prepend input text to output, separator is "%s"Fzpunctuation marks is regex %sz!can't compile regex pattern from )rE   rp   r   r&   prepend_textpreserve_empty_linespreserve_punctuationpunctuation_markswith_stresstielanguage_switchwords_mismatchnjobsr   c              3   D   >#    U  H  nUS     ST SUS    3v   M     g7f)r    r"   Nr0   )rd   lineinput_output_separators     r   rg   main.<locals>.<genexpr>  s3      !D 7)134Ad1gY?s    )/r\   r_   r   set_libraryr`   set_executabler   rj   rq   rp   rw   rt   ru   r   r;   debugnameoutputr   	Separatorphone_separatorsyllable_separatorword_separatorr   r   punctuation_marks_is_regexr   recompileerrorcloser   r   	readlinesrE   r&   r   r   r   r   r   r   r   r%   oslineseprk   )argslogstreamin	streamoutsepoutr   s         @r   mainr   M  s    <D &&t':':;++D,D,DE ||goo  nT\\*+ <<+8DL T\\4::
.C DJJ,HII/T[[#.IIIoy~~. ||&		HI!!&&
 !!&&,,$$& II%!$!;!;D<M<M!N		="	$ "'&&	[II5t7M7MN%'ZZ0F0F%GD" jj&&!66!6600$$HH,,**jjC" %JJOO !! ! jj	
 


,rzz9: 
= xx 	[NNOO@AWAW@XYZZ		[s   AO	 	AP__main__)r-   rJ   r   r#   r   
phonemizerr   r   r   r   r   phonemizer.backendr   r
   r\   rq   rw   r   r   r)   r0   r   r   <module>r      sm     I  	 
 	 I I '" "JodC2 Z; Z;z zF r   