
    11i                     ~    S r SSKrSSKJr  SSKJrJrJrJrJ	r	  SSK
r
SSKJr  SSKJr  SSKJrJr   " S S	\5      rg)
z#Segments backend for the phonemizer    N)Logger)OptionalDictListUnionPattern)BaseBackend)	Separator)get_package_resourceversion_as_tuplec                     ^  \ rS rSrSr   SS\S\\\\4      S\	S\\
   4U 4S jjjrS r\S	 5       r\S
 5       r\S 5       r\S 5       r\S\S\	4S j5       r\S\S\R*                  4S j5       rS\\   S\S\S\	S\\   4
S jrSrU =r$ )SegmentsBackend   zvSegments backends for the phonemizer

The phonemize method will raise a ValueError when parsing an
unknown morpheme.

languagepunctuation_markspreserve_punctuationloggerc                 4   > S U l         [        TU ]	  UUUUS9  g )N)r   r   r   )
_tokenizersuper__init__)selfr   r   r   r   	__class__s        U/home/james-whalen/.local/lib/python3.13/site-packages/phonemizer/backend/segments.pyr   SegmentsBackend.__init__$   s*    
 9=/!5	 	 	    c                     U R                  U5      n[        R                  " US9U l        [        R
                  " U5      R                  $ )N)profile)_load_g2p_profilesegments	Tokenizerr   pathlibPathstem)r   r   r   s      r   _init_languageSegmentsBackend._init_language0   s;    ((2",,W= ||H%***r   c                      g)Nr     r(   r   r   nameSegmentsBackend.name8   s    r   c                 4    [        [        R                  5      $ )N)r   r    __version__clss    r   versionSegmentsBackend.version<   s     4 455r   c                     g)NTr(   r-   s    r   is_availableSegmentsBackend.is_available@   s    r   c                      [        S5      n U R                  5        Vs0 s H!  oR                  S:X  d  M  UR                  U_M#     sn$ s  snf )zReturns a dict of language: file supported by the segments backend

The supported languages have a grapheme to phoneme conversion file
bundled with phonemizer. Users can also use their own file as
parameter of the phonemize() function.

r    z.g2p)r   iterdirsuffixr$   )	directoryg2ps     r   supported_languages#SegmentsBackend.supported_languagesD   sU     )4	 %,,.H.C**2F #.H 	H Hs
   A	A	returnc                     [         R                  " U5      R                  5       (       a   U R                  U5        gXR                  5       ;   $ ! [         a     gf = f)NTF)r"   r#   is_filer   RuntimeErrorr9   )r.   r   s     r   is_supported_language%SegmentsBackend.is_supported_languageT   sX    <<!))++%%h/ 22444   s   A 
AAc           
      f   [         R                  " U5      R                  5       (       d   U R                  5       U   n0 n[        USSS9 n[        U5       Hd  u  pEUR                  5       R                  5       n[        U5      S:X  d(  [        SR                  US-   [        U5      U5      5      eUS   X&S	   '   Mf     SSS5        [        R                  " UR                  5        VVs/ s H	  u  pxXxS
.PM     snn6 $ ! [         a    [        SU 35      Sef = f! , (       d  f       Nc= fs  snnf )z,Returns a segments profile from a `language`z$grapheme to phoneme file not found: Nrutf8)encoding   zBgrapheme to phoneme file, line {} must have 2 rows but have {}: {}   r   )Graphememapping)r"   r#   r=   r9   KeyErrorr>   open	enumeratestripsplitlenformatr    Profileitems)	r.   r   r8   flangnumlineeltskvs	            r   r   !SegmentsBackend._load_g2p_profile^   s)    ||H%--//-224X> !(C&1U&u-	zz|))+4yA~&**0&q#d)X*NP P  $AwG . 2 8;		D1+DF 	F%  -":j"#(,-- 21 Es   C? A4D*D-
?D
D*textoffset	separatorrL   c                    ^ ^ U 4S jU 5       nU(       d  S U 5       nS U 5       nS U 5       nU4S jU 5       nU4S jU 5       n[        U5      $ )Nc              3   F   >#    U  H  nTR                  US SS9v   M     g7f)rH   strict)columnerrorsNr   ).0rT   r   s     r   	<genexpr>1SegmentsBackend._phonemize_aux.<locals>.<genexpr>}   s'       OOD8ODs   !c              3   *   #    U  H	  oS -   v   M     g7f) # Nr(   rb   ps     r   rc   rd      s     8Ze)Zs   c              3   D   #    U  H  oR                  S S5      v   M     g7f)rf   z  # Nreplacerg   s     r   rc   rd      s     GJq))E622J    c              3   D   #    U  H  oR                  S S5      v   M     g7f)rf   #Nrj   rg   s     r   rc   rd      s     @Ziis++Zrl   c              3   Z   >#    U  H   oR                  S TR                  5      v   M"     g7f) N)rk   phonerb   rh   r[   s     r   rc   rd      s!     Jz!iiY__55z   (+c              3   Z   >#    U  H   oR                  S TR                  5      v   M"     g7f)rn   N)rk   wordrr   s     r   rc   rd      s!     IjiiY^^44jrs   )list)r   rY   rZ   r[   rL   
phonemizeds   `  `  r   _phonemize_auxSegmentsBackend._phonemize_aux{   sY    
 8Z8JGJGJ AZ@
JzJ
IjI
 Jr   ra   )NFN)__name__
__module____qualname____firstlineno____doc__strr   r   r   boolr   r   r%   staticmethodr)   classmethodr/   r2   r9   r?   r    rP   r   r   intr
   rx   __static_attributes____classcell__)r   s   @r   r   r      s1    EI.3,0
 
$,U3<-@$A
'+
 "&)
 
+   6 6   H H 5S 5T 5 5 F F1A1A F F8 49  c  i  X\  aefiaj    r   r   )r~   r"   loggingr   typingr   r   r   r   r   r    phonemizer.backend.baser	   phonemizer.separatorr
   phonemizer.utilsr   r   r   r(   r   r   <module>r      s0    *   7 7  / * Cs k s r   