
    11i<"                         S r SSKrSSKrSSKJrJrJrJr  SSKJ	r	  SSK
Jr  Sr\R                  " S/ SQ5      r " S	 S
5      rg)z(Implementation of punctuation processing    N)ListUnionTuplePattern)str2list)	Separatoru!   ;:,.!?¡¿—…"«»“”(){}[]_mark_index)indexmarkpositionc                   l   \ rS rSrSr\4S\\\4   4S jjr	\
S 5       r\S 5       r\R                  S\\\4   4S j5       rS	\\\\   4   S
\\\\   4   4S jrS	\\\   \4   S
\\\\      \\   4   4S jrS\S\S
\\\   \\   4   4S jr\S	\\\\   4   S\\   S\S\S
\\   4
S j5       rSrg)Punctuation   a2  Preserve or remove the punctuation during phonemization

Backends behave differently with punctuation: festival and espeak ignore it
and remove it silently whereas segments will raise an error. The
Punctuation class solves that issue by "hiding" the punctuation to the
phonemization backend and restoring it afterwards.

Parameters
----------
marks (str or re.Pattern) : The punctuation marks to consider for processing
    (either removal or preservation). If a string, each mark must be made of
    a single character. Default to Punctuation.default_marks().

marksc                 ,    S U l         S U l        Xl        g N_marks	_marks_rer   )selfr   s     P/home/james-whalen/.local/lib/python3.13/site-packages/phonemizer/punctuation.py__init__Punctuation.__init__/   s    '+
    c                      [         $ )z1Returns the default punctuation marks as a string)_DEFAULT_MARKS r   r   default_marksPunctuation.default_marks4   s
     r   c                 R    U R                   (       a  U R                   $ [        S5      e)z!The punctuation marks as a stringzCpunctuation initialized from regex, cannot access marks as a string)r   
ValueError)r   s    r   r   Punctuation.marks9   s!     ;;;;^__r   valuec                    [        U[        5      (       a3  [        R                  " SUR                  -   S-   5      U l        S U l        g [        U[        5      (       a]  SR                  [        U5      5      U l        [        R                  " S[        R                  " U R                  5       S35      U l        g [        S5      e)Nz((z)|\s)+ z(\s*[z]+\s*)+z;punctuation marks must be defined as a string or re.Pattern)
isinstancer   recompilepatternr   r   strjoinsetescaper!   )r   r#   s     r   r   r"   @   s    eW%%ZZ(=	(IJDNDKs##''#e*-DK  ZZ54;;1G0H(PQDNZ[[r   textreturnc                    ^  S[         S[         4U 4S jjn[        U[         5      (       a  U" U5      $ U Vs/ s H
  o2" U5      PM     sn$ s  snf )zReturns the `text` with all punctuation marks replaced by spaces

The input `text` can be a string or a list and is returned with the
same type and punctuation removed.

r.   r/   c                 d   > [         R                  " TR                  SU 5      R                  5       $ )N )r'   subr   strip)r.   r   s    r   auxPunctuation.remove.<locals>.auxW   s#    66$..#t4::<<r   )r*   r&   )r   r.   r5   lines   `   r   removePunctuation.removeO   sK    	=c 	=c 	= dC  t9&*+ddD	d+++s   Ac                     [        U5      n/ n/ n[        U5       H   u  pEU R                  XT5      u  pVX%-  nX6-  nM"     U Vs/ s H  oU(       d  M  UPM     snU4$ s  snf )zRemoves punctuation from `text`, allowing for furter restoration

This method returns the text as a list of punctuated chunks, along with
a list of punctuation marks for furter restoration:

    'hello, my world!' -> ['hello', 'my world'], [',', '!']

)r   	enumerate_preserve_line)r   r.   preserved_textpreserved_marksnumr7   r   s          r   preservePunctuation.preserve^   sj     #4."4IC--d8KD"N$O ) "0848/II8s   
AAr7   r?   c                    [        [        R                  " U R                  U5      5      nU(       d  U// 4$ [	        U5      S:X  a&  US   R                  5       U:X  a  / [        X!S5      /4$ / nU H  nSnXSS   :X  a'  UR                  UR                  5       5      (       a  SnO.XSS   :X  a&  UR                  UR                  5       5      (       a  SnUR                  [        X%R                  5       U5      5        M     / nU HS  nUR                  UR                  5      n	U	S   UR                  R                  U	SS 5      pUR                  U
5        UnMU     Xq/-   U4$ )	z+Auxiliary method for Punctuation.preserve()   r   AIBEN)listr'   finditerr   lengroup
_MarkIndex
startswithendswithappendsplitr   r+   )r   r7   r?   matchesr   matchr   preserved_liner   rQ   prefixsuffixs               r   r<   Punctuation._preserve_lineq   s>   r{{4>>48962: w<1!1!1!3t!;
3c2333 E H
"tu{{}'E'E"+%$--*F*FLLCAB  DJJtyy)E"1Xtyy~~eABi'@F!!&)D	  &--r   sepr4   c           
         [        U5      n/ nSnU(       d  U(       Ga  U(       dd  U HZ  nU(       d?  UR                  (       a.  UR                  UR                  5      (       d  XsR                  -   nUR                  U5        M\     / nGO"U(       dK  UR                  [        R
                  " SUR                  SR                  S U 5       5      5      5        / nGOUS   nUR                  U:X  Ga  US   n	USS n[        R
                  " SUR                  U	R                  5      n	UR                  (       aB  US   R                  UR                  5      (       a  US   S[        UR                  5      *  US'   UR                  S:X  a  XS   -   US'   GOUR                  S:X  aX  UR                  US   U	-   U(       d   U	R                  UR                  5      (       a  SOUR                  -   5        USS nUS-   nOUR                  S	:X  aL  UR                  X(       d   U	R                  UR                  5      (       a  SOUR                  -   5        US-   nOQ[        U5      S:X  a  US   U	-   US'   O6US   n
USS nX-   US   -   US'   OUR                  US   5        USS nUS-   nU(       a  GM  U(       a  GM  U$ )
aJ  Restore punctuation in a text.

This is the reverse operation of Punctuation.preserve(). It takes a
list of punctuated chunks and a list of punctuation marks, as well as
the separator and strip parameters used by phonemize. It returns the
punctuated text as a list:

    ['hello', 'my world'], [',', '!'] -> ['hello, my world!']

r   r2   r%   c              3   8   #    U  H  oR                   v   M     g 7fr   )r   ).0ms     r   	<genexpr>&Punctuation.restore.<locals>.<genexpr>   s     D[UZPQVVUZs   rC   NrF   rH   rD   )r   wordrO   rP   r'   r3   r+   r
   r   rK   r   )clsr.   r   rX   r4   punctuated_textposr7   current_markr   
first_words              r   restorePunctuation.restore   sr    ~e D SXXdmmCHH6M6M#hh#**40	 !
   &&rvvc388RWWD[UZD[=['\]  %Qx%%, !8D!!"IE66#sxx;D xxDG$4$4SXX$>$>"&q'/CM>":Q#,,3"&a.Q%..#5'..tAw~uPTP]P]^a^f^fPgPgmpmumu/vw#ABx!Ag%..#5'..tUdmmTWT\T\F]F]rcfckck/lm!Ag t9> '+1gnDG)-aJ#'8D&0&7$q'&ADG $**4738D'Ci deen r   r   N)__name__
__module____qualname____firstlineno____doc__r   r   r*   r   r   staticmethodr   propertyr   setterr   r8   r   rM   r@   intr<   classmethodr   boolre   __static_attributes__r   r   r   r   r      si    5C eCL1 
   ` ` \\\5g. \ \,5d3i0 ,U3S	>5J ,JU49c>2 JuT$s)_dS]N^=^7_ J&.3 .S .U49d:FV;V5W .B I5d3i0 IJ'II I !%S	I Ir   r   )rk   collectionsr'   typingr   r   r   r   phonemizer.utilsr   phonemizer.separatorr   r   
namedtuplerM   r   r   r   r   <module>rx      sE    /  	 . . % * 5##02
} }r   