
    11i%                         S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	J
r
  S SKrS SKrS SKJr  SSKJrJrJrJr  SSKJr  SSKJr  SSKJr   " S	 S
5      rg)    N)AsyncGenerator)NDArray   )MAX_PHONEME_LENGTHSAMPLE_RATEEspeakConfigKoKoroConfig)log)	Tokenizer)trimc                   ^   \ rS rSr  SS\S\S\S-  S\\-  S-  4S jjr\  SS\	R                  S\S\S-  S\\-  S-  4S	 jj5       rS\\-  S-  S
\4S jrS\S\\R                     S\S
\\\R                     \4   4S jrS\S
\\R                     4S jrS\S
\\   4S jr    SS\S\\\R                     -  S\S\S\S\S
\\\R                     \4   4S jjr    SS\S\\\R                     -  S\S\S\S\S
\\\\R                     \4   S4   4S jjrS
\\   4S jrSrg)Kokoro   N
model_pathvoices_pathespeak_configvocab_configc           	         [         R                  " S[        R                  R	                  S5       S[
        R
                  " 5        S[
        R                  " 5        35        [        XU5      U l        U R                  R                  5         S/n[        R                  R                  S5      nU(       a  [        R                  " 5       n[        R                  " S5      nU(       a  U/n[         R                  " SU 35        [        R                  " XS	9U l        ["        R$                  " U5      U l        U R)                  U5      n[+        X8S
9U l        g )Nzkoko-onnx version zkokoro-onnxz on  CPUExecutionProviderzonnxruntime-gpuONNX_PROVIDERzProviders: )	providersvocab)r
   debug	importlibmetadataversionplatformr	   configvalidateutil	find_specrtget_available_providersosgetenvInferenceSessionsessnploadvoices_load_vocabr   	tokenizer)	selfr   r   r   r   r   gpu_enabledenv_providerr   s	            N/home/james-whalen/.local/lib/python3.13/site-packages/kokoro_onnx/__init__.py__init__Kokoro.__init__   s    			 !3!3!;!;M!J K4PXPaPaPcOddefnfvfvfxeyz	
 #:MJ ,,	  nn../@A#%#=#=#?I yy1%I		K	{+,''
H	"$''+"6  ."=>    sessionc                    U R                  U 5      nXl        [        UR                  X#5      Ul        UR                  R                  5         [        R                  " U5      Ul        UR                  U5      n[        X6S9Ul        U$ )Nr   )__new__r)   r	   _model_pathr    r!   r*   r+   r,   r-   r   r.   )clsr6   r   r   r   instancer   s          r2   from_sessionKokoro.from_session9   sm     ;;s#&w':':KW  "''+.$$\2&}Br5   returnc                     [        U[        5      (       a/  [        USS9 n[        R                  " U5      nUS   sSSS5        $ [        U[
        5      (       a  US   $ 0 $ ! , (       d  f       N*= f)zLoad vocabulary from config file or dictionary.

Args:
    vocab_config: Path to vocab config file or dictionary containing vocab.

Returns:
    Loaded vocabulary dictionary or empty dictionary if no config provided.
zutf-8)encodingr   N)
isinstancestropenjsonr+   dict)r/   r   fpr    s       r2   r-   Kokoro._load_vocabK   sa     lC((lW52g 65 lD))((	 65s   A!!
A/phonemesvoicespeedc                 "   [         R                  " SU 35        [        U5      [        :  a  [         R                  " S[         S35        US [         n[
        R
                  " 5       n[        R                  " U R                  R                  U5      [        R                  S9n[        U5      [        ::  d   S[         S35       eU[        U5         nS/UQSP/nSU R                  R                  5        Vs/ s H  ofR                  PM     sn;   aJ  U[        R                  " U[        R                  S9[        R                  " U/[        R                  S9S	.nO*UU[        R                   " S
[        R                  S9U-  S.nU R                  R#                  S U5      S   n[        U5      [$        -  n	[
        R
                  " 5       U-
  n
X-  n[         R                  " SU	S S[        U5       SU
S SUS 35        U[$        4$ s  snf )Nz
Phonemes: z%Phonemes are too long, truncating to 	 phonemes)dtypezContext length is z7, but leave room for the pad token 0 at the start & endr   	input_ids)rN   stylerJ   r   )tokensrO   rJ   zCreated audio in length of .2fzs for z phonemes in zs (RTF: )r
   r   lenr   warningtimer*   arrayr.   tokenizeint64r)   
get_inputsnamefloat32int32onesrunr   )r/   rH   rI   rJ   start_trP   iinputsaudioaudio_durationcreate_durationrtfs               r2   _create_audioKokoro._create_audio]   s    			Jxj)*x=--KK78J7K9U //0))+$..11(;288L6{00 	
 !3 44kl	
0 c&k"/v/q/"499+?+?+AB+Aa66+ABB $%rzz:5':F !"**5=F 		dF+A.Uk1))+/.		).)=VCM?R_`ops_tt|  ~A  BE  }F  G	
 k!!+ Cs   :HrY   c                      U R                   U   $ N)r,   )r/   rY   s     r2   get_voice_styleKokoro.get_voice_style   s    {{4  r5   c                    [         R                  " SU5      n/ nSnU H}  nUR                  5       nU(       d  M  [        U5      [        U5      -   S-   [        :  a#  UR                  UR                  5       5        UnMa  US;   a  XE-  nMm  U(       a  US-  nXE-  nM     U(       a  UR                  UR                  5       5        U$ )zZ
Split phonemes into batches of MAX_PHONEME_LENGTH
Prefer splitting at punctuation marks.
z	([.,!?;]) r   z.,!?;r   )resplitstriprR   r   append)r/   rH   wordsbatched_phoenemescurrent_batchparts         r2   _split_phonemesKokoro._split_phonemes   s     x0')D::<Dt }%D	1A59KK%,,]-@-@-BC$(Mw%-()S0M%-! & $$]%8%8%:;  r5   textlangis_phonemesr   c                    US:  a  US::  d   S5       e[        U[        5      (       a+  X R                  ;   d   SU S35       eU R                  U5      n[        R                  " 5       nU(       a  UnOU R
                  R                  X5      nU R                  U5      n	/ n
[        R                  " S[        U	5       S[        U5       S35        U	 H<  nU R                  XU5      u  pU(       a  [        U5      u  pU
R                  U5        M>     [        R                  " U
5      n
[        R                  " S	[        R                  " 5       U-
  S
 S35        U
[         4$ )z=
Create audio from text using the specified voice and speed.
      ?       @#Speed should be between 0.5 and 2.0Voice  not found in available voiceszCreating audio for z batches for rL   zCreated audio in rQ   s)rA   rB   r,   ri   rT   r.   	phonemizeru   r
   r   rR   re   
trim_audiorp   r*   concatenater   )r/   rw   rI   rJ   rx   ry   r   r^   rH   rr   ra   
audio_part_s                r2   createKokoro.create   s<    |S.SS,eS!!KK'W6%8V)WW'((/E))+H~~//;H 00:		!#&7"8!9s8}oU^_	
 *H ..xFMJ !+: 6
LL$ * u%		%diikG&;C%@BCk!!r5   c                  ^ ^^^^
^#    TS:  a  TS::  d   S5       e[        T[        5      (       a,  TT R                  ;   d   ST S35       eT R                  T5      mU(       a  UnOT R                  R                  X5      nT R                  U5      m
[        R                  " 5       mU
UU UUU4S jn[        R                  " U" 5       5         TR                  5       I Sh  vN n	U	c  gU	7v   M#   N7f)z`
Stream audio creation asynchronously in the background, yielding chunks as they are processed.
r{   r|   r}   r~   r   c                    >#    [        T5       H  u  p[        R                  " 5       nUR                  STR                  UTT	5      I Sh  vN u  p4T
(       a  [        U5      u  p5[        R                  " SU  S35        TR                  X445      I Sh  vN   M     TR                  S5      I Sh  vN   g Nl N$ N	7f)z*Process phoneme batches in the background.NzProcessed chunk z
 of stream)		enumerateasyncioget_event_looprun_in_executorre   r   r
   r   put)r_   rH   loopr   sample_rater   batched_phonemesqueuer/   rJ   r   rI   s         r2   process_batches-Kokoro.create_stream.<locals>.process_batches   s     ()9:--/040D0D$,,hu1 +'
  %/z$:MJ		,QCz:;ii 9:::  ; ))D/!!+ ;!s7   AB>B8A	B>B:B>2B<3B>:B><B>N)rA   rB   r,   ri   r.   r   ru   r   Queuecreate_taskget)r/   rw   rI   rJ   rx   ry   r   rH   r   chunkr   r   s   ` ``  `   @@r2   create_streamKokoro.create_stream   s      |S.SS,eS!!DKK'W6%8V)WW'((/EH~~//;H//9GN}}	" 	"" 	O-.))+%E}K	 %s   CC4"C2#C4c                 Z    [        [        U R                  R                  5       5      5      $ rh   )listsortedr,   keys)r/   s    r2   
get_voicesKokoro.get_voices  s    F4;;++-.//r5   )r    r)   r.   r,   )NN)g      ?zen-usFT)__name__
__module____qualname____firstlineno__rB   r   rE   r3   classmethodr$   r(   r<   r-   r   r*   rZ   floattupleintre   ri   r   ru   boolr   r   r   r   __static_attributes__ r5   r2   r   r      s8   
 .2*. ? ?  ? $d*	 ?
 Sj4' ?D 
 .2*.$$  $d*	
 Sj4' "s
T(9 d $&"&"$+BJJ$7&"@E&"	wrzz"C'	(&"P!C !GBJJ,? ! !  !S	  !L !'"'" WRZZ(('" 	'"
 '" '" '" 
wrzz"C'	('"Z !11 WRZZ((1 	1
 1 1 1 
gbjj1367=	>1f0DI 0r5   r   )r   r   importlib.metadataimportlib.utilrD   r&   r   rm   rT   collections.abcr   numpyr*   onnxruntimer$   numpy.typingr   r    r   r   r   r	   r
   r.   r   r   r   r   r   r5   r2   <module>r      sG         	  	  *     O O    $q0 q0r5   