
    љiPB                         S SK r S SKrS SKrS SKrS SKrS SKrS SKJrJ	r	J
r
JrJr  SSKJr  SSKJr  \R"                  " \5      r " S S\5      r " S S	\5      rg)
    N)AsyncGeneratorCallableOptionalDict	Awaitable   )SpeechDetector)RecordingSessionBasec                   D   ^  \ rS rSrSS\S\4U 4S jjjrU 4S jrSrU =r	$ )RecordingSession   
session_idpreroll_buffer_countc                 j   > [         TU ]  X5        S U l        [        5       U l        X0l        SU l        g NF)super__init__amplitude_threshold	bytearray
vad_buffervad_iteratoris_speech_active)selfr   r   r   	__class__s       Q/home/james-whalen/.local/lib/python3.13/site-packages/aiavatar/sts/vad/silero.pyr   RecordingSession.__init__   s/    :48 %.[(&+    c                    > [         TU ]  5         SU l        S U l        U R                  (       a  U R                  R                  5         g g r   )r   resetr   r   r   reset_states)r   r   s    r   r   RecordingSession.reset   s<     %#' **, r   )r   r   r   r   )   N)
__name__
__module____qualname____firstlineno__strintr   r   __static_attributes____classcell__r   s   @r   r   r      s%    ,3 ,c , ,- -r   r   c            !       n  ^  \ rS rSrSSSSSSSSS	SSS
SSSS	S.S\\   S\S\S\S\S\S\S\\\/\4      S\	S\\
   S\S\S\S\\\
/\S   4      S\S\	4 U 4S jjjrS\4S jrS \S\4S! jr\S\4S" j5       r\R$                  S#\\   4S$ j5       rS@S\\
   4S% jjrS&\S\R*                  4S' jrS(\
4S) jrS&\S*\S\	4S+ jrS,\S-\S(\
4S. jrS/\S(\
S\	4S0 jrS1\\S4   S(\
4S2 jrS3 rS(\
4S4 jrS(\
4S5 jr S(\
4S6 jr!S(\
S7\
4S8 jr"SAS(\
S7\
S#\#S9\	4S: jjr$S;\4S< jr%S@S(\
4S= jjr&S(\
S#\4S> jr'S?r(U =r)$ )BSileroSpeechDetector    Ng      ?g      $@g?i>  r   r"   Fi   g      ?)volume_db_thresholdsilence_duration_thresholdmax_durationmin_durationsample_ratechannelsr   to_linear16debug
model_pathspeech_probability_threshold
chunk_sizemodel_pool_sizeon_recording_started!on_recording_started_min_durationuse_vad_iteratorr/   r0   r1   r2   r3   r4   r   r5   r6   r7   r8   r9   r:   r;   r<   r=   c                r  > [         TU ]  UUS9  Xl        Ub  SSU R                  S-  -  -  U l        OS U l        X l        X0l        X@l        X`l        Xl	        Xpl
        Xl        0 U l        U(       a  U R                  R                  U5        Xl        Xl        Xl        UU l        U R'                  U
5        g )N)r3   r<     
         4@)r   r   _volume_db_thresholdr/   r   r0   r1   r2   r4   r6   r   r5   recording_sessions_on_recording_startedappendr8   r9   r:   r=   _init_silero_model)r   r/   r0   r1   r2   r3   r4   r   r5   r6   r7   r8   r9   r:   r;   r<   r=   r   s                    r   r   SileroSpeechDetector.__init__!   s    ( 	#.O 	 	
 %8!*',t7O7ORV7V0W'XD$'+D$*D'(( 
$8!&?A&&--.BC -I)$. 0 	
+r   returnc                     U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  S.$ )Nr/   r0   r1   r2   r3   r4   r   r6   r8   r9   r=   rJ   r   s    r   
get_configSileroSpeechDetector.get_configR   si    #'#;#;*.*I*I -- --++$($=$=ZZ,0,M,M// $ 5 5
 	
r   configc                    U R                  5       R                  5       n0 nUR                  5        H?  u  pEUc  M
  XB;  a  M  US:X  a  U R                  U5        XSU'   M.   [	        XU5        XSU'   MA     U$ ! [
         a     MR  f = f)Nr8   )rL   keysitems set_speech_probability_thresholdsetattr	Exception)r   rN   allowed_keysupdatedkvs         r   
set_configSileroSpeechDetector.set_configa   s    (--/LLNDAy$2255a8
DQ'!"AJ #  ! s    A66
BBc                     U R                   $ N)rB   rK   s    r   r/   (SileroSpeechDetector.volume_db_thresholdt   s    (((r   valuec                     Xl         Ub6  SSUS-  -  -  U l        [        R                  SU SU R                   35        g S U l        [        R                  S5        g )Nr?   r@   rA   zUpdated volume_db_threshold to z dB, amplitude_threshold=z'Volume threshold disabled (set to None))rB   r   loggerr6   )r   r^   s     r   r/   r]   x   s\    $)!',ut|0D'ED$LL:5'AZ[_[s[sZtuv'+D$LLBCr   c                     / U l         / U l        [        U R                  5       GH  nU(       aB  [        R
                  R                  U5      n[        R                  R                  SSSSS9u  pEO"[        R                  R                  SSSSS9u  p5U R                   R                  U5        U R                  R                  [        R                  " 5       5        US:X  d  M  US   U l        US   U l        US   U l        US   U l        US	   U l        U R                  U l        GM     ["        R%                  S
U R                   S35        g! [&         a  n["        R)                  SU 35        e SnAff = f)z Initialize Silero VAD model poolzsnakers4/silero-vad
silero_vadF)repo_or_dirmodelforce_reloadonnxr   r            z4Silero VAD model pool initialized successfully with z modelsz,Failed to initialize Silero VAD model pool: N)
model_poolmodel_locksranger:   torchjitloadhubrE   	threadingLockget_speech_timestamps
save_audio
read_audioVADIteratorcollect_chunksVADIteratorClassr`   inforT   error)r   r7   ird   _utilsexs          r   rF   'SileroSpeechDetector._init_silero_model   sV   )	 DO!D4//0!IINN:6E$yy~~$9*%*"	  .  HAu $)99>>$9*%*"	 $2 $LE &&u-  ''	(89 616qD.&+AhDO&+AhDO',QxD$*/(D',0,<,<D)= 1@ KKNtOcOcNddklm 	LLGtLM	s   CE %A+E 
E9E44E9audio_bytesc                     [         R                  " U[         R                  S9nUR                  [         R                  5      S-  nU$ )N)dtypeg      @)np
frombufferint16astypefloat32)r   r   audio_int16audio_float32s       r   _bytes_to_numpy$SileroSpeechDetector._bytes_to_numpy   s7    mmKrxx@ $**2::6@r   r   c                     U R                   S:X  a  U R                  S   U R                  S   4$ [        U5      U R                   -  nU R                  U   U R                  U   4$ )z9Get model and lock for a session using consistent hashingr   r   )r:   rj   rk   hash)r   r   	model_idxs      r   _get_model_and_lock(SileroSpeechDetector._get_model_and_lock   se    1$??1%t'7'7'::: $t';';;	y)4+;+;I+FFFr   sessionc                     U R                  U5      n[        R                  " U5      nU R                  UR                  5      u  pVU   U R
                  (       aG  UR                  USS9nU(       a  SU;   a  SUl        OSU;   a  SUl        UR                  sS S S 5        $ [        R                  " 5          U" X@R                  5      R                  5       nS S S 5        WU R                  :  sS S S 5        $ ! , (       d  f       N&= f! , (       d  f       g = f! [         a"  n	[        R                  SU	 35         S n	A	gS n	A	ff = f)NF)return_secondsstartTendzError in Silero VAD detection: )r   rm   
from_numpyr   r   r=   r   r   no_gradr3   itemr8   rT   r`   rz   )
r   r   r   audio_npaudio_tensorrd   
model_lockspeech_dictspeech_probes
             r   _detect_speech_silero*SileroSpeechDetector._detect_speech_silero   s    	++K8H !++H5L !% 8 89K9K LE((")"6"6|TY"6"ZK #"k17;G4"k17<G4 #33   &+L:J:J&K&P&P&R )&)J)JJ%   )! (  	LL:1#>?	s[   AD AD	D !D6!C7D-	D 7
D	D
DD D 
E#E  Erecorded_datarecorded_durationc                 H   #    U R                  US S X#5      I S h  vN   g  N7fr\   )_execute_on_speech_detected)r   r   r   r   s       r   execute_on_speech_detected/SileroSpeechDetector.execute_on_speech_detected   s      ..}dDJ[hhhs   " "samplesc           
      `
  #    U R                   (       a  U R                  U5      nU R                  U5      nU R                  5       (       aZ  UR                  5         UR                  R                  5         UR                  R                  5         [        R                  S5        gUR                  R                  U5        UR                  R                  U5        [        U5      S-  U R                  U R                  -  -  nSn[        UR                  5      U R                  S-  :  a  [        UR                  U R                  * S-  S  5      nU R!                  Xc5      nU(       aN  UR"                  bA  [%        ['        S [(        R*                  " SU5       5       5      5      nXsR"                  ::  a  Sn[        UR                  5      U R                  S-  :  a"  UR                  U R                  * S-  S  Ul        U R                  (       aA  [        R                  SU SUR,                  S	 S
UR.                  S	 SUR0                   35        U(       a  U R3                  U5      I S h  vN   UR4                  (       d  U(       au  UR                  5         SUl        UR                   H  nUR6                  R                  U5        M      UR6                  R                  U5        U=R,                  U-  sl        UR4                  $ UR6                  R                  U5        U=R,                  U-  sl        U(       a  SUl        OU=R.                  U-  sl        U R9                  U5      I S h  vN   UR.                  U R:                  :  a  UR,                  UR.                  -
  n	XR<                  :  a+  U R                  (       a  [        R?                  SU	 S35        OoU R                  (       a  [        R?                  SU	 S35        [        UR6                  5      n
[@        RB                  " U RE                  XUR0                  5      5        UR                  5         UR4                  $ UR,                  U RF                  :  a  U R                  (       a#  [        R?                  SUR,                   S35        [        UR6                  5      n
[@        RB                  " U RE                  XR,                  UR0                  5      5        UR                  5         UR4                  $  GN GN7f)NzSileroSpeechDetector is muted.Frg   c              3   <   #    U  H  u  n[        U5      v   M     g 7fr\   )abs).0samples     r   	<genexpr>7SileroSpeechDetector.process_samples.<locals>.<genexpr>  s     )gEf'&#f++Efs   z<hri   zSpeech detected: z, duration: z.2fz, silence: z, session: Tr   zRecording too short: z seczRecording finished: z Recording max duration reached: )$r5   get_sessionshould_muter   preroll_bufferclearr   r`   r6   rE   extendlenr3   r4   r9   bytesr   r   floatmaxstructiter_unpackrecord_durationsilence_durationr   _execute_on_voicedis_recordingbuffer$_check_and_trigger_recording_startedr0   r2   ry   asynciocreate_taskr   r1   )r   r   r   r   sample_durationspeech_detected	vad_chunkmax_amplitudefr   r   s              r   process_samples$SileroSpeechDetector.process_samples   sJ    &&w/G"":.MMO""((*$$&LL9:%%g.!!'* w<!+0@0@4==0PQ  w!!"doo&99g00$//1AA1E1FGHI"88LO7#>#>#J %c)gVEWEWX\^eEf)g&g h $?$??&+O 7%%&1)<<%,%7%78H18L8M%N"::LL,_,=\'JaJabeIffqry  sK  sK  LO  rP  P[  \c  \n  \n  [o  p  q))*555##'+$ //ANN))!, 0 %%g.''?:'F ###? NN!!'*##6#+,(((O;( ;;GDDD''4+J+JJ$+$;$;g>V>V$V!$'8'88zz&;<M;Nd$STzz&:;L:MT$RS$)'..$9M''(G(Gjqj|j|(}~ ### ((D,=,==::KK"B7CZCZB[[_ `a %gnn 5##D$C$CMSjSjlsl~l~$  A###_ 64 Es&   I*T.,T(-DT.2T+3F6T.+T.input_streamc                 ,  #    [         R                  S5        U  S h  vN nU(       d    O9U R                  X25      I S h  vN   [        R                  " S5      I S h  vN   MK  U R                  U5        [         R                  S5        g  Nm NM N1
 N/7f)Nz-SileroSpeechDetector start processing stream.g-C6?z.SileroSpeechDetector finish processing stream.)r`   ry   r   r   sleepdelete_session)r   r   r   datas       r   process_stream#SileroSpeechDetector.process_stream?  su     CD& 	($&&t888--'''J'DE	( 9'	 'sI   BBBBB BBB-BBBBBc                 .   #    U R                  U5        g 7fr\   )r   r   r   s     r   finalize_session%SileroSpeechDetector.finalize_sessionL  s     J's   c                 6   U R                   R                  U5      nUc\  U R                  U5      u  p4U R                  UU R                  U R
                  S9n[        XR                  U5      nX R                   U'   UR                  c  U R                  Ul        U$ )N	thresholdsampling_rate)	rC   getr   rx   r8   r3   r   r   r   )r   r   r   rd   r|   r   s         r   r    SileroSpeechDetector.get_sessionO  s    ))--j9?//
;HE00;;".. 1 L
 'z3L3Ll[G29##J/&&.*.*B*BG'r   c                 j    U R                   R                  U5      =n(       a  UR                  5         g g r\   )rC   r   r   r   r   r   s      r   reset_session"SileroSpeechDetector.reset_session_  s,    --11*==7=MMO >r   c                 x    XR                   ;   a+  U R                   U   R                  5         U R                   U	 g g r\   )rC   r   r   s     r   r   #SileroSpeechDetector.delete_sessionc  s8    000##J/557''
3 1r   keyc                 ~    U R                   R                  U5      nU(       a  UR                  R                  U5      $ g r\   )rC   r   r   )r   r   r   r   s       r   get_session_data%SileroSpeechDetector.get_session_datah  s5    ))--j9<<##C(( r   create_sessionc                     U(       a  U R                  U5      nOU R                  R                  U5      nU(       a  X5R                  U'   g g r\   )r   rC   r   r   )r   r   r   r^   r   r   s         r   set_session_data%SileroSpeechDetector.set_session_datam  s?    &&z2G--11*=G %LL r   r   c                    Xl         U R                  R                  5        HJ  nU R                  UR                  5      u  p4U R                  UU R                   U R                  S9Ul        ML     [        R                  SU 35        g)z+Set Silero VAD speech probability thresholdr   z3Updated Silero VAD speech probability threshold to N)
r8   rC   valuesr   r   rx   r3   r   r`   r6   )r   r   r   rd   r|   s        r   rR   5SileroSpeechDetector.set_speech_probability_thresholdv  s|    ,5)..557G//0B0BCHE#'#8#8;;".. $9 $G  8 	J9+VWr   c                    U(       ah  U R                   R                  U5      nU(       aE  UR                  (       a3  UR                  R                  5         [        R                  SU 35        gggU R                   R                  5        H0  nUR                  (       d  M  UR                  R                  5         M2     [        R                  S5        g)z=Reset VAD iterator state for specific session or all sessionsz#Silero VAD state reset for session z'Silero VAD state reset for all sessionsN)rC   r   r   r    r`   r6   r   r   s      r   reset_vad_state$SileroSpeechDetector.reset_vad_state  s    --11*=G7//$$113B:,OP 0w
  2299;'''((557 < LLBCr   c                 F    U R                  U5      nSSUS-  -  -  Ul        g )Nr?   r@   rA   )r   r   )r   r   r^   r   s       r   set_volume_db_threshold,SileroSpeechDetector.set_volume_db_threshold  s(    "":.&+redl/C&D#r   )rv   rx   rB   r   r4   r9   rw   r6   rs   r1   r2   rk   rj   r:   r   ru   rC   rt   r0   r8   r5   r=   r\   )F)*r#   r$   r%   r&   r   r   r(   r   r   boolr'   r   r   dictrL   rY   propertyr/   setterrF   r   ndarrayr   r   r   r   r   r   r   r   r   r   r   r   r   anyr   rR   r   r   r)   r*   r+   s   @r   r-   r-       s    04,/"! $%:>$(.1 KO36!&%/, &e_/, %*	/,
 /, /, /, /, "/, hw~67/, /, SM/, ',/, /, /,  'xy0F'GH!/," ,1#/,$ %/, /,b
D 
 $ & )U ) ) D% D  D+Xc] +Z5 RZZ Gc G! !AQ !VZ !Fie iX] ikn iV$U V$ V$ V$pFt1L FZ] F(c   4 4
)3 )S )
&3 &S & &VZ &X% XD# DE# Ee E Er   r-   )r   loggingnumpyr   r   rq   rm   typingr   r   r   r   r    r	   baser
   	getLoggerr#   r`   r   r-    r   r   <module>r      sP          F F  &			8	$-+ -$sE> sEr   