
    :i                         S SK r S SKrS SKJr  S SKrS SKJr  S SKJ	r	J
r
Jr  \ R                  " \5      r\ " S S5      5       r\ " S S5      5       r\ " S	 S
5      5       r\ " S S5      5       r " S S5      rg)    N)	dataclass)Audio)
AudioChunkAudioURLChunkAudioURLTypec                   B    \ rS rSr% Sr\\S'   \\S'   \\S'   S	S jrSrg)
AudioSpectrogramConfig   aF  Configuration for generating an audio spectrogram.

Attributes:
    num_mel_bins: Number of mel bins, typically 80 or 128.
    hop_length: Length of the overlapping windows for
        the STFT used to obtain the Mel Frequency coefficients, typically 160.
    window_size: Window size of the Fourier transform, typically 400.
num_mel_bins
hop_lengthwindow_sizeNc                     U R                   S:  d   U R                   5       eU R                  S:  d   U R                  5       eU R                  S:  d   U R                  5       eg )Nr   )r   r   r   selfs    `/home/james-whalen/.local/lib/python3.13/site-packages/mistral_common/tokens/tokenizers/audio.py__post_init__$AudioSpectrogramConfig.__post_init__   s[      1$7d&7&77$"3DOO3"!#5T%5%55#     returnN)	__name__
__module____qualname____firstlineno____doc__int__annotations__r   __static_attributes__r   r   r   r	   r	      s     O6r   r	   c                       \ rS rSr% Sr\\S'   \\S'   \\S'   Sr	\S-  \S'   SS	 jr
\S\4S
 j5       r\S\4S j5       rSrg)AudioConfig%   aH  Configuration for audio processing.

Attributes:
    sampling_rate: Sampling rate of the audio.
    frame_rate: Number of frames per second accepted by the tokenizer model.
    encoding_config: Configuration for audio spectrogram.
    chunk_length_s: Whether to pad an audio into multiples of chunk_length_s seconds (optional).
sampling_rate
frame_rateencoding_configNchunk_length_sr   c                 H   U R                   S:  d   U R                   5       eU R                  S:  d   U R                  5       eU R                  bS  U R                  S:  d   U R                  5       eU R                  S:  d!   SU R                   SU R                   35       eg g )Nr   z7chunk_length_s and sampling_rate must both be > 0, got z and )r$   r#   r&   chunk_framesr   s    r   r   AudioConfig.__post_init__7   s    "3DOO3"!!A%9t'9'99%*&&*?D,?,??*$$q( I$J]J]I^^cdhdvdvcwx( +r   c                     U R                   c   SU R                   < S35       e[        U R                   U R                  -  5      $ )z)Calculate the number of frames per chunk.z/Can't call chunk_frames if self.chunk_length_s=.)r&   r   r#   r   s    r   r(   AudioConfig.chunk_framesA   sI     "".e2bdNaNaMccd0ee.4&&););;<<r   c                 z    U R                   U R                  -  nXR                  R                  -  n[	        U5      $ )z(Calculate the length of audio per token.)r#   r$   r%   r   r   )r   downsample_factors     r   audio_length_per_tok AudioConfig.audio_length_per_tokG   s:     !..$//A11<<<$%%r   r   r   )r   r   r   r   r   r   r   floatr	   r&   r   propertyr(   r/   r   r   r   r   r!   r!   %   sd     ++#'NEDL' =c = =
 &c & &r   r!   c                   4    \ rS rSr% Sr\\   \S'   \\S'   Sr	g)AudioEncodingO   zEncapsulates the tokens and audio data for an audio chunk.

Attributes:
    tokens: Text tokens corresponding to this audio chunk.
    audio: Original audio waveform data.
tokensaudior   N)
r   r   r   r   r   listr   r   r   r   r   r   r   r4   r4   O   s     ILr   r4   c                   .    \ rS rSr% Sr\\S'   \\S'   Srg)SpecialAudioIDs^   zSpecial text tokens corresponding to audio token sequence.

Attributes:
    audio: Token representing audio.
    begin_audio: Token representing the beginning of audio.
r7   begin_audior   N)r   r   r   r   r   r   r   r   r   r   r   r:   r:   ^   s     Jr   r:   c                       \ rS rSrSrS\S\SS4S jrS\R                  S	\
S\R                  4S
 jrS\
S	\
S\
4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\\-  S\4S jr\S\
4S j5       r\S\
4S j5       rSrg)AudioEncoderk   zEncodes audio chunks into a format suitable for further processing.

Attributes:
    audio_config: Configuration for audio processing.
    encoding_config: Configuration for audio spectrogram.
    special_ids: Special tokens for audio encoding.
audio_configspecial_idsr   Nc                 >    Xl         UR                  U l        X l        g Nr@   r%   rA   )r   r@   rA   s      r   __init__AudioEncoder.__init__t   s    (+;;&r   audio_arrayr#   c                    U R                   R                  (       aI  U R                  UR                  S   U5      n[        R
                  " USX1R                  S   -
  45      nU$ UR                  S   U R                  R                  :  a=  [        R
                  " USU R                  R                  UR                  S   -
  45      nU$ )zPad the audio array to the desired length.

Args:
    audio_array: Audio data as a numpy array.
    sampling_rate: Sampling rate of the audio.

Returns:
    Padded audio array.
r   )r@   r&   next_multiple_of_chunk_framesshapenppadr%   r   )r   rG   r#   rJ   s       r   rM   AudioEncoder.pady   s     ++,0,N,N{O`O`acOdfs,t)&&q2ORcRcdfRg2g.hiK
 	 r"T%9%9%E%EE&&q$2F2F2R2RU`UfUfgiUj2j.klKr   audio_array_lenc                 n   X R                   R                  :X  d#   SU< SU R                   R                  < 35       eU R                   R                  c    SU R                   R                  < S35       e[        R                  " XR                   R
                  -  5      U R                   R
                  -  $ )zCalculate the next multiple of chunk frames.

Args:
    audio_array_len: Length of the audio array.
    sampling_rate: Sampling rate of the audio.

Returns:
    The next multiple of chunk frames.
zExpected sampling_rate=z' to be self.audio_config.sampling_rate=zMCan't call next_multiple_of_chunk_frames if self.audio_config.chunk_length_s=r+   )r@   r#   r&   mathceilr(   )r   rO   r#   s      r   rJ   *AudioEncoder.next_multiple_of_chunk_frames   s      1 1 ? ?? 	
&''Ot/@/@/N/N.PQ	
?   //; 	
\4;L;L;[;[:]]^_	
; yy+<+<+I+IIJTM^M^MkMkkkr   r7   c                 X   UR                  U R                  R                  5        U R                  UR                  U R                  R                  5      Ul        UR                  R
                  S   nX R                  R                  -  S:w  a0  [        R                  " X R                  R                  -  S-
  5      nOX R                  R                  -  n[        R                  " X R                  R                  -  5      nU R                  /U R                  /U-  -   n[        UUS9$ )Nr      )r6   r7   )resampler@   r#   rM   rG   rK   r%   r   rQ   rR   r/   begin_audio_tokenaudio_tokenr4   )r   r7   signal_lengthnum_audio_tokensaudio_tokenss        r   _encode_audioAudioEncoder._encode_audio   s    t((667 HHU%6%68I8I8W8WX))//2 //:::a? IIm6J6J6U6U&UXY&YZM)-A-A-L-LLM99]5F5F5[5[%[\../43C3C2DGW2WW
 	
r   contentc                 d    [         R                  " UR                  5      nU R                  U5      $ rC   )r   from_raw_audioinput_audior\   )r   r^   r7   s      r   _encode_audio_chunk AudioEncoder._encode_audio_chunk   s)    $$W%8%89!!%((r   c                 x   UR                  5       nU[        R                  [        R                  1;   a!  [        R
                  " UR                  5      nOUU[        R                  :X  a!  [        R                  " UR                  5      nO [        R                  " UR                  5      nU R                  U5      $ rC   )
get_url_typer   filefile_urir   	from_fileurlfrom_urlfrom_base64r\   )r   r^   url_typer7   s       r   _encode_audio_url_chunk$AudioEncoder._encode_audio_url_chunk   s    '')))<+@+@AAOOGKK0E)))NN7;;/E%%gkk2E!!%((r   c                     [        U[        5      (       a  U R                  U5      $ [        U[        5      (       a  U R	                  U5      $ [        S[        U5       35      e)zCall the encoder on an audio chunk or URL chunk.

Args:
    content: Audio or URL chunk to encode.

Returns:
    Encoded audio data and tokens.
zUnsupported content type: )
isinstancer   rm   r   rb   
ValueErrortype)r   r^   s     r   __call__AudioEncoder.__call__   sW     g}--//88,,++G449$w-IJJr   c                 .    U R                   R                  $ )zGet the audio token.)rA   r7   r   s    r   rX   AudioEncoder.audio_token   s     %%%r   c                 .    U R                   R                  $ )zGet the begin audio token.)rA   r<   r   s    r   rW   AudioEncoder.begin_audio_token   s     +++r   rD   )r   r   r   r   r   r!   r:   rE   rL   ndarrayr   rM   rJ   r   r4   r\   r   rb   r   rm   rs   r2   rX   rW   r   r   r   r   r>   r>   k   s    '[ ' 'SW '
rzz # "** &lS lQT lY\ l&
5 
] 
(): )- )
)} 
) 
)K
] : K} K  &S & & ,3 , ,r   r>   )loggingrQ   dataclassesr   numpyrL   mistral_common.audior   &mistral_common.protocol.instruct.chunkr   r   r   	getLoggerr   loggerr	   r!   r4   r:   r>   r   r   r   <module>r      s      !  & Z Z			8	$ 6 6 6. && && &&R    	 	 	p, p,r   