
    bCi{                         S r SSKJrJr  SSKrSSKJr  SSKJ	r	  SSK
JrJrJr  \R                  " \5      r " S S	\5      rS	/rg)
zFeature extractor class for DAC    )OptionalUnionN   )SequenceFeatureExtractor)BatchFeature)PaddingStrategy
TensorTypeloggingc                   
  ^  \ rS rSrSrSS/r    SS\S\S\S\4U 4S	 jjjr     SS
\	\
R                  \\   \\
R                     \\\      4   S\\	\\\4      S\\   S\\   S\\	\\4      S\\   S\4S jjrSrU =r$ )DacFeatureExtractor   a
  
Constructs an Dac feature extractor.

This feature extractor inherits from [`~feature_extraction_sequence_utils.SequenceFeatureExtractor`] which contains
most of the main methods. Users should refer to this superclass for more information regarding those methods.

Args:
    feature_size (`int`, *optional*, defaults to 1):
        The feature dimension of the extracted features. Use 1 for mono, 2 for stereo.
    sampling_rate (`int`, *optional*, defaults to 16000):
        The sampling rate at which the audio waveform should be digitalized, expressed in hertz (Hz).
    padding_value (`float`, *optional*, defaults to 0.0):
        The value that is used for padding.
    hop_length (`int`, *optional*, defaults to 512):
        Overlap length between successive windows.
input_valuesn_quantizersfeature_sizesampling_ratepadding_value
hop_lengthc                 8   > [         TU ]  " SXUS.UD6  X@l        g )N)r   r   r    )super__init__r   )selfr   r   r   r   kwargs	__class__s         h/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/dac/feature_extraction_dac.pyr   DacFeatureExtractor.__init__1   s#     	wl_lwpvw$    	raw_audiopadding
truncation
max_lengthreturn_tensorsreturnc                    Ub<  X`R                   :w  a,  [        SU  SU R                    SU R                    SU S3	5      eO-[        R                  SU R                  R
                   S35        U(       a  U(       a  [        S	5      eUc  S
n[        [        U[        [        45      =(       a(    [        US   [        R                  [        [        45      5      nU(       a>  U Vs/ s H0  n[        R                  " U[        R                  S9R                  PM2     nnOU(       dC  [        U[        R                  5      (       d$  [        R                  " U[        R                  S9nOo[        U[        R                  5      (       aP  UR                  [        R                  " [        R                   5      L a  UR#                  [        R                  5      nU(       d!  [        R                  " U5      R                  /n[%        U5       H  u  pU
R&                  S:  a  [        SU
R(                   35      eU R*                  S:X  a,  U
R&                  S:w  a  [        SU
R(                  S    S35      eU R*                  S:X  d  M{  [        S5      e   [-        SU05      nU R/                  UUUUUU R0                  S9nU(       a  UR3                  S5      US'   U(       a)  UR4                  SS2[        R6                  SS24   Ul        / nUR3                  S5       H3  n
U R*                  S:X  a  U
S   n
UR9                  U
R                  5        M5     XS'   Ub  UR;                  U5      nU$ s  snf )a  
Main method to featurize and prepare for the model one or several sequence(s).

Args:
    raw_audio (`np.ndarray`, `list[float]`, `list[np.ndarray]`, `list[list[float]]`):
        The sequence or batch of sequences to be processed. Each sequence can be a numpy array, a list of float
        values, a list of numpy arrays or a list of list of float values. The numpy array must be of shape
        `(num_samples,)` for mono audio (`feature_size = 1`), or `(2, num_samples)` for stereo audio
        (`feature_size = 2`).
    padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
        Select a strategy to pad the returned sequences (according to the model's padding side and padding
        index) among:

        - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
          sequence if provided).
        - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
          acceptable input length for the model if that argument is not provided.
        - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
          lengths).
    truncation (`bool`, *optional*, defaults to `False`):
        Activates truncation to cut input sequences longer than `max_length` to `max_length`.
    max_length (`int`, *optional*):
        Maximum length of the returned list and optionally padding length (see above).
    return_tensors (`str` or [`~utils.TensorType`], *optional*, default to 'pt'):
        If set, will return tensors instead of list of python integers. Acceptable values are:

        - `'tf'`: Return TensorFlow `tf.constant` objects.
        - `'pt'`: Return PyTorch `torch.Tensor` objects.
        - `'np'`: Return Numpy `np.ndarray` objects.
    sampling_rate (`int`, *optional*):
        The sampling rate at which the `audio` input was sampled. It is strongly recommended to pass
        `sampling_rate` at the forward call to prevent silent errors.
Nz3The model corresponding to this feature extractor: z& was trained using a sampling rate of zB. Please make sure that the provided audio input was sampled with z	 and not .zDIt is strongly recommended to pass the `sampling_rate` argument to `zN()`. Failing to do so can result in silent errors that might be hard to debug.zABoth padding and truncation were set. Make sure you only set one.Tr   )dtype   z6Expected input shape (channels, length) but got shape    z$Expected mono audio but example has z	 channelsz$Stereo audio isn't supported for nowr   )r!   r    r   return_attention_maskpad_to_multiple_ofattention_maskpadding_mask).N)r   
ValueErrorloggerwarningr   __name__bool
isinstancelisttuplenpndarrayasarrayfloat32Tr&   float64astype	enumeratendimshaper   r   padr   popr   newaxisappendconvert_to_tensors)r   r   r   r    r!   r"   r   
is_batchedaudioidxexampler   padded_inputss                r   __call__DacFeatureExtractor.__call__<   s   T $ 2 22 I$ P**+ ,**+9]O1F  3 NNVW[WeWeWnWnVo p\ \
 z`aa_Gy4-0jj1PRPZPZ\acgOh6i

 LUVI5E<>>IIVIJy"**$E$E

9BJJ?I	2::..9??bhhrzzFZ3Z!((4I I.001I &i0LC||a #YZaZgZgYh!ijj  A%',,!*; #GVXHYGZZc!dee  A% !GHH 1 $^Y$?@ !!")# ! 
 ,9,=,=>N,OM.))6)C)CArzzSTDT)UM&$((8G  A%!),		* 9
 )5n%%)<<^LM[ Ws   $7M,)r   )r(   i>  g        i   )NFNNN)r1   
__module____qualname____firstlineno____doc__model_input_namesintfloatr   r   r6   r7   r4   r   r2   strr   r	   r   rJ   __static_attributes____classcell__)r   s   @r   r   r      s   " (8 ""	%	% 	% 		%
 	% 	% @D%*$(;?'+oT%[$rzz2BDeDUUVo %c? :;<o TN	o
 SMo !sJ!78o  }o 
o or   r   )rO   typingr   r   numpyr6   !feature_extraction_sequence_utilsr   feature_extraction_utilsr   utilsr   r	   r
   
get_loggerr1   r/   r   __all__r   r   r   <module>r]      sH    & "  I 4 9 9 
		H	%N2 Nb !
!r   