
    cCi                         S r SSKrSSKJrJr  SSKrSSKJrJ	r	J
r
Jr  SSKJr  SSKJr  \R                   " \5      r " S S	\5      rS	/rg)
zXcodec model configuration    N)OptionalUnion)
AutoConfig	DacConfigHubertConfigWavLMConfig   )PretrainedConfig)loggingc                   ^  ^  \ rS rSrSrSr\\S.rSSSSS/SS/SS/SS	SS
SS4S\	\
\      S\S\S\
\   S\
\   S\
\   S\S\S\	\   S\S\\\4   S\\\4   4U 4S jjjr\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       rSrU =r$ ) XcodecConfig   a	  
This is the configuration class to store the configuration of an [`XcodecModel`]. It is used to instantiate a
Xcodec model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the
[Manel/X-Codec](https://huggingface.co/Manel/X-Codec) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    target_bandwidths (`List[float]`, *optional*, defaults to `[0.5, 1, 1.5, 2, 4]`):
        The range of different bandwidths (in kbps) the model can encode audio with.
    sample_rate (`int`, *optional*, defaults to 16000):
        The sampling rate at which the audio waveform should be digitalized, in hertz (Hz).
    kernel_size (`int`, *optional*, defaults to 3):
        Kernel size for the initial semantic convolution.
    channel_ratios (`List[float]`, *optional*, defaults to `[1, 1]`):
        Expansion factors for the number of output channels in each semantic block.
    strides (`List[int]`, *optional*, defaults to `[1, 1]`):
        Strides for each semantic encoder block.
    block_dilations (`List[int]`, *optional*, defaults to `[1, 1]`):
        Dilation factors for the residual units in semantic blocks.
    unit_kernel_size (`int`, *optional*, defaults to 3):
        Kernel size inside each ResidualUnit in semantic blocks.
    codebook_size (`int`, *optional*, defaults to 1024):
        Number of entries in each residual quantizer's codebook.
    codebook_dim (`int`, *optional*):
        Dimensionality of each codebook vector. Defaults to sum of hidden size of acoustic and semantic models.
    initializer_range (`float`, *optional*, defaults to 0.02):
        Standard deviation of the truncated normal initializer for all weight matrices.
    acoustic_model_config (`Union[Dict, DacConfig]`, *optional*):
        An instance of the configuration for the acoustic (DAC) model.
    semantic_model_config (`Union[Dict, HubertConfig, WavLMConfig]`, *optional*):
        An instance of the configuration object for the semantic (HuBERT) model.

Example:

```python
>>> from transformers import XcodecModel, XcodecConfig

>>> # Initializing configuration
>>> configuration = XcodecConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = XcodecModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```xcodec)acoustic_model_configsemantic_model_configNi>  r	         g{Gz?target_bandwidthssample_ratekernel_sizechannel_ratiosstridesblock_dilationsunit_kernel_sizecodebook_sizecodebook_diminitializer_ranger   r   c                 h  > [         TU ]  " S0 UD6  Uc  [        S/ SQS/ SQSS9U l        OY[	        U[
        5      (       a  [        S0 UD6U l        O3[	        U[        5      (       a  Xl        O[        S[        U5       35      eUc  [        5       U l	        O[	        U[
        5      (       aK  SU;   a  [        R                  " US   5      U l	        On[        R                  S5        [        S0 UD6U l	        OH[	        U[        5      (       d  [	        U[        5      (       a  Xl	        O[        S	[        U5       35      eUc  / S
QnXl        X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        U	c-  U R                  R0                  U R                  R0                  -   n	Xl        g )N@   )            r      )encoder_hidden_sizedownsampling_ratiosdecoder_hidden_sizeupsampling_ratioshidden_sizezDacoustic_model_config must be a dict or DacConfig instance, but got _name_or_pathz_Could not determine semantic model type from config architecture. Defaulting to `HubertConfig`.zUsemantic_model_config must be a dict, HubertConfig, or WavLMConfig instance, but got )g      ?r   g      ?r#   r"    )super__init__r   r   
isinstancedict
ValueErrortyper   r   r   from_pretrainedloggerwarningr   r   r   r   r   r   r   r   r   r   r)   r   )selfr   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                 i/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/xcodec/configuration_xcodec.pyr-   XcodecConfig.__init__Y   s     	"6" ()2$& %1$(".*D& -t44)2)K5J)KD&-y99)>&VW[\qWrVst  !()5D&-t44"77-7-G-GH]^mHn-o* u .:-R<Q-R*-{;;zJ_am?n?n)>&ghl  nC  iD  hE  F  $ 3!2&&,. 0*!255AADD^D^DjDjjL(    returnc                 \    [         R                  " U R                  U R                  -  5      $ N)mathceilr   
hop_lengthr5   s    r8   
frame_rateXcodecConfig.frame_rate   s     yy))DOO;<<r:   c                 .    U R                   R                  $ r=   )r   r)   rA   s    r8   semantic_hidden_size!XcodecConfig.semantic_hidden_size   s    ))555r:   c                 h    [        [        R                  " U R                  R                  5      5      $ r=   )intnpprodr   r&   rA   s    r8   r@   XcodecConfig.hop_length   s"    277455IIJKKr:   c                 j    [         R                  " [         R                  " U R                  5      5      $ r=   )r>   r?   log2r   rA   s    r8   codebook_nbitsXcodecConfig.codebook_nbits   s     yy4#5#5677r:   c                 \    U R                   R                  U R                  R                  -   $ r=   )r   r)   r   rA   s    r8   r)   XcodecConfig.hidden_size   s%    ))558R8R8^8^^^r:   c                 l    [        SU R                  S   -  U R                  U R                  -  -  5      $ )Ni  )rH   r   rB   rN   rA   s    r8   num_quantizersXcodecConfig.num_quantizers   s1    4$00444K^K^9^_``r:   )r   r   r   r   r   r   r   r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__
model_typer   r   sub_configsr   listfloatrH   r   r/   r   r-   propertyrB   rE   r@   rN   r)   rT   __static_attributes____classcell__)r7   s   @r8   r   r      s   0d J "+!+K 48 '(!fV&'V !!&*#'8<;?F)#DK0F) F) 	F)
 UF) cF) cF) F) F) smF) !F)  %T9_5F)  %T<%78F) F)P =C = = 6c 6 6 LC L L 8 8 8 _S _ _ a a ar:   r   )rZ   r>   typingr   r   numpyrI   transformersr   r   r   r   configuration_utilsr
   utilsr   
get_loggerrV   r3   r   __all__r+   r:   r8   <module>ri      sM    !  "  I I 3  
		H	%Xa# Xav 
r:   