
    bCik:                         S r SSKrSSKJr  SSKJr  SSKJr  \R                  " \	5      r
 " S S\5      r " S	 S
\5      r " S S\5      r " S S\5      rg)z#BARK model generation configuration    N)Optional   )GenerationConfig)loggingc                   P   ^  \ rS rSrSr                SU 4S jjrSrU =r$ )BarkSemanticGenerationConfig   semanticc                    > [         TU ]  " SUU	UUUUUUUS.	UD6  Xl        Xl        Xl        Xl        Xl        Xl        Xl        UU l	        g)a
  Class that holds a generation configuration for [`BarkSemanticModel`].

This configuration inherit from [`GenerationConfig`] and can be used to control the model generation. Read the
documentation from [`GenerationConfig`] for more information.

Args:
    eos_token_id (`int`, *optional*, defaults to 10_000):
        The id of the *end-of-sequence* token.
    renormalize_logits (`bool`, *optional*, defaults to `True`):
        Whether to renormalize the logits after applying all the logits processors (including the
        custom ones). It's highly recommended to set this flag to `True` as the search algorithms suppose the
        score logits are normalized but some logit processors break the normalization.
    max_new_tokens (`int`, *optional*, defaults to 768):
        The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
    output_scores (`bool`, *optional*, defaults to `False`):
        Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
    return_dict_in_generate (`bool`, *optional*, defaults to `False`):
        Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
    output_hidden_states (`bool`, *optional*, defaults to `False`):
        Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
        for more details.
    output_attentions (`bool`, *optional*, defaults to `False`):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more details.
    temperature (`float`, *optional*, defaults to 1.0):
        The value used to modulate the next token probabilities.
    do_sample (`bool`, *optional*, defaults to `False`):
        Whether or not to use sampling ; use greedy decoding otherwise.
    text_encoding_offset (`int`, *optional*, defaults to 10_048):
        Text encoding offset.
    text_pad_token (`int`, *optional*, defaults to 129_595):
        Text pad token.
    semantic_infer_token (`int`, *optional*, defaults to 129_599):
        Semantic infer token.
    semantic_vocab_size (`int`, *optional*, defaults to 10_000):
        Semantic vocab size.
    max_input_semantic_length (`int`, *optional*, defaults to 256):
        Max length of semantic input vector.
    semantic_rate_hz (`float`, *optional*, defaults to 49.9):
        Semantic rate in Hertz.
    min_eos_p (`float`, *optional*):
        Minimum threshold of the probability of the EOS token for it to be sampled. This is an early stopping
        strategy to mitigate potential unwanted generations at the end of a prompt. The original implementation
        suggests a default value of 0.2.
)	temperature	do_sampleeos_token_idrenormalize_logitsmax_new_tokensoutput_scoresreturn_dict_in_generateoutput_hidden_statesoutput_attentionsN )
super__init__text_encoding_offsettext_pad_tokensemantic_pad_tokensemantic_infer_tokensemantic_vocab_sizemax_input_semantic_lengthsemantic_rate_hz	min_eos_p)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                     p/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/bark/generation_configuration_bark.pyr   %BarkSemanticGenerationConfig.__init__   sp    B 	 	
#%1)'$;!5/	
 	
 %9!,".$8!#6 )B& 0"    )r   r   r   r   r   r   r   r   )'  Ti   FFFF      ?Fi@'  i; i? r&      g33333H@N)__name__
__module____qualname____firstlineno__
model_typer   __static_attributes____classcell__r"   s   @r#   r   r      sF    J  %"#$""%#U# U#r%   r   c                   X   ^  \ rS rSrSr              SS\S\4U 4S jjjrSrU =r$ )BarkCoarseGenerationConfigv   coarse_acousticsmax_coarse_historysliding_window_lenc                    > [         TU ]  " SUUUUUUUS.UD6  Xl        Xl        Xl        Xl        Xl        Xl        Xl        g)aS	  Class that holds a generation configuration for [`BarkCoarseModel`].

This configuration inherit from [`GenerationConfig`] and can be used to control the model generation. Read the
documentation from [`GenerationConfig`] for more information.

Args:
    renormalize_logits (`bool`, *optional*, defaults to `True`):
        Whether to renormalize the logits after applying all the logits processors (including the
        custom ones). It's highly recommended to set this flag to `True` as the search algorithms suppose the
        score logits are normalized but some logit processors break the normalization.
    output_scores (`bool`, *optional*, defaults to `False`):
        Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
    return_dict_in_generate (`bool`, *optional*, defaults to `False`):
        Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
    output_hidden_states (`bool`, *optional*, defaults to `False`):
        Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
        for more details.
    output_attentions (`bool`, *optional*, defaults to `False`):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more details.
    temperature (`float`, *optional*, defaults to 1.0):
        The value used to modulate the next token probabilities.
    do_sample (`bool`, *optional*, defaults to `False`):
        Whether or not to use sampling ; use greedy decoding otherwise.
    coarse_semantic_pad_token (`int`, *optional*, defaults to 12_048):
        Coarse semantic pad token.
    coarse_rate_hz (`int`, *optional*, defaults to 75):
        Coarse rate in Hertz.
    n_coarse_codebooks (`int`, *optional*, defaults to 2):
        Number of coarse codebooks.
    coarse_infer_token (`int`, *optional*, defaults to 12_050):
        Coarse infer token.
    max_coarse_input_length (`int`, *optional*, defaults to 256):
        Max length of input coarse vector.
    max_coarse_history (`int`, *optional*, defaults to 630):
        Max length of the output of the coarse acoustics model used in the fine generation step.
    sliding_window_len (`int`, *optional*, defaults to 60):
        The coarse generation step uses a sliding window to generate raw audio.
)r   r   r   r   r   r   r   Nr   )	r   r   coarse_semantic_pad_tokencoarse_rate_hzn_coarse_codebookscoarse_infer_tokenmax_coarse_input_lengthr5   r6   )r    r   r   r   r   r   r   r   r8   r9   r:   r;   r<   r5   r6   r!   r"   s                   r#   r   #BarkCoarseGenerationConfig.__init__y   sd    r 	 		
#1'$;!5/		
 		
 *C&,"4"4'>$"4"4r%   )r;   r9   r8   r5   r<   r:   r6   )TFFFFr'   Fi/  K      i/  r(   iv  <   )	r)   r*   r+   r,   r-   intr   r.   r/   r0   s   @r#   r2   r2   v   sV    #J   %""(! #"%"$J5  J5  J5 J5r%   r2   c                   >   ^  \ rS rSrSr    SU 4S jjrS rSrU =r$ )BarkFineGenerationConfig   fine_acousticsc                 D   > [         TU ]  US9  X l        X0l        X@l        g)a:  Class that holds a generation configuration for [`BarkFineModel`].

[`BarkFineModel`] is an autoencoder model, so should not usually be used for generation. However, under the
hood, it uses `temperature` when used by [`BarkModel`]

This configuration inherit from [`GenerationConfig`] and can be used to control the model generation. Read the
documentation from [`GenerationConfig`] for more information.

Args:
    temperature (`float`, *optional*):
        The value used to modulate the next token probabilities.
    max_fine_history_length (`int`, *optional*, defaults to 512):
        Max length of the fine history vector.
    max_fine_input_length (`int`, *optional*, defaults to 1024):
        Max length of fine input vector.
    n_fine_codebooks (`int`, *optional*, defaults to 8):
        Number of codebooks used.
)r   N)r   r   max_fine_history_lengthmax_fine_input_lengthn_fine_codebooks)r    r   rG   rH   rI   r!   r"   s         r#   r   !BarkFineGenerationConfig.__init__   s'    4 	[1'>$%:" 0r%   c                     g)zt
Overrides GenerationConfig.validate because BarkFineGenerationConfig don't use any parameters outside
temperature.
Nr   )r    r!   s     r#   validate!BarkFineGenerationConfig.validate   s    
 	r%   )rG   rH   rI   )r'   i         )	r)   r*   r+   r,   r-   r   rL   r.   r/   r0   s   @r#   rC   rC      s&    !J  #"1@ r%   rC   c                   t    \ rS rSrSr     SS\\   S\\   S\\   4S jjr\S\	S\
S\4S j5       rS	 rS
rg)BarkGenerationConfig   barkNsemantic_configcoarse_acoustics_configfine_acoustics_configc                    Uc  0 n[         R                  S5        Uc  0 n[         R                  S5        Uc  0 n[         R                  S5        [        S0 UD6U l        [	        S0 UD6U l        [        S0 UD6U l        X@l        XPl	        g)a  Class that holds a generation configuration for [`BarkModel`].

The [`BarkModel`] does not have a `generate` method, but uses this class to generate speeches with a nested
[`BarkGenerationConfig`] which uses [`BarkSemanticGenerationConfig`], [`BarkCoarseGenerationConfig`],
[`BarkFineGenerationConfig`].

This configuration inherit from [`GenerationConfig`] and can be used to control the model generation. Read the
documentation from [`GenerationConfig`] for more information.

Args:
    semantic_config (`Dict`, *optional*):
        Semantic generation configuration.
    coarse_acoustics_config (`Dict`, *optional*):
        Coarse generation configuration.
    fine_acoustics_config (`Dict`, *optional*):
        Fine generation configuration.
    sample_rate (`int`, *optional*, defaults to 24_000):
        Sample rate.
    codebook_size (`int`, *optional*, defaults to 1024):
        Vector length for each codebook.
NzMsemantic_config is None. initializing the semantic model with default values.zScoarse_acoustics_config is None. initializing the coarse model with default values.zOfine_acoustics_config is None. initializing the fine model with default values.r   )
loggerinfor   rT   r2   rU   rC   rV   sample_ratecodebook_size)r    rT   rU   rV   rZ   r[   r!   s          r#   r   BarkGenerationConfig.__init__   s    < " OKKgh"*&(#KKmn ($&!KKij;NoN'A'\D['\$%=%V@U%V"&*r%   c                 n    U " SUR                  5       UR                  5       UR                  5       S.UD6$ )z
Instantiate a [`BarkGenerationConfig`] (or a derived class) from bark sub-models generation configuration.

Returns:
    [`BarkGenerationConfig`]: An instance of a configuration object
)rT   rU   rV   r   )to_dict)clsrT   rU   rV   r!   s        r#   from_sub_model_configs+BarkGenerationConfig.from_sub_model_configs'  sD      
+335$;$C$C$E"7"?"?"A
 	
 	
r%   c                 &   [         R                  " U R                  5      nU R                  R	                  5       US'   U R
                  R	                  5       US'   U R                  R	                  5       US'   U R                  R                  US'   U$ )z
Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].

Returns:
    `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
rT   rU   rV   r-   )	copydeepcopy__dict__rT   r^   rU   rV   r"   r-   )r    outputs     r#   r^   BarkGenerationConfig.to_dict<  s}     t}}-$($8$8$@$@$B !,0,H,H,P,P,R()*.*D*D*L*L*N&'#~~88|r%   )rU   r[   rV   rZ   rT   )NNNi]  rN   )r)   r*   r+   r,   r-   r   dictr   classmethodr   r2   rC   r`   r^   r.   r   r%   r#   rQ   rQ      s{    J +/2604/+!$/+ "*$/+  (~	/+b 
5
 "<
  8	
 
(r%   rQ   )__doc__rc   typingr   generation.configuration_utilsr   utilsr   
get_loggerr)   rX   r   r2   rC   rQ   r   r%   r#   <module>ro      se    *   >  
		H	%X##3 X#vM5!1 M5`(/ (VY+ Yr%   