
    +hL                         S SK JrJrJrJr  S SKrSSKJr  SSKJ	r	  SSK
JrJr  SSKJr  SS	KJrJrJr  \" 5       (       a  S SKJs  Jr  S
rOSr\R0                  " \5      r " S S\\5      rg)    )ListOptionalTupleUnionN   )UNet1DModel)SchedulerMixin)is_torch_xla_availablelogging)randn_tensor   )AudioPipelineOutputDeprecatedPipelineMixinDiffusionPipelineTFc                      ^  \ rS rSrSrSrSrS\S\4U 4S jjr	\
R                  " 5            SS\S\S	\\\
R                  \\
R                     4      S
\\   S\S\\\4   4S jj5       rSrU =r$ )DanceDiffusionPipeline%   a  
Pipeline for audio generation.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

Parameters:
    unet ([`UNet1DModel`]):
        A `UNet1DModel` to denoise the encoded audio.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded audio latents. Can be one of
        [`IPNDMScheduler`].
z0.33.1unet	schedulerc                 @   > [         TU ]  5         U R                  XS9  g )N)r   r   )super__init__register_modules)selfr   r   	__class__s      v/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.pyr   DanceDiffusionPipeline.__init__7   s    4=    
batch_sizenum_inference_steps	generatoraudio_length_in_sreturn_dictreturnc                    UcA  U R                   R                  R                  U R                   R                  R                  -  nX@R                   R                  R                  -  nS[	        U R                   R
                  5      -  nUSU-  :  a5  [        U SSU-  U R                   R                  R                  -   S35      e[        U5      nXg-  S:w  a  X@R                   R                  R                  -  U-  S-   U-  n[        R                  U SX`R                   R                  R                  -   S	XR                   R                  R                  -   S
35        [        U5      n[        U R                   R                  5       5      R                  n	XR                   R                  R                  U4n
[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      e[!        XU R"                  U	S9nU R$                  R'                  X+R(                  S9  U R$                  R*                  R-                  U	5      U R$                  l        U R/                  U R$                  R*                  5       Hf  nU R                  X5      R0                  nU R$                  R3                  XU5      R4                  n[6        (       d  MQ  [8        R:                  " 5         Mh     UR=                  SS5      R?                  5       RA                  5       RC                  5       nUSS2SS2SU24   nU(       d  U4$ [E        US9$ )at  
The call function to the pipeline for generation.

Args:
    batch_size (`int`, *optional*, defaults to 1):
        The number of audio samples to generate.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher-quality audio sample at
        the expense of slower inference.
    generator (`torch.Generator`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    audio_length_in_s (`float`, *optional*, defaults to `self.unet.config.sample_size/self.unet.config.sample_rate`):
        The length of the generated audio sample in seconds.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.

Example:

```py
from diffusers import DiffusionPipeline
from scipy.io.wavfile import write

model_id = "harmonai/maestro-150k"
pipe = DiffusionPipeline.from_pretrained(model_id)
pipe = pipe.to("cuda")

audios = pipe(audio_length_in_s=4.0).audios

# To save locally
for i, audio in enumerate(audios):
    write(f"maestro_test_{i}.wav", pipe.unet.sample_rate, audio.transpose())

# To display in google colab
import IPython.display as ipd

for audio in audios:
    display(ipd.Audio(audio, rate=pipe.unet.sample_rate))
```

Returns:
    [`~pipelines.AudioPipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.AudioPipelineOutput`] is returned, otherwise a `tuple` is
        returned where the first element is a list with the generated audio.
Nr   r   z1 is too small. Make sure it's bigger or equal to .r      z is increased to z; so that it can be handled by the model. It will be cut to z after the denoising process.z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r!   devicedtype)r(   )audios)#r   configsample_sizesample_ratelen	up_blocks
ValueErrorintloggerinfonext
parametersr)   in_channels
isinstancelistr   _execution_devicer   set_timestepsr(   	timestepstoprogress_barsamplestepprev_sampleXLA_AVAILABLExm	mark_stepclampfloatcpunumpyr   )r   r   r    r!   r"   r#   r-   down_scale_factororiginal_sample_sizer)   shapeaudiotmodel_outputs                 r   __call__DanceDiffusionPipeline.__call__;   s   n $ $		 0 0 < <tyy?O?O?[?[ ['))*:*:*F*FFTYY%8%8!99...$% &))DII,<,<,H,HHIL 
  #;/*a/"YY%5%5%A%AAFWWZ[[!"K KK$%%6{YYEUEUEaEa7a6b c44H99K[K[KgKg4g3h i
 +&TYY))+,22YY--99;Gi&&3y>Z+GA#i.AQ R&<'gi 
 U@V@V^cd 	$$%8$N#'>>#;#;#>#>u#E ""4>>#;#;<A99U.55L NN''?KKE} = B"((*..0668a111128O"%00r    )r'   d   NNT)__name__
__module____qualname____firstlineno____doc___last_supported_versionmodel_cpu_offload_seqr   r	   r   torchno_gradr2   r   r   	Generatorr   rF   boolr   r   rO   __static_attributes____classcell__)r   s   @r   r   r   %   s     '">[ >^ > ]]_ #&MQ-1 m1m1 !m1 E%//43H"HIJ	m1
 $E?m1 m1 
"E)	*m1 m1r   r   )typingr   r   r   r   rZ   modelsr   
schedulersr	   utilsr
   r   utils.torch_utilsr   pipeline_utilsr   r   r   torch_xla.core.xla_modelcore	xla_modelrC   rB   
get_loggerrS   r3   r   rQ   r   r   <module>rj      s_     0 /  ! ( 4 - \ \ ))MM			H	%D146G D1r   