
    +h¥                        S SK r S SKJrJrJrJrJrJr  S SKrS SK	J
r
JrJrJr  SSKJr  SSKJrJrJrJr  SSKJrJrJrJr  SSKJr  SS	KJr  SS
KJrJ r J!r!J"r"J#r#J$r$  SSK%J&r&J'r'J(r(J)r)J*r*J+r+J,r,  SSK-J.r.  SSK/J0r0  SSK1J2r2  SSK3J4r4  SSK5J6r6J7r7  SSK8J9r9  \(" 5       (       a  S SK:J;s  J<r=  Sr>OSr>\)R~                  " \@5      rASrB " S S\6\7\\\\2\4\5
      rCg)    N)AnyCallableDictListOptionalUnion)CLIPImageProcessorCLIPTextModelCLIPTokenizerCLIPVisionModelWithProjection   )PipelineImageInput)FromSingleFileMixinIPAdapterMixinStableDiffusionLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLImageProjectionUNet2DConditionModelUNetMotionModel)adjust_lora_scale_text_encoder)MotionAdapter)DDIMSchedulerDPMSolverMultistepSchedulerEulerAncestralDiscreteSchedulerEulerDiscreteSchedulerLMSDiscreteSchedulerPNDMScheduler)USE_PEFT_BACKEND	deprecateis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor)VideoProcessor   )FreeInitMixin)AnimateDiffFreeNoiseMixin)DiffusionPipelineStableDiffusionMixin   )AnimateDiffPipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler
        >>> from diffusers.utils import export_to_gif

        >>> adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
        >>> pipe = AnimateDiffPipeline.from_pretrained("frankjoshua/toonyou_beta6", motion_adapter=adapter)
        >>> pipe.scheduler = DDIMScheduler(beta_schedule="linear", steps_offset=1, clip_sample=False)
        >>> output = pipe(prompt="A corgi walking in the park")
        >>> frames = output.frames[0]
        >>> export_to_gif(frames, "animation.gif")
        ```
c            /         ^  \ rS rSrSrSr/ SQr/ SQr  S=S\S\	S	\
S
\\\4   S\S\\\\\\\4   S\S\4U 4S jjjr     S>S\\R4                     S\\R4                     S\\   S\\   4S jjrS?S jrS rS@S\4S jjr S r!      SAS jr" S?S jr#\$S 5       r%\$S 5       r&\$S 5       r'\$S  5       r(\$S! 5       r)\$S" 5       r*\RV                  " 5       \," \-5      SSSSS#S$SS%S&SSSSSSS'S(SSSS)/S4S*\\\.\/\.   4      S+\\   S,\\   S-\\   S.\S/\S0\\\.\/\.   4      S1\\   S2\S3\\\R`                  \/\R`                     4      S)\\R4                     S\\R4                     S\\R4                     S4\\1   S5\\/\R4                        S6\\.   S7\2S8\\3\.\44      S\\   S9\\5\\\3/S4      S:\/\.   S\4,S; jj5       5       r6S<r7U =r8$ )BAnimateDiffPipelineN   a  
Pipeline for text-to-video generation.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

The pipeline also inherits the following loading methods:
    - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
    - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
    - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
    - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

Args:
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModel`]):
        Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
    tokenizer (`CLIPTokenizer`):
        A [`~transformers.CLIPTokenizer`] to tokenize text.
    unet ([`UNet2DConditionModel`]):
        A [`UNet2DConditionModel`] used to create a UNetMotionModel to denoise the encoded video latents.
    motion_adapter ([`MotionAdapter`]):
        A [`MotionAdapter`] to be used in combination with `unet` to denoise the encoded video latents.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
        [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
z&text_encoder->image_encoder->unet->vae)feature_extractorimage_encodermotion_adapter)latentsprompt_embedsnegative_prompt_embedsNvaetext_encoder	tokenizerunetr4   	schedulerr2   r3   c	                 f  > [         T	U ]  5         [        U[        5      (       a  [        R
                  " XE5      nU R                  UUUUUUUUS9  [        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OSU l        [        SU R                  S9U l        g )N)r8   r9   r:   r;   r4   r<   r2   r3   r8   r(   r-      F)	do_resizevae_scale_factor)super__init__
isinstancer   r   from_unet2dregister_modulesgetattrlenr8   configblock_out_channelsr@   r'   video_processor)
selfr8   r9   r:   r;   r4   r<   r2   r3   	__class__s
            n/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/animatediff/pipeline_animatediff.pyrB   AnimateDiffPipeline.__init__x   s    $ 	d011"..tDD%)/' 	 		
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw-PTPePef    r6   r7   
lora_scale	clip_skipc
                 
   UbS  [        U [        5      (       a>  Xl        [        (       d  [	        U R
                  U5        O[        U R
                  U5        Ub  [        U[        5      (       a  Sn
O3Ub!  [        U[        5      (       a  [        U5      n
OUR                  S   n
UGc  [        U [        5      (       a  U R                  XR                  5      nU R                  USU R                  R                  SSS9nUR                  nU R                  USSS	9R                  nUR                  S
   UR                  S
   :  a  [         R"                  " X5      (       dj  U R                  R%                  USS2U R                  R                  S-
  S
24   5      n[&        R)                  SU R                  R                   SU 35        [+        U R
                  R,                  S5      (       aA  U R
                  R,                  R.                  (       a  UR0                  R3                  U5      nOSnU	c%  U R                  UR3                  U5      US9nUS   nOQU R                  UR3                  U5      USS9nUS
   U	S-   *    nU R
                  R4                  R7                  U5      nU R
                  b  U R
                  R8                  nO0U R:                  b  U R:                  R8                  nOUR8                  nUR3                  UUS9nUR                  u  nnnUR=                  SUS5      nUR?                  UU-  US
5      nU(       Ga  UGc|  Uc  S/U
-  nOUb;  [A        U5      [A        U5      La$  [C        S[A        U5       S[A        U5       S35      e[        U[        5      (       a  U/nO2U
[        U5      :w  a!  [E        SU S[        U5       SU SU
 S3	5      eUn[        U [        5      (       a  U R                  UU R                  5      nUR                  S   nU R                  USUSSS9n[+        U R
                  R,                  S5      (       aA  U R
                  R,                  R.                  (       a  UR0                  R3                  U5      nOSnU R                  UR                  R3                  U5      US9nUS   nU(       aG  UR                  S   nUR3                  UUS9nUR=                  SUS5      nUR?                  X-  US
5      nU R
                  b6  [        U [        5      (       a!  [        (       a  [G        U R
                  U5        Xg4$ )a,  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    lora_scale (`float`, *optional*):
        A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
Nr-   r   
max_lengthTpt)paddingrS   
truncationreturn_tensorslongest)rU   rW   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: use_attention_mask)attention_mask)r[   output_hidden_states)dtypedevice z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)$rC   r   _lora_scaler   r   r9   r$   strlistrG   shaper   maybe_convert_promptr:   model_max_length	input_idstorchequalbatch_decodeloggerwarninghasattrrH   rZ   r[   to
text_modelfinal_layer_normr]   r;   repeatviewtype	TypeError
ValueErrorr%   )rK   promptr^   num_images_per_promptdo_classifier_free_guidancenegative_promptr6   r7   rP   rQ   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textr[   prompt_embeds_dtypebs_embedseq_len_uncond_tokensrS   uncond_inputs                          rM   encode_prompt!AnimateDiffPipeline.encode_prompt   sQ   V !j7U&V&V) $#.t/@/@*M!$"3"3Z@*VS"9"9JJvt$<$<VJ&,,Q/J $ ;<<226>>J..$>>::# ) K )22N"nnVYW[n\ffO$$R(N,@,@,DDU[[N N  $~~::#At~~'F'F'JR'O$OP  778	,Q
 t((//1EFF4K\K\KcKcKvKv!,!;!;!>!>v!F!%  $ 1 1.2C2CF2K\j 1 k -a 0 $ 1 1"%%f-ncg !2 ! !.b 1IM2B C
 !% 1 1 < < M Mm \("&"3"3"9"9YY""&))//"/"5"5%((/B6(R,22'1%,,Q0EqI%**86K+KWVXY '+A+I&!#z 1#VD<Q(QUVZ[jVkUl mV~Q(  OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0 $ ;<< $ 9 9- X&,,Q/J>>$%# * L t((//1EFF4K\K\KcKcKvKv!-!<!<!?!?!G!%%)%6%6&&))&1- &7 &" &<A%>"&,2215G%;%>%>EXag%>%h"%;%B%B1F[]^%_"%;%@%@Acelnp%q"($ >??DTDT#D$5$5zB44rO   c                 d   [        U R                  R                  5       5      R                  n[	        U[
        R                  5      (       d  U R                  USS9R                  nUR                  X%S9nU(       aq  U R                  USS9R                  S   nUR                  USS9nU R                  [
        R                  " U5      SS9R                  S   nUR                  USS9nXg4$ U R                  U5      R                  nUR                  USS9n[
        R                  " U5      n	X4$ )	NrT   )rW   )r^   r]   T)r\   r   dim)nextr3   
parametersr]   rC   rh   Tensorr2   pixel_valuesrn   hidden_statesrepeat_interleave
zeros_likeimage_embeds)
rK   imager^   rw   r\   r]   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             rM   encode_image AnimateDiffPipeline.encode_imageS  s?   T''2245;;%..**5*FSSE4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +JJ--e4AAL'99:OUV9WL"'"2"2<"@44rO   c                 
   / nU(       a  / nUGc&  [        U[        5      (       d  U/n[        U5      [        U R                  R                  R
                  5      :w  aB  [        S[        U5       S[        U R                  R                  R
                  5       S35      e[        XR                  R                  R
                  5       Hh  u  p[        U	[        5      (       + n
U R                  XSU
5      u  pUR                  US S S 24   5        U(       d  MP  WR                  US S S 24   5        Mj     OEU H?  nU(       a$  UR                  S5      u  pWR                  U5        UR                  U5        MA     / n[        U5       Hw  u  p[        R                  " U/U-  SS9nU(       a2  [        R                  " WU   /U-  SS9n[        R                  " X/SS9nUR                  US9nUR                  U5        My     U$ )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r-   r(   r   r   r^   )rC   rc   rG   r;   encoder_hid_projimage_projection_layersru   zipr   r   appendchunk	enumeraterh   catrn   )rK   ip_adapter_imageip_adapter_image_embedsr^   rw   rx   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 rM   prepare_ip_adapter_image_embeds3AnimateDiffPipeline.prepare_ip_adapter_image_embedsl  s9    &$&!"*.55$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A ))"<"<"T"T>9' +55E*W&W#DHDUDU+Q8KEA# ##$7a$@A..)001MdTUg1VW> (?#.H[HaHabcHdE0)001MN##$78	 (? #%&/&="A"'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1M0cij&k#"5"8"8"8"G#**+>? '> '&rO      decode_chunk_sizec                 F   SU R                   R                  R                  -  U-  nUR                  u  p4pVnUR	                  SSSSS5      R                  X5-  XFU5      n/ n[        SUR                  S   U5       H?  n	XX-    n
U R                   R                  U
5      R                  n
UR                  U
5        MA     [        R                  " U5      nUS S S 24   R                  X5S4UR                  SS  -   5      R	                  SSSSS5      nUR                  5       nU$ )Nr-   r   r(   r      rY   )r8   rH   scaling_factorrd   permutereshaperangedecodesampler   rh   r   float)rK   r5   r   rz   channels
num_framesheightwidthvideor   batch_latentss              rM   decode_latents"AnimateDiffPipeline.decode_latents  s   dhhoo444w>:A--7
j%//!Q1a0889PRZdijq'--*,=>A#(=>M HHOOM:AAMLL' ?
 		% dAg&&
'CekkRSRTo'UV^^_`bcefhiklmrO   c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Neta	generator)setinspect	signaturer<   stepr   keys)rK   r   r   accepts_etaextra_step_kwargsaccepts_generators         rM   prepare_extra_step_kwargs-AnimateDiffPipeline.prepare_extra_step_kwargs  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  rO   c           
        ^  US-  S:w  d	  US-  S:w  a  [        SU SU S35      eUb6  [        U[        5      (       a  US::  a  [        SU S[        U5       S35      eU
bW  [	        U 4S jU
 5       5      (       d=  [        S	T R
                   S
U
 Vs/ s H  oT R
                  ;  d  M  UPM     sn 35      eUb  Ub  [        SU SU S35      eUc  Uc  [        S5      eUb8  [        U[        [        [        45      (       d  [        S[        U5      < 35      eUb  Ub  [        SU SU S35      eUbC  Ub@  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eUb  U	b  [        S5      eU	b\  [        U	[        5      (       d  [        S[        U	5       35      eU	S   R                  S;  a  [        SU	S   R                   S35      eg g s  snf )Nr>   r   z7`height` and `width` have to be divisible by 8 but are z and r`   z5`callback_steps` has to be a positive integer but is z	 of type c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0krK   s     rM   	<genexpr>3AnimateDiffPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.zG`prompt` has to be of type `str`, `list` or `dict` but is type(prompt)=z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zProvide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined.z:`ip_adapter_image_embeds` has to be of type `list` but is )r   r   zF`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is D)ru   rC   intrs   allr   rb   rc   dictrd   ndim)rK   rv   r   r   callback_stepsry   r6   r7   r   r   "callback_on_step_end_tensor_inputsr   s   `           rM   check_inputs AnimateDiffPipeline.check_inputs  s    A:?eai1nVW]V^^cdicjjklmm%z.#/N/NR`deReGGW X(),  .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  
6Ct;L(M(MgZ^_eZfYhijj&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  ',C,O ^  #.5t<< PQUVmQnPop  )+00> \]tuv]w]|]|\}}~  ? /E pHs   G,*G,c
                 |   U R                   (       a  U R                  XX4XVXxU	5	      n	[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      eUUUX@R                  -  XPR                  -  4n
U	c  [        XXvS9n	OU	R                  U5      n	XR                  R                  -  n	U	$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   r^   r]   )free_noise_enabled_prepare_latents_free_noiserC   rc   rG   ru   r@   r&   rn   r<   init_noise_sigma)rK   rz   num_channels_latentsr   r   r   r]   r^   r   r5   rd   s              rM   prepare_latents#AnimateDiffPipeline.prepare_latents  s     ""66*eTZgnG i&&3y>Z+GA#i.AQ R&<'gi   +++***
 ?"5fZGjj(G NN;;;rO   c                     U R                   $ r   _guidance_scalerK   s    rM   guidance_scale"AnimateDiffPipeline.guidance_scale"  s    ###rO   c                     U R                   $ r   )
_clip_skipr   s    rM   rQ   AnimateDiffPipeline.clip_skip&      rO   c                      U R                   S:  $ )Nr-   r   r   s    rM   rx   /AnimateDiffPipeline.do_classifier_free_guidance-  s    ##a''rO   c                     U R                   $ r   )_cross_attention_kwargsr   s    rM   cross_attention_kwargs*AnimateDiffPipeline.cross_attention_kwargs1  s    +++rO   c                     U R                   $ r   )_num_timestepsr   s    rM   num_timesteps!AnimateDiffPipeline.num_timesteps5  s    """rO   c                     U R                   $ r   )
_interruptr   s    rM   	interruptAnimateDiffPipeline.interrupt9  r   rO   2   g      @r-   g        pilTr5   rv   r   r   r   num_inference_stepsr   ry   num_videos_per_promptr   r   r   r   output_typereturn_dictr   callback_on_step_endr   c                    UR                  SS5      nUR                  SS5      nUb  [        SSS5        Ub  [        SSS5        U=(       d-    U R                  R                  R                  U R
                  -  nU=(       d-    U R                  R                  R                  U R
                  -  nSnU R                  UUUUUUUUUU5
        X`l        UU l        UU l	        SU l
        Ub  [        U[        [        45      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                   S	   nU R"                  nU R$                  b  U R$                  R'                  S
S5      OSnU R(                  (       a/  U R+                  UUUUU R,                  UUUUU R.                  S9
u  pOeU R1                  UUUU R,                  UUUUU R.                  S9	u  pU R,                  (       a  [2        R4                  " X/5      nUR7                  US	S9nUc  Ub"  U R9                  UUUUU-  U R,                  5      nU R:                  R=                  UUS9  U R:                  R>                  nU R                  R                  R@                  nU RC                  UU-  UUUUURD                  UU
U5	      nU RG                  X5      n Uc  Ub  SW0OSn!U RH                  (       a  U RJ                  OSn"[M        U"5       GHZ  n#U RH                  (       a#  U RO                  UU#UUURD                  U
5      u  nn[        U5      U l(        [        U5      XPR:                  RR                  -  -
  n$U RU                  U RP                  S9 n%[W        U5       GH  u  n&n'U RX                  (       a  M  U R,                  (       a  [2        R4                  " U/S-  5      OUn(U R:                  R[                  U(U'5      n(U R                  U(U'UUU!S9R\                  n)U R,                  (       a  U)R_                  S5      u  n*n+U*UU+U*-
  -  -   n)U R:                  R`                  " U)U'U40 U D6Rb                  nUb\  0 n,U H  n-[e        5       U-   U,U-'   M     U" U U&U'U,5      n.U.R                  SU5      nU.R                  SU5      nU.R                  SU5      nU&[        U5      S-
  :X  d)  U&S-   U$:  aF  U&S-   U R:                  RR                  -  S	:X  a&  U%Rg                  5         Ub  U&U-  S	:X  a
  U" U&U'U5        [h        (       d  GM  [j        Rl                  " 5         GM     SSS5        GM]     US:X  a  Un/O,U Ro                  UU5      n0U Rp                  Rs                  U0US9n/U Ru                  5         U(       d  U/4$ [w        U/S9$ ! , (       d  f       GM  = f)u  
The call function to the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
    height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The height in pixels of the generated video.
    width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The width in pixels of the generated video.
    num_frames (`int`, *optional*, defaults to 16):
        The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
        amounts to 2 seconds of video.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality videos at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 7.5):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide what to not include in image generation. If not defined, you need to
        pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
    eta (`float`, *optional*, defaults to 0.0):
        Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
        applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
        `(batch_size, num_channel, num_frames, height, width)`.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
        provided, text embeddings are generated from the `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
        not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
    ip_adapter_image: (`PipelineImageInput`, *optional*):
        Optional image input to work with IP Adapters.
    ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
        Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
        IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
        contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
        provided, embeddings are computed from the `ip_adapter_image` input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
        of a plain tuple.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
        [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    decode_chunk_size (`int`, defaults to `16`):
        The number of frames to decode at a time when calling `decode_latents` method.

Examples:

Returns:
    [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is
        returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
callbackNr   z1.0.0zjPassing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`zpPassing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`r-   Fr   scale)
rv   r   r^   r   rx   ry   r6   r7   rP   rQ   )r6   r7   rP   rQ   )repeatsr   r   r   )totalr(   )encoder_hidden_statesr   added_cond_kwargsr5   r6   r7   latent)r   r  )frames)<popr    r;   rH   sample_sizer@   r   r   r   r   r   rC   rb   r   rc   rG   rd   _execution_devicer   getr   _encode_prompt_free_noiserx   rQ   r   rh   r   r   r   r<   set_timesteps	timestepsin_channelsr   r]   r   free_init_enabled_free_init_num_itersr   _apply_free_initr   orderprogress_barr   r   scale_model_inputr   r   r   prev_samplelocalsupdateXLA_AVAILABLExm	mark_stepr   rJ   postprocess_videomaybe_free_model_hooksr.   )1rK   rv   r   r   r   r   r   ry   r   r   r   r5   r6   r7   r   r   r  r  r   rQ   r  r   r   kwargsr  r   rz   r^   text_encoder_lora_scaler   r  r   r   r
  num_free_init_itersfree_init_iternum_warmup_stepsr  r   tlatent_model_input
noise_prednoise_pred_uncondnoise_pred_textcallback_kwargsr   callback_outputsr   video_tensors1                                                    rM   __call__AnimateDiffPipeline.__call__=  s   R ::j$/$4d;|
 %  C O499++77$:O:OOM))558M8MM ! 	"#.	
  .#'=$ *Vc4["A"AJJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	  ""484R4R%&;,0,L,L /+'=2.. 5S 51M1 594F4F%00+'=2.. 5G 
51M // %		+A*Q R);;JTU;VM'+B+N?? '2200L 	$$%8$HNN,,	  $yy//;;&&.. 

 !::9J
  +/F/R \* 	 <@;Q;Qd77WX#$78N%%%)%:%:^-@&'--Yb&" #&i.D"9~0CnnFZFZ0ZZ "")<)<"=%i0DAq~~  FJEeEeG9q=)Akr&)-)I)IJ\^_)`& "&*.;/E*; "+ " f  77=G=M=Ma=P:)?%6?]nKn9o%o
 #nn11*a^L]^jjG+7*,!CA17!OA. "D+?aO+\("2"6"6y'"J(8(<(<_m(\1A1E1EF^`v1w. C	NQ..AE=M3MSTWXSX\`\j\j\p\pRptuRu$++-#/A4F!4K$Q73$}U 1 >= 9p ("E..w8IJL((::[f:gE 	##%8O(66u >=s   F.V45V44
W	)r   r   r   r   ra   r   r@   rJ   )NN)NNNNNr   )r   )NNNNNN)9__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r
   r   r   r   r   r   r   r   r   r   r   r   r	   r   rB   r   rh   r   r   r   r   r   r   r   r   r   r   propertyr   rQ   rx   r   r   r   no_gradr#   EXAMPLE_DOC_STRINGrb   r   	Generatorr   boolr   r   r   r0  __static_attributes____classcell__)rL   s   @rM   r0   r0   N   s   8 EST" 157;!!g!g $!g !	!g
 (/9:!g &!g  "+')
!g .!g  5!!g !gT 049=&*#'t5  -t5 !) 6t5 UOt5 C=t5n52+'Z &!. # $+/CL nr@ $ $   ( ( , , # #   ]]_12 37$& $##% #;?/0MQ*.049=9=@D%* ;?#'KO9B!#/j7sDI~./j7 SMj7 	j7
 }j7 !j7 j7 "%T#Y"78j7  (}j7 j7 E%//43H"HIJj7 %,,'j7  -j7 !) 6j7 ##56j7  "*$u||*<!=!j7" c]#j7$ %j7& !)c3h 8'j7( C=)j7* 'xc40@$0F'GH+j7, -1I-j7. /j7 3 j7rO   r0   )Dr   typingr   r   r   r   r   r   rh   transformersr	   r
   r   r   image_processorr   loadersr   r   r   r   modelsr   r   r   r   models.lorar   models.unets.unet_motion_modelr   
schedulersr   r   r   r   r   r   utilsr   r    r!   r"   r#   r$   r%   utils.torch_utilsr&   rJ   r'   free_init_utilsr)   free_noise_utilsr*   pipeline_utilsr+   r,   pipeline_outputr.   torch_xla.core.xla_modelcore	xla_modelr  r  
get_loggerr2  rk   r;  r0    rO   rM   <module>rS     s     = =  h h 1 w w [ [ 9 ;    . - + 8 D 6 ))MM			H	% "[7"[7rO   