
    +h/                        S SK r S SKJrJrJrJrJrJr  S SKrS SK	J
r
JrJrJr  SSKJr  SSKJrJrJr  SSKJrJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJ r J!r!J"r"J#r#J$r$J%r%  SSK&J'r'  SSK(J)r)  SSK*J+r+  SSK,J-r-  SSK.J/r/  SSK0J1r1J2r2  SSK3J4r4  \!" 5       (       a  S SK5J6s  J7r8  Sr9OSr9\"Rt                  " \;5      r<Sr= " S S\1\2\\\\-\/\45
      r>g)    N)AnyCallableDictListOptionalUnion)CLIPImageProcessorCLIPTextModelCLIPTokenizerCLIPVisionModelWithProjection   )PipelineImageInput)IPAdapterMixinStableDiffusionLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLImageProjectionUNet2DConditionModelUNetMotionModel)adjust_lora_scale_text_encoder)MotionAdapter)KarrasDiffusionSchedulers)USE_PEFT_BACKENDis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor)VideoProcessor   )AnimateDiffPipelineOutput)FreeInitMixin)AnimateDiffFreeNoiseMixin)DiffusionPipelineStableDiffusionMixin   )PAGMixinTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import AnimateDiffPAGPipeline, MotionAdapter, DDIMScheduler
        >>> from diffusers.utils import export_to_gif

        >>> model_id = "SG161222/Realistic_Vision_V5.1_noVAE"
        >>> motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-2"
        >>> motion_adapter = MotionAdapter.from_pretrained(motion_adapter_id)
        >>> scheduler = DDIMScheduler.from_pretrained(
        ...     model_id, subfolder="scheduler", beta_schedule="linear", steps_offset=1, clip_sample=False
        ... )
        >>> pipe = AnimateDiffPAGPipeline.from_pretrained(
        ...     model_id,
        ...     motion_adapter=motion_adapter,
        ...     scheduler=scheduler,
        ...     pag_applied_layers=["mid"],
        ...     torch_dtype=torch.float16,
        ... ).to("cuda")

        >>> video = pipe(
        ...     prompt="car, futuristic cityscape with neon lights, street, no human",
        ...     negative_prompt="low quality, bad quality",
        ...     num_inference_steps=25,
        ...     guidance_scale=6.0,
        ...     pag_scale=3.0,
        ...     generator=torch.Generator().manual_seed(42),
        ... ).frames[0]

        >>> export_to_gif(video, "animatediff_pag.gif")
        ```
c            3         ^  \ rS rSrSrSr/ SQr/ SQr   S@S\S\	S	\
S
\\\4   S\S\S\S\S\\\\   4   4U 4S jjjr     SAS\\R.                     S\\R.                     S\\   S\\   4S jjrSBS jrS rSCS\4S jjrS r      SDS jr SBS jr \!S 5       r"\!S 5       r#\!S  5       r$\!S! 5       r%\!S" 5       r&\RN                  " 5       \(" \)5      SSSSS#S$SS%S&SSSSSSS'S(SSSS)/SS*S&4S+\\\\\   4      S,\\   S-\\   S.\\   S/\S0\S1\\\\\   4      S2\\   S3\S4\\\RT                  \\RT                     4      S)\\R.                     S\\R.                     S\\R.                     S5\\+   S6\\\R.                        S7\\   S8\,S9\\-\\.4      S\\   S:\\/\\\-/S4      S;\\   S\S<\S=\40S> jj5       5       r0S?r1U =r2$ )EAnimateDiffPAGPipelineY   a  
Pipeline for text-to-video generation using
[AnimateDiff](https://huggingface.co/docs/diffusers/en/api/pipelines/animatediff) and [Perturbed Attention
Guidance](https://huggingface.co/docs/diffusers/en/using-diffusers/pag).

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

The pipeline also inherits the following loading methods:
    - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
    - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
    - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
    - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

Args:
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModel`]):
        Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
    tokenizer (`CLIPTokenizer`):
        A [`~transformers.CLIPTokenizer`] to tokenize text.
    unet ([`UNet2DConditionModel`]):
        A [`UNet2DConditionModel`] used to create a UNetMotionModel to denoise the encoded video latents.
    motion_adapter ([`MotionAdapter`]):
        A [`MotionAdapter`] to be used in combination with `unet` to denoise the encoded video latents.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
        [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
z&text_encoder->image_encoder->unet->vae)feature_extractorimage_encodermotion_adapter)latentsprompt_embedsnegative_prompt_embedsNvaetext_encoder	tokenizerunetr.   	schedulerr,   r-   pag_applied_layersc
                   > [         T
U ]  5         [        U[        5      (       a  [        R
                  " XE5      nU R                  UUUUUUUUS9  [        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OSU l        [        SU R                  S9U l        U R                  U	5        g )N)r2   r3   r4   r5   r.   r6   r,   r-   r2   r!   r'      F)	do_resizevae_scale_factor)super__init__
isinstancer   r   from_unet2dregister_modulesgetattrlenr2   configblock_out_channelsr;   r    video_processorset_pag_applied_layers)selfr2   r3   r4   r5   r.   r6   r,   r-   r7   	__class__s             m/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.pyr=   AnimateDiffPAGPipeline.__init__   s     	d011"..tDD%)/' 	 		
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw-PTPePef##$67    r0   r1   
lora_scale	clip_skipc
                 
   UbS  [        U [        5      (       a>  Xl        [        (       d  [	        U R
                  U5        O[        U R
                  U5        Ub  [        U[        5      (       a  Sn
O3Ub!  [        U[        5      (       a  [        U5      n
OUR                  S   n
UGc  [        U [        5      (       a  U R                  XR                  5      nU R                  USU R                  R                  SSS9nUR                  nU R                  USSS	9R                  nUR                  S
   UR                  S
   :  a  [         R"                  " X5      (       dj  U R                  R%                  USS2U R                  R                  S-
  S
24   5      n[&        R)                  SU R                  R                   SU 35        [+        U R
                  R,                  S5      (       aA  U R
                  R,                  R.                  (       a  UR0                  R3                  U5      nOSnU	c%  U R                  UR3                  U5      US9nUS   nOQU R                  UR3                  U5      USS9nUS
   U	S-   *    nU R
                  R4                  R7                  U5      nU R
                  b  U R
                  R8                  nO0U R:                  b  U R:                  R8                  nOUR8                  nUR3                  UUS9nUR                  u  nnnUR=                  SUS5      nUR?                  UU-  US
5      nU(       Ga  UGc|  Uc  S/U
-  nOUb;  [A        U5      [A        U5      La$  [C        S[A        U5       S[A        U5       S35      e[        U[        5      (       a  U/nO2U
[        U5      :w  a!  [E        SU S[        U5       SU SU
 S3	5      eUn[        U [        5      (       a  U R                  UU R                  5      nUR                  S   nU R                  USUSSS9n[+        U R
                  R,                  S5      (       aA  U R
                  R,                  R.                  (       a  UR0                  R3                  U5      nOSnU R                  UR                  R3                  U5      US9nUS   nU(       aG  UR                  S   nUR3                  UUS9nUR=                  SUS5      nUR?                  X-  US
5      nU R
                  b6  [        U [        5      (       a!  [        (       a  [G        U R
                  U5        Xg4$ )a,  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    lora_scale (`float`, *optional*):
        A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
Nr'   r   
max_lengthTpt)paddingrO   
truncationreturn_tensorslongest)rQ   rS   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: use_attention_mask)attention_mask)rW   output_hidden_states)dtypedevice z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)$r>   r   _lora_scaler   r   r3   r   strlistrB   shaper   maybe_convert_promptr4   model_max_length	input_idstorchequalbatch_decodeloggerwarninghasattrrC   rV   rW   to
text_modelfinal_layer_normrY   r5   repeatviewtype	TypeError
ValueErrorr   )rG   promptrZ   num_images_per_promptdo_classifier_free_guidancenegative_promptr0   r1   rL   rM   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textrW   prompt_embeds_dtypebs_embedseq_len_uncond_tokensrO   uncond_inputs                          rI   encode_prompt$AnimateDiffPAGPipeline.encode_prompt   sQ   V !j7U&V&V) $#.t/@/@*M!$"3"3Z@*VS"9"9JJvt$<$<VJ&,,Q/J $ ;<<226>>J..$>>::# ) K )22N"nnVYW[n\ffO$$R(N,@,@,DDU[[N N  $~~::#At~~'F'F'JR'O$OP  778	,Q
 t((//1EFF4K\K\KcKcKvKv!,!;!;!>!>v!F!%  $ 1 1.2C2CF2K\j 1 k -a 0 $ 1 1"%%f-ncg !2 ! !.b 1IM2B C
 !% 1 1 < < M Mm \("&"3"3"9"9YY""&))//"/"5"5%((/B6(R,22'1%,,Q0EqI%**86K+KWVXY '+A+I&!#z 1#VD<Q(QUVZ[jVkUl mV~Q(  OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0 $ ;<< $ 9 9- X&,,Q/J>>$%# * L t((//1EFF4K\K\KcKcKvKv!-!<!<!?!?!G!%%)%6%6&&))&1- &7 &" &<A%>"&,2215G%;%>%>EXag%>%h"%;%B%B1F[]^%_"%;%@%@Acelnp%q"($ >??DTDT#D$5$5zB44rK   c                 d   [        U R                  R                  5       5      R                  n[	        U[
        R                  5      (       d  U R                  USS9R                  nUR                  X%S9nU(       aq  U R                  USS9R                  S   nUR                  USS9nU R                  [
        R                  " U5      SS9R                  S   nUR                  USS9nXg4$ U R                  U5      R                  nUR                  USS9n[
        R                  " U5      n	X4$ )	NrP   )rS   )rZ   rY   T)rX   r   dim)nextr-   
parametersrY   r>   rd   Tensorr,   pixel_valuesrj   hidden_statesrepeat_interleave
zeros_likeimage_embeds)
rG   imagerZ   rs   rX   rY   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             rI   encode_image#AnimateDiffPAGPipeline.encode_image\  s?   T''2245;;%..**5*FSSE4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +JJ--e4AAL'99:OUV9WL"'"2"2<"@44rK   c                 
   / nU(       a  / nUGc&  [        U[        5      (       d  U/n[        U5      [        U R                  R                  R
                  5      :w  aB  [        S[        U5       S[        U R                  R                  R
                  5       S35      e[        XR                  R                  R
                  5       Hh  u  p[        U	[        5      (       + n
U R                  XSU
5      u  pUR                  US S S 24   5        U(       d  MP  WR                  US S S 24   5        Mj     OEU H?  nU(       a$  UR                  S5      u  pWR                  U5        UR                  U5        MA     / n[        U5       Hw  u  p[        R                  " U/U-  SS9nU(       a2  [        R                  " WU   /U-  SS9n[        R                  " X/SS9nUR                  US9nUR                  U5        My     U$ )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r'   r!   r   r   rZ   )r>   r_   rB   r5   encoder_hid_projimage_projection_layersrq   zipr   r   appendchunk	enumeraterd   catrj   )rG   ip_adapter_imageip_adapter_image_embedsrZ   rs   rt   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 rI   prepare_ip_adapter_image_embeds6AnimateDiffPAGPipeline.prepare_ip_adapter_image_embedsu  s9    &$&!"*.55$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A ))"<"<"T"T>9' +55E*W&W#DHDUDU+Q8KEA# ##$7a$@A..)001MdTUg1VW> (?#.H[HaHabcHdE0)001MN##$78	 (? #%&/&="A"'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1M0cij&k#"5"8"8"8"G#**+>? '> '&rK      decode_chunk_sizec                 F   SU R                   R                  R                  -  U-  nUR                  u  p4pVnUR	                  SSSSS5      R                  X5-  XFU5      n/ n[        SUR                  S   U5       H?  n	XX-    n
U R                   R                  U
5      R                  n
UR                  U
5        MA     [        R                  " U5      nUS S S 24   R                  X5S4UR                  SS  -   5      R	                  SSSSS5      nUR                  5       nU$ )Nr'   r   r!   r      rU   )r2   rC   scaling_factorr`   permutereshaperangedecodesampler   rd   r   float)rG   r/   r   rv   channels
num_framesheightwidthvideor   batch_latentss              rI   decode_latents%AnimateDiffPAGPipeline.decode_latents  s   dhhoo444w>:A--7
j%//!Q1a0889PRZdijq'--*,=>A#(=>M HHOOM:AAMLL' ?
 		% dAg&&
'CekkRSRTo'UV^^_`bcefhiklmrK   c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Neta	generator)setinspect	signaturer6   stepr   keys)rG   r   r   accepts_etaextra_step_kwargsaccepts_generators         rI   prepare_extra_step_kwargs0AnimateDiffPAGPipeline.prepare_extra_step_kwargs  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  rK   c
           
        ^  US-  S:w  d	  US-  S:w  a  [        SU SU S35      eU	bW  [        U 4S jU	 5       5      (       d=  [        ST R                   SU	 V
s/ s H  oT R                  ;  d  M  U
PM     sn
 35      eUb  Ub  [        S	U S
U S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[
        5      (       d  [        S[        U5       35      eUb  Ub  [        SU SU S35      eUbC  Ub@  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eUb  Ub  [        S5      eUb\  [        U[
        5      (       d  [        S[        U5       35      eUS   R                  S;  a  [        SUS   R                   S35      eg g s  sn
f )Nr9   r   z7`height` and `width` have to be divisible by 8 but are z and r\   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0krG   s     rI   	<genexpr>6AnimateDiffPAGPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zProvide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined.z:`ip_adapter_image_embeds` has to be of type `list` but is )r   r   zF`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is D)	rq   allr   r>   r^   r_   ro   r`   ndim)rG   rr   r   r   ru   r0   r1   r   r   "callback_on_step_end_tensor_inputsr   s   `          rI   check_inputs#AnimateDiffPAGPipeline.check_inputs  s    A:?eai1nVW]V^^cdicjjklmm-9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  ',C,O ^  #.5t<< PQUVmQnPop  )+00> \]tuv]w]|]|\}}~  ? /E pHs   F<1F<c
                 |   U R                   (       a  U R                  XX4XVXxU	5	      n	[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      eUUUX@R                  -  XPR                  -  4n
U	c  [        XXvS9n	OU	R                  U5      n	XR                  R                  -  n	U	$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   rZ   rY   )free_noise_enabled_prepare_latents_free_noiser>   r_   rB   rq   r;   r   rj   r6   init_noise_sigma)rG   rv   num_channels_latentsr   r   r   rY   rZ   r   r/   r`   s              rI   prepare_latents&AnimateDiffPAGPipeline.prepare_latents  s     ""66*eTZgnG i&&3y>Z+GA#i.AQ R&<'gi   +++***
 ?"5fZGjj(G NN;;;rK   c                     U R                   $ r   _guidance_scalerG   s    rI   guidance_scale%AnimateDiffPAGPipeline.guidance_scale(  s    ###rK   c                     U R                   $ r   )
_clip_skipr   s    rI   rM    AnimateDiffPAGPipeline.clip_skip,  s    rK   c                      U R                   S:  $ )Nr'   r   r   s    rI   rt   2AnimateDiffPAGPipeline.do_classifier_free_guidance3  s    ##a''rK   c                     U R                   $ r   )_cross_attention_kwargsr   s    rI   cross_attention_kwargs-AnimateDiffPAGPipeline.cross_attention_kwargs7  s    +++rK   c                     U R                   $ r   )_num_timestepsr   s    rI   num_timesteps$AnimateDiffPAGPipeline.num_timesteps;  s    """rK   2   g      @r'   g        pilTr/   g      @rr   r   r   r   num_inference_stepsr   ru   num_videos_per_promptr   r   r   r   output_typereturn_dictr   callback_on_step_endr   	pag_scalepag_adaptive_scalec                 ^   U=(       d-    U R                   R                  R                  U R                  -  nU=(       d-    U R                   R                  R                  U R                  -  nSnU R	                  UUUUUUUUU5	        X`l        UU l        UU l        UU l        UU l	        Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                  nU R                   b  U R                   R#                  SS5      OSnU R%                  UUUU R&                  UUUUU R(                  S9	u  pU R*                  (       a  U R-                  XU R&                  5      nO(U R&                  (       a  [.        R0                  " X/5      nUR3                  USS9nUc  Ub  U R5                  UUUUU-  U R&                  5      n[7        U5       H  u  nnSnU R&                  (       a  UR9                  S5      u  nnU R*                  (       a  U R-                  UUU R&                  5      nO(U R&                  (       a  [.        R0                  " UU/SS9nUR;                  U5      nUUU'   M     U R<                  R?                  UUS	9  U R<                  R@                  nU R                   R                  RB                  n U RE                  UU-  U UUUURF                  UU
U5	      nU RI                  X5      n!Uc  Ub  S
U0OSn"U R*                  (       a:  U R                   RJ                  n#U RM                  U RN                  U R&                  S9  U RP                  (       a  U RR                  OSn$[U        U$5       GHx  n%U RP                  (       a#  U RW                  UU%UUURF                  U
5      u  nn[        U5      U l,        [        U5      XPR<                  RZ                  -  -
  n&U R]                  U RX                  S9 n'[7        U5       GH  u  nn([.        R0                  " U/UR                  S   U-  UR                  S   -  -  5      n)U R<                  R_                  U)U(5      n)U R                  U)U(UUU"S9R`                  n*U R*                  (       a)  U Rc                  U*U R&                  U Rd                  U(5      n*O0U R&                  (       a  U*R9                  S5      u  n+n,U+UU,U+-
  -  -   n*U R<                  Rf                  " U*U(U40 U!D6Rh                  nUb\  0 n-U H  n.[k        5       U.   U-U.'   M     U" U UU(U-5      n/U/Rm                  SU5      nU/Rm                  SU5      nU/Rm                  SU5      nU[        U5      S-
  :X  d)  US-   U&:  a0  US-   U R<                  RZ                  -  S:X  a  U'Ro                  5         [p        (       d  GM  [r        Rt                  " 5         GM     SSS5        GM{     US:X  a  Un0O,U Rw                  UU5      n1U Rx                  R{                  U1US9n0U R}                  5         U R*                  (       a  U R                   R                  W#5        U(       d  U04$ [        U0S9$ ! , (       d  f       GM  = f)u  
The call function to the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
    height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The height in pixels of the generated video.
    width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The width in pixels of the generated video.
    num_frames (`int`, *optional*, defaults to 16):
        The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
        amounts to 2 seconds of video.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality videos at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 7.5):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide what to not include in image generation. If not defined, you need to
        pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
    eta (`float`, *optional*, defaults to 0.0):
        Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
        applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
        `(batch_size, num_channel, num_frames, height, width)`.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
        provided, text embeddings are generated from the `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
        not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
    ip_adapter_image: (`PipelineImageInput`, *optional*):
        Optional image input to work with IP Adapters.
    ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
        Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
        IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
        contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
        provided, embeddings are computed from the `ip_adapter_image` input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
        of a plain tuple.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
        [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    pag_scale (`float`, *optional*, defaults to 3.0):
        The scale factor for the perturbed attention guidance. If it is set to 0.0, the perturbed attention
        guidance will not be used.
    pag_adaptive_scale (`float`, *optional*, defaults to 0.0):
        The adaptive scale factor for the perturbed attention guidance. If it is set to 0.0, `pag_scale` is
        used.

Examples:

Returns:
    [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is
        returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
r'   Nr   scale)r0   r1   rL   rM   )repeatsr   r!   r   r   r   )r7   rt   )total)encoder_hidden_statesr   added_cond_kwargsr/   r0   r1   latent)r   r   )frames)Ar5   rC   sample_sizer;   r   r   r   r   
_pag_scale_pag_adaptive_scaler>   r^   r_   rB   r`   _execution_devicer   getr   rt   rM   do_perturbed_attention_guidance%_prepare_perturbed_attention_guidancerd   r   r   r   r   r   rj   r6   set_timesteps	timestepsin_channelsr   rY   r   attn_processors_set_pag_attn_processorr7   free_init_enabled_free_init_num_itersr   _apply_free_initr   orderprogress_barscale_model_inputr   #_apply_perturbed_attention_guidancer   r   prev_samplelocalspopupdateXLA_AVAILABLExm	mark_stepr   rE   postprocess_videomaybe_free_model_hooksset_attn_processorr"   )2rG   rr   r   r   r   r   r   ru   r   r   r   r/   r0   r1   r   r   r   r   r   rM   r   r   r   r   r   rv   rZ   text_encoder_lora_scaler   r   r   r
  r   r   r   original_attn_procnum_free_init_itersfree_init_iternum_warmup_stepsr  tlatent_model_input
noise_prednoise_pred_uncondnoise_pred_textcallback_kwargsr   callback_outputsr   video_tensors2                                                     rI   __call__AnimateDiffPAGPipeline.__call__?  s   ^ O499++77$:O:OOM))558M8MM ! 	"#.
	
  .#'=$##5  *VS"9"9JJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	  150B0B!,,'#9.nn 1C 
1
- // FFt7W7WM --!II'=&MNM%77
PQ7R'+B+N&*&J&J '2200'# $--D#E<(,%33:F:L:LQ:O7)<77#'#M#M$&;T=]=]$L 55#(99.C\-RXY#ZL+v6-9'* $F 	$$%8$HNN,,	  $yy//;;&&.. 

 !::9J
  +/F/R 45 	 //!%!:!:((#'#:#:,0,L,L ) 
 <@;Q;Qd77WX#$78N%%%)%:%:^-@&'--Yb&" #&i.D"9~0CnnFZFZ0ZZ "")<)<"=%i0DAq). 	]%8%8%;z%IW]][\M]%]^*& *.)I)IJ\^_)`& "&*.;/E*; "+ " f  ;;%)%M%M&(H(H$J]J]_`&
 99=G=M=Ma=P:)?%6?]nKn9o%o
 #nn11*a^L]^jjG+7*,!CA17!OA. "D+?aO+\("2"6"6y'"J(8(<(<_m(\1A1E1EF^`v1w. C	NQ..AE=M3MSTWXSX\`\j\j\p\pRptuRu$++-$}W 1 >= 9r ("E..w8IJL((::[f:gE 	##%//II(();<8O(66} >=s    GZ1Z
Z,	)	r   r   r   r]   r   r  r  r;   rE   )NNzmid_block.*attn1)NNNNNr   )r   )NNNNNN)3__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r
   r   r   r   r   r   r   r	   r   r^   r   r=   r   rd   r   r   intr   r   r   r   r   r   r   propertyr   rM   rt   r   r   no_gradr   EXAMPLE_DOC_STRING	Generatorr   boolr   r   r   r,  __static_attributes____classcell__)rH   s   @rI   r*   r*   Y   s   < EST 157;4F88 $8 !	8
 (/9:8 &8 -8 .8 58 "#tCy.18 8L 049=&*#'t5  -t5 !) 6t5 UOt5 C=t5n52+'\ &!. # $+/=B nr@ $ $   ( ( , , # # ]]_12 37$& $##% #;?/0MQ*.049=9=@D%* ;?#'KO9B!#$'3m7sDI~./m7 SMm7 	m7
 }m7 !m7 m7 "%T#Y"78m7  (}m7 m7 E%//43H"HIJm7 %,,'m7  -m7 !) 6m7 ##56m7  "*$u||*<!=!m7" c]#m7$ %m7& !)c3h 8'm7( C=)m7* 'xc40@$0F'GH+m7, -1I-m7. /m70 1m72 "3m7 3 m7rK   r*   )?r   typingr   r   r   r   r   r   rd   transformersr	   r
   r   r   image_processorr   loadersr   r   r   modelsr   r   r   r   models.lorar   models.unets.unet_motion_modelr   
schedulersr   utilsr   r   r   r   r   r   utils.torch_utilsr   rE   r    animatediff.pipeline_outputr"   free_init_utilsr#   free_noise_utilsr$   pipeline_utilsr%   r&   	pag_utilsr(   torch_xla.core.xla_modelcore	xla_modelr  r  
get_loggerr.  rg   r8  r*    rK   rI   <module>rQ     s     = =  h h 1 b b [ [ 9 ; 3  . - C + 8 D  ))MM			H	%  FU7"U7rK   