
    +h                    $   S SK r S SKJrJrJrJrJrJrJr  S SK	r	S SK
JrJrJrJrJr  SSKJr  SSKJrJrJrJr  SSKJrJrJrJrJr  SSKJrJrJ r   SS	K!J"r"  SS
K#J$r$J%r%J&r&J'r'J(r(J)r)  SSK*J+r+J,r,J-r-J.r.J/r/J0r0  SSK1J2r2  SSK3J4r4  SSK5J6r6  SSK7J8r8J9r9  SSK:J;r;  \," 5       (       a  S SK<J=s  J>r?  Sr@OSr@\-R                  " \B5      rCSrDSS jrE    SS\\F   S\\\G\	R                  4      S\\\F      S\\\I      4S jjrJ " S S\8\9\\\\\65	      rKg)     N)AnyCallableDictListOptionalTupleUnion)CLIPImageProcessorCLIPTextModelCLIPTextModelWithProjectionCLIPTokenizerCLIPVisionModelWithProjection   )PipelineImageInput)FromSingleFileMixinIPAdapterMixin StableDiffusionXLLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLImageProjectionMotionAdapterUNet2DConditionModelUNetMotionModel)AttnProcessor2_0FusedAttnProcessor2_0XFormersAttnProcessor)adjust_lora_scale_text_encoder)DDIMSchedulerDPMSolverMultistepSchedulerEulerAncestralDiscreteSchedulerEulerDiscreteSchedulerLMSDiscreteSchedulerPNDMScheduler)USE_PEFT_BACKENDis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor)VideoProcessor   )FreeInitMixin)DiffusionPipelineStableDiffusionMixin   )AnimateDiffPipelineOutputTFa$  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers.models import MotionAdapter
        >>> from diffusers import AnimateDiffSDXLPipeline, DDIMScheduler
        >>> from diffusers.utils import export_to_gif

        >>> adapter = MotionAdapter.from_pretrained(
        ...     "a-r-r-o-w/animatediff-motion-adapter-sdxl-beta", torch_dtype=torch.float16
        ... )

        >>> model_id = "stabilityai/stable-diffusion-xl-base-1.0"
        >>> scheduler = DDIMScheduler.from_pretrained(
        ...     model_id,
        ...     subfolder="scheduler",
        ...     clip_sample=False,
        ...     timestep_spacing="linspace",
        ...     beta_schedule="linear",
        ...     steps_offset=1,
        ... )
        >>> pipe = AnimateDiffSDXLPipeline.from_pretrained(
        ...     model_id,
        ...     motion_adapter=adapter,
        ...     scheduler=scheduler,
        ...     torch_dtype=torch.float16,
        ...     variant="fp16",
        ... ).to("cuda")

        >>> # enable memory savings
        >>> pipe.enable_vae_slicing()
        >>> pipe.enable_vae_tiling()

        >>> output = pipe(
        ...     prompt="a panda surfing in the ocean, realistic, high quality",
        ...     negative_prompt="low quality, worst quality",
        ...     num_inference_steps=20,
        ...     guidance_scale=8,
        ...     width=1024,
        ...     height=1024,
        ...     num_frames=16,
        ... )

        >>> frames = output.frames[0]
        >>> export_to_gif(frames, "animation.gif")
        ```
c                     UR                  [        [        SUR                  5      5      SS9nU R                  [        [        SU R                  5      5      SS9nXU-  -  nX%-  SU-
  U -  -   n U $ )a  
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
Flawed](https://huggingface.co/papers/2305.08891).

Args:
    noise_cfg (`torch.Tensor`):
        The predicted noise tensor for the guided diffusion process.
    noise_pred_text (`torch.Tensor`):
        The predicted noise tensor for the text-guided diffusion process.
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        A rescale factor applied to the noise predictions.

Returns:
    noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
r0   T)dimkeepdim)stdlistrangendim)	noise_cfgnoise_pred_textguidance_rescalestd_textstd_cfgnoise_pred_rescaleds         s/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.pyrescale_noise_cfgr@   |   s{    " ""tE!_5I5I,J'KUY"ZHmmU1inn%= >mMG#''9: 6!>N:NR[9[[I    num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`List[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`List[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesrD   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)rD   rC   rE   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)rE   rC   rC    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__rD   len)	schedulerrB   rC   rD   rE   kwargsaccepts_timestepsaccept_sigmass           r?   retrieve_timestepsrU      s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))rA   c            K         ^  \ rS rSrSrSr/ SQr/ SQr   SZS\S	\	S
\
S\S\S\\\4   S\S\\\\\\\4   S\S\S\4U 4S jjjr            S[S\S\\   S\\R:                     S\S\S\\   S\\   S\\R>                     S\\R>                     S\\R>                     S\\R>                     S \\    S!\\   4S" jjr!S\S# jr"S$ r#S% r$S& r%       S]S' jr& S\S( jr' S\S) jr(S* r)S+\RT                  4S,\R>                  S-\S.\RV                  S/\R>                  4S0 jjr,\-S1 5       r.\-S2 5       r/\-S3 5       r0\-S4 5       r1\-S5 5       r2\-S6 5       r3\-S7 5       r4\-S8 5       r5\Rl                  " 5       \7" \85      SSS9SSS:SSSS;SSSS<SSSSSSSSS=SSS<SS>SSS>SSSS?/4#S\\\9\   4   S\\\\9\   4      S@\SA\\   SB\\   SC\SD\9\   SE\9\    SF\\    SG\ S\\\\9\   4      S\\\\9\   4      S\\   SH\ SI\\\Rt                  \9\Rt                     4      S?\\R>                     S\\R>                     S\\R>                     S\\R>                     S\\R>                     SJ\\;   SK\\9\R>                        SL\\   SM\SN\\<\\=4      SO\ SP\\>\\4      SQ\>\\4   SR\\>\\4      SS\\>\\4      ST\>\\4   SU\\>\\4      S!\\   SV\\?\\\</S4      SW\9\   4FSX jj5       5       r@SYrAU =rB$ )^AnimateDiffSDXLPipeline   aI
  
Pipeline for text-to-video generation using Stable Diffusion XL.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

The pipeline also inherits the following loading methods:
    - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
    - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
    - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
    - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
    - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

Args:
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModel`]):
        Frozen text-encoder. Stable Diffusion XL uses the text portion of
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
        the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
    text_encoder_2 ([` CLIPTextModelWithProjection`]):
        Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
        specifically the
        [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
        variant.
    tokenizer (`CLIPTokenizer`):
        Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_2 (`CLIPTokenizer`):
        Second Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    unet ([`UNet2DConditionModel`]):
        Conditional U-Net architecture to denoise the encoded image latents.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
        [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    force_zeros_for_empty_prompt (`bool`, *optional*, defaults to `"True"`):
        Whether the negative prompt embeddings shall be forced to always be set to 0. Also see the config of
        `stabilityai/stable-diffusion-xl-base-1-0`.
z6text_encoder->text_encoder_2->image_encoder->unet->vae)	tokenizertokenizer_2text_encodertext_encoder_2image_encoderfeature_extractor)latentsprompt_embedsnegative_prompt_embedsadd_text_embedsadd_time_idsnegative_pooled_prompt_embedsnegative_add_time_idsNTvaer[   r\   rY   rZ   unetmotion_adapterrQ   r]   r^   force_zeros_for_empty_promptc                 f  > [         TU ]  5         [        U[        5      (       a  [        R
                  " Xg5      nU R                  UUUUUUUUU	U
S9
  U R                  US9  [        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OSU l        [        U R                  S9U l        [!        U S5      (       aX  U R"                  bK  [!        U R"                  R                  S	5      (       a&  U R"                  R                  R$                  U l        g S
U l        g )N)
rf   r[   r\   rY   rZ   rg   rh   rQ   r]   r^   )ri   rf   r,   r0      )vae_scale_factorrg   sample_size   )super__init__
isinstancer   r   from_unet2dregister_modulesregister_to_configgetattrrP   rf   configblock_out_channelsrl   r+   video_processorhasattrrg   rm   default_sample_size)selfrf   r[   r\   rY   rZ   rg   rh   rQ   r]   r^   ri   rO   s               r?   rp    AnimateDiffSDXLPipeline.__init__  s   * 	d011"..tDD%)#)'/ 	 	
 	=YZV]^bdikoVpVpc$((//*L*L&MPQ&Q Rvw-t?T?TU tV$$)>7499K[K[]jCkCk II(( 	   	 rA   r0   promptprompt_2rC   num_videos_per_promptdo_classifier_free_guidancenegative_promptnegative_prompt_2r`   ra   pooled_prompt_embedsrd   
lora_scale	clip_skipc                    U=(       d    U R                   nUb  [        U [        5      (       a  Xl        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        [        U[        5      (       a  U/OUnUb  [        U5      nOUR                  S   nU R                  b  U R                  U R                  /OU R                  /nU R                  b  U R                  U R                  /OU R                  /nUGc  U=(       d    Un[        U[        5      (       a  U/OUn/ nX/n[        UUU5       GHQ  u  nnn[        U [        5      (       a  U R!                  UU5      nU" USUR"                  SSS9nUR$                  nU" USSS9R$                  nUR                  S	   UR                  S	   :  ah  [&        R(                  " UU5      (       dL  UR+                  USS2UR"                  S
-
  S	24   5      n[,        R/                  SUR"                   SU 35        U" UR1                  U5      SS9nU
c  US   R2                  S:X  a  US   n
Uc  UR4                  S   nOUR4                  US-   *    nUR7                  U5        GMT     [&        R8                  " US	S9nUSL =(       a    U R:                  R<                  nU(       a8  U	c5  U(       a.  [&        R>                  " U5      n	[&        R>                  " U
5      nGOU(       Ga  U	Gc  U=(       d    SnU=(       d    Un[        U[        5      (       a  X/-  OUn[        U[        5      (       a  X/-  OUnUb;  [A        U5      [A        U5      La$  [C        S[A        U5       S[A        U5       S35      eU[        U5      :w  a!  [E        SU S[        U5       SU SU S3	5      eXg/n/ n[        UUU5       H  u  nnn[        U [        5      (       a  U R!                  UU5      nUR                  S
   nU" USUSSS9nU" UR$                  R1                  U5      SS9n	Uc  U	S   R2                  S:X  a  U	S   nU	R4                  S   n	UR7                  U	5        M     [&        R8                  " US	S9n	U R                  b%  UR1                  U R                  RF                  US9nO$UR1                  U RH                  RF                  US9nUR                  u  nnn URK                  S
US
5      nURM                  UU-  US	5      nU(       a  U	R                  S
   nU R                  b%  U	R1                  U R                  RF                  US9n	O$U	R1                  U RH                  RF                  US9n	U	RK                  S
US
5      n	U	RM                  X-  US	5      n	U
RK                  S
U5      RM                  UU-  S	5      n
U(       a%  URK                  S
U5      RM                  UU-  S	5      nU R                  b6  [        U [        5      (       a!  [
        (       a  [O        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [O        U R                  U5        XX4$ )a
  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in both text-encoders
    device: (`torch.device`):
        torch device
    num_videos_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
Nr   
max_lengthTpt)paddingr   
truncationreturn_tensorslongest)r   r   r0   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: output_hidden_statesr,   r3    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)dtyperC   )(_execution_devicerq   r   _lora_scaler[   r$   r   r(   r\   strrP   shaperY   rZ   zipr   maybe_convert_promptmodel_max_length	input_idstorchequalbatch_decodeloggerwarningtor8   hidden_statesappendconcatrv   ri   
zeros_liketype	TypeErrorrH   r   rg   repeatviewr)   )!r{   r}   r~   rC   r   r   r   r   r`   ra   r   rd   r   r   
batch_size
tokenizerstext_encodersprompt_embeds_listpromptsrY   r[   text_inputstext_input_idsuntruncated_idsremoved_textzero_out_negative_promptuncond_tokensnegative_prompt_embeds_listr   uncond_inputbs_embedseq_len_s!                                    r?   encode_prompt%AnimateDiffSDXLPipeline.encode_promptI  s   t 1411 !j7W&X&X)   ,''243D3DjQ%d&7&7D"".''243F3F
S%d&9&9:F'44&&VJ&,,Q/J <@>>;Udnnd&6&67\`\l\l[m
8<8I8I8UT 3 34\`\o\o[p 	  )6H%/#%>%>zHH "$(G36w
M3Z/	<d$?@@!66vyIF'((99##' "-!6!6"+FIVZ"["e"e"((,0D0DR0HHQVQ\Q\"OR R $-#9#9/!YMgMgjkMknpMpJp:q#rLNN%667yP
 !-^->->v-F]a b (/M!4D4I4IQ4N+8+;($$1$?$?$CM %2$?$?)a-@P$QM"))-8I 4[L "LL);DM $3d#:#gt{{?g?g &+A+INf%*%5%5m%D",1,<,<=Q,R)(-C-K-3O 1 D_ AK?\_@`@`j+<<fuO4>?PRU4V4V
00\m 
 !d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  "1 D*,'<?z[h<i8Ld$?@@&*&?&?QZ&[O*003
(#()##'  *6 **--f5)-*& 18=STU=V=[=[_`=`4J14M1)?)M)Mb)Q&+223IJ/ =j2 &+\\2MSU%V"*),,43F3F3L3LU[,\M),,499??6,RM,22'1%,,Q0EqI%**86K+KWVXY&,2215G"".)?)B)BI\I\IbIbkq)B)r&)?)B)Bag)B)h&%;%B%B1F[]^%_"%;%@%@Acelnp%q"3::1>STYY,,b 
 ',I,P,PQRTi,j,o,o00"-) ($ @AAFVFV#D$5$5zB*$ @AAFVFV#D$7$7D6JiirA   c                 d   [        U R                  R                  5       5      R                  n[	        U[
        R                  5      (       d  U R                  USS9R                  nUR                  X%S9nU(       aq  U R                  USS9R                  S   nUR                  USS9nU R                  [
        R                  " U5      SS9R                  S   nUR                  USS9nXg4$ U R                  U5      R                  nUR                  USS9n[
        R                  " U5      n	X4$ )	Nr   )r   rC   r   Tr   r   r   r   )nextr]   rM   r   rq   r   Tensorr^   pixel_valuesr   r   repeat_interleaver   image_embeds)
r{   imagerC   num_images_per_promptr   r   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             r?   encode_image$AnimateDiffSDXLPipeline.encode_image8  s?   T''2245;;%..**5*FSSE4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +JJ--e4AAL'99:OUV9WL"'"2"2<"@44rA   c                 
   / nU(       a  / nUGc&  [        U[        5      (       d  U/n[        U5      [        U R                  R                  R
                  5      :w  aB  [        S[        U5       S[        U R                  R                  R
                  5       S35      e[        XR                  R                  R
                  5       Hh  u  p[        U	[        5      (       + n
U R                  XSU
5      u  pUR                  US S S 24   5        U(       d  MP  WR                  US S S 24   5        Mj     OEU H?  nU(       a$  UR                  S5      u  pWR                  U5        UR                  U5        MA     / n[        U5       Hw  u  p[        R                  " U/U-  SS9nU(       a2  [        R                  " WU   /U-  SS9n[        R                  " X/SS9nUR                  US9nUR                  U5        My     U$ )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r0   r,   r   r   )rC   )rq   r6   rP   rg   encoder_hid_projimage_projection_layersrH   r   r   r   r   chunk	enumerater   catr   )r{   ip_adapter_imageip_adapter_image_embedsrC   r   r   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 r?   prepare_ip_adapter_image_embeds7AnimateDiffSDXLPipeline.prepare_ip_adapter_image_embedsQ  s9    &$&!"*.55$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A ))"<"<"T"T>9' +55E*W&W#DHDUDU+Q8KEA# ##$7a$@A..)001MdTUg1VW> (?#.H[HaHabcHdE0)001MN##$78	 (? #%&/&="A"'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1M0cij&k#"5"8"8"8"G#**+>? '> '&rA   c                    SU R                   R                  R                  -  U-  nUR                  u  p#pEnUR	                  SSSSS5      R                  X$-  X5U5      nU R                   R                  U5      R                  nUS S S 24   R                  X$S4UR                  SS  -   5      R	                  SSSSS5      nUR                  5       nU$ )Nr0   r   r,   r      r   )	rf   rv   scaling_factorr   permutereshapedecodesamplefloat)	r{   r_   r   channels
num_framesheightwidthr   videos	            r?   decode_latents&AnimateDiffSDXLPipeline.decode_latents  s    dhhoo444w>:A--7
j%//!Q1a0889PRZdij(//dAg&&
'CekkRSRTo'UV^^_`bcefhiklmrA   c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Neta	generator)rI   rJ   rK   rQ   steprM   rN   )r{   r   r   accepts_etaextra_step_kwargsaccepts_generators         r?   prepare_extra_step_kwargs1AnimateDiffSDXLPipeline.prepare_extra_step_kwargs  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  rA   c           
        ^  US-  S:w  d	  US-  S:w  a  [        SU SU S35      eUbW  [        U 4S jU 5       5      (       d=  [        ST R                   SU Vs/ s H  oT R                  ;  d  M  UPM     sn 35      eUb  Ub  [        S	U S
U S35      eUb  Ub  [        SU S
U S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[
        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[
        5      (       d  [        S[        U5       35      eUb  Ub  [        SU SU S35      eUb  Ub  [        SU SU S35      eUbC  Ub@  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eUb  U	c  [        S5      eUb  U
c  [        S5      eg g s  snf )Nrk   r   z7`height` and `width` have to be divisible by 8 but are z and r   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0kr{   s     r?   	<genexpr>7AnimateDiffSDXLPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: z)Cannot forward both `negative_prompt_2`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.zIf `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`.)rH   allr   rq   r   r6   r   r   )r{   r}   r~   r   r   r   r   r`   ra   r   rd   "callback_on_step_end_tensor_inputsr   s   `            r?   check_inputs$AnimateDiffSDXLPipeline.check_inputs  s    A:?eai1nVW]V^^cdicjjklmm-9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa!:h+D+DZX`bfMgMgSTXYaTbScdee&+A+M9/9J K*++]_  */E/Q;<M;N O*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  $)=)E U  "-2O2W y  3X-] pHs   G#1G#c
                 0   UUUX@R                   -  XPR                   -  4n
[        U[        5      (       a*  [        U5      U:w  a  [	        S[        U5       SU S35      eU	c  [        XXvS9n	OU	R                  U5      n	XR                  R                  -  n	U	$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   rC   r   )	rl   rq   r6   rP   rH   r*   r   rQ   init_noise_sigma)r{   r   num_channels_latentsr   r   r   r   rC   r   r_   r   s              r?   prepare_latents'AnimateDiffSDXLPipeline.prepare_latents  s      +++***
 i&&3y>Z+GA#i.AQ R&<'gi 
 ?"5fZGjj(G NN;;;rA   c                 2   [        X-   U-   5      nU R                  R                  R                  [	        U5      -  U-   nU R                  R
                  R                  R                  nX:w  a  [        SU SU S35      e[        R                  " U/US9nU$ )Nz7Model expects an added time embedding vector of length z, but a vector of z was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`.r   )r6   rg   rv   addition_time_embed_dimrP   add_embeddinglinear_1in_featuresrH   r   tensor)	r{   original_sizecrops_coords_top_lefttarget_sizer   text_encoder_projection_dimrc   passed_add_embed_dimexpected_add_embed_dims	            r?   _get_add_time_ids)AnimateDiffSDXLPipeline._get_add_time_ids  s     MAKOP II44s<7HHKff 	 "&!8!8!A!A!M!M!9IJ`Iaas  uI  tJ  JU  V  ||\N%@rA   c                 4   U R                   R                  nU R                   R                  [        R                  S9  [        U R                   R                  R                  R                  S   R                  [        [        [        45      nU(       a  U R                   R                  R                  U5        U R                   R                  R                  R                  U5        U R                   R                  R                  R                  U5        g g )Nr  r   )rf   r   r   r   float32rq   decoder	mid_block
attentions	processorr   r   r   post_quant_convconv_in)r{   r   use_torch_2_0_or_xformerss      r?   
upcast_vae"AnimateDiffSDXLPipeline.upcast_vae  s    %--($.HH&&11!4>> %%%
! %HH$$''.HH$$''.HH&&))%0 %rA   i   wembedding_dimr   returnc                 r   [        UR                  5      S:X  d   eUS-  nUS-  n[        R                  " [        R                  " S5      5      US-
  -  n[        R
                  " [        R                  " XCS9U* -  5      nUR                  U5      SS2S4   USSS24   -  n[        R                  " [        R                  " U5      [        R                  " U5      /SS9nUS-  S:X  a*  [        R                  R                  R                  US5      nUR                  UR                  S	   U4:X  d   eU$ )
a,  
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298

Args:
    w (`torch.Tensor`):
        Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
    embedding_dim (`int`, *optional*, defaults to 512):
        Dimension of the embeddings to generate.
    dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
        Data type of the generated embeddings.

Returns:
    `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
r0   g     @@r,   g     @r  Nr   )r   r0   r   )rP   r   r   logr  exparanger   r   sincosnn
functionalpad)r{   r  r   r   half_dimembs         r?   get_guidance_scale_embedding4AnimateDiffSDXLPipeline.get_guidance_scale_embedding$  s    " 177|q   J A%iiW-.(Q,?iiX;sdBCdd5k!T'"Sq\1ii338a@1!((%%))#v6CyyQWWQZ7777
rA   c                     U R                   $ r   )_guidance_scaler{   s    r?   guidance_scale&AnimateDiffSDXLPipeline.guidance_scaleB  s    ###rA   c                     U R                   $ r   )_guidance_rescaler1  s    r?   r;   (AnimateDiffSDXLPipeline.guidance_rescaleF  s    %%%rA   c                     U R                   $ r   )
_clip_skipr1  s    r?   r   !AnimateDiffSDXLPipeline.clip_skipJ      rA   c                 r    U R                   S:  =(       a"    U R                  R                  R                  S L $ )Nr0   )r0  rg   rv   time_cond_proj_dimr1  s    r?   r   3AnimateDiffSDXLPipeline.do_classifier_free_guidanceQ  s.    ##a'WDII,<,<,O,OSW,WWrA   c                     U R                   $ r   )_cross_attention_kwargsr1  s    r?   cross_attention_kwargs.AnimateDiffSDXLPipeline.cross_attention_kwargsU  s    +++rA   c                     U R                   $ r   )_denoising_endr1  s    r?   denoising_end%AnimateDiffSDXLPipeline.denoising_endY      """rA   c                     U R                   $ r   )_num_timestepsr1  s    r?   num_timesteps%AnimateDiffSDXLPipeline.num_timesteps]  rF  rA   c                     U R                   $ r   )
_interruptr1  s    r?   	interrupt!AnimateDiffSDXLPipeline.interrupta  r:  rA      2   g      @        pil)r   r   r_   r   r   r   rB   rD   rE   rD  r2  r   r   r   r   output_typereturn_dictr@  r;   r  r  r  negative_original_sizenegative_crops_coords_top_leftnegative_target_sizecallback_on_step_endr   c$                 6  ^@ U=(       d    U R                   U R                  -  nU=(       d    U R                   U R                  -  nSnU=(       d    XE4nU=(       d    XE4nU R                  UUUUUUUUUUU#5        Xl        UU l        U!U l        UU l        Xl        SU l        Ub  [        U[        5      (       a  Sn$O3Ub!  [        U[        5      (       a  [        U5      n$OUR                  S   n$U R                  n%U R                  b  U R                  R!                  SS5      OSn&U R#                  UUU%UU R$                  UUUUUUU&U R&                  S9u  nnnn[)        U R*                  UU%Xx5      u  pvU R,                  R.                  R0                  n'U R3                  U$U-  U'UUUUR4                  U%UU5	      nU R7                  X5      n(Un)U R8                  c  [;        UR                  S   5      n*O U R8                  R.                  R<                  n*U R?                  UUUUR4                  U*S9n+Ub!  U b  U R?                  UUU UR4                  U*S9n,OU+n,U R$                  (       aE  [@        RB                  " UU/SS	9n[@        RB                  " UU)/SS	9n)[@        RB                  " U,U+/SS	9n+URE                  USS
9nURG                  U%5      nU)RG                  U%5      n)U+RG                  U%5      RI                  U$U-  S5      n+Uc  Ub"  U RK                  UUU%U$U-  U R$                  5      n-U RL                  b  [        U RL                  [N        5      (       a  U RL                  S:  a  U RL                  S:  a  [;        [Q        U R*                  R.                  RR                  U RL                  U R*                  R.                  RR                  -  -
  5      5      m@[        [        [U        U@4S jU5      5      5      nUSU nSn.U R,                  R.                  RV                  b{  [@        RX                  " U RZ                  S-
  5      RI                  U$U-  5      n/U R]                  U/U R,                  R.                  RV                  S9RG                  U%UR4                  S9n.U R^                  (       a  U R`                  OSn0[c        U05       GH  n1U R^                  (       a#  U Re                  UU1UU%UR4                  U5      u  nn[        U5      U l3        U Ri                  U Rf                  S9 n2[k        U5       GH	  u  n3n4U Rl                  (       a  M  U R$                  (       a  [@        RB                  " U/S-  5      OUn5U R*                  Ro                  U5U45      n5U)U+S.n6Uc  U(       a  W-U6S'   U R-                  U5U4UU.U R                  U6SS9S   n7U R$                  (       a)  U7Rq                  S5      u  n8n9U8U RZ                  U9U8-
  -  -   n7U R$                  (       a%  U Rr                  S:  a  [u        U7W9U Rr                  S9n7U R*                  Rv                  " U7U4U40 U(DSS0D6S   nU"b  0 n:U# H  n;[y        5       U;   U:U;'   M     U"" U U3U4U:5      n<U<R{                  SU5      nU<R{                  SU5      nU<R{                  SU5      nU<R{                  SU)5      n)U<R{                  SU5      nU<R{                  SU+5      n+U<R{                  SU,5      n,U2R}                  5         [~        (       d  GM  [        R                  " 5         GM     SSS5        GM     U R                  R4                  [@        R                  :H  =(       a     U R                  R.                  R                  n=U=(       a_  U R                  5         URG                  [        [        U R                  R                  R                  5       5      5      R4                  5      nUS:X  a  Un>O+U R                  U5      n?U R                  R                  U?US9n>U=(       a'  U R                  RG                  [@        R                  S9  U R                  5         U(       d  U>4$ [        U>S 9$ ! , (       d  f       GM  = f)!u,  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide the video generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in both text-encoders
    num_frames:
        The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
        amounts to 2 seconds of video.
    height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The height in pixels of the generated video. This is set to 1024 by default for the best results.
        Anything below 512 pixels won't work well for
        [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
        and checkpoints that are not specifically fine-tuned on low resolutions.
    width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The width in pixels of the generated video. This is set to 1024 by default for the best results.
        Anything below 512 pixels won't work well for
        [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
        and checkpoints that are not specifically fine-tuned on low resolutions.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality video at the
        expense of slower inference.
    timesteps (`List[int]`, *optional*):
        Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
        in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
        passed will be used. Must be in descending order.
    sigmas (`List[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    denoising_end (`float`, *optional*):
        When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
        completed before it is intentionally prematurely terminated. As a result, the returned sample will
        still retain a substantial amount of noise as determined by the discrete timesteps selected by the
        scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
        "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
        Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
    guidance_scale (`float`, *optional*, defaults to 5.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower video quality.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the video generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the video generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
    num_videos_per_prompt (`int`, *optional*, defaults to 1):
        The number of videos to generate per prompt.
    eta (`float`, *optional*, defaults to 0.0):
        Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
        applies to [`schedulers.DDIMScheduler`], will be ignored for others.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    ip_adapter_image: (`PipelineImageInput`, *optional*):
        Optional image input to work with IP Adapters.
    ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
        Pre-generated image embeddings for IP-Adapter. If not provided, embeddings are computed from the
        `ip_adapter_image` input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated video. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.stable_diffusion_xl.AnimateDiffPipelineOutput`] instead of a
        plain tuple.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
        Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
        [Common Diffusion Noise Schedules and Sample Steps are
        Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
        using zero terminal SNR.
    original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
        `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
        explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
        `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
        `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
        `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        For most cases, `target_size` should be set to the desired height and width of the generated image. If
        not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
        section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        To negatively condition the generation process based on a specific image resolution. Part of SDXL's
        micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
        To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
        micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        To negatively condition the generation process based on a target image resolution. It should be as same
        as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is
        returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
r0   FNr   scale)r}   r~   rC   r   r   r   r   r`   ra   r   rd   r   r   r   )r   r  r   )repeatsr3   c                    > U T:  $ r   rG   )tsdiscrete_timestep_cutoffs    r?   <lambda>2AnimateDiffSDXLPipeline.__call__.<locals>.<lambda>  s    RC[=[rA   )r   r   )totalr,   )text_embedstime_idsr   )encoder_hidden_statestimestep_condr@  added_cond_kwargsrT  rQ  )r;   rT  r_   r`   ra   rb   rd   rc   re   latent)r   rS  r  )frames)Orz   rl   r   r0  r5  r8  r?  rC  rL  rq   r   r6   rP   r   r   r@  getr   r   r   rU   rQ   rg   rv   in_channelsr  r   r   r\   intprojection_dimr  r   r   r   r   r   r   rD  r   roundnum_train_timestepsfilterr<  r  r2  r-  free_init_enabled_free_init_num_itersr7   _apply_free_initrH  progress_barr   rM  scale_model_inputr   r;   r@   r   localspopupdateXLA_AVAILABLExm	mark_steprf   float16force_upcastr  r   iterr  rM   r   rx   postprocess_videomaybe_free_model_hooksr1   )Ar{   r}   r~   r   r   r   rB   rD   rE   rD  r2  r   r   r   r   r   r_   r`   ra   r   rd   r   r   rS  rT  r@  r;   r  r  r  rU  rV  rW  r   rX  r   r   rC   r   r  r   rb   r  rc   re   r   re  guidance_scale_tensornum_free_init_itersfree_init_iterrs  r   tlatent_model_inputrf  
noise_prednoise_pred_uncondr:   callback_kwargsr   callback_outputsneeds_upcastingr   video_tensorr^  sA                                                                   @r?   __call__ AnimateDiffSDXLPipeline.__call__e  sn   z K433d6K6KKI11D4I4II !%8&!4f_ 	" ).	
  .!1#'=$+ *VS"9"9JJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	 "7(,(H(H+/'#9!5*G!nn  
	
" )$ *<NN/*
&	
  $yy//;;&&.. 

 !::9J /&*-.B.H.H.L*M'*.*=*=*D*D*S*S'--!%%(C . 
 "-2F2R$($:$:&.$#)),G %; %! %1!++!II'=}&MSTUM#ii)F(X^_`O 99&;\%JPQRL%77
PQ7R%((0),,V4#v.55jCX6XZ[\'+B+N?? '2200L *4--u55""Q&""Q&'*NN))==))DNN,A,A,U,UUW($ #&d62[]f+g&h"i!"6#67I 99..:$)LL1D1Dq1H$I$P$PQ[^sQs$t! ==%TYY5E5E5X5X > bgmmb4  <@;Q;Qd77WX#$78N%%%)%:%:^-@&'--Yb&" #&i.D "")<)<"=%i0DAq~~  FJEeEeG9q=)Akr&)-)I)IJ\^_)`& 9HUa(b%'37N<H).9!%*.;&3/3/J/J*;$) "+ " "J 77=G=M=Ma=P:)?%69L9LP_bsPs9t%t
77D<Q<QTW<W%6&$J_J_&

 #nn11*aqL]qkpqrstG+7*,!CA17!OA. "D+?aO+\("2"6"6y'"J(8(<(<_m(\1A1E1EF^`v1w.*:*>*>?PRa*b8H8L8L;=Z95 (8';';NL'Y0@0D0DE\^s0t- '')$}s 1 >= 9L ((..EMM9Zdhhoo>Z>ZOOjjd488+C+C+N+N+P&Q!R!X!XYG ("E..w7L((::[f:gE HHKKemmK, 	##%8O(66i >=s   (G;b(b
b	)r8  r?  rC  r5  r0  rL  r   rH  rz   rl   rx   )NNT)NNr0   TNNNNNNNNr   )NNNNNNN)C__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r   r	   r   r   r   r   r#   r"   r!   r    r   r   r
   boolrp   r   r   r   rC   rk  r   r   r   r   r   r   r   r   r  r  r  r  r   r-  propertyr2  r;   r   r   r@  rD  rI  rM  no_gradr'   EXAMPLE_DOC_STRINGr   	Generatorr   r   r   r   r   r  __static_attributes____classcell__)rO   s   @r?   rW   rW      sN   (T U6 8<04-1'.
.
 $.
 4	.

 !.
 #.
 (/9:.
 &.
  "+')
.
" 5#.
$ .%.
& '+'.
 .
h #')-%&,0)-+/049=7;@D&*#'ljlj 3-lj &	lj
  #lj &*lj "#lj $C=lj  -lj !) 6lj 'u||4lj (0'=lj UOlj C=lj^52+'\
!. #!&*+/FT nr4 ei$1( 58emm.1@E	< $ $ & &   X X , , # # # #   ]]_12 )-48 $##%#")- #;?=A/0MQ*.049=7;@D9=@D%* ;?"%371715<@:@:>#'KO9BIv7c49n%v7 5d3i01v7 	v7
 v7 }v7 !v7 9v7 Uv7  v7 v7 "%T#Y"78v7 $E#tCy.$9:v7  (}v7 v7  E%//43H"HIJ!v7" %,,'#v7$  -%v7& !) 6'v7( 'u||4)v7* (0'=+v7, ##56-v7. "*$u||*<!=/v70 c]1v72 3v74 !)c3h 85v76  7v78  c3h09v7:  %S#X;v7< eCHo.=v7> !)sCx 9?v7@ ).c3hAv7B 'uS#X7Cv7D C=Ev7F 'xc40@$0F'GHGv7H -1IIv7 3 v7rA   rW   )rQ  )NNNN)LrJ   typingr   r   r   r   r   r   r	   r   transformersr
   r   r   r   r   image_processorr   loadersr   r   r   r   modelsr   r   r   r   r   models.attention_processorr   r   r   models.lorar   
schedulersr   r   r    r!   r"   r#   utilsr$   r%   r&   r'   r(   r)   utils.torch_utilsr*   rx   r+   free_init_utilsr-   pipeline_utilsr.   r/   pipeline_outputr1   torch_xla.core.xla_modelcore	xla_modelry  rx  
get_loggerr  r   r  r@   rk  r   rC   r   rU   rW   rG   rA   r?   <module>r     s)    D D D   2  k j 
 :   . - + D 6 ))MM			H	%. d: *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*vK7$K7rA   