
    +h                        S SK r S SKJrJrJrJrJrJrJr  S SK	r	S SK
Js  Jr  S SKJrJrJrJr  SSKJr  SSKJrJrJrJr  SSKJrJrJrJrJrJ r   SSK!J"r"  SS	K#J$r$  SS
K%J&r&  SSK'J(r(J)r)J*r*J+r+J,r,  SSK-J.r.J/r/  SSK0J1r1  SSK2J3r3  SSK4J5r5  SSK6J7r7J8r8  SSK9J:r:  \)" 5       (       a  S SK;J<s  J=r>  Sr?OSr?\*R                  " \A5      rBSrC " S S\7\8\\\\3\5\5
      rDg)    N)AnyCallableDictListOptionalTupleUnion)CLIPImageProcessorCLIPTextModelCLIPTokenizerCLIPVisionModelWithProjection   )PipelineImageInput)FromSingleFileMixinIPAdapterMixinStableDiffusionLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLControlNetModelImageProjectionMultiControlNetModelUNet2DConditionModelUNetMotionModel)adjust_lora_scale_text_encoder)MotionAdapter)KarrasDiffusionSchedulers)USE_PEFT_BACKENDis_torch_xla_availableloggingscale_lora_layersunscale_lora_layers)is_compiled_modulerandn_tensor)VideoProcessor   )FreeInitMixin)AnimateDiffFreeNoiseMixin)DiffusionPipelineStableDiffusionMixin   )AnimateDiffPipelineOutputTFaT  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import (
        ...     AnimateDiffControlNetPipeline,
        ...     AutoencoderKL,
        ...     ControlNetModel,
        ...     MotionAdapter,
        ...     LCMScheduler,
        ... )
        >>> from diffusers.utils import export_to_gif, load_video

        >>> # Additionally, you will need a preprocess videos before they can be used with the ControlNet
        >>> # HF maintains just the right package for it: `pip install controlnet_aux`
        >>> from controlnet_aux.processor import ZoeDetector

        >>> # Download controlnets from https://huggingface.co/lllyasviel/ControlNet-v1-1 to use .from_single_file
        >>> # Download Diffusers-format controlnets, such as https://huggingface.co/lllyasviel/sd-controlnet-depth, to use .from_pretrained()
        >>> controlnet = ControlNetModel.from_single_file("control_v11f1p_sd15_depth.pth", torch_dtype=torch.float16)

        >>> # We use AnimateLCM for this example but one can use the original motion adapters as well (for example, https://huggingface.co/guoyww/animatediff-motion-adapter-v1-5-3)
        >>> motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")

        >>> vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
        >>> pipe: AnimateDiffControlNetPipeline = AnimateDiffControlNetPipeline.from_pretrained(
        ...     "SG161222/Realistic_Vision_V5.1_noVAE",
        ...     motion_adapter=motion_adapter,
        ...     controlnet=controlnet,
        ...     vae=vae,
        ... ).to(device="cuda", dtype=torch.float16)
        >>> pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
        >>> pipe.load_lora_weights(
        ...     "wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora"
        ... )
        >>> pipe.set_adapters(["lcm-lora"], [0.8])

        >>> depth_detector = ZoeDetector.from_pretrained("lllyasviel/Annotators").to("cuda")
        >>> video = load_video(
        ...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif"
        ... )
        >>> conditioning_frames = []

        >>> with pipe.progress_bar(total=len(video)) as progress_bar:
        ...     for frame in video:
        ...         conditioning_frames.append(depth_detector(frame))
        ...         progress_bar.update()

        >>> prompt = "a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality"
        >>> negative_prompt = "bad quality, worst quality"

        >>> video = pipe(
        ...     prompt=prompt,
        ...     negative_prompt=negative_prompt,
        ...     num_frames=len(video),
        ...     num_inference_steps=10,
        ...     guidance_scale=2.0,
        ...     conditioning_frames=conditioning_frames,
        ...     generator=torch.Generator().manual_seed(42),
        ... ).frames[0]

        >>> export_to_gif(video, "animatediff_controlnet.gif", fps=8)
        ```
c            8         ^  \ rS rSrSrSrSS/r/ SQr  SES\S	\	S
\
S\\\4   S\S\\\\   \\   \4   S\S\\   S\\   4U 4S jjjr     SFS\\R2                     S\\R2                     S\\   S\\   4S jjrSGS jrS rSHS\4S jjrS r         SIS jr! SGS jr"  SJS  jr#\$S! 5       r%\$S" 5       r&\$S# 5       r'\$S$ 5       r(\$S% 5       r)\$S& 5       r*\RV                  " 5       SSSSS'S(SS)SSSSSSSSS*S+SSSSSSSS,/S4S-\\,\\,   4   S.\\   S/\\   S0\\   S1\S2\S3\\\,\\,   4      S4\\   S5\S6\\\RZ                  \\RZ                     4      S,\\R2                     S\\R2                     S\\R2                     S7\\.   S8\\.   S9\\\.      S:\\,   S;\/S<\\0\,\14      S=\\\\   4   S>\/S?\\\\   4   S@\\\\   4   S\\   SA\\2\\\0/S4      SB\\,   S\46SC jj5       r3SDr4U =r5$ )KAnimateDiffControlNetPipelinex   a3  
Pipeline for text-to-video generation with ControlNet guidance.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

The pipeline also inherits the following loading methods:
    - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
    - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
    - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
    - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

Args:
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModel`]):
        Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
    tokenizer (`CLIPTokenizer`):
        A [`~transformers.CLIPTokenizer`] to tokenize text.
    unet ([`UNet2DConditionModel`]):
        A [`UNet2DConditionModel`] used to create a UNetMotionModel to denoise the encoded video latents.
    motion_adapter ([`MotionAdapter`]):
        A [`MotionAdapter`] to be used in combination with `unet` to denoise the encoded video latents.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
        [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
ztext_encoder->unet->vaefeature_extractorimage_encoder)latentsprompt_embedsnegative_prompt_embedsNvaetext_encoder	tokenizerunetmotion_adapter
controlnet	schedulerc
                   > [         T
U ]  5         [        U[        5      (       a  [        R
                  " XE5      n[        U[        [        45      (       a  [        U5      nU R                  UUUUUUUUU	S9	  [        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OSU l        [!        U R                  S9U l        [!        U R                  SSS	9U l        g )
N)	r4   r5   r6   r7   r8   r9   r:   r/   r0   r4   r%   r*      )vae_scale_factorTF)r=   do_convert_rgbdo_normalize)super__init__
isinstancer   r   from_unet2dlisttupler   register_modulesgetattrlenr4   configblock_out_channelsr=   r$   video_processorcontrol_video_processor)selfr4   r5   r6   r7   r8   r9   r:   r/   r0   	__class__s             y/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.pyrA   &AnimateDiffControlNetPipeline.__init__   s     	d011"..tDDj4-00-j9J%)!/' 	 
	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw-t?T?TU'5!224V[(
$    r2   r3   
lora_scale	clip_skipc
                 
   UbS  [        U [        5      (       a>  Xl        [        (       d  [	        U R
                  U5        O[        U R
                  U5        Ub  [        U[        5      (       a  Sn
O3Ub!  [        U[        5      (       a  [        U5      n
OUR                  S   n
UGc  [        U [        5      (       a  U R                  XR                  5      nU R                  USU R                  R                  SSS9nUR                  nU R                  USSS	9R                  nUR                  S
   UR                  S
   :  a  [         R"                  " X5      (       dj  U R                  R%                  USS2U R                  R                  S-
  S
24   5      n[&        R)                  SU R                  R                   SU 35        [+        U R
                  R,                  S5      (       aA  U R
                  R,                  R.                  (       a  UR0                  R3                  U5      nOSnU	c%  U R                  UR3                  U5      US9nUS   nOQU R                  UR3                  U5      USS9nUS
   U	S-   *    nU R
                  R4                  R7                  U5      nU R
                  b  U R
                  R8                  nO0U R:                  b  U R:                  R8                  nOUR8                  nUR3                  UUS9nUR                  u  nnnUR=                  SUS5      nUR?                  UU-  US
5      nU(       Ga  UGc|  Uc  S/U
-  nOUb;  [A        U5      [A        U5      La$  [C        S[A        U5       S[A        U5       S35      e[        U[        5      (       a  U/nO2U
[        U5      :w  a!  [E        SU S[        U5       SU SU
 S3	5      eUn[        U [        5      (       a  U R                  UU R                  5      nUR                  S   nU R                  USUSSS9n[+        U R
                  R,                  S5      (       aA  U R
                  R,                  R.                  (       a  UR0                  R3                  U5      nOSnU R                  UR                  R3                  U5      US9nUS   nU(       aG  UR                  S   nUR3                  UUS9nUR=                  SUS5      nUR?                  X-  US
5      nU R
                  b6  [        U [        5      (       a!  [        (       a  [G        U R
                  U5        Xg4$ )a,  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    lora_scale (`float`, *optional*):
        A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
Nr*   r   
max_lengthTpt)paddingrU   
truncationreturn_tensorslongest)rW   rY   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: use_attention_mask)attention_mask)r]   output_hidden_states)dtypedevice z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)$rB   r   _lora_scaler   r   r5   r    strrD   rH   shaper   maybe_convert_promptr6   model_max_length	input_idstorchequalbatch_decodeloggerwarninghasattrrI   r\   r]   to
text_modelfinal_layer_normr_   r7   repeatviewtype	TypeError
ValueErrorr!   )rM   promptr`   num_images_per_promptdo_classifier_free_guidancenegative_promptr2   r3   rR   rS   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textr]   prompt_embeds_dtypebs_embedseq_len_uncond_tokensrU   uncond_inputs                          rO   encode_prompt+AnimateDiffControlNetPipeline.encode_prompt   sQ   V !j7U&V&V) $#.t/@/@*M!$"3"3Z@*VS"9"9JJvt$<$<VJ&,,Q/J $ ;<<226>>J..$>>::# ) K )22N"nnVYW[n\ffO$$R(N,@,@,DDU[[N N  $~~::#At~~'F'F'JR'O$OP  778	,Q
 t((//1EFF4K\K\KcKcKvKv!,!;!;!>!>v!F!%  $ 1 1.2C2CF2K\j 1 k -a 0 $ 1 1"%%f-ncg !2 ! !.b 1IM2B C
 !% 1 1 < < M Mm \("&"3"3"9"9YY""&))//"/"5"5%((/B6(R,22'1%,,Q0EqI%**86K+KWVXY '+A+I&!#z 1#VD<Q(QUVZ[jVkUl mV~Q(  OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0 $ ;<< $ 9 9- X&,,Q/J>>$%# * L t((//1EFF4K\K\KcKcKvKv!-!<!<!?!?!G!%%)%6%6&&))&1- &7 &" &<A%>"&,2215G%;%>%>EXag%>%h"%;%B%B1F[]^%_"%;%@%@Acelnp%q"($ >??DTDT#D$5$5zB44rQ   c                 d   [        U R                  R                  5       5      R                  n[	        U[
        R                  5      (       d  U R                  USS9R                  nUR                  X%S9nU(       aq  U R                  USS9R                  S   nUR                  USS9nU R                  [
        R                  " U5      SS9R                  S   nUR                  USS9nXg4$ U R                  U5      R                  nUR                  USS9n[
        R                  " U5      n	X4$ )	NrV   )rY   r`   r_   T)r^   r   dim)nextr0   
parametersr_   rB   ri   Tensorr/   pixel_valuesro   hidden_statesrepeat_interleave
zeros_likeimage_embeds)
rM   imager`   rx   r^   r_   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             rO   encode_image*AnimateDiffControlNetPipeline.encode_image~  s?   T''2245;;%..**5*FSSE4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +JJ--e4AAL'99:OUV9WL"'"2"2<"@44rQ   c                 
   / nU(       a  / nUGc&  [        U[        5      (       d  U/n[        U5      [        U R                  R                  R
                  5      :w  aB  [        S[        U5       S[        U R                  R                  R
                  5       S35      e[        XR                  R                  R
                  5       Hh  u  p[        U	[        5      (       + n
U R                  XSU
5      u  pUR                  US S S 24   5        U(       d  MP  WR                  US S S 24   5        Mj     OEU H?  nU(       a$  UR                  S5      u  pWR                  U5        UR                  U5        MA     / n[        U5       Hw  u  p[        R                  " U/U-  SS9nU(       a2  [        R                  " WU   /U-  SS9n[        R                  " X/SS9nUR                  US9nUR                  U5        My     U$ )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r*   r%   r   r   r`   )rB   rD   rH   r7   encoder_hid_projimage_projection_layersrv   zipr   r   appendchunk	enumerateri   catro   )rM   ip_adapter_imageip_adapter_image_embedsr`   rx   ry   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 rO   prepare_ip_adapter_image_embeds=AnimateDiffControlNetPipeline.prepare_ip_adapter_image_embeds  s9    &$&!"*.55$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A ))"<"<"T"T>9' +55E*W&W#DHDUDU+Q8KEA# ##$7a$@A..)001MdTUg1VW> (?#.H[HaHabcHdE0)001MN##$78	 (? #%&/&="A"'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1M0cij&k#"5"8"8"8"G#**+>? '> '&rQ      decode_chunk_sizec                 F   SU R                   R                  R                  -  U-  nUR                  u  p4pVnUR	                  SSSSS5      R                  X5-  XFU5      n/ n[        SUR                  S   U5       H?  n	XX-    n
U R                   R                  U
5      R                  n
UR                  U
5        MA     [        R                  " U5      nUS S S 24   R                  X5S4UR                  SS  -   5      R	                  SSSSS5      nUR                  5       nU$ )Nr*   r   r%   r      r[   )r4   rI   scaling_factorre   permutereshaperangedecodesampler   ri   r   float)rM   r1   r   r{   channels
num_framesheightwidthvideor   batch_latentss              rO   decode_latents,AnimateDiffControlNetPipeline.decode_latents  s   dhhoo444w>:A--7
j%//!Q1a0889PRZdijq'--*,=>A#(=>M HHOOM:AAMLL' ?
 		% dAg&&
'CekkRSRTo'UV^^_`bcefhiklmrQ   c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Neta	generator)setinspect	signaturer:   stepr   keys)rM   r   r   accepts_etaextra_step_kwargsaccepts_generators         rO   prepare_extra_step_kwargs7AnimateDiffControlNetPipeline.prepare_extra_step_kwargs  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  rQ         ?        c                   ^ ^	 US-  S:w  d	  US-  S:w  a  [        SU SU S35      eUbW  [        U 4S jU 5       5      (       d=  [        ST R                   SU Vs/ s H  oT R                  ;  d  M  UPM     sn 35      eUb  Ub  [        S	U S
U S35      eUc  Uc  [        S5      eUb7  [        U[        [
        [        45      (       d  [        S[        U5       35      eUb  Ub  [        SU SU S35      eUbC  Ub@  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      e[        T R                  [        5      (       aW  [        U[
        5      (       aB  [        R                  S[        T R                  R                  5       S[        U5       S35        [        [         S5      =(       a8    [        T R                  ["        R$                  R&                  R(                  5      n[        T R                  [*        5      (       d0  U(       a  [        T R                  R,                  [*        5      (       aW  [        T	[
        5      (       d  [/        S[        T	5       35      e[        T	5      U:w  a  [        SU S[        T	5      < 35      eO[        T R                  [        5      (       d0  U(       a  [        T R                  R,                  [        5      (       a  [        T	[
        5      (       a  [        T	S   [
        5      (       d  [/        S[        T	5      < 35      e[        T	S   5      U:w  a  [        SU S[        T	S   5      < 35      e[1        U	4S jT	 5       5      (       a  [        S5      eO e[        T R                  [*        5      (       d0  U(       aJ  [        T R                  R,                  [*        5      (       a!  [        U
[2        5      (       d  [/        S5      eO[        T R                  [        5      (       d0  U(       a  [        T R                  R,                  [        5      (       a  [        U
[
        5      (       a#  [1        S U
 5       5      (       a  [        S 5      eOO[        U
[
        5      (       a7  [        U
5      [        T R                  R                  5      :w  a  [        S!5      eO e[        U[4        [
        45      (       d  U/n[        U[4        [
        45      (       d  U/n[        U5      [        U5      :w  a$  [        S"[        U5       S#[        U5       S$35      e[        T R                  [        5      (       a  [        U5      [        T R                  R                  5      :w  a[  [        S%U S&[        U5       S'[        T R                  R                  5       S([        T R                  R                  5       S3	5      e[7        X5       HH  u  nnUU:  a  [        S)U S*U S35      eUS+:  a  [        S)U S,35      eUS-:  d  M;  [        S.U S/35      e   g s  snf )0Nr<   r   z7`height` and `width` have to be divisible by 8 but are z and rb   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0krM   s     rO   	<genexpr>=AnimateDiffControlNetPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z:`prompt` has to be of type `str`, `list` or `dict` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` z	You have z! ControlNets and you have passed z= prompts. The conditionings will be fixed across the prompts.scaled_dot_product_attentionz>For single controlnet, `image` must be of type `list` but got zExcepted image to have length z but got len(video)=zQFor multiple controlnets: `image` must be type list of lists but got type(video)=z$Expected length of image sublist as z but got len(video[0])=c              3   X   >#    U  H  n[        U5      [        TS    5      :g  v   M!     g7f)r   N)rH   )r   imgr   s     rO   r   r   ;  s"     >3s8s58},s   '*zDAll conditioning frame batches for multicontrolnet must be same sizezLFor single controlnet: `controlnet_conditioning_scale` must be type `float`.c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   )rB   rD   )r   r   s     rO   r   r   N  s     R4Qqz!T**4Qs   zEA single batch of multiple conditionings are supported at the moment.zFor multiple controlnets: When `controlnet_conditioning_scale` is specified as `list`, it must have the same length as the number of controlnetsz`control_guidance_start` has z* elements, but `control_guidance_end` has zI elements. Make sure to provide the same number of elements to each list.z`control_guidance_start`: z has z elements but there are z- controlnets available. Make sure to provide zcontrol guidance start: z4 cannot be larger or equal to control guidance end: r   z can't be smaller than 0.r   zcontrol guidance end: z can't be larger than 1.0.)rv   allr   rB   rd   rD   dictrt   re   r9   r   rl   rm   rH   netsrn   Fri   _dynamo
eval_frameOptimizedModuler   	_orig_modru   anyr   rE   r   )rM   rw   r   r   r   rz   r2   r3   "callback_on_step_end_tensor_inputsr   controlnet_conditioning_scalecontrol_guidance_startcontrol_guidance_endr   is_compiledstartends   `        `       rO   check_inputs*AnimateDiffControlNetPipeline.check_inputs  s    A:?eai1nVW]V^^cdicjjklmm-9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  
6Ct;L(M(MYZ^_eZfYghii&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  doo';<<&$''DOO$8$8 9::[\_`f\g[hST a!?@ 
ZOOU]]55EEF
 t884??44oFFeT**"`aefkal`m noo5zZ' #A*MbWZ[`WaVc!dee ( t(<==4??446JKKeT***U1Xt2L2L"thlmrhsgu vww58}
* #G
|Sk]`afghai]j\l!mnn>>>> !ghh ? 5 t884??44oFF;UCC noo D t(<==4??446JKK7>>R4QRRR$%lmm S94@@SIfEgkn$$l F !D 
 505$-@@&<%=".>>$8#9 %&#.B*CC/4J0K/LLvwz  |P  xQ  wR  R[  \  doo';<<)*c$//2F2F.GG 01G0HcRhNiMj  kC  DG  HL  HW  HW  H\  H\  D]  C^  ^K  LO  PT  P_  P_  Pd  Pd  Le  Kf  fg  h  4KJE3| .ug5ijminnop  s{ #;E7B[!\]]Sy #9#>X!YZZ LY pHs   Y2Yc
                 |   U R                   (       a  U R                  XX4XVXxU	5	      n	[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      eUUUX@R                  -  XPR                  -  4n
U	c  [        XXvS9n	OU	R                  U5      n	XR                  R                  -  n	U	$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   r`   r_   )free_noise_enabled_prepare_latents_free_noiserB   rD   rH   rv   r=   r#   ro   r:   init_noise_sigma)rM   r{   num_channels_latentsr   r   r   r_   r`   r   r1   re   s              rO   prepare_latents-AnimateDiffControlNetPipeline.prepare_latentsv  s     ""66*eTZgnG i&&3y>Z+GA#i.AQ R&<'gi   +++***
 ?"5fZGjj(G NN;;;rQ   Fc
                 |   U R                   R                  XUS9R                  [        R                  S9nUR                  SSSSS5      R                  SS5      nUR                  S   n
U
S:X  a  UnOUnUR                  USS9nUR                  XgS	9nU(       a!  U	(       d  [        R                  " U/S-  5      nU$ )
N)r   r   )r_   r   r%   r*   r   r   r   r   )
rL   preprocess_videoro   ri   float32r   flattenre   r   r   )rM   r   r   r   r{   num_videos_per_promptr`   r_   ry   
guess_modevideo_batch_size	repeat_bys               rO   prepare_video+AnimateDiffControlNetPipeline.prepare_video  s     ,,==eZ_=`cc-- d 
 aAq!,44Q: ;;q>q "I .I''	q'94&zIIugk*ErQ   c                     U R                   $ r   _guidance_scalerM   s    rO   guidance_scale,AnimateDiffControlNetPipeline.guidance_scale  s    ###rQ   c                     U R                   $ r   )
_clip_skipr  s    rO   rS   'AnimateDiffControlNetPipeline.clip_skip      rQ   c                      U R                   S:  $ )Nr*   r  r  s    rO   ry   9AnimateDiffControlNetPipeline.do_classifier_free_guidance  s    ##a''rQ   c                     U R                   $ r   )_cross_attention_kwargsr  s    rO   cross_attention_kwargs4AnimateDiffControlNetPipeline.cross_attention_kwargs  s    +++rQ   c                     U R                   $ r   )_num_timestepsr  s    rO   num_timesteps+AnimateDiffControlNetPipeline.num_timesteps  s    """rQ   c                     U R                   $ r   )
_interruptr  s    rO   	interrupt'AnimateDiffControlNetPipeline.interrupt  r	  rQ   2   g      @r*   pilTr1   rw   r   r   r   num_inference_stepsr  rz   r   r   r   r   r   conditioning_framesoutput_typereturn_dictr  r   r   r   r   callback_on_step_endr   c                    [        U R                  5      (       a  U R                  R                  OU R                  n[        U[        5      (       d%  [        U[        5      (       a  [        U5      U/-  nO[        U[        5      (       d%  [        U[        5      (       a  [        U5      U/-  nOb[        U[        5      (       dM  [        U[        5      (       d8  [        U[        5      (       a  [        UR                  5      OSnUU/-  UU/-  nnU=(       d-    U R                  R                  R                  U R                  -  nU=(       d-    U R                  R                  R                  U R                  -  nSnU R                  UUUUUUUUUUUUS9  X`l        UU l        UU l        SU l        Ub  [        U["        [$        45      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR&                  S   nU R(                  n[        U[        5      (       a.  [        U[*        5      (       a  U/[        UR                  5      -  n[        U[,        5      (       a  UR                  R.                  O"UR                  S   R                  R.                  n U=(       d    U nUb  UR1                  SS5      OSn!U R2                  (       a/  U R5                  UUUUU R6                  UUUU!U R8                  S9
u  pOeU R;                  UUUU R6                  UUUU!U R8                  S9	u  pU R6                  (       a  [<        R>                  " X/5      nURA                  USS	9nUc  Ub"  U RC                  UUUUU-  U R6                  5      n"[        U[,        5      (       a2  U RE                  UUUUU-  U-  UUURF                  U R6                  US
9	nOg[        U[        5      (       aP  / n#U HE  n$U RE                  U$UUUU-  U-  UUURF                  U R6                  US
9	n%U#RI                  U%5        MG     U#nO eU RJ                  RM                  UUS9  U RJ                  RN                  n&U R                  R                  RP                  n'U RS                  UU-  U'UUUURF                  UU
U5	      nU RU                  X5      n(Uc  Ub  SW"0OSn)/ n*[W        [        U&5      5       H  n+[Y        UU5       V,V-s/ s H>  u  n,n-S[+        U+[        U&5      -  U,:  =(       d    U+S-   [        U&5      -  U-:  5      -
  PM@     n.n,n-U*RI                  [        U[,        5      (       a  U.S   OU.5        M     U RZ                  (       a  U R\                  OSn/[W        U/5       GHo  n0U RZ                  (       a#  U R_                  UU0UUURF                  U
5      u  nn&[        U&5      U l0        [        U&5      XPRJ                  Rb                  -  -
  n1U Re                  U R`                  S9 n2[g        U&5       GH  u  n+n3U Rh                  (       a  M  U R6                  (       a  [<        R>                  " U/S-  5      OUn4U RJ                  Rk                  U4U35      n4U(       aD  U R6                  (       a3  Un5U RJ                  Rk                  U5U35      n5URm                  S5      S   n6OU4n5Un6[        U*U+   [        5      (       a(  [Y        UU*U+   5       V7V,s/ s H  u  n7n,U7U,-  PM     n8n7n,O$Un9[        U9[        5      (       a  U9S   n9U9U*U+   -  n8[<        Rn                  " U5SS5      n5U5Rq                  SU5R&                  S   U5R&                  S   U5R&                  S   45      n5U R                  U5U3U6UU8USS9u  n:n;U R                  U4U3UU Rr                  U)U:U;S9Rt                  n<U R6                  (       a  U<Rm                  S5      u  n=n>U=UU>U=-
  -  -   n<U RJ                  Rv                  " U<U3U40 U(D6Rx                  nUb\  0 n?U H  n@[{        5       U@   U?U@'   M     U" U U+U3U?5      nAUAR}                  SU5      nUAR}                  SU5      nUAR}                  SU5      nU+[        U&5      S-
  :X  d)  U+S-   U1:  a0  U+S-   U RJ                  Rb                  -  S:X  a  U2R                  5         [        (       d  GM  [        R                  " 5         GM     SSS5        GMr     US:X  a  UnBO,U R                  UU5      nCU R                  R                  UCUS9nBU R                  5         U(       d  WB4$ [        WBS9$ s  sn-n,f s  sn,n7f ! , (       d  f       GM  = f)u  
The call function to the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
    height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The height in pixels of the generated video.
    width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The width in pixels of the generated video.
    num_frames (`int`, *optional*, defaults to 16):
        The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
        amounts to 2 seconds of video.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality videos at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 7.5):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide what to not include in image generation. If not defined, you need to
        pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
    eta (`float`, *optional*, defaults to 0.0):
        Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
        applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
        `(batch_size, num_channel, num_frames, height, width)`.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
        provided, text embeddings are generated from the `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
        not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
    ip_adapter_image (`PipelineImageInput`, *optional*):
        Optional image input to work with IP Adapters.
    ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
        Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
        IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
        contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
        provided, embeddings are computed from the `ip_adapter_image` input argument.
    conditioning_frames (`List[PipelineImageInput]`, *optional*):
        The ControlNet input condition to provide guidance to the `unet` for generation. If multiple
        ControlNets are specified, images must be passed as a list such that each element of the list can be
        correctly batched for input to a single ControlNet.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
        of a plain tuple.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
        [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
        The outputs of the ControlNet are multiplied by `controlnet_conditioning_scale` before they are added
        to the residual in the original `unet`. If multiple ControlNets are specified in `init`, you can set
        the corresponding scale as a list.
    guess_mode (`bool`, *optional*, defaults to `False`):
        The ControlNet encoder tries to recognize the content of the input image even if you remove all
        prompts. A `guidance_scale` value between 3.0 and 5.0 is recommended.
    control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
        The percentage of total steps at which the ControlNet starts applying.
    control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
        The percentage of total steps at which the ControlNet stops applying.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is
        returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
r*   )rw   r   r   r   rz   r   r2   r3   r   r   r   r   FNr   scale)
rw   r   r`   r   ry   rz   r2   r3   rR   rS   )r2   r3   rR   rS   )repeatsr   )	r   r   r   r{   r   r`   r_   ry   r   r   r   r   )totalr%   r[   r   r   )encoder_hidden_statescontrolnet_condconditioning_scaler   r  )r#  r  added_cond_kwargsdown_block_additional_residualsmid_block_additional_residualr1   r2   r3   latent)r   r  )frames)Hr"   r9   r   rB   rD   rH   r   r   r7   rI   sample_sizer=   r   r  r  r  r  rd   r   re   _execution_devicer   r   global_pool_conditionsgetr   _encode_prompt_free_noisery   rS   r   ri   r   r   r   r   r_   r   r:   set_timesteps	timestepsin_channelsr   r   r   r   free_init_enabled_free_init_num_iters_apply_free_initr  orderprogress_barr   r  scale_model_inputr   	transposer   r  r   r   prev_samplelocalspopupdateXLA_AVAILABLExm	mark_stepr   rK   postprocess_videomaybe_free_model_hooksr+   )DrM   rw   r   r   r   r  r  rz   r   r   r   r1   r2   r3   r   r   r  r  r  r  r   r   r   r   rS   r  r   r   r9   multr{   r`   r-  text_encoder_lora_scaler   cond_prepared_videosframe_prepared_videor1  r   r   r&  controlnet_keepr   sekeepsnum_free_init_itersfree_init_iternum_warmup_stepsr7  tlatent_model_inputcontrol_model_inputcontrolnet_prompt_embedsc
cond_scalecontrolnet_cond_scaledown_block_res_samplesmid_block_res_sample
noise_prednoise_pred_uncondnoise_pred_textcallback_kwargsr   callback_outputsr   video_tensorsD                                                                       rO   __call__&AnimateDiffControlNetPipeline.__call__  s	   p 3ET__2U2UT__..[_[j[j
 0$77JG[]a<b<b%()=%>BXAY%Y"0$77JG]_c<d<d#&'=#>BVAW#W 2D99*MacgBhBh+5jBV+W+W3z']^D.//,-- %9" O499++77$:O:OOM))558M8MM ! 	!+/Q'#9%*G#9!5 	 	
  .#'=$ *Vc4["A"AJJvt$<$<VJ&,,Q/J''j"677JGdfk<l<l-J,KcR\RaRaNb,b) *o66 44#**AA 	
  9#9
 :P9["&&w5ae 	  ""484R4R%&;,0,L,L /+'=2.. 5S 51M1 594F4F%00+'=2.. 5G 
51M // %		+A*Q R);;JTU;VM'+B+N?? '2200L j/22"&"4"4)%(==
J&; &&,0,L,L% #5 
# 
$899#% -!%!3!3 !),AAJN*?!$**040P0P) "4 
" %++N; . #75 	$$%8$HNN,,	  $yy//;;&&.. 

 !::9J
  +/F/R \* 	 s9~&A   68LMMDAq eAI.2Rq1uI6NQR6RSSM   ""z*o/V/V58\ab ' <@;Q;Qd77WX#$78N%%%)%:%:^-@&'--Yb&" #&i.D"9~0CnnFZFZ0ZZ "")<)<"=%i0DAq~~  FJEeEeG9q=)Akr&)-)I)IJ\^_)`&!d&F&F.5+.2nn.N.NObde.f+3@3F3Fq3I!3L0.@+3@0!/!"4d;;8;<Y[jkl[m8n%o8n1a!e8n
%o
0M-%&;TBB4I!4L1%:_Q=O%O
*///:MqRS*T'*=*E*E066q9;N;T;TUV;WYlYrYrstYuv+' DH??+.F(;+5#-$) DS D@*,@ "&*.;/3/J/J*;8N6J "+ " f  77=G=M=Ma=P:)?%6?]nKn9o%o
 #nn11*a^L]^jjG+7*,!CA17!OA. "D+?aO+\("2"6"6y'"J(8(<(<_m(\1A1E1EF^`v1w. C	NQ..AE=M3MSTWXSX\`\j\j\p\pRptuRu$++-$}U 1 >= 9p ("E..w8IJL((::[f:gE 	##%8O(66WH &p' >=s-   ?Ae,%C(e8e2G	e8-e82e88
f	)	r  r  r  r  rc   r  rL   r=   rK   )NN)NNNNNr   )r   )NNNNNr   r   r   )FF)6__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r	   r   r   r   r   r   r   r   r   r   r
   r   rA   ri   r   r   intr   r   r   r   r   r   r   r   propertyr  rS   ry   r  r  r  no_gradrd   	Generatorr   boolr   r   r   r^  __static_attributes____classcell__)rN   s   @rO   r-   r-   x   sY   8 6/AT ;?AE"
"
 $"
 !	"

 (/9:"
 &"
 /4+@%BXZnno"
 -"
 $$67"
   =>"
 "
V 049=&*#'t5  -t5 !) 6t5 UOt5 C=t5n52+'\ &!. #+/&)" J[\ nrR %*@ $ $   ( ( , , # #   ]]_ )-$& $##% #;?/0MQ*.049=9=@DBF%* ;?CF <?:=#'KO9B!#9L7c49n%L7 SML7 	L7
 }L7 !L7 L7 "%T#Y"78L7  (}L7 L7 E%//43H"HIJL7 %,,'L7  -L7 !) 6L7 ##56L7  "**<!=!L7" &d+=&>?#L7$ c]%L7& 'L7( !)c3h 8)L7* (-UDK-?'@+L7, -L7. !&eT%[&8 9/L70 $E4;$671L72 C=3L74 'xc40@$0F'GH5L76 -1I7L78 9L7 L7rQ   r-   )Er   typingr   r   r   r   r   r   r	   ri   torch.nn.functionalnn
functionalr   transformersr
   r   r   r   image_processorr   loadersr   r   r   r   modelsr   r   r   r   r   r   models.lorar   models.unets.unet_motion_modelr   
schedulersr   utilsr   r   r   r    r!   utils.torch_utilsr"   r#   rK   r$   free_init_utilsr&   free_noise_utilsr'   pipeline_utilsr(   r)   pipeline_outputr+   torch_xla.core.xla_modelcore	xla_modelr?  r>  
get_loggerr`  rl   EXAMPLE_DOC_STRINGr-    rQ   rO   <module>r     s     D D D    h h 1 w w  : ; 3 n n A - + 8 D 6 ))MM			H	%? Df7"f7rQ   