
    +h                        S SK r S SKJrJrJrJrJr  S SKrS SK	r	S SK
Js  Jr  S SKJr  S SKJrJrJrJr  SSKJrJr  SSKJr  SSKJrJr  SS	KJr  SS
KJ r   SSK!J"r"J#r#J$r$  SSK%J&r&  SSK'J(r(  SSK)J*r*  \"" 5       (       a  S SK+J,s  J-r.  Sr/OSr/\#R`                  " \15      r2Sr3S r4S r5S r6S S jr7S!S jr8S"S jr9    S#S\\:   S\\\;\	Rx                  4      S\\\:      S\\\=      4S jjr> " S S\5      r?g)$    N)CallableDictListOptionalUnion)Image)	BertModelBertTokenizerQwen2TokenizerQwen2VLForConditionalGeneration   )MultiPipelineCallbacksPipelineCallback)VaeImageProcessor)AutoencoderKLMagvitEasyAnimateTransformer3DModel)DiffusionPipeline)FlowMatchEulerDiscreteScheduler)is_torch_xla_availableloggingreplace_example_docstring)randn_tensor)VideoProcessor   )EasyAnimatePipelineOutputTFaw  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import EasyAnimateInpaintPipeline
        >>> from diffusers.pipelines.easyanimate.pipeline_easyanimate_inpaint import get_image_to_video_latent
        >>> from diffusers.utils import export_to_video, load_image

        >>> pipe = EasyAnimateInpaintPipeline.from_pretrained(
        ...     "alibaba-pai/EasyAnimateV5.1-12b-zh-InP-diffusers", torch_dtype=torch.bfloat16
        ... )
        >>> pipe.to("cuda")

        >>> prompt = "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
        >>> validation_image_start = load_image(
        ...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"
        ... )

        >>> validation_image_end = None
        >>> sample_size = (448, 576)
        >>> num_frames = 49
        >>> input_video, input_video_mask = get_image_to_video_latent(
        ...     [validation_image_start], validation_image_end, num_frames, sample_size
        ... )

        >>> video = pipe(
        ...     prompt,
        ...     num_frames=num_frames,
        ...     negative_prompt="Twisted body, limb deformities, text subtitles, comics, stillness, ugliness, errors, garbled text.",
        ...     height=sample_size[0],
        ...     width=sample_size[1],
        ...     video=input_video,
        ...     mask_video=input_video_mask,
        ... )
        >>> export_to_video(video.frames[0], "output.mp4", fps=8)
        ```
c                    [        U [        R                  5      (       aI  [        R                  R                  R                  U R                  S5      USSS9R                  S5      n O[        U [        R                  5      (       a0  U R                  US   US   45      n [        R                  " U 5      n On[        U [        R                  5      (       aD  [        R                  " U 5      R                  US   US   45      n [        R                  " U 5      n O[        S5      e[        U [        R                  5      (       d8  [        R                  " U 5      R!                  SSS5      R#                  5       S-  n U $ )	z\
Preprocess a single image (PIL.Image, numpy.ndarray, or torch.Tensor) to a resized tensor.
r   bilinearFsizemodealign_cornersr   zKUnsupported input type. Expected PIL.Image, numpy.ndarray, or torch.Tensor.   g     o@)
isinstancetorchTensornn
functionalinterpolate	unsqueezesqueezer   resizenparrayndarray	fromarray
ValueError
from_numpypermutefloat)imagesample_sizes     v/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.pypreprocess_imager7   Z   s#    %&&##//OOA[zQV 0 

'!* 	 
E5;;	'	'k!nk!n=>	E2::	&	&&--{1~{1~.NOfgg eU\\**  '//1a8>>@5HL    c                 z   SnSnU GbW  [        U [        5      (       a  U  Vs/ s H  n[        Xc5      PM     nnO[        X5      n[        U[        5      (       a  [        R                  " U Vs/ s H"  ofR                  S5      R                  S5      PM$     snSS9n[        R                  " USS2SS2SS24   SSUSS/5      nXSS2SS2S[        U5      24'   O:[        R                  " UR                  S5      R                  S5      SSUSS/5      n[        R                  " USS2SS24   5      n[        U[        5      (       a  SUSS2SS2[        U5      S24'   OSUSS2SS2SS24'   Ub  [        U[        5      (       a  U Vs/ s H  n[        Xc5      PM     n	n[        R                  " U	 Vs/ s H"  ofR                  S5      R                  S5      PM$     snSS9n
XSS2SS2[        U
5      * S24'   SUSS2SS2[        U	5      * S24'   XE4$ [        X5      n	U	R                  S5      R                  S5      USS2SS2SS24'   SUSS2SS2SS24'   XE4$ U cC  [        R                  " SSX#S   US   /5      n[        R                  " SSX#S   US   /5      S-  nXE4$ s  snf s  snf s  snf s  snf )	z
Generate latent representations for video from start and end images. Inputs can be PIL.Image, numpy.ndarray, or
torch.Tensor.
Nr   r   r"   dim   r   )r#   listr7   r$   catr)   tilelen
zeros_likezerosones)validation_image_startvalidation_image_end
num_framesr5   input_videoinput_video_maskimgimage_startstart_video	image_end	end_videos              r6   get_image_to_video_latentrO   u   s   
 K),d33I_`I_#+C=I_K`K*+AOK k4(()):EF+3q!++A.+FK  **[Arr%:Q:qRS<TUK4?10K 0001**%%a(2215Az1a(K !++K2A2,>?k4((9<Q3{#3#556),Q12X&  +.55K_`K_C-c?K_	`!II>GHis]]1%//2iH	 8AAq3y>/"334<= AI'8!89 (( --AO	)2)<)<Q)?)I)I!)LAq"#I&./ Ars+ (( 
 	'kk1a^[QR^"TU ::q!ZQUV&XY\__((_ a G. aHs   J).)J.J37)J8c                    UnUnU u  pVXV-  nXtU-  :  a  Un[        [        XE-  U-  5      5      n	OUn	[        [        X6-  U-  5      5      n[        [        XH-
  S-  5      5      n
[        [        X9-
  S-  5      5      nX4X-   X-   44$ )Ng       @)intround)src	tgt_width
tgt_heighttwthhwrresize_heightresize_widthcrop_top	crop_lefts               r6   get_resize_crop_region_for_gridr_      s    	B	BDA	AG}5!,-E"&1*-.5",345HE2,345I 8#;Y=U"VVVr8   c                     UR                  [        [        SUR                  5      5      SS9nU R                  [        [        SU R                  5      5      SS9nXU-  -  nX%-  SU-
  U -  -   n U $ )a  
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
Flawed](https://huggingface.co/papers/2305.08891).

Args:
    noise_cfg (`torch.Tensor`):
        The predicted noise tensor for the guided diffusion process.
    noise_pred_text (`torch.Tensor`):
        The predicted noise tensor for the text-guided diffusion process.
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        A rescale factor applied to the noise predictions.

Returns:
    noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
r   T)r;   keepdim)stdr>   rangendim)	noise_cfgnoise_pred_textguidance_rescalestd_textstd_cfgnoise_pred_rescaleds         r6   rescale_noise_cfgrk      s{    " ""tE!_5I5I,J'KUY"ZHmmU1inn%= >mMG#''9: 6!>N:NR[9[[Ir8   c           	         UR                  5       nU(       a  [        USS  5      nSUS'   [        R                  " U S S 2S S 2SS2S S 2S S 24   USSS9n[        USS  5      nUS   S-
  US'   US   S:w  aA  [        R                  " U S S 2S S 2SS 2S S 2S S 24   USSS9n[        R
                  " XV/SS9nU$ Un U$ [        USS  5      n[        R                  " XSSS9nU$ )Nr"   r   r   	trilinearFr   r:   )r   r>   Fr(   r$   r?   )masklatentprocess_first_frame_onlylatent_sizetarget_sizefirst_frame_resizedremaining_frames_resizedresized_masks           r6   resize_maskrw      s   ++-K;qr?+AmmAqsAq!+UZ
 ;qr?+$Q!+Aq>Q'(}}Q12q!^$;[X]($ !99&9%TZ[\L 	 /L  ;qr?+}}T+]bcr8   c                    Ucm  [         R                  " SSU R                  S   4S9R                  U R                  5      n[         R
                  " U5      R                  U R                  5      nOK[         R                  " U R                  S   45      R                  U R                  U R                  5      U-  nUbF  [         R                  " U R                  5       X R                  U R                  S9US S 2S S S S 4   -  nO#[         R                  " U 5      US S 2S S S S 4   -  n[         R                  " U S:H  [         R                  " U 5      U5      nX-   n U $ )Ng            ?r   )meanrb   r   )	generatordtypedevicer=   )r$   normalshapetor}   expr|   rD   randnr   
randn_likewhererB   )r4   ratior{   sigmaimage_noises        r6   add_noise_to_reference_videor      s   }$Cu{{1~6GHKKELLY		% ##EKK0

EKKN,-00u{{KeSKK

	UZUaUabAtT4-./ 	
 &&u-atT46O0PP++erk5+;+;E+BKPKELr8   num_inference_stepsr}   	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`List[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`List[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r   r}   r   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r   r}   r}    )
r0   setinspect	signatureset_timesteps
parameterskeys	__class__r   rA   )	schedulerr   r}   r   r   kwargsaccepts_timestepsaccept_sigmass           r6   retrieve_timestepsr     s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r8   c            9         ^  \ rS rSrSrSr/ SQrS\S\\	\
4   S\\\4   S\S	\4
U 4S
 jjr          SAS\\\\   4   S\S\S\\\\\   4      S\\R,                     S\\R,                     S\\R,                     S\\R,                     S\\R.                     S\\R0                     S\4S jjrS r      SBS jrS rS r      SCS jr\S 5       r \S  5       r!\S! 5       r"\S" 5       r#\S# 5       r$\RJ                  " 5       \&" \'5      SS$SSSS%S%S&S'SSS(SSSSSSS)SSS*/S(S+S,S4S\\\\   4   S-\\   S.\\RP                     S/\\RP                     S0\\RP                     S1\\   S2\\   S3\\   S4\\)   S\\\\\   4      S\\   S5\\)   S6\\\RT                  \\RT                     4      S*\\R,                     S\\R,                     S\\R,                     S\\R,                     S\\R,                     S7\\   S8\S9\\\+\\\,/S4   \-\.4      S:\\   S;\)S<\)S=\)S>\\\      44S? jj5       5       r/S@r0U =r1$ )DEasyAnimateInpaintPipelineiJ  a  
Pipeline for text-to-video generation using EasyAnimate.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

EasyAnimate uses one text encoder [qwen2 vl](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) in V5.1.

Args:
    vae ([`AutoencoderKLMagvit`]):
        Variational Auto-Encoder (VAE) Model to encode and decode video to and from latent representations.
    text_encoder (Optional[`~transformers.Qwen2VLForConditionalGeneration`, `~transformers.BertModel`]):
        EasyAnimate uses [qwen2 vl](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) in V5.1.
    tokenizer (Optional[`~transformers.Qwen2Tokenizer`, `~transformers.BertTokenizer`]):
        A `Qwen2Tokenizer` or `BertTokenizer` to tokenize text.
    transformer ([`EasyAnimateTransformer3DModel`]):
        The EasyAnimate model designed by EasyAnimate Team.
    scheduler ([`FlowMatchEulerDiscreteScheduler`]):
        A scheduler to be used in combination with EasyAnimate to denoise the encoded image latents.
ztext_encoder->transformer->vae)latentsprompt_embedsnegative_prompt_embedsvaetext_encoder	tokenizertransformerr   c                   > [         TU ]  5         U R                  UUUUUS9  [        U SS 5      b   U R                  R
                  R                  OSU l        [        U SS 5      b  U R                  R                  OSU l	        [        U SS 5      b  U R                  R                  OSU l        [        U R                  S9U l        [        U R                  SSSS	9U l        [        U R                  S9U l        g )
N)r   r   r   r   r   r   Tr         )vae_scale_factorF)r   do_normalizedo_binarizedo_convert_grayscale)super__init__register_modulesgetattrr   configenable_text_attention_maskr   spatial_compression_ratiovae_spatial_compression_ratiotemporal_compression_ratiovae_temporal_compression_ratior   image_processormask_processorr   video_processor)selfr   r   r   r   r   r   s         r6   r   #EasyAnimateInpaintPipeline.__init__c  s     	%# 	 	
 t]D1= ##>> 	' 3:$t2L2XDHH..^_ 	* 4;43M3YDHH//_` 	+  1$BdBde/!??!%	
  .t?a?abr8   r   TNpromptnum_images_per_promptdo_classifier_free_guidancenegative_promptr   r   prompt_attention_masknegative_prompt_attention_maskr}   r|   max_sequence_lengthc           
         U
=(       d    U R                   R                  n
U	=(       d    U R                   R                  n	Ub  [        U[        5      (       a  SnO3Ub!  [        U[
        5      (       a  [        U5      nOUR                  S   nUGc  [        U[        5      (       a  SSUS./S./nOU Vs/ s H  nSSUS./S.PM     nnU Vs/ s H  oR                  R                  U/SSS	9PM      nnU R                  US
USSSSS9nUR                  U R                   R                  5      nUR                  nUR                  nU R                  (       a  U R                  UUSS9R                  S   nO[        S5      eUR!                  US5      nUR                  XS9nUR                  u  nnnUR!                  SUS5      nUR#                  UU-  US5      nUR                  U	S9nU(       Ga
  UGc  Ub   [        U[        5      (       a  SSUS./S./nOU Vs/ s H  nSSUS./S.PM     nnU Vs/ s H  oR                  R                  U/SSS	9PM      nnU R                  US
USSSSS9nUR                  U R                   R                  5      nUR                  nUR                  nU R                  (       a  U R                  UUSS9R                  S   nO[        S5      eUR!                  US5      nU(       aU  UR                  S   nUR                  XS9nUR!                  SUS5      nUR#                  X-  US5      nUR                  U	S9nXVXx4$ s  snf s  snf s  snf s  snf )as  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    device: (`torch.device`):
        torch device
    dtype (`torch.dtype`):
        torch dtype
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask for the prompt. Required when `prompt_embeds` is passed directly.
    negative_prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask for the negative prompt. Required when `negative_prompt_embeds` is passed directly.
    max_sequence_length (`int`, *optional*): maximum sequence length to use for the prompt.
r   r   usertext)typer   )rolecontentFT)tokenizeadd_generation_prompt
max_lengthrightpt)r   paddingr   
truncationreturn_attention_maskpadding_sidereturn_tensors)	input_idsattention_maskoutput_hidden_stateszLLM needs attention_mask)r|   r}   r=   r}   )r   r|   r}   r#   strr>   rA   r   r   apply_chat_templater   r   r   r   hidden_statesr0   repeatview)r   r   r   r   r   r   r   r   r   r}   r|   r   
batch_sizemessages_promptmr   text_inputstext_input_idsbs_embedseq_len__negative_prompts                          r6   encode_prompt(EasyAnimateInpaintPipeline.encode_prompt  s   Z 0**0034,,33*VS"9"9JJvt$<$<VJ&,,Q/J &#&& !'-3V$D#E $*
 $* !'-3W$E#F $*   nvmuhi22A3^b2cmu   ..$.&*$# ) K &..):):)A)ABK(22N$/$>$>!.. $ 1 1,=Rim !2 !-!$ !!;<<$9$@$@AVXY$Z!%((u(D,22'1%,,Q0EqI%**86K+KWVXY 5 8 8 8 G '+A+I*z/3/O/O !'-3_$M#N -<
 -<( !'-3=M$N#O -<   nvmuhi22A3^b2cmu   ..$.&*$# ) K &..):):)A)ABK(22N-8-G-G*..)-):):,#A)- *; *  -	*$& !!;<<-K-R-RShjk-l*&,2215G%;%>%>U%>%Z"%;%B%B1F[]^%_"%;%@%@Acelnp%q"-K-N-NV\-N-]*6KkkAXs   5M%M"M'7%M,c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Netar{   )r   r   r   r   stepr   r   )r   r{   r   accepts_etaextra_step_kwargsaccepts_generators         r6   prepare_extra_step_kwargs4EasyAnimateInpaintPipeline.prepare_extra_step_kwargs-  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  r8   c
           
        ^  US-  S:w  d	  US-  S:w  a  [        SU SU S35      eU	bW  [        U 4S jU	 5       5      (       d=  [        ST R                   SU	 V
s/ s H  oT R                  ;  d  M  U
PM     sn
 35      eUb  Ub  [        S	U S
U S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[
        5      (       d  [        S[        U5       35      eUb  Uc  [        S5      eUb  Ub  [        SU SU S35      eUb  Uc  [        S5      eUbE  UbA  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eg g g s  sn
f )N   r   z8`height` and `width` have to be divisible by 16 but are z and .c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0kr   s     r6   	<genexpr>:EasyAnimateInpaintPipeline.check_inputs.<locals>.<genexpr>M  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is zEMust provide `prompt_attention_mask` when specifying `prompt_embeds`.z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zWMust provide `negative_prompt_attention_mask` when specifying `negative_prompt_embeds`.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` )r0   allr   r#   r   r>   r   r   )r   r   heightwidthr   r   r   r   r   "callback_on_step_end_tensor_inputsr   s   `          r6   check_inputs'EasyAnimateInpaintPipeline.check_inputs>  s4    B;!urzQWX^W__dejdkklmnn-9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa$)>)Fdee&+A+M9/9J K*++]_ 
 "-2P2Xvww$)?)K""&<&B&BB --:-@-@,A B.445Q8  C *L$7 pHs   E01E0c                 N   [        [        X-  5      U5      n[        X-
  S5      nU R                  R                  XPR                  R
                  -  S  n[        U R                  S5      (       a1  U R                  R                  XPR                  R
                  -  5        XaU-
  4$ )Nr   set_begin_index)minrQ   maxr   r   orderhasattrr   )r   r   strengthr}   init_timestept_startr   s          r6   get_timesteps(EasyAnimateInpaintPipeline.get_timestepsu  s    C 3 >?ATU)91=NN,,W~~7K7K-K-MN	4>>#455NN**7^^5I5I+IJ777r8   c                 `   Ub  UR                  XvS9n/ nSn[        SUR                  S   U5       HH  nXX-    nU R                  R	                  U5      S   nUR                  5       nUR                  U5        MJ     [        R                  " USS9nXR                  R                  R                  -  nUb  UR                  XvS9nU R                  R                  R                  (       a
  [        X*US9n/ nSn[        SUR                  S   U5       HH  nX-X-    nU R                  R	                  U5      S   nUR                  5       nUR                  U5        MJ     [        R                  " USS9nUU R                  R                  R                  -  nUR                  XvS9nUU4$ S nUU4$ )Nr}   r|   r   r   r:   )r   r{   )r   rc   r   r   encoder    appendr$   r?   r   scaling_factorr   add_noise_in_inpaint_modelr   )r   ro   masked_imager   r   r   r|   r}   r{   r   noise_aug_strengthnew_maskbsimask_bsnew_mask_pixel_valuesmask_pixel_values_bsmasked_image_latentss                     r6   prepare_mask_latents/EasyAnimateInpaintPipeline.prepare_mask_latents  s     77&76DHB1djjmR016*((//'215!,,.(	 1
 99X1-D((//888D#'??&?FL&&AA; i  %'!B1l003R8'3'?$'+xx7K'LQ'O$';'@'@'B$%,,-AB	 9
 $)99-B#J #7$((//:X:X#X  $8#:#:&#:#V  ))) $( )))r8   c                 (   UUUS-
  U R                   -  S-   X0R                  -  X@R                  -  4n[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      eU(       d
  U	c  U(       d  U
R                  XvS9n
Sn/ n[        SU
R                  S   U5       HJ  nU
UUU-    nU R                  R                  U5      S   nUR                  5       nUR                  U5        ML     [        R                  " USS9n
XR                  R                  R                   -  n
U
R#                  XR                  S   -  SSSS5      nUR                  XvS9nU	c  [%        XXvS9n[        U R&                  [(        5      (       a'  U(       a  UOU R&                  R+                  WUU5      n	O&U(       a  UOU R&                  R-                  WUU5      n	[/        U R&                  S	5      (       a!  U(       a  XR&                  R0                  -  OU	n	OW[/        U R&                  S	5      (       a+  U	R                  U5      nUU R&                  R0                  -  n	OU	R                  U5      n	U	4nU(       a  UW4-  nU(       a  UW4-  nU$ )
Nr   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.r
  r   r:   )r{   r}   r|   init_noise_sigma)r   r   r#   r>   rA   r0   r   rc   r   r   r  sampler  r$   r?   r   r  r   r   r   r   scale_noise	add_noiser  r  )r   r   num_channels_latentsr   r   rG   r|   r}   r{   r   videotimestepis_strength_maxreturn_noisereturn_video_latentsr   r  	new_videor  video_bsvideo_latentsnoiseoutputss                          r6   prepare_latents*EasyAnimateInpaintPipeline.prepare_latents  s_   $  !^ C CCaG888777
 i&&3y>Z+GA#i.AQ R&<'gi 
  GOOHHFH8EBI1ekk!nb1 QV,88??84Q7#??,  *	 2
 IIiQ/EHHOO:::E!LL{{1~)Eq!QPQRM),,F,HM? FXE$..*IJJ#2%8R8RS`bjlq8r#2%8P8PQ^`ego8pt~~'9::GV'NN$C$CC\ct~~'9::

6*$.."A"AA!**V, *xG''Gr8   c                     U R                   $ r   _guidance_scaler   s    r6   guidance_scale)EasyAnimateInpaintPipeline.guidance_scale  s    ###r8   c                     U R                   $ r   )_guidance_rescaler/  s    r6   rg   +EasyAnimateInpaintPipeline.guidance_rescale  s    %%%r8   c                      U R                   S:  $ )Nr   r-  r/  s    r6   r   6EasyAnimateInpaintPipeline.do_classifier_free_guidance  s    ##a''r8   c                     U R                   $ r   )_num_timestepsr/  s    r6   num_timesteps(EasyAnimateInpaintPipeline.num_timesteps  s    """r8   c                     U R                   $ r   )
_interruptr/  s    r6   	interrupt$EasyAnimateInpaintPipeline.interrupt  s    r8   1   i   2   g      @        pilr         ?gޓZӬ?rG   r   
mask_videomasked_video_latentsr   r   r   r0  r   r{   output_typereturn_dictcallback_on_step_endr   rg   r  r  r   c                 J   [        U[        [        45      (       a  UR                  n[	        US-  S-  5      n[	        US-  S-  5      nU R                  UUUU
UUUUU5	        Xl        UU l        SU l        Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                  nU R                  b  U R                  R                  nOU R                   R                  nU R#                  UUUUU R$                  U
UUUUS9
u  nnnn[        U R&                  [(        5      (       a  [+        U R&                  UUUSS9u  nnO[+        U R&                  UUU5      u  nnU R-                  UUUS9u  nnUSS R/                  UU-  5      nUS	:H  nUb  UR                  u  nn nn!n"U R0                  R3                  UR5                  SS
SSS5      R7                  UU-  U U!U"5      UUS9n#U#R9                  [:        R<                  S9n#U#R7                  UUU Xg5      R5                  SS
SSS5      n#OSn#U R>                  R@                  RB                  n$U R                   R@                  RD                  n%U%U$:H  n&U RG                  UU-  U$UUUUUUUU#UUSU&S9n'U&(       a  U'u  nn(n)OU'u  nn(UGb  US:H  RI                  5       (       GaI  [:        RJ                  " U5      R9                  UU5      n*U R                   R@                  RL                  (       a0  [:        RJ                  " U5      SS2SS24   R9                  UU5      n+O&[:        RJ                  " U5      R9                  UU5      n+[:        RJ                  " U5      R9                  UU5      nU R$                  (       a  [:        RN                  " U+/S
-  5      OU+n,U R$                  (       a  [:        RN                  " U/S
-  5      OUn-[:        RN                  " U,U-/SS9R9                  U5      n.GOTUR                  u  nn nn!n"U RP                  R3                  UR5                  SS
SSS5      R7                  UU-  U U!U"5      UUS9n/U/R9                  [:        R<                  S9n/U/R7                  UUU Xg5      R5                  SS
SSS5      n/U%U$:w  Ga  [:        RR                  " U// SQ5      n0Uc)  U#U0S:  -  [:        RT                  " U#5      U0S:  -  S-  -   n1OUn1U R                   R@                  RL                  (       a  U RW                  SU1UUUUUUU R$                  US9
u  n2n[Y        SU/-
  XPR>                  R@                  RZ                  5      n+U+R9                  UU5      U R>                  R@                  R\                  -  n+O%U RW                  U0U1UUUUUUU R$                  US9
u  n+nU R$                  (       a  [:        RN                  " U+/S
-  5      OU+n,U R$                  (       a  [:        RN                  " U/S
-  5      OUn-[:        RN                  " U,U-/SS9R9                  U5      n.OSn.[:        RR                  " U/SU$SSS/5      n*[^        R`                  " U*URc                  5       SS SSS9R9                  UU5      n*GOU%U$:w  GaH  [:        RJ                  " U5      R9                  UU5      n*U R                   R@                  RL                  (       a0  [:        RJ                  " U5      SS2SS24   R9                  UU5      n+O&[:        RJ                  " U5      R9                  UU5      n+[:        RJ                  " U5      R9                  UU5      nU R$                  (       a  [:        RN                  " U+/S
-  5      OU+n,U R$                  (       a  [:        RN                  " U/S
-  5      OUn-[:        RN                  " U,U-/SS9R9                  U5      n.Ou[:        RJ                  " U#SS2SS24   5      n*[:        RR                  " U*SU$SSS/5      n*[^        R`                  " U*URc                  5       SS SSS9R9                  UU5      n*Sn.U%U$:w  a  W+R                  S   n3UR                  S   n4U$U3-   U4-   U R                   R@                  RD                  :w  aV  [e        SU R                   R@                   SU R                   R@                  RD                   SU$ SU3 SU4 SU$U4-   U3-    S 35      eU Rg                  X5      n5U R$                  (       a0  [:        RN                  " UU/5      n[:        RN                  " UU/5      nUR9                  US!9nUR9                  US!9n[        U5      XR&                  Rh                  -  -
  n6[        U5      U l5        U Rm                  US"9 n7[o        U5       GH  u  n8n9U Rp                  (       a  M  U R$                  (       a  [:        RN                  " U/S
-  5      OUn:[s        U R&                  S#5      (       a  U R&                  Ru                  U:U95      n:[:        Rv                  " U9/U:R                  S   -  US!9R9                  U:R                  S9n;U R!                  U:U;UU.SS$9S   n<U<Rc                  5       S   U R>                  R@                  RB                  :w  a  U<Ry                  S
SS9u  n<n2U R$                  (       a  U<Ry                  S
5      u  n=n>U=U	U>U=-
  -  -   n<U R$                  (       a  US%:  a  [{        U<W>US&9n<U R&                  R|                  " U<U9U40 U5DS'S0D6S   nU%U$:X  a  W)n?U*n@U8[        U5      S-
  :  a  UU8S-      nA[        U R&                  [(        5      (       a3  U R&                  R                  U?[:        Rv                  " WA/U(5      5      n?O2U R&                  R                  U?U([:        Rv                  " WA/5      5      n?SW@-
  U?-  U@U-  -   nUb\  0 nBU H  nC[        5       UC   WBUC'   M     U" U U8U9WB5      nDUDR                  S(U5      nUDR                  S)U5      nUDR                  S*U5      nU8[        U5      S-
  :X  d)  U8S-   U6:  a0  U8S-   U R&                  Rh                  -  S:X  a  U7R                  5         [        (       d  GM  [        R                  " 5         GM     SSS5        US+:X  d^  SU R>                  R@                  R\                  -  U-  nU R>                  R                  USS,9S   nU R                  R                  UUS-9nOUnU R                  5         U(       d  U4$ [        US.9$ ! , (       d  f       N= f)/aj  
The call function to the pipeline for generation with HunyuanDiT.

Examples:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
    num_frames (`int`, *optional*):
        Length of the video to be generated in seconds. This parameter influences the number of frames and
        continuity of generated content.
    video (`torch.FloatTensor`, *optional*):
        A tensor representing an input video, which can be modified depending on the prompts provided.
    mask_video (`torch.FloatTensor`, *optional*):
        A tensor to specify areas of the video to be masked (omitted from generation).
    masked_video_latents (`torch.FloatTensor`, *optional*):
        Latents from masked portions of the video, utilized during image generation.
    height (`int`, *optional*):
        The height in pixels of the generated image or video frames.
    width (`int`, *optional*):
        The width in pixels of the generated image or video frames.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image but slower
        inference time. This parameter is modulated by `strength`.
    guidance_scale (`float`, *optional*, defaults to 5.0):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is effective when `guidance_scale > 1`.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide what to exclude in image generation. If not defined, you need to provide
        `negative_prompt_embeds`. This parameter is ignored when not using guidance (`guidance_scale < 1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    eta (`float`, *optional*, defaults to 0.0):
        A parameter defined in the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only applies to the
        [`~schedulers.DDIMScheduler`] and is ignored in other schedulers. It adjusts noise level during the
        inference process.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) for setting
        random seeds which helps in making generation deterministic.
    latents (`torch.Tensor`, *optional*):
        A pre-computed latent representation which can be used to guide the generation process.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
        provided, embeddings are generated from the `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings, aiding in fine-tuning what should not be represented in the
        outputs. If not provided, embeddings are generated from the `negative_prompt` argument.
    prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask guiding the focus of the model on specific parts of the prompt text. Required when using
        `prompt_embeds`.
    negative_prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask for the negative prompt, needed when `negative_prompt_embeds` are used.
    output_type (`str`, *optional*, defaults to `"latent"`):
        The output format of the generated image. Choose between `PIL.Image` and `np.array` to define how you
        want the results to be formatted.
    return_dict (`bool`, *optional*, defaults to `True`):
        If set to `True`, a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] will be returned;
        otherwise, a tuple containing the generated images and safety flags will be returned.
    callback_on_step_end (`Callable[[int, int, Dict], None]`, `PipelineCallback`, `MultiPipelineCallbacks`,
    *optional*):
        A callback function (or a list of them) that will be executed at the end of each denoising step,
        allowing for custom processing during generation.
    callback_on_step_end_tensor_inputs (`List[str]`, *optional*):
        Specifies which tensor inputs should be included in the callback function. If not defined, all tensor
        inputs will be passed, facilitating enhanced logging or monitoring of the generation process.
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        Rescale parameter for adjusting noise configuration based on guidance rescale. Based on findings from
        [Common Diffusion Noise Schedules and Sample Steps are
        Flawed](https://huggingface.co/papers/2305.08891).
    strength (`float`, *optional*, defaults to 1.0):
        Affects the overall styling or quality of the generated output. Values closer to 1 usually provide
        direct adherence to prompts.

Examples:
    # Example usage of the function for generating images based on prompts.

Returns:
    [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
        Returns either a structured output containing generated images and their metadata when `return_dict` is
        `True`, or a simpler tuple, where the first element is a list of generated images and the second
        element indicates if any of them contain "not-safe-for-work" (NSFW) content.
r   FNr   r   )
r   r}   r|   r   r   r   r   r   r   r   )mu)r   r  r}   rC  r"   r   r   )r   r   )r|   T)r   r!  r"  r#  r$  r<   r:   )r   r   r   r   r   ry   r=   )r  rm   r   zHIncorrect configuration settings! The config of `pipeline.transformer`: z	 expects z& but received `num_channels_latents`: z + `num_channels_mask`: z  + `num_channels_masked_image`: z = z[. Please verify the config of `pipeline.transformer` or your `mask_image` or `image` input.r   )totalscale_model_input)encoder_hidden_statesinpaint_latentsrG  rA  )rg   rG  r   r   r   rp   )rG  )r   rF  )frames)Lr#   r   r   tensor_inputsrQ   r   r.  r3  r<  r   r>   rA   r   _execution_devicer   r|   r   r   r   r   r   r   r  r   r   
preprocessr2   reshaper   r$   float32r   r   latent_channelsin_channelsr*  r   rB   resize_inpaint_mask_directlyr?   r   r@   	ones_liker  rw   cache_mag_vaer  rn   r(   r   r0   r   r  r8  progress_bar	enumerater=  r  rM  tensorchunkrk   r   r  r  localspopupdateXLA_AVAILABLExm	mark_stepdecoder   postprocess_videomaybe_free_model_hooksr   )Er   r   rG   r   rD  rE  r   r   r   r0  r   r   r   r{   r   r   r   r   r   rF  rG  rH  r   rg   r  r  r   r   r}   r|   latent_timestepr"  channelsheight_videowidth_video
init_videor  num_channels_transformerreturn_image_latentslatents_outputsr(  image_latentsro   mask_latents
mask_inputmasked_video_latents_inputrO  mask_conditionmask_condition_tilemasked_videor   num_channels_masknum_channels_masked_imager   num_warmup_stepsr[  r  tlatent_model_inputt_expand
noise_prednoise_pred_uncondrf   init_latents_proper	init_masknoise_timestepcallback_kwargsr   callback_outputssE                                                                        r6   __call__#EasyAnimateInpaintPipeline.__call__  s   d *-=?U,VWW1E1S1S. Vr\B&'ERK"$% 	"!*.
	
  .!1 *VS"9"9JJvt$<$<VJ&,,Q/J''(%%++E$$**E "7(,(H(H+'#9"7+I  
	
"!* dnn&EFF-? 3VY1.*I* .@Pcekmv-w*I*)-);); 3hv *< *
&	&
 $BQ-..z<Q/QR"c/JO++GJ*lK--88aAq!,44Z*5LhXdfqr 9 J
 $U]];J#++J
Hf\ddefhiklnoqrsJJ  $xx>>#'#3#3#:#:#F#F 7;OO .... $+!5 / 
   ,;)GUM,NGU !c!&&((''033FEB##**GG#(#3#3G#<QU#C#F#Fvu#UL#(#3#3G#<#?#?#NL','7'7'@'C'CFE'R$>B>^>^UYY~'9:dp
=A=]=]EII34q89cw + #())Z9S,TZ["\"_"_`e"f OYN^N^K
Hj,!%!4!4!?!?&&q!Q15=="Z/< " "@ " "0!2!2!2!G!/!7!7
JPXZ`!h!p!pq!Q" ,/CC*/**^_*U'+3&*=*CD#ooj9=PSV=VWZ\\] %
 (<''..KK262K2K (&"!!"% <</A 3L 3// (3.0DhhooFcFc( (4vu'EHfHf'f=A=V=V/(&"!!"% <</A >W >:&: CGBbBbL>A+=!>htJ  ;; 		#7"81"<=1 /
 ',ii=W0X^_&`&c&cdi&jO&*Ozz.16JAqRS2TU}}Trs0C+eijmmE (+??''033FEB##**GG#(#3#3G#<QU#C#F#Fvu#UL#(#3#3G#<#?#?#NL','7'7'@'C'CFE'R$>B>^>^UYY~'9:dp
=A=]=]EII34q89cw + #())Z9S,TZ["\"_"_`e"f''
1bqb5(9:zz$,@!Q(JK}}Trs0C+eijmmE #' $';; , 2 21 5(<(B(B1(E%$'88;TT##**667 !^_c_o_o_v_v^w x((//;;<<bcwbx y-->,??_`y_z.1JJM^^_ `UU  !::9J++!II'=}&MNM$)II/MOd.e$f! &(((7 5 8 8 8 G y>,?..BVBV,VV!)n%89\!),1>> BFAaAaUYYy1}%=gn"4>>+>??)-)I)IJ\^_)`& !<<.@.F.Fq.I(IRXY\\,22 ] 
 "--&*7$3 % .  
 ??$Q'488??+J+JJ$.$4$4QA$4$>MJ 339C9I9I!9L6%!2^YjGj5k!kJ338H38N!2:aq!rJ ..--j!WmHYmglmnop+/CC*7' $I3y>A--)21q5)9%dnn6UVV26..2L2L 3U\\>BRTY5Z3/ 37..2J2J 3UELL.IY<Z3/  !9}0CCiRYFYYG'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*I**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNG - :L h&$((//8887BGHHOOGO?BE((::T_:`EE 	##%8O(66g :9s   Lv)v
v")
r3  r.  r<  r8  r   r   r   r   r   r   )
r   TNNNNNNN   )NNNNNN)NNNTFF)2__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seqr   r   r   r   r	   r   r
   r   r   r   r   r   rQ   boolr   r$   r%   r}   r|   r   r   r   r  r  r*  propertyr0  rg   r   r9  r=  no_gradr   EXAMPLE_DOC_STRINGFloatTensorr3   	Generatorr   r   r   r   r  __static_attributes____classcell__)r   s   @r6   r   r   J  s   * =T$c $c ;YFG$c 67	$c
 3$c 3$cT &',0;?049=8<AE)-'+#&`lc49n%`l  #`l &*	`l
 "%T#Y"78`l  -`l !) 6`l  (5`l )1(>`l &`l $`l !`lF!, #"'++/4n	81*z "HT $ $ & & ( ( # #   ]]_12 )-$&*./39= #"-/*-;?/0"MQ*.049=8<AE%*  9B"%$*)-;}7c49n%}7 SM}7 U&&'	}7
 %++,}7 $E$5$56}7 }7 }}7 &c]}7 !}7 "%T#Y"78}7  (}}7 e_}7 E%//43H"HIJ}7 %,,'}7   -!}7" !) 6#}7$  (5%}7& )1(>'}7( c])}7* +}7, '(Cd+T124DF\\]
-}72 -1I3}74  5}76 7}78 "9}7: DI&;}7 3 }7r8   r   )rA  )T)NN)NNNN)@r   typingr   r   r   r   r   numpyr,   r$   torch.nn.functionalr&   r'   rn   PILr   transformersr	   r
   r   r   	callbacksr   r   r   r   modelsr   r   pipelines.pipeline_utilsr   
schedulersr   utilsr   r   r   utils.torch_utilsr   r   r   pipeline_outputr   torch_xla.core.xla_modelcore	xla_modelrc  rb  
get_loggerr  loggerr  r7   rO   r_   rk   rw   r   rQ   r   r}   r3   r   r   r   r8   r6   <module>r     s     8 8       B 0 H 9 9 O O - - 6 ))MM			H	%$ N6:)|W&64, *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*vI7!2 I7r8   