
    +h~                     P   S SK JrJrJrJrJr  S SKrS SKJr  S SK	rSSK
JrJr  SSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJrJrJrJrJrJr  SSKJrJr  SSK J!r!  SSK"J#r#  SSK$J%r%J&r&J'r'  SSK(J)r)  \RT                  " \+5      r, " S S\RZ                  5      r. " S S\!\\5      r/g)    )AnyDictOptionalTupleUnionN   )ConfigMixinregister_to_config)UNet2DConditionLoadersMixin)logging   )get_activation)	AttentionFeedForward)ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSAttentionProcessorAttnAddedKVProcessorAttnProcessorFusedAttnProcessor2_0)TimestepEmbedding	Timesteps)
ModelMixin)TransformerTemporalModel   )UNetMidBlock3DCrossAttnget_down_blockget_up_block)UNet3DConditionOutputc                      ^  \ rS rSr    SS\S\S\S\S\S\\   S\4U 4S	 jjjrS
\	R                  S\	R                  4S jrSrU =r$ )"I2VGenXLTransformerTemporalEncoder0   dimnum_attention_headsattention_head_dimactivation_fnupcast_attentionff_inner_dimdropoutc           
         > [         TU ]  5         [        R                  " USSS9U l        [        UUUUSUSS9U l        [        UUUSUSS9U l        g )NTh㈵>)elementwise_affineepsF)	query_dimheadsdim_headr)   biasr'   out_bias)r)   r&   final_dropout	inner_dimr1   )	super__init__nn	LayerNormnorm1r   attn1r   ff)	selfr#   r$   r%   r&   r'   r(   r)   	__class__s	           _/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/unets/unet_i2vgen_xl.pyr6   +I2VGenXLTransformerTemporalEncoder.__init__1   se     	\\#$DI
%'-

 '"
    hidden_statesreturnc                     U R                  U5      nU R                  US S9nX1-   nUR                  S:X  a  UR                  S5      nU R	                  U5      nXA-   nUR                  S:X  a  UR                  S5      nU$ )N)encoder_hidden_states   r   )r9   r:   ndimsqueezer;   )r<   rA   norm_hidden_statesattn_output	ff_outputs        r>   forward*I2VGenXLTransformerTemporalEncoder.forwardO   s     "ZZ6jj!34jP#3")11!4MGGM*	!1")11!4Mr@   )r:   r;   r9   )gegluFNg        )__name__
__module____qualname____firstlineno__intstrboolr   r6   torchTensorrK   __static_attributes____classcell__r=   s   @r>   r!   r!   0   s     %!&&*

 !
  	

 
 
 sm
 
 
<|| 
 r@   r!   c                   z  ^  \ rS rSrSrSr\           S*S\\   S\S\S\	\
S	4   S
\	\
S	4   S\	\S	4   S\S\\   S\S\\\	\   4   S\\\\	\   4      4U 4S jjj5       r\S\\
\4   4S j5       rS\\\\
\4   4   4S jrS+S\\   S\SS4S jjrS rS rS rS rS rS r     S,S\R4                  S \\R4                  \\4   S!\R4                  S"\R4                  S#\\R4                     S$\\R4                     S%\\R4                     S&\\\
\4      S'\S\\\	\R4                     4   4S( jjrS)r U =r!$ )-I2VGenXLUNeta   a  
I2VGenXL UNet. It is a conditional 3D UNet model that takes a noisy sample, conditional state, and a timestep and
returns a sample-shaped output.

This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
for all models (such as downloading or saving).

Parameters:
    sample_size (`int` or `Tuple[int, int]`, *optional*, defaults to `None`):
        Height and width of input/output sample.
    in_channels (`int`, *optional*, defaults to 4): The number of channels in the input sample.
    out_channels (`int`, *optional*, defaults to 4): The number of channels in the output.
    down_block_types (`Tuple[str]`, *optional*, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")`):
        The tuple of downsample blocks to use.
    up_block_types (`Tuple[str]`, *optional*, defaults to `("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D")`):
        The tuple of upsample blocks to use.
    block_out_channels (`Tuple[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
        The tuple of output channels for each block.
    layers_per_block (`int`, *optional*, defaults to 2): The number of layers per block.
    norm_num_groups (`int`, *optional*, defaults to 32): The number of groups to use for the normalization.
        If `None`, normalization and activation layers is skipped in post-processing.
    cross_attention_dim (`int`, *optional*, defaults to 1280): The dimension of the cross attention features.
    attention_head_dim (`int`, *optional*, defaults to 64): Attention head dim.
    num_attention_heads (`int`, *optional*): The number of attention heads.
FNsample_sizein_channelsout_channelsdown_block_types.up_block_typesblock_out_channelslayers_per_blocknorm_num_groupscross_attention_dimr%   r$   c                 $
  > [         TU ]  5         U
n[        U5      [        U5      :w  a  [        SU SU S35      e[        U5      [        U5      :w  a  [        SU SU S35      e[	        U[
        5      (       d*  [        U5      [        U5      :w  a  [        SU SU S35      e[        R                  " X"-   US   SS	S
9U l        [        SUUS   S	US9U l
        [        R                  " [        R                  " SUS-  SS	S9[        R                  " 5       [        R                  " US-  US-  SS	S	S9[        R                  " 5       [        R                  " US-  USS	S	S95      U l        [        USUS-  USS9U l        [        R                  " [        R                  " SUS-  SS	S9[        R                  " 5       [        R                   " S5      [        R                  " US-  US-  SSS	S9[        R                  " 5       [        R                  " US-  U	SSS	S95      U l        US   S-  n[%        US   SS5      U l        US   n[)        XSS9U l        [        R                  " [        R,                  " X5      [        R                  " 5       [        R,                  " XU-  5      5      U l        [        R                  " [        R,                  " X5      [        R                  " 5       [        R,                  " X5      5      U l        [        R2                  " / 5      U l        [        R2                  " / 5      U l        [	        U[
        5      (       a  U4[        U5      -  nUS   n[9        U5       HT  u  nnUnXo   nU[        U5      S	-
  :H  n[;        UUUUUU(       + SSUU	X   S	SS9nU R4                  R=                  U5        MV     [?        US   USSS	U	US   USS9	U l         SU l!        [E        [G        U5      5      n[E        [G        U5      5      nUS   n[9        U5       H  u  nnU[        U5      S	-
  :H  nUnUU   nU[I        US	-   [        U5      S	-
  5         nU(       d  SnU =RB                  S	-  sl!        OSn[K        UUS	-   UUUUUSSUU	UU   SUS9nU R6                  R=                  U5        UnM     [        RL                  " US   USS9U l'        [Q        S5      U l)        [        R                  " US   USS	S
9U l*        g )Nz\Must provide the same number of `down_block_types` as `up_block_types`. `down_block_types`: z. `up_block_types`: .zbMust provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: z. `down_block_types`: zdMust provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: r   r   r   )kernel_sizepadding   )r$   r%   r^   
num_layersrd   rE   )ri   )strideri   r   gelu)r#   r$   r(   r%   r&   )    rn      Tsilu)act_fnr+   F)rk   r^   r_   temb_channelsadd_downsample
resnet_epsresnet_act_fnresnet_groupsre   r$   downsample_paddingdual_cross_attention)	r^   rr   rt   ru   output_scale_factorre   r$   rv   rx   )rk   r^   r_   prev_output_channelrr   add_upsamplert   ru   rv   re   r$   rx   resolution_idx)num_channels
num_groupsr-   )+r5   r6   len
ValueError
isinstancerR   r7   Conv2dconv_inr   transformer_in
SequentialSiLUimage_latents_proj_inr!   image_latents_temporal_encoderAdaptiveAvgPool2dimage_latents_context_embeddingr   	time_projr   time_embeddingLinearcontext_embeddingfps_embedding
ModuleListdown_blocks	up_blocks	enumerater   appendr   	mid_blocknum_upsamplerslistreversedminr   	GroupNormconv_norm_outr   conv_actconv_out)r<   r]   r^   r_   r`   ra   rb   rc   rd   re   r%   r$   time_embed_dimtimestep_input_dimoutput_channelidown_block_typeinput_channelis_final_block
down_blockreversed_block_out_channelsreversed_num_attention_headsup_block_typer{   r|   up_blockr=   s                             r>   r6   I2VGenXLUNet.__init__~   s   2 	 1  C$77no  oA  AU  Vd  Ue  ef  g  !"c*:&;;t  vH  uI  I_  `p  _q  qr  s  -s33<O8PTWXhTi8iv  xK  wL  Lb  cs  bt  tu  v 
 yy!:<Nq<Q_`jkl6 !2*1-+
 &(]]IIaq!Q7GGIIIkAo{Q!QOGGIIIkAo{AaK&
" /Q !$q* /
+ 02}}IIaq!Q7GGI  *IIkAo{R'71aPGGIIIkB&(;QqRST0
, ,A.2"#5a#8$B/2/0B[ab!#II):GGIIInK&GH"

  ]]II(92779biiP^Fo

 ==,r*)3//#6"83?O;P"P ,A."+,<"=A*M/2N#&8"9A"==N'+)+,#11 $-$7$7$:#$%*J ##J/) #>. 1*2.(  ! 3 3B 7)!&

   '+84F+G&H#'+H5H,I'J$4Q7 ). 9A}#&8"9A"==N"08;N7AE3GYCZ]^C^8_`M "###q(#$#+a/)+$7,) $-$7$@$C%* H  NN!!(+"0? !:D  \\7I!7LYhnst&v.		"4Q"7ST^_`r@   rB   c                    ^ 0 nS[         S[        R                  R                  S[        [         [
        4   4U4S jjmU R                  5        H  u  p#T" X#U5        M     U$ )z
Returns:
    `dict` of attention processors: A dictionary containing all attention processors used in the model with
    indexed by its weight name.
namemodule
processorsc                    > [        US5      (       a  UR                  5       X  S3'   UR                  5        H  u  p4T" U  SU 3XB5        M     U$ )Nget_processor
.processorrg   )hasattrr   named_children)r   r   r   sub_namechildfn_recursive_add_processorss        r>   r   AI2VGenXLUNet.attn_processors.<locals>.fn_recursive_add_processorsH  sZ    v//282F2F2H
V:./#)#8#8#:+tfAhZ,@%T $; r@   )rS   rU   r7   Moduler   r   r   )r<   r   r   r   r   s       @r>   attn_processorsI2VGenXLUNet.attn_processors=  sb     
	c 	588?? 	X\]`bt]tXu 	 !//1LD'jA 2 r@   	processorc           	      d  ^ [        U R                  R                  5       5      n[        U[        5      (       a-  [        U5      U:w  a  [        S[        U5       SU SU S35      eS[        S[        R                  R                  4U4S jjmU R                  5        H  u  p4T" X4U5        M     g)	a  
Sets the attention processor to use to compute attention.

Parameters:
    processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
        The instantiated processor class or a dictionary of processor classes that will be set as the processor
        for **all** `Attention` layers.

        If `processor` is a dict, the key needs to define the path to the corresponding cross attention
        processor. This is strongly recommended when setting trainable attention processors.

z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.r   r   c                 
  > [        US5      (       aJ  [        U[        5      (       d  UR                  U5        O#UR                  UR	                  U  S35      5        UR                  5        H  u  p4T" U  SU 3XB5        M     g )Nset_processorr   rg   )r   r   dictr   popr   )r   r   r   r   r   fn_recursive_attn_processors        r>   r   DI2VGenXLUNet.set_attn_processor.<locals>.fn_recursive_attn_processorl  ss    v//!)T22((3(($z7J)KL#)#8#8#:+tfAhZ,@%S $;r@   N)r   r   keysr   r   r   rS   rU   r7   r   r   )r<   r   countr   r   r   s        @r>   set_attn_processorI2VGenXLUNet.set_attn_processorW  s     D((--/0i&&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1LD'i@ 2r@   
chunk_sizer#   c                    ^ US;  a  [        SU 35      eU=(       d    SnS[        R                  R                  S[        S[        4U4S jjmU R                  5        H  nT" X1U5        M     g)	a  
Sets the attention processor to use [feed forward
chunking](https://huggingface.co/blog/reformer#2-chunked-feed-forward-layers).

Parameters:
    chunk_size (`int`, *optional*):
        The chunk size of the feed-forward layers. If not specified, will run feed-forward layer individually
        over each tensor of dim=`dim`.
    dim (`int`, *optional*, defaults to `0`):
        The dimension over which the feed-forward computation should be chunked. Choose between dim=0 (batch)
        or dim=1 (sequence length).
)r   r   z-Make sure to set `dim` to either 0 or 1, not r   r   r   r#   c                    > [        U S5      (       a  U R                  XS9  U R                  5        H  nT" X1U5        M     g Nset_chunk_feed_forward)r   r#   r   r   childrenr   r   r#   r   fn_recursive_feed_forwards       r>   r   GI2VGenXLUNet.enable_forward_chunking.<locals>.fn_recursive_feed_forward  =    v788---M*)%SA +r@   N)r   rU   r7   r   rR   r   )r<   r   r#   r   r   s       @r>   enable_forward_chunking$I2VGenXLUNet.enable_forward_chunkingz  sn     fLSERSS  _1
	Behhoo 	B3 	BUX 	B mmoF%f#> &r@   c                    ^ S[         R                  R                  S[        S[        4U4S jjmU R	                  5        H  nT" US S5        M     g )Nr   r   r#   c                    > [        U S5      (       a  U R                  XS9  U R                  5        H  nT" X1U5        M     g r   r   r   s       r>   r   HI2VGenXLUNet.disable_forward_chunking.<locals>.fn_recursive_feed_forward  r   r@   r   )rU   r7   r   rR   r   )r<   r   r   s     @r>   disable_forward_chunking%I2VGenXLUNet.disable_forward_chunking  sH    	Behhoo 	B3 	BUX 	B mmoF%fdA6 &r@   c           	      ~   [        S U R                  R                  5        5       5      (       a  [        5       nOr[        S U R                  R                  5        5       5      (       a  [	        5       nO8[        S[        [        U R                  R                  5       5      5       35      eU R                  U5        g)zU
Disables custom attention processors and sets the default attention implementation.
c              3   F   #    U  H  oR                   [        ;   v   M     g 7fN)r=   r   .0procs     r>   	<genexpr>:I2VGenXLUNet.set_default_attn_processor.<locals>.<genexpr>  s     iKh4~~!>>Kh   !c              3   F   #    U  H  oR                   [        ;   v   M     g 7fr   )r=   r   r   s     r>   r   r     s     hJg$#==Jgr   zOCannot call `set_default_attn_processor` when attention processors are of type N)	allr   valuesr   r   r   nextiterr   )r<   r   s     r>   set_default_attn_processor'I2VGenXLUNet.set_default_attn_processor  s     i4K_K_KfKfKhiii,.Ih$J^J^JeJeJghhh%Iabfgklp  mA  mA  mH  mH  mJ  hK  cL  bM  N  		*r@   c                     [        U R                  5       H9  u  pV[        USU5        [        USU5        [        USU5        [        USU5        M;     g)a  Enables the FreeU mechanism from https://huggingface.co/papers/2309.11497.

The suffixes after the scaling factors represent the stage blocks where they are being applied.

Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of values that
are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.

Args:
    s1 (`float`):
        Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
        mitigate the "oversmoothing effect" in the enhanced denoising process.
    s2 (`float`):
        Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
        mitigate the "oversmoothing effect" in the enhanced denoising process.
    b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
    b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
s1s2b1b2N)r   r   setattr)r<   r   r   r   r   r   upsample_blocks          r>   enable_freeuI2VGenXLUNet.enable_freeu  sJ    $ "+4>>!:AND"-ND"-ND"-ND"-	 ";r@   c                     1 Skn[        U R                  5       H9  u  p#U H.  n[        X45      (       d  [        X4S5      c  M"  [	        X4S5        M0     M;     g)zDisables the FreeU mechanism.>   r   r   r   r   N)r   r   r   getattrr   )r<   
freeu_keysr   r   ks        r>   disable_freeuI2VGenXLUNet.disable_freeu  sH    -
!*4>>!:A>--D1Q1]Nt4   ";r@   c                    SU l         U R                  R                  5        H3  u  pS[        UR                  R
                  5      ;   d  M*  [        S5      e   U R                  U l         U R                  5        H)  n[        U[        5      (       d  M  UR                  SS9  M+     U R                  [        5       5        g)u  
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
are fused. For cross-attention modules, key and value projection matrices are fused.

<Tip warning={true}>

This API is 🧪 experimental.

</Tip>
NAddedzQ`fuse_qkv_projections()` is not supported for models having added KV projections.T)fuse)original_attn_processorsr   itemsrS   r=   rN   r   modulesr   r   fuse_projectionsr   r   )r<   _attn_processorr   s       r>   fuse_qkv_projections!I2VGenXLUNet.fuse_qkv_projections  s     )-%!%!5!5!;!;!=A#n66??@@ !tuu "> )-(<(<%llnF&),,''T'2 % 	 5 78r@   c                 V    U R                   b  U R                  U R                   5        gg)um   Disables the fused QKV projection if enabled.

<Tip warning={true}>

This API is 🧪 experimental.

</Tip>

N)r   r   )r<   s    r>   unfuse_qkv_projections#I2VGenXLUNet.unfuse_qkv_projections  s)     ((4##D$A$AB 5r@   sampletimestepfpsimage_latentsimage_embeddingsrD   timestep_condcross_attention_kwargsreturn_dictc
                 N  ^& UR                   u  ppnSU R                  -  m&SnSn[        U&4S jUR                   SS  5       5      (       a  [        R	                  S5        SnUn[
        R                  " U5      (       d  UR                  R                  S:H  nUR                  R                  S	:H  n[        U[        5      (       a/  U(       d  U(       a  [
        R                  O[
        R                  nO.U(       d  U(       a  [
        R                  O[
        R                  n[
        R                  " U/UUR                  S
9nO7[!        UR                   5      S:X  a  US   R#                  UR                  5      nUR%                  UR                   S   5      nU R'                  U5      nUR#                  U R(                  S9nU R+                  UU5      nUR%                  UR                   S   5      nU R-                  U R'                  U5      R#                  U R(                  S95      nUU-   nUR/                  USUR                   S   U-  S9nUR1                  U
SU R2                  R4                  5      n[
        R6                  " UU/SS9nUSS2SS2SS2SS24   nUR9                  SSSSS5      R;                  UR                   S   UR                   S   -  UR                   S   UR                   S   UR                   S   5      nU R=                  U5      nUR                   u  nnnnUR9                  SSSS5      R;                  UUU-  U5      n[
        R6                  " UU/SS9nU R?                  U5      nURA                  SU R2                  RB                  U R2                  R4                  5      n[
        R6                  " UU/SS9nUR/                  USUR                   S   U-  S9nUR9                  SSSSS5      R;                  UR                   S   UR                   S   -  UR                   S   UR                   S   UR                   S   5      nU RE                  U5      nUSSS24   R;                  XXU5      R9                  SSSSS5      R;                  X-  U-  X5      nU RG                  U5      nUR;                  XXU5      R9                  SSSSS5      n[
        R6                  " X/SS9nUR9                  SSSSS5      R;                  UR                   S   U-  S4UR                   SS -   5      nU RI                  U5      nU RK                  UUUSS9S   nU4n U RL                   HC  n![O        U!S5      (       a  U!RP                  (       a  U!" UUUUUS9u  nn"OU!" UUUS9u  nn"U U"-  n ME     U RR                  b  U RS                  UUUUUS9n[U        U RV                  5       H  u  n#n$U#[!        U RV                  5      S-
  :H  n%U [!        U$RX                  5      * S n"U S[!        U$RX                  5      *  n U%(       d  U(       a  U S   R                   SS n[O        U$S5      (       a  U$RP                  (       a  U$" UUU"UUUUS9nM  U$" UUU"UUS9nM     U R[                  U5      nU R]                  U5      nU R_                  U5      nUSSS24   R;                  SU4UR                   SS -   5      R9                  SSSSS5      nU	(       d  U4$ [a        US9$ )a  
The [`I2VGenXLUNet`] forward method.

Args:
    sample (`torch.Tensor`):
        The noisy input tensor with the following shape `(batch, num_frames, channel, height, width`.
    timestep (`torch.Tensor` or `float` or `int`): The number of timesteps to denoise an input.
    fps (`torch.Tensor`): Frames per second for the video being generated. Used as a "micro-condition".
    image_latents (`torch.Tensor`): Image encodings from the VAE.
    image_embeddings (`torch.Tensor`):
        Projection embeddings of the conditioning image computed with a vision encoder.
    encoder_hidden_states (`torch.Tensor`):
        The encoder hidden states with shape `(batch, sequence_length, feature_dim)`.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~models.unets.unet_3d_condition.UNet3DConditionOutput`] instead of a plain
        tuple.

Returns:
    [`~models.unets.unet_3d_condition.UNet3DConditionOutput`] or `tuple`:
        If `return_dict` is True, an [`~models.unets.unet_3d_condition.UNet3DConditionOutput`] is returned,
        otherwise a `tuple` is returned where the first element is the sample tensor.
r   FNc              3   2   >#    U  H  oT-  S :g  v   M     g7f)r   N )r   sdefault_overall_up_factors     r>   r   'I2VGenXLUNet.forward.<locals>.<genexpr>/  s     M;La,,1;Ls   z9Forward upsample size to force interpolation output size.Tmpsnpu)dtypedevicer   )r  )r#   output_sizer   )r#   r   rE   ry   )
num_framesr  r  has_cross_attention)rA   tembrD   r  r  )rA   r  r  )rD   r  r  )rA   r  res_hidden_states_tuplerD   upsample_sizer  r  )rA   r  r  r  r  )r  )1shaper   anyloggerinforU   	is_tensorr  typer   floatfloat32float64int32int64tensorr   toexpandr   r  r   r   repeat_interleave	new_zerosconfigre   catpermutereshaper   r   viewr^   r   r   r   r   r   r   r  r   r   r   resnetsr   r   r   r   )'r<   r  r  r	  r
  r  rD   r  r  r  
batch_sizechannelsr  heightwidthforward_upsample_sizer  	timestepsis_mpsis_npur  t_embfps_embembcontext_embimage_latents_for_context_embdsimage_latents_context_embs_batch_size	_channels_height_width	image_embdown_block_res_samplesdownsample_blockres_samplesr   r   r   r  s'                                         @r>   rK   I2VGenXLUNet.forward  s   L ;A,,7
j% %&t':':$:! !&M6<<;LMMMKKST$(! 	y)) ]]''50F]]''50F)U++*0F(.&u{{i[fmmTI!Q&!$**6==9I $$V\\!_5	y)
 tzz*##E=9 jj1&$$T^^C%8%;%;$**%;%MN go##JA399Q<R\C\#] &&z1dkk6U6UVii.C D!L*71bqb!*D'%D%L%LQPQSTVWYZ%[%c%c+11!47V7\7\]^7__+11!4+11!4+11!4	&
" &*%I%IJd%e"2L2R2R/Y%?%G%G1aQR%S%[%[6)9&
" ii.H IqQ**+;<	NN2t{{'>'>@_@_`	iii 8a@!33JAS^SdSdefSgjtSt3u%--aAq!<DD"]%8%8%;;"""	
 22=A$'"WZXuEWQ1a#WZ(50*G	 	 ;;MJ%--j%U]^ffghjkmnpqstu F2:1aA.66Q*8TVX7Y\b\h\hijik\l7lmf%$$!#9	 % 

  #) $ 0 0')>??DTDhDh&6"(*5)+A'# '7VRUbl&m#"k1" !1 >>%^^&1%'= $ F "+4>>!:A~#dnn"5"99N0#n6L6L2M1M1OPK%;<Zs>CYCY?Z>Z%[" "&; 6r : @ @ D~'<==.BdBd'"(,7*5"/)+A ("(,7"/)- ";> ##F+v&v& a(("j)9FLL<L)LMUUVWYZ\]_`bcd9$F33r@   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )NrE   rE   )CrossAttnDownBlock3DrM  rM  DownBlock3D)	UpBlock3DCrossAttnUpBlock3DrP  rP  )i@  i     rQ  r   rn   i   @   N)Nr   )NNNNT)"rN   rO   rP   rQ   __doc__ _supports_gradient_checkpointingr
   r   rR   r   rS   r   r6   propertyr   r   r   r   r   r   r   r   r   r  r  rU   rV   r&  r   rT   r   rK   rW   rX   rY   s   @r>   r[   r[   a   s|   4 (-$ &*-
+
 /E !)+#'57@D-|ac]|a |a 	|a
  S/|a c3h|a" "#s(O#|a$ %|a& "#'|a( !)|a* "#uSz/2+|a, &eCsO&<=-|a |a| c+=&=!>  0 AE2Dd3PbKbFc2c,d  AF?(3- ?S ?Y] ?<	7+ .2594C& 488<04;? S4S4 eS01S4 \\	S4
 ||S4 #5<<0S4  (5S4  -S4 !)c3h 8S4 S4 
$eELL&99	:S4 S4r@   r[   )0typingr   r   r   r   r   rU   torch.nnr7   torch.utils.checkpointconfiguration_utilsr	   r
   loadersr   utilsr   activationsr   	attentionr   r   attention_processorr   r   r   r   r   r   
embeddingsr   r   modeling_utilsr   !transformers.transformer_temporalr   unet_3d_blocksr   r   r   unet_3d_conditionr   
get_loggerrN   r"  r   r!   r[   r  r@   r>   <module>re     s    5 4    B 2  ( .  6 ' H 
 5 
		H	%. .bo	4:{,G o	4r@   