
    +hU                        S SK JrJrJrJr  S SKrS SKJs  Jr	  S SKJr  SSK
JrJr  SSKJr  SSKJr  SS	KJrJr  SS
KJrJrJr  SSKJr  SSKJr  SSKJrJrJr  \R@                  " \!5      r" " S S\RF                  5      r$ " S S\RF                  5      r% " S S5      r&\ " S S\RF                  5      5       r' " S S\\5      r(g)    )ListOptionalTupleUnionN)nn   )ConfigMixinregister_to_config)logging)maybe_allow_in_graph   )	AttentionFeedForward)TimestepEmbedding	Timestepsget_3d_rotary_pos_embed)Transformer2DModelOutput)
ModelMixin)AdaLayerNormFP32LayerNormRMSNormc                     ^  \ rS rSr    SS\S\S\S\S\S\SS	4U 4S
 jjjrS\	R                  S\	R                  S\	R                  S\\	R                  \	R                  \	R                  \	R                  4   4S jrSrU =r$ )EasyAnimateLayerNormZero#   conditioning_dimembedding_dimelementwise_affineepsbias	norm_typereturnNc                   > [         TU ]  5         [        R                  " 5       U l        [        R
                  " USU-  US9U l        US:X  a  [        R                  " X#US9U l        g US:X  a  [        X#US9U l        g [        SU S35      e)N   )r   
layer_normr   r   fp32_layer_normzUnsupported `norm_type` (z@) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'.)super__init__r   SiLUsiluLinearlinear	LayerNormnormr   
ValueError)selfr   r   r   r   r   r    	__class__s          o/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/transformers/transformer_easyanimate.pyr(   !EasyAnimateLayerNormZero.__init__$   s     	GGI	ii 0!m2C$O$]_bcDI++%m`cdDI+I;6vw     hidden_statesencoder_hidden_statestembc                 N   U R                  U R                  U5      5      R                  SSS9u  pEpgpU R                  U5      SUR	                  S5      -   -  UR	                  S5      -   nU R                  U5      SUR	                  S5      -   -  UR	                  S5      -   nXXi4$ )Nr#      dim)r,   r*   chunkr.   	unsqueeze)
r0   r5   r6   r7   shiftscalegate	enc_shift	enc_scaleenc_gates
             r2   forward EasyAnimateLayerNormZero.forward;   s     >B[[SW=Y=_=_`agh=_=i:dy		-0A8J4JKeoo^_N`` $		*? @A	H[H[\]H^D^ _bkbubuc
 !
 TCCr4   )r,   r.   r*   )Th㈵>Tr&   )__name__
__module____qualname____firstlineno__intboolfloatstrr(   torchTensorr   rD   __static_attributes____classcell__r1   s   @r2   r   r   #   s    
 $(*  !	
    
 .D"\\DBG,,DV[VbVbD	u||U\\5<<E	FD Dr4   r   c                   ~   ^  \ rS rSrS\S\\   SS4U 4S jjrS rS\R                  S\R                  4S	 jr
S
rU =r$ )EasyAnimateRotaryPosEmbedF   
patch_sizerope_dimr!   Nc                 :   > [         TU ]  5         Xl        X l        g N)r'   r(   rW   rX   )r0   rW   rX   r1   s      r2   r(   "EasyAnimateRotaryPosEmbed.__init__G   s    $ r4   c                    UnUnUu  pgXg-  nXU-  :  a  Un	[        [        XV-  U-  5      5      n
OUn
[        [        XG-  U-  5      5      n	[        [        XY-
  S-  5      5      n[        [        XJ-
  S-  5      5      nX4X-   X-   44$ )Ng       @)rK   round)r0   src	tgt_width
tgt_heighttwthhwrresize_heightresize_widthcrop_top	crop_lefts                r2   get_resize_crop_region_for_grid9EasyAnimateRotaryPosEmbed.get_resize_crop_region_for_gridM   s    ER=MuRVaZ01LLbfqj 12Mub0C789r0C789	$x'?AY&ZZZr4   r5   c                    UR                  5       u  p#pEnXPR                  -  nX`R                  -  nSU R                  -  nSU R                  -  nU R                  XV4Xx5      n	[        U R                  U	XV4UR                  S5      SS9n
U
$ )NZ   <   r   T)	grid_sizetemporal_sizeuse_real)sizerW   rj   r   rX   )r0   r5   bsc
num_framesgrid_height
grid_widthbase_size_widthbase_size_heightgrid_crops_coordsimage_rotary_embs              r2   rD   !EasyAnimateRotaryPosEmbed.forward^   s    5B5G5G5I2z
!__4??2
/0 @@%
 3MM"/',,Q/
  r4   )rW   rX   )rG   rH   rI   rJ   rK   r   r(   rj   rO   rP   rD   rQ   rR   rS   s   @r2   rU   rU   F   sG    !3 !$s) ! ![" U\\  ell    r4   rU   c                       \ rS rSrSrS r  SS\S\R                  S\R                  S\	\R                     S	\	\R                     S
\R                  4S jjr
Srg)EasyAnimateAttnProcessor2_0r   z
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
used in the EasyAnimateTransformer3DModel model.
c                 D    [        [        S5      (       d  [        S5      eg )Nscaled_dot_product_attentionzaEasyAnimateAttnProcessor2_0 requires PyTorch 2.0 or above. To use it, please install PyTorch 2.0.)hasattrFImportError)r0   s    r2   r(   $EasyAnimateAttnProcessor2_0.__init__x   s%    q899s  :r4   Nattnr5   r6   attention_maskr{   r!   c           	      0   UR                   c  Ub  [        R                  " X2/SS9nUR                  U5      nUR	                  U5      nUR                  U5      nUR                  SUR                  S45      R                  SS5      nUR                  SUR                  S45      R                  SS5      nUR                  SUR                  S45      R                  SS5      nUR                  b  UR                  U5      nUR                  b  UR                  U5      nUR                   Gb?  UGb;  UR                  U5      n	UR                  U5      n
UR                  U5      nU	R                  SUR                  S45      R                  SS5      n	U
R                  SUR                  S45      R                  SS5      n
UR                  SUR                  S45      R                  SS5      nUR                  b  UR                  U	5      n	UR                  b  UR                  U
5      n
[        R                  " X/SS9n[        R                  " X/SS9n[        R                  " X/SS9nUb  SSKJn  U" US S 2S S 2UR"                  S   S 24   U5      US S 2S S 2UR"                  S   S 24'   UR$                  (       d;  U" US S 2S S 2UR"                  S   S 24   U5      US S 2S S 2UR"                  S   S 24'   [&        R(                  " XgXSSS9nUR                  SS5      R+                  SS	5      nUR-                  UR.                  5      nUb  US S 2S UR"                  S   24   US S 2UR"                  S   S 24   p#[1        US
S 5      b*  UR2                  S   " U5      nUR2                  S   " U5      n[1        USS 5      b  UR5                  U5      nX#4$ [1        US
S 5      b*  UR2                  S   " U5      nUR2                  S   " U5      nX#4$ )Nr9   r:   r   )apply_rotary_emb        F)	attn_mask	dropout_p	is_causalr   to_outr   
to_add_out)
add_q_projrO   catto_qto_kto_v	unflattenheads	transposenorm_qnorm_k
add_k_proj
add_v_projnorm_added_qnorm_added_k
embeddingsr   shapeis_cross_attentionr   r   flattentodtypegetattrr   r   )r0   r   r5   r6   r   r{   querykeyvalueencoder_queryencoder_keyencoder_valuer   s                r2   __call__$EasyAnimateAttnProcessor2_0.__call__~   s    ??"'<'H!II'<&LRSTM 		-(ii&		-(DJJ#34>>q!DmmA

B/0::1a@DJJ#34>>q!D ;;"KK&E;;"++c"C ??&+@+L OO,ABM//*?@K OO,ABM)33A

B7GHRRSTVWXM%//DJJ3CDNNqRSTK)33A

B7GHRRSTVWXM  , $ 1 1- @  ,"//<II}4!<E))[.A6CII}4!<E'5<La177:<<=?O=E!Q-33A6889 **>N1399!<>>?AQ?Aq/55a8::;
 663RW
 &//15==aC%((5 !,a!A#8#>#>q#A!AABa!6!<!<Q!?!AAB $1
 tXt,8 $A} = $A} =t\40<(,8M(N% 33	 tXt,8 $A} = $A} =33r4    )NN)rG   rH   rI   rJ   __doc__r(   r   rO   rP   r   r   rQ   r   r4   r2   r~   r~   r   s~    
 2637Q4Q4 ||Q4  %||	Q4
 !.Q4 #5<<0Q4 
Q4 Q4r4   r~   c                   R  ^  \ rS rSr           SS\S\S\S\S\S\S\S	\S
\S\\   S\S\S\S\S\4U 4S jjjr	 SS\
R                  S\
R                  S\
R                  S\\\
R                  \
R                  4      S\\
R                  \
R                  4   4
S jjrSrU =r$ )EasyAnimateTransformerBlock   r;   num_attention_headsattention_head_dimtime_embed_dimdropoutactivation_fnnorm_elementwise_affinenorm_epsfinal_dropoutff_inner_dimff_biasqk_norm
after_normr    is_mmdit_blockc                 ~  > [         TU ]  5         [        XAXxUSS9U l        [	        UUUU(       a  SOS SSSU(       a  UOS U(       a  SOS [        5       S9
U l        [        XAXxUSS9U l        [        UUUU	U
US9U l	        S U l
        U(       a  [        UUUU	U
US9U l
        S U l        U(       a  [        XUS9U l        g g )	NT)r    r   r$   ư>F)
	query_dimdim_headr   r   r   r   added_proj_biasadded_kv_proj_dimcontext_pre_only	processor)r   r   r   	inner_dimr   r%   )r'   r(   r   norm1r   r~   attn1norm2r   fftxt_ffnorm3r   )r0   r;   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r1   s                   r2   r(   $EasyAnimateTransformerBlock.__init__   s    $ 	 .!8i^b

 '%$+L %3c&4U$13

 .!8i^b

 ''"
 %++&DK 
&s\deDJ r4   r5   r6   r7   r{   r!   c                    U R                  XU5      u  pVpxU R                  UUUS9u  pXR                  S5      U	-  -   nX(R                  S5      U
-  -   nU R                  XU5      u  pVpU R                  bo  U R	                  U R                  U5      5      nU R                  b!  U R	                  U R                  U5      5      nObU R	                  U R                  U5      5      nOAU R                  U5      nU R                  b  U R                  U5      nOU R                  U5      nXR                  S5      U-  -   nX,R                  S5      U-  -   nX4$ )N)r5   r6   r{   r9   )r   r   r=   r   r   r   r   )r0   r5   r6   r7   r{   norm_hidden_statesnorm_encoder_hidden_statesgate_msaenc_gate_msaattn_hidden_statesattn_encoder_hidden_statesgate_ffenc_gate_ffs                r2   rD   #EasyAnimateTransformerBlock.forward  so    RVQ[Q[$R
N :>,"<- :D :
6
 &(:(:1(=@R(RR 58N8Nq8QTn8n n PTzz$P
L ::!!%DGG4F,G!H{{&-1ZZD^8_-`*-1ZZ@Z8[-\*!%);!<{{&-1[[9S-T*-1WW5O-P*%(9(9!(<?Q(QQ 58M8Ma8PSm8m m33r4   )r   r   r   r   r   r   )r   gelu-approximateTr   TNTTFr&   TrZ   )rG   rH   rI   rJ   rK   rM   rN   rL   r   r(   rO   rP   r   rD   rQ   rR   rS   s   @r2   r   r      sa    /(,"&* *#!@f@f !@f  	@f
 @f @f @f "&@f @f @f sm@f @f @f @f @f  !@f @fN IM%4||%4  %||%4 ll	%4
 #5u||)C#DE%4 
u||U\\)	*%4 %4r4   r   c            2          ^  \ rS rSrSrSrS/r/ SQr\                         S*S\	S\	S\
\	   S	\
\	   S
\
\	   S\	S\	S\S\S\	S\	S\	S\S\	S\S\	S\	S\S\S\S\S\S\S\40U 4S jjj5       r      S+S\R                   S \R                   S!\
\R                      S"\
\R                      S#\
\R                      S$\
\R                      S%\
\R                      S&\S'\\\R                      \4   4S( jjrS)rU =r$ ),EasyAnimateTransformer3DModeli>  a`	  
A Transformer model for video-like data in [EasyAnimate](https://github.com/aigc-apps/EasyAnimate).

Parameters:
    num_attention_heads (`int`, defaults to `48`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`, defaults to `64`):
        The number of channels in each head.
    in_channels (`int`, defaults to `16`):
        The number of channels in the input.
    out_channels (`int`, *optional*, defaults to `16`):
        The number of channels in the output.
    patch_size (`int`, defaults to `2`):
        The size of the patches to use in the patch embedding layer.
    sample_width (`int`, defaults to `90`):
        The width of the input latents.
    sample_height (`int`, defaults to `60`):
        The height of the input latents.
    activation_fn (`str`, defaults to `"gelu-approximate"`):
        Activation function to use in feed-forward.
    timestep_activation_fn (`str`, defaults to `"silu"`):
        Activation function to use when generating the timestep embeddings.
    num_layers (`int`, defaults to `30`):
        The number of layers of Transformer blocks to use.
    mmdit_layers (`int`, defaults to `1000`):
        The number of layers of Multi Modal Transformer blocks to use.
    dropout (`float`, defaults to `0.0`):
        The dropout probability to use.
    time_embed_dim (`int`, defaults to `512`):
        Output dimension of timestep embeddings.
    text_embed_dim (`int`, defaults to `4096`):
        Input dimension of text embeddings from the text encoder.
    norm_eps (`float`, defaults to `1e-5`):
        The epsilon value to use in normalization layers.
    norm_elementwise_affine (`bool`, defaults to `True`):
        Whether to use elementwise affine in normalization layers.
    flip_sin_to_cos (`bool`, defaults to `True`):
        Whether to flip the sin to cos in the time embedding.
    time_position_encoding_type (`str`, defaults to `3d_rope`):
        Type of time position encoding.
    after_norm (`bool`, defaults to `False`):
        Flag to apply normalization after.
    resize_inpaint_mask_directly (`bool`, defaults to `True`):
        Flag to resize inpaint mask directly.
    enable_text_attention_mask (`bool`, defaults to `True`):
        Flag to enable text attention mask.
    add_noise_in_inpaint_model (`bool`, defaults to `False`):
        Flag to add noise in inpaint model.
Tr   )z^proj$r.   z
^proj_out$r   r   in_channelsout_channelsrW   sample_widthsample_heightr   timestep_activation_fn
freq_shift
num_layersmmdit_layersr   r   add_norm_text_encodertext_embed_dimtext_embed_dim_t5r   r   flip_sin_to_costime_position_encoding_typeresize_inpaint_mask_directlyenable_text_attention_maskadd_noise_in_inpaint_modelc                   > [         TU ]  5         X-  n[        UUU
5      U l        [	        UX5      U l        [        XR5      U l        [        R                  " UUXU4USS9U l
        S U l        S U l        U(       d<  [        R                  " UU5      U l        Ub  [        R                  " UU5      U l        Ow[        R                  " [        USSS9[        R                  " UU5      5      U l        Ub:  [        R                  " [        USSS9[        R                  " UU5      5      U l        [        R                   " [#        U5       Vs/ s H  n[%        UUUUUUUUUUU:  a  SOSS9
PM     sn5      U l        [        R(                  " UUU5      U l        [-        USU-  UUSS	9U l        [        R                  " UXU-  U-  5      U l        SU l        g s  snf )
NT)kernel_sizestrider   r   )r   F)
r;   r   r   r   r   r   r   r   r   r   r   r9   )r   
output_dimr   r   	chunk_dim)r'   r(   r   	time_projr   time_embeddingrU   rope_embeddingr   Conv2dproj	text_projtext_proj_t5r+   
Sequentialr   
ModuleListranger   transformer_blocksr-   
norm_finalr   norm_outproj_outgradient_checkpointing)r0   r   r   r   r   rW   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _r1   s                               r2   r(   &EasyAnimateTransformer3DModel.__init__u  s   : 	'<	 #9ozJ/	>b7
W II0HQ[bf
	
  $YY~yADN ,$&II.?$K!]]F		R`bkHlDN !,$&MMNDTJBIIVgirLs%!
 #%-- z* +A ,!(;'9#1#"/,C%)+,|+;4 +#
" ,,y(<ST %(9}$;
 		)Z-D|-ST&+#9s   
$Gr5   timesteptimestep_condr6   encoder_hidden_states_t5inpaint_latentscontrol_latentsreturn_dictr!   c	           	      
   UR                  5       u  ppnU R                  R                  nX-  nX-  nU R                  U5      R	                  UR
                  S9nU R                  UU5      nU R                  U5      nUb  [        R                  " X/S5      nUb  [        R                  " X/S5      nUR                  SSSSS5      R                  SS5      nU R                  U5      nUR                  SU	S45      R                  SSSSS5      nUR                  SS5      R                  SS5      nU R                  U5      nUb5  U R!                  U5      n[        R"                  " XE/SS9R%                  5       nU R&                   HR  n[        R(                  " 5       (       a)  U R*                  (       a  U R-                  UXUU5      u  pMF  U" XUU5      u  pMT     U R/                  U5      nU R1                  UUS	9nU R3                  U5      nU R                  R                  nUR5                  XUUXU5      nUR                  SSSSS
SS5      R                  S
S5      R                  SS5      nU(       d  U4$ [7        US9$ )N)r   r9   r   r   r      r   r:   )r7      r#   )sample)rr   configrW   r   r   r   r   r   rO   concatpermuter   r   r   r   r   r   r   
contiguousr   is_grad_enabledr   _gradient_checkpointing_funcr   r   r   reshaper   )r0   r5   r  r  r6   r  r  r  r  
batch_sizechannelsvideo_lengthheightwidthppost_patch_heightpost_patch_widthr7   r{   blockoutputs                        r2   rD   %EasyAnimateTransformer3DModel.forward  s    =J<N<N<P9
lEKK"""K : ~~h'**1D1D*E""47..}= &!LL-)I1MM&!LL-)I1MM%--aAq!<DDQJ		-0%//J3CDLLq!Q
 &--a3==aC !%/D E#/'+'8'89Q'R$$)II/D._ef$g$r$r$t! ,,E$$&&4+F+F7;7X7X=GW844 8=!$@P844 - 6 m$?m4 KK""&&zARTdfnstu1aAq!4<<QBJJ1aP9'v66r4   )r   r   r   r   r   r   r   r   r   r   r   )0   @   NNNrm   rn   r   r*   r   r  r  r   i   Fi   NrF   TT3d_ropeFTTT)NNNNNT)rG   rH   rI   rJ   r    _supports_gradient_checkpointing_no_split_modules _skip_layerwise_casting_patternsr
   rK   r   rN   rM   rL   r(   rO   rP   r   r   r   rD   rQ   rR   rS   s   @r2   r   r   >  s^   0d (,$67'G$ $&"$%)&*$(/&,!&+"!%(, $+4-1+/+/5W, W,  W, c]	W,
 smW, SMW, W, W, W, !$W, W, W, W, W, W,   $!W," #W,$ %W,& 'W,( "&)W,* +W,, &)-W,0 '+1W,2 %)3W,4 %)5W, W,z 158<;?2626 @7||@7 ,,@7  -	@7
  (5@7 #+5<<"8@7 "%,,/@7 "%,,/@7 @7 
uU\\"$<<	=@7 @7r4   r   ))typingr   r   r   r   rO   torch.nn.functionalr   
functionalr   configuration_utilsr	   r
   utilsr   utils.torch_utilsr   	attentionr   r   r   r   r   r   modeling_outputsr   modeling_utilsr   normalizationr   r   r   
get_loggerrG   loggerModuler   rU   r~   r   r   r   r4   r2   <module>r2     s     0 /     B  5 . N N 7 ' @ @ 
		H	% Dryy  DF) 		 ) X]4 ]4@ h4")) h4 h4VQ7J Q7r4   