
    +h`                        S SK r S SKJrJrJr  S SKrS SKJr  SSKJ	r	J
r
  SSKJr  SSKJrJr  SSKJrJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr   " S S\R:                  5      r " S S\R:                  5      r " S S\R:                  5      r  " S S\R:                  5      r! " S S\R:                  5      r" " S S\R:                  5      r# " S S\\	5      r$S r%g)    N)OptionalTupleUnion   )ConfigMixinregister_to_config)apply_forward_hook   )	AttentionSpatialNorm)DecoderOutputDiagonalGaussianDistribution)Downsample2D)AutoencoderKLOutput)
ModelMixin)ResnetBlock2D)
Upsample2Dc                      ^  \ rS rSrSr      SS\S\\   S\S\S\S	\S
\SS4U 4S jjjr	\
S\R                  S\R                  4S j5       rS\R                  S\S\R                  4S jrSrU =r$ )AllegroTemporalConvLayer!   z
Temporal convolutional layer that can be used for video (sequence of images) input. Code adapted from:
https://github.com/modelscope/modelscope/blob/1509fdb973e5871f37148a4b5e5964cafd43e64d/modelscope/models/multi_modal/video_synthesis/unet_sd.py#L1016
Nin_dimout_dimdropoutnorm_num_groups	up_sampledown_samplestridereturnc                 D  > [         TU ]  5         U=(       d    Un[        US-
  S-  5      =pSn
X`l        XPl        U(       a^  [
        R                  " [
        R                  " XA5      [
        R                  " 5       [
        R                  " XSXw4SSX4S95      U l
        OU(       a`  [
        R                  " [
        R                  " XA5      [
        R                  " 5       [
        R                  " XS-  SXw4SX4S95      U l
        O\[
        R                  " [
        R                  " XA5      [
        R                  " 5       [
        R                  " XSXw4XU	4S95      U l
        [
        R                  " [
        R                  " XB5      [
        R                  " 5       [
        R                  " U5      [
        R                  " X!SXw4XU	4S95      U l        [
        R                  " [
        R                  " XB5      [
        R                  " 5       [
        R                  " U5      [
        R                  " X!SXw4XU4S95      U l        [
        R                  " [
        R                  " XB5      [
        R                  " 5       [
        R                  " X!SXw4XU4S95      U l        g )	N   g      ?r   r
   )r
   r    r    )r   paddingr!   r   )super__init__intr   r   nn
Sequential	GroupNormSiLUConv3dconv1Dropoutconv2conv3conv4)selfr   r   r   r   r   r   r   pad_hpad_wpad_t	__class__s              n/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/autoencoders/autoencoder_kl_allegro.pyr$   !AllegroTemporalConvLayer.__init__'   s    	#VVaZ3.//&"_5			&Av+>y[\^cZklDJ
 _5			&A+6/BQPUL]^DJ _5			&Av+>W\H]^DJ
 ]]LL2GGIJJwIIg6':USXDYZ	

 ]]LL2GGIJJwIIg6':USXDYZ	

 ]]LL2GGIIIg6':USXDYZ

    hidden_statesc                     [         R                  " U S S 2S S 2SS24   U 4SS9n [         R                  " X S S 2S S 2SS 24   4SS9n U $ )Nr   r    r
   )dim)torchcat)r8   s    r5   _pad_temporal_dim*AllegroTemporalConvLayer._pad_temporal_dim^   sN    		=Aqs#;]"KQRS		=1bc	2J"KQRSr7   
batch_sizec                 X   UR                  SUS45      R                  SSSSS5      nU R                  (       a  US S 2S S 2S S S24   nO5U R                  (       a"  UR	                  SSUR
                  S   S-  S9nOUnU R                  (       d  U R                  (       a  U R                  U5      nO"U R                  U5      nU R                  U5      nU R                  (       a6  UR                  SS5      R                  SSSSSS	5      R                  SS5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nX1-   nUR                  SSSSS5      R                  SS5      nU$ )
Nr   r;   r
   r    r      )r:   output_size)r
   r;      )	unflattenpermuter   r   repeat_interleaveshaper+   r>   flattenr-   r.   r/   )r0   r8   r@   identitys       r5   forward AllegroTemporalConvLayer.forwardd   s   %//J3CDLLQPQSTVWYZ[$Q3Q3Y/H^^$66qa]M`M`abMcfgMg6hH$Ht~~ JJ}5M 22=AM JJ}5M>>)33Aw?GG1aQRTUWXYaabcefgM..}=

=1..}=

=1..}=

=1 0%--aAq!<DDQJr7   )r+   r-   r.   r/   r   r   )N            FFr    )__name__
__module____qualname____firstlineno____doc__r%   r   floatboolr$   staticmethodr<   Tensorr>   rK   __static_attributes____classcell__r4   s   @r5   r   r   !   s     "&!!5
5
 #5
 	5

 5
 5
 5
 5
 
5
 5
n  %,,  
U\\ s u||  r7   r   c                      ^  \ rS rSr           SS\S\S\S\S\S\S\S	\S
\S\S\S\S\4U 4S jjjrS\	R                  S\	R                  4S jrSrU =r$ )AllegroDownBlock3D   in_channelsout_channelsr   
num_layers
resnet_epsresnet_time_scale_shiftresnet_act_fnresnet_groupsresnet_pre_normoutput_scale_factorspatial_downsampletemporal_downsampledownsample_paddingc                   > [         TU ]  5         / n/ n[        U5       HI  nUS:X  a  UOUnUR                  [	        UUS UUUUUU
U	S9
5        UR                  [        UUSUS95        MK     [        R                  " U5      U l        [        R                  " U5      U l	        U(       a  [        X"SUSSS9U l
        Xl        U(       a'  [        R                  " [        USX-SS	9/5      U l        g S U l        g )
Nr   
r^   r_   temb_channelsepsgroupsr   time_embedding_normnon_linearityrf   pre_norm皙?r   r   Tr   )r   r   r   r   op)use_convr_   r!   name)r#   r$   rangeappendr   r   r&   
ModuleListresnets
temp_convstemp_convs_downadd_temp_downsampler   downsamplers)r0   r^   r_   r   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rz   r{   ir4   s                    r5   r$   AllegroDownBlock3D.__init__   s     	
z"A)*a+\KNN +!-"&"(#(?"/(;, (  $1	! #2 }}W---
3#;Cdhqr$D  $7  " $t,qu!D !%Dr7   r8   r   c                    UR                   S   nUR                  SSSSS5      R                  SS5      n[        U R                  U R
                  5       H  u  p4U" US S9nU" XS9nM     U R                  (       a  U R                  XS9nU R                  b  U R                   H  nU" U5      nM     UR                  SUS45      R                  SSSSS5      nU$ 	Nr   r
   r    r   rB   )temb)r@   r;   )
rH   rF   rI   ziprz   r{   r}   r|   r~   rE   )r0   r8   r@   resnet	temp_convdownsamplers         r5   rK   AllegroDownBlock3D.forward   s    "((+
%--aAq!<DDQJ!$T\\4??!CF"=t<M%mKM "D ## 000VM(#00 +M :  1 &//J3CDLLQPQSTVWYZ[r7   )r}   r~   rz   r{   r|   )rM   r    ư>defaultswishrN   T      ?TFr    rO   rP   rQ   rR   r%   rT   strrU   r$   r<   rW   rK   rX   rY   rZ   s   @r5   r\   r\      s    
  '0$ $%(#'$)"#@%@% @% 	@%
 @% @% "%@% @% @% @% #@% !@% "@%  @% @%DU\\ ell  r7   r\   c                      ^  \ rS rSr           SS\S\S\S\S\S\S\S	\S
\S\S\S\S\\   4U 4S jjjr	S\
R                  S\
R                  4S jrSrU =r$ )AllegroUpBlock3D   r^   r_   r   r`   ra   rb   rc   rd   re   rf   spatial_upsampletemporal_upsamplerl   c                   > [         TU ]  5         / n/ n[        U5       HI  nUS:X  a  UOUnUR                  [	        UUUUUUUUU
U	S9
5        UR                  [        UUSUS95        MK     [        R                  " U5      U l        [        R                  " U5      U l	        Xl
        U(       a  [        X"SUSSS9U l        U(       a&  [        R                  " [        USUS9/5      U l        g S U l        g )	Nr   rk   rr   rs   Tr   )r   r   r   r   )ru   r_   )r#   r$   rw   rx   r   r   r&   ry   rz   r{   add_temp_upsampletemp_conv_upr   
upsamplers)r0   r^   r_   r   r`   ra   rb   rc   rd   re   rf   r   r   rl   rz   r{   r   input_channelsr4   s                     r5   r$   AllegroUpBlock3D.__init__   s      	
z"A,-F[NNN .!-"/"(#(?"/(;, (  $1	# #4 }}W---
3!2 8Cbfop!D  mmZtbn-o,pqDO"DOr7   r8   r   c                    UR                   S   nUR                  SSSSS5      R                  SS5      n[        U R                  U R
                  5       H  u  p4U" US S9nU" XS9nM     U R                  (       a  U R                  XS9nU R                  b  U R                   H  nU" U5      nM     UR                  SUS45      R                  SSSSS5      nU$ r   )
rH   rF   rI   r   rz   r{   r   r   r   rE   )r0   r8   r@   r   r   	upsamplers         r5   rK   AllegroUpBlock3D.forward  s    "((+
%--aAq!<DDQJ!$T\\4??!CF"=t<M%mKM "D !! --m-SM??&!__	 )- 8 - &//J3CDLLQPQSTVWYZ[r7   )r   rz   r   r{   r   )rM   r    r   r   r   rN   Tr   TFN)rO   rP   rQ   rR   r%   rT   r   rU   r   r$   r<   rW   rK   rX   rY   rZ   s   @r5   r   r      s    
  '0$ $%(!%"''+;#;# ;# 	;#
 ;# ;# "%;# ;# ;# ;# #;# ;#  ;#  };# ;#zU\\ ell  r7   r   c                      ^  \ rS rSr          SS\S\S\S\S\S\S\S	\S
\S\S\S\4U 4S jjjrS\	R                  S\	R                  4S jrSrU =r$ )AllegroMidBlock3DConvi/  r^   rl   r   r`   ra   rb   rc   rd   re   add_attentionattention_head_dimrf   c                 H  > [         TU ]  5         [        UUUUUUUUUU	S9
/n[        UUSUS9/n/ nUc  Un[	        U5       H  nU
(       a5  UR                  [        UX-  UUUUS:X  a  UOS US:X  a  UOS SSSSS95        OUR                  S 5        UR                  [        UUUUUUUUUU	S9
5        UR                  [        UUSUS95        M     [        R                  " U5      U l	        [        R                  " U5      U l
        [        R                  " U5      U l        g )Nrk   rr   rs   r   spatialT)
headsdim_headrescale_output_factorrm   r   spatial_norm_dimresidual_connectionbiasupcast_softmax_from_deprecated_attn_block)r#   r$   r   r   rw   rx   r   r&   ry   rz   r{   
attentions)r0   r^   rl   r   r`   ra   rb   rc   rd   re   r   r   rf   rz   r{   r   _r4   s                    r5   r$   AllegroMidBlock3DConv.__init__0  sk    	 '(+$$;+$7(
 % -	

 
%!,z"A!!#)?!3.A&9PT]9]cg:QU^:^dh,0!'+48  !!$'NN +!,"/"(#(?"/(;, ($1	G #X }}W---
3--
3r7   r8   r   c                    UR                   S   nUR                  SSSSS5      R                  SS5      nU R                  S   " US S9nU R                  S   " XS9n[        U R                  U R                  SS  U R                  SS  5       H  u  p4nU" U5      nU" US S9nU" XS9nM     UR                  SUS45      R                  SSSSS5      nU$ r   )rH   rF   rI   rz   r{   r   r   rE   )r0   r8   r@   attnr   r   s         r5   rK   AllegroMidBlock3DConv.forward  s    "((+
%--aAq!<DDQJQDA*=P'*4??DLL<Ldoo^_^`Na'b#D) /M"=t<M%mKM (c
 &//J3CDLLQPQSTVWYZ[r7   )r   rz   r{   )
rM   r    r   r   r   rN   TTr    r   r   rZ   s   @r5   r   r   /  s    
  '0$ $""#%([4[4 [4 	[4
 [4 [4 "%[4 [4 [4 [4 [4  [4 #[4 [4zU\\ ell  r7   r   c                      ^  \ rS rSrSSSS/ SQSSSS	4	S
\S\S\\S4   S\\S4   S\\S4   S\S\S\S\4U 4S jjjrS\	R                  S\	R                  4S jrSrU =r$ )AllegroEncoder3Di  r   r\   r\   r\   r\            r   TTFFr
   rN   siluTr^   r_   down_block_types.block_out_channelstemporal_downsample_blockslayers_per_blockr   act_fndouble_zc
                   > [         TU ]  5         [        R                  " UUS   SSSS9U l        [        R
                  " US   US   SSS9U l        [        R                  " / 5      U l        US   n
[        U5       Ha  u  pU
nXK   n
U[        U5      S-
  :H  nUS:X  a  [        UUU
U(       + X[   S	SUUS
9	nO[        S5      eU R                  R                  U5        Mc     [        US   S	USSUS   US S9U l        [        R                   " US   US	S9U l        [        R$                  " 5       U l        U	(       a  SU-  OUn[        R
                  " US   US   SSS9U l        [        R                  " US   USSS9U l        SU l        g )Nr   r   r    kernel_sizer   r!   r   r    r    r    r   r   )r^   r_   r   r!   r\   r   )	r`   r^   r_   rg   rh   ra   ri   rc   rd   zCInvalid `down_block_type` encountered. Must be `AllegroDownBlock3D`r;   r   r^   ra   rc   rf   rb   r   rd   rl   num_channels
num_groupsrm   r
   r"   F)r#   r$   r&   Conv2dconv_inr*   temp_conv_inry   down_blocks	enumeratelenr\   
ValueErrorrx   r   	mid_blockr(   conv_norm_outr)   conv_acttemp_conv_outconv_outgradient_checkpointing)r0   r^   r_   r   r   r   r   r   r   r   output_channelr   down_block_typeinput_channelis_final_block
down_blockconv_out_channelsr4   s                    r5   r$   AllegroEncoder3D.__init__  s   " 	yyq!
 II*1-+A.!	
 ==, ,A."+,<"=A*M/2N#&8"9A"==N"66// -!/+9'9(B(E#'("("1

 !!fgg##J/) #>. /*2.  !$-1"5)	
  \\7I"7MZiost	08A,lYY'9"'=?QRT?UW`jst		"4R"8:KQXYZ&+#r7   sampler   c                     UR                   S   nUR                  SSSSS5      R                  SS5      nU R                  U5      nUR	                  SUS45      R                  SSSSS5      nUnU R                  U5      nX-   n[        R                  " 5       (       aR  U R                  (       aA  U R                   H  nU R                  XA5      nM     U R                  U R                  U5      nO,U R                   H  nU" U5      nM     U R                  U5      nUR                  SSSSS5      R                  SS5      nU R                  U5      nU R                  U5      nUR	                  SUS45      R                  SSSSS5      nUnU R                  U5      nX-   nUR                  SSSSS5      R                  SS5      nU R                  U5      nUR	                  SUS45      R                  SSSSS5      nU$ Nr   r
   r    r   rB   r;   )rH   rF   rI   r   rE   r   r<   is_grad_enabledr   r   _gradient_checkpointing_funcr   r   r   r   r   )r0   r   r@   residualr   s        r5   rK   AllegroEncoder3D.forward  s   \\!_
1aA.66q!<f%!!!j"%56>>q!Q1M""6*"  ""t'B'B"..
:::N / 66t~~vNF #..
#F+ / ^^F+F 1aA.66q!<##F+v&!!!j"%56>>q!Q1M##F+"1aA.66q!<v&!!!j"%56>>q!Q1Mr7   )	r   r   r   r   r   r   r   r   r   rO   rP   rQ   rR   r%   r   r   rU   r$   r<   rW   rK   rX   rY   rZ   s   @r5   r   r     s     -
 /C7Q !!Q,Q, Q,  S/	Q, "#s(OQ, %*$)$4Q, Q, Q, Q, Q, Q,f(ell (u|| ( (r7   r   c                      ^  \ rS rSrSSS/ SQSSSS	S
4	S\S\S\\S4   S\\S4   S\\S4   S\S\S\S\4U 4S jjjrS\	R                  S\	R                  4S jrSrU =r$ )AllegroDecoder3Di  rB   r   r   r   r   r   FTTFr   r
   rN   r   groupr^   r_   up_block_types.temporal_upsample_blocksr   r   r   r   	norm_typec
                   > [         TU ]  5         [        R                  " UUS   SSSS9U l        [        R
                  " US   US   SSS9U l        S U l        [        R                  " / 5      U l	        U	S:X  a  UOS n
[        US   S	USU	S
:X  a  SOU	US   UU
S9U l        [        [        U5      5      nUS   n[        U5       Hg  u  pUnX   nU[        U5      S-
  :H  nUS:X  a  [        US-   UUU(       + XM   S	UUU
U	S9
nO[!        S5      eU R                  R#                  U5        UnMi     U	S:X  a  [%        US   U
5      U l        O[        R(                  " US   US	S9U l        [        R*                  " 5       U l        [        R
                  " US   US   SSS9U l        [        R                  " US   USSS9U l        SU l        g )Nr;   r   r    r   r   r   r"   r   r   r   r   r   r   r   )
r`   r^   r_   r   r   ra   rc   rd   rl   rb   z?Invalid `UP_block_type` encountered. Must be `AllegroUpBlock3D`r   F)r#   r$   r&   r   r   r*   r   r   ry   	up_blocksr   listreversedr   r   r   r   rx   r   r   r(   r)   r   r   r   r   )r0   r^   r_   r   r   r   r   r   r   r   rl   reversed_block_out_channelsr   r   up_block_typeprev_output_channelr   up_blockr4   s                     r5   r$   AllegroDecoder3D.__init__  s   " 	yyr"
 II&8&<>PQS>TV_irsr*'0I'=4 /*2.  !1:g1EI91"5)'	
 '+84F+G&H#4Q7 ). 9A"08;N#&8"9A"==N 22+/!3 3!/)7%7&>&A#"("1"/,5 !!bccNN!!(+"0/ !:4 	!!,-?-BM!RD!#;Ma;P]lrv!wD	YY'9!'<>PQR>SU^hqr		"4Q"7qRST&+#r7   r   r   c                    UR                   S   nUR                  SSSSS5      R                  SS5      nU R                  U5      nUR	                  SUS45      R                  SSSSS5      nUnU R                  U5      nX-   n[        [        U R                  R                  5       5      5      R                  n[        R                  " 5       (       aR  U R                  (       aA  U R                  U R                  U5      nU R                   H  nU R                  XQ5      nM     O=U R                  U5      nUR!                  U5      nU R                   H  nU" U5      nM     UR                  SSSSS5      R                  SS5      nU R#                  U5      nU R%                  U5      nUR	                  SUS45      R                  SSSSS5      nUnU R'                  U5      nX-   nUR                  SSSSS5      R                  SS5      nU R)                  U5      nUR	                  SUS45      R                  SSSSS5      nU$ r   )rH   rF   rI   r   rE   r   nextiterr   
parametersdtyper<   r   r   r   r   tor   r   r   r   )r0   r   r@   r   upscale_dtyper   s         r5   rK   AllegroDecoder3D.forwardu  s   \\!_
1aA.66q!<f%!!!j"%56>>q!Q1M""6*"T$..";";"=>?EE  ""t'B'B66t~~vNF !NN::8L +
 ^^F+FYY}-F !NN!&) + 1aA.66q!<##F+v&!!!j"%56>>q!Q1M##F+"1aA.66q!<v&!!!j"%56>>q!Q1Mr7   )	r   r   r   r   r   r   r   r   r   r   rZ   s   @r5   r   r     s     +
 6P.B !! U,U, U, c3h	U, #(c	"2U, "#s(OU, U, U, U, U, U,n,ell ,u|| , ,r7   r   c            "         ^  \ rS rSrSrSr\               S)S\S\S\\	S4   S\\	S4   S	\\S4   S
\\
S4   S\\
S4   S\S\S\	S\S\S\S\S\
SS4 U 4S jjj5       rS*S jrS*S jrS*S jrS*S jrS\R$                  S\R$                  4S jr\ S+S\R$                  S\
S\\\\   4   4S jj5       rS\R$                  S\R$                  4S  jr\S+S\R$                  S\
S\\\R$                  4   4S! jj5       rS\R$                  S\R$                  4S" jrS\R$                  S\R$                  4S# jr   S,S$\R$                  S%\
S\
S&\\R>                     S\\\R$                  4   4
S' jjr S(r!U =r"$ )-AutoencoderKLAllegroi  a}  
A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. Used in
[Allegro](https://github.com/rhymes-ai/Allegro).

This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
for all models (such as downloading or saving).

Parameters:
    in_channels (int, defaults to `3`):
        Number of channels in the input image.
    out_channels (int, defaults to `3`):
        Number of channels in the output.
    down_block_types (`Tuple[str, ...]`, defaults to `("AllegroDownBlock3D", "AllegroDownBlock3D", "AllegroDownBlock3D", "AllegroDownBlock3D")`):
        Tuple of strings denoting which types of down blocks to use.
    up_block_types (`Tuple[str, ...]`, defaults to `("AllegroUpBlock3D", "AllegroUpBlock3D", "AllegroUpBlock3D", "AllegroUpBlock3D")`):
        Tuple of strings denoting which types of up blocks to use.
    block_out_channels (`Tuple[int, ...]`, defaults to `(128, 256, 512, 512)`):
        Tuple of integers denoting number of output channels in each block.
    temporal_downsample_blocks (`Tuple[bool, ...]`, defaults to `(True, True, False, False)`):
        Tuple of booleans denoting which blocks to enable temporal downsampling in.
    latent_channels (`int`, defaults to `4`):
        Number of channels in latents.
    layers_per_block (`int`, defaults to `2`):
        Number of resnet or attention or temporal convolution layers per down/up block.
    act_fn (`str`, defaults to `"silu"`):
        The activation function to use.
    norm_num_groups (`int`, defaults to `32`):
        Number of groups to use in normalization layers.
    temporal_compression_ratio (`int`, defaults to `4`):
        Ratio by which temporal dimension of samples are compressed.
    sample_size (`int`, defaults to `320`):
        Default latent size.
    scaling_factor (`float`, defaults to `0.13235`):
        The component-wise standard deviation of the trained latent space computed using the first batch of the
        training set. This is used to scale the latent space to have unit variance when training the diffusion
        model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
        diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1
        / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image
        Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
    force_upcast (`bool`, default to `True`):
        If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
        can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
        can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
Tr^   r_   r   .r   r   r   r   latent_channelsr   r   r   temporal_compression_ratiosample_sizescaling_factorforce_upcastr   Nc                   > [         TU ]  5         [        UUUUUU	U
USS9	U l        [	        UUUUUU	UU
S9U l        [        R                  " SU-  SU-  S5      U l        [        R                  " XS5      U l	        SU l
        SU l        S[        U5      S-
  -  U l        SU l        SU l        S	U l        S
nUX4U l        UU R                  -
  XR                  -
  XR                   -
  4U l        g )NT)	r^   r_   r   r   r   r   r   r   r   )r^   r_   r   r   r   r   r   r   r
   r    F   x   P      )r#   r$   r   encoderr   decoderr&   r   
quant_convpost_quant_convuse_slicing
use_tilingr   spatial_compression_ratiotile_overlap_ttile_overlap_htile_overlap_wkernelr   )r0   r^   r_   r   r   r   r   r   r   r   r   r   r  r  r  r  sample_framesr4   s                    r5   r$   AutoencoderKLAllegro.__init__  s   : 	'#(-'A1-+

 ('%)%=1-+	
 ))A$7_9LaP!yy1M
 !)*s3E/F/J)K&! $k?D///------
r7   c                     SU l         g)z
Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
processing larger images.
TNr  r0   s    r5   enable_tiling"AutoencoderKLAllegro.enable_tiling  s     r7   c                     SU l         g)z
Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
decoding in one step.
FNr  r  s    r5   disable_tiling#AutoencoderKLAllegro.disable_tiling&  s    
  r7   c                     SU l         g)z
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
TNr  r  s    r5   enable_slicing#AutoencoderKLAllegro.enable_slicing-  s    
  r7   c                     SU l         g)z
Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
decoding in one step.
FNr   r  s    r5   disable_slicing$AutoencoderKLAllegro.disable_slicing4  s    
 !r7   xc                 \    U R                   (       a  U R                  U5      $ [        S5      e)Nz5Encoding without tiling has not been implemented yet.)r  tiled_encodeNotImplementedError)r0   r&  s     r5   _encodeAutoencoderKLAllegro._encode;  (     ??$$Q''!"YZZr7   return_dictc                 >   U R                   (       aY  UR                  S   S:  aF  UR                  S5       Vs/ s H  o0R                  U5      PM     nn[        R
                  " U5      nOU R                  U5      n[        U5      nU(       d  U4$ [        US9$ s  snf )a  
Encode a batch of videos into latents.

Args:
    x (`torch.Tensor`):
        Input batch of videos.
    return_dict (`bool`, defaults to `True`):
        Whether to return a [`~models.autoencoder_kl.AutoencoderKLOutput`] instead of a plain tuple.

Returns:
        The latent representations of the encoded videos. If `return_dict` is True, a
        [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned.
r   r    )latent_dist)r  rH   splitr*  r<   r=   r   r   )r0   r&  r-  x_sliceencoded_slicesh	posteriors          r5   encodeAutoencoderKLAllegro.encodeC  s    " 
QCD771:N:ll73:NN		.)AQA03	<"y99 Os   Bzc                 \    U R                   (       a  U R                  U5      $ [        S5      e)Nz5Decoding without tiling has not been implemented yet.)r  tiled_decoder)  )r0   r7  s     r5   _decodeAutoencoderKLAllegro._decode`  r,  r7   c                 (   U R                   (       aY  UR                  S   S:  aF  UR                  S5       Vs/ s H  o0R                  U5      PM     nn[        R
                  " U5      nOU R                  U5      nU(       d  U4$ [        US9$ s  snf )a  
Decode a batch of videos.

Args:
    z (`torch.Tensor`):
        Input batch of latent vectors.
    return_dict (`bool`, defaults to `True`):
        Whether to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.

Returns:
    [`~models.vae.DecoderOutput`] or `tuple`:
        If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
        returned.
r   r    r   )r  rH   r0  r:  r<   r=   r   )r0   r7  r-  z_slicedecoded_slicesdecodeds         r5   decodeAutoencoderKLAllegro.decodeh  su      
QCD771:N:ll73:NNii/Gll1oG:G,, Os   Bc                 
   SnU R                   nU R                  R                  nUR                  u  pVpxn	[        R
                  " XpR                  S   -
  U R                  S   -  5      S-   n
[        R
                  " XR                  S   -
  U R                  S   -  5      S-   n[        R
                  " XR                  S   -
  U R                  S   -  5      S-   nSnUR                  X-  U-  SU R                  R                  -  U R                  S   U-  U R                  S   U-  U R                  S   U-  45      nUR                  X&U R                  S   U R                  S   U R                  S   45      n[        U
5       GH  n[        U5       GHr  n[        U5       GH^  nUU R                  S   -  UU R                  S   -  U R                  S   -   nnUU R                  S   -  UU R                  S   -  U R                  S   -   nnUU R                  S   -  UU R                  S   -  U R                  S   -   nnUS S 2S S 2UU2UU2UU24   nUXU-  '   X-  US-
  :X  d  XU-  U-  S-
  :X  a  U R                  U5      nXU-  U-  S-
  :X  a  X-  US-
  :w  a  US X-  S-    XX-  -
  S & OUXU-
  S-   US-   & UR                  X&U R                  S   U R                  S   U R                  S   45      nUS-  nGMa     GMu     GM     UR                  USU R                  R                  -  Xt-  X-  X-  45      nU R                  S   U-  U R                  S   U-  U R                  S   U-  4nU R                  S   U-  U R                  S   U-  U R                  S   U-  4nUS   US   -
  US   US   -
  US   US   -
  4n[        U
5       H  nUUS   -  UUS   -  US   -   nn[        U5       H  nUUS   -  UUS   -  US   -   nn[        U5       Hv  nUUS   -  UUS   -  US   -   nn[        UU
US   4UUUS   4UUUS   4UUU-  U-  UU-  -   U-      R                  S5      5      nUS S 2S S 2UU2UU2UU24==   U-  ss'   Mx     M     M     UR                  SSSSS5      R                  SS5      nU R!                  U5      nUR#                  SUS45      R                  SSSSS5      nU$ Nr    r   r
   r   rB   r;   )r  configr  rH   mathfloorr  r   	new_zerosr   rw   r
  _prepare_for_blend	unsqueezerF   rI   r  rE   )r0   r&  local_batch_sizersrtr@   r   
num_framesheightwidthoutput_num_framesoutput_heightoutput_widthcountoutput_latentvae_batch_inputr   jkn_startn_endh_starth_endw_startw_end
video_cubelatentoutput_kerneloutput_strideoutput_overlaplatent_means                                  r5   r(  !AutoencoderKLAllegro.tiled_encode  s   ++[[33>?gg;
*e JJ
[[^(Ct{{ST~'UVYZZ

F[[^$;t{{1~#MNQRRzz5;;q>#9T[[^"KLqP!1L@DKK///A"$A"$A"$
 ++'7t{{ST~W[WbWbcdWegkgrgrstgu&vw()A=)|,A%&Q%7T[[^9KdkkZ[n9\UG%&Q%7T[[^9KdkkZ[n9\UG%&Q%7T[[^9KdkkZ[n9\UG!"1awu}#T!UJ@JO,<$<= 04Dq4HH $E$TWX$XX!%o!> "%F%UXY%YY % 8<Lq<P PPVWuY^YqtuYuPvM%2J*J*LMV\M2B*BQ*FQRS*+++-T[[^T[[YZ^]a]h]hij]kl+ QJE5 - * *< T[[888*:JFLZ_Zef
 A",dkk!n.BDKKPQNVXDXXA",dkk!n.BDKKPQNVXDXX!}Q//!}Q//!}Q//
 ()Aq!111}Q7G3G-XYJZ3ZUG=)!"]1%5!5q=;K7Km\]N^7^|,A%&q)9%91}Q?O;OR_`aRb;bUG"4-~a/@AM>!+<=L.*;<%a-&7,&F\IY&Y\]&]^hhijk	#K 1awu}LMQ\\M - * * 1aA.66q!<(!!!j"%56>>q!Q1Mr7   c                 	   SnU R                   nU R                  R                  nU R                  S   U-  U R                  S   U-  U R                  S   U-  4nU R                  S   U-  U R                  S   U-  U R                  S   U-  4nUR
                  u  pxpnUR                  SSSSS5      R                  SS5      nU R                  U5      nUR                  SUS45      R                  SSSSS5      n[        R                  " XS   -
  US   -  5      S-   n[        R                  " XS   -
  US   -  5      S-   n[        R                  " XS   -
  US   -  5      S-   nSnUR                  X-  U-  U R                  R                  U R                  S   U R                  S   U R                  S   45      nUR                  X(US   US   US   45      n[        U5       GH  n[        U5       H  n[        U5       H  nUUS   -  UUS   -  US   -   nnUUS   -  UUS   -  US   -   nnUUS   -  UUS   -  US   -   nnUS S 2S S 2UU2UU2UU24   nUUX-  '   X-  US-
  :X  d  XU-  U-  S-
  :X  ah  U R                  U5      nXU-  U-  S-
  :X  a  X-  US-
  :w  a  US X-  S-    UXU-  -
  S & OUUX-
  S-   US-   & UR                  X(US   US   US   45      nUS-  nM     M     GM     UR                  XpR                  R                  X-  X-  X-  45      nU R                  S   U R                  S   -
  U R                  S   U R                  S   -
  U R                  S   U R                  S   -
  4n[        U5       GH!  nUU R                  S   -  UU R                  S   -  U R                  S   -   nn[        U5       H  nUU R                  S   -  UU R                  S   -  U R                  S   -   nn[        U5       H  nUU R                  S   -  UU R                  S   -  U R                  S   -   nn[!        UUUS   4UUUS   4UUUS   4UUU-  U-  UU-  -   U-      R#                  S5      5      nUS S 2S S 2UU2UU2UU24==   U-  ss'   M     M     GM$     UR                  SSSSS5      R%                  5       nU$ rD  )r  rE  r  r  r   rH   rF   rI   r  rE   rF  rG  rH  r_   rw   r  rI  rJ  
contiguous) r0   r7  rK  rL  rM  latent_kernellatent_strider@   r   rN  rO  rP  rQ  rR  rS  rT  decoded_videosrV  r   rW  rX  rY  rZ  r[  r\  r]  r^  current_latentcurrent_videovideovideo_overlapout_video_blends                                    r5   r9  !AutoencoderKLAllegro.tiled_decode  s   ++[[33A",dkk!n.BDKKPQNVXDXXA",dkk!n.BDKKPQNVXDXX>?gg;
*e IIaAq!$,,Q2  #KKJ+,44Q1aC JJ
15E(EWXIY'YZ]^^

F1-=$=qAQ#QRUVVzz5+;#;}Q?O"OPSTT!1L@((AAA
 ++]1-=}Q?OQ^_`Qab
 ()A=)|,A%&q)9%91}Q?O;OR_`aRb;bUG%&q)9%91}Q?O;OR_`aRb;bUG%&q)9%91}Q?O;OR_`aRb;bUG%&q!WU]GEM7SX='X%YN@NOE$<= 04Dq4HH $E$TWX$XX(,_(E "%F%UXY%YY % 8<Lq<P PQ^ >%":Q">RN5;K3K+K+MN XeN5+Ca+G%RS)T*+++-]1=M}]^O_anopaqr+ QJE9 - * *@ Z)A)A:?TZT_afaklmKKNT[[^+KKNT[[^+KKNT[[^+
 ()AQ/T[[^1CdkkRSn1TUG=)!"T[[^!3QQ5G$++VW.5X|,A%&Q%7T[[^9KdkkZ[n9\UG&8-}Q/?@M=+;<L-*:;&q='8<'G!lJZ'Z]^'^_iijkl	'O !Qwu}gemKLP__L - * * aAq!,779r7   r   sample_posterior	generatorc                     UnU R                  U5      R                  nU(       a  UR                  US9nOUR                  5       nU R	                  U5      R                  nU(       d  U4$ [        US9$ )a  
Args:
    sample (`torch.Tensor`): Input sample.
    sample_posterior (`bool`, *optional*, defaults to `False`):
        Whether to sample from the posterior.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`DecoderOutput`] instead of a plain tuple.
    generator (`torch.Generator`, *optional*):
        PyTorch random number generator.
)rr  r=  )r5  r/  r   moderA  r   )	r0   r   rq  r-  rr  r&  r4  r7  decs	            r5   rK   AutoencoderKLAllegro.forward.  sf    " KKN..	  9 5A Akk!n##6MC((r7   )r  r
  r  r  r  r  r   r  r  r  r  r  )r   r   r   r   r   r   r   rB   r
   r   rN   rB   i@  gp=
ף?T)r   N)T)FTN)#rO   rP   rQ   rR   rS    _supports_gradient_checkpointingr   r%   r   r   rU   rT   r$   r  r  r!  r$  r<   rW   r*  r	   r   r   r   r5  r:  r   rA  r(  r9  r   	GeneratorrK   rX   rY   rZ   s   @r5   r   r     s   +Z (,$ -
+
 /C7Q5O  !!,- $!5G
G
 G
  S/	G
 c3hG
  "#s(O!G
" %*$)$4#G
$ #(c	"2%G
& 'G
( )G
* +G
, -G
. %*/G
0 1G
2 3G
4 5G
6 
7G
 G
R  ![ [%,, [ 37::,0:	"E*F$GG	H: :8[ [%,, [ - -4 -5X]XdXdIdCe - -2Qell Qu|| QfWell Wu|| Wx "' /3)) ) 	)
 EOO,) 
}ell*	+) )r7   r   c                    U u  pEnUu  pxn	Uu  pnUS:  Ga  US:  ay  US S 2S S 2SU2S S 2S S 24   [         R                  " SU5      R                  5       R                  UR                  5      U-  R                  USS5      -  US S 2S S 2SU2S S 2S S 24'   XES-
  :  a~  US S 2S S 2U* S 2S S 2S S 24   S[         R                  " SU5      R                  5       R                  UR                  5      U-  -
  R                  USS5      -  US S 2S S 2U* S 2S S 2S S 24'   US:  ax  US S 2S S 2S S 2SU	2S S 24   [         R                  " SU	5      R                  5       R                  UR                  5      U	-  R                  U	S5      -  US S 2S S 2S S 2SU	2S S 24'   XxS-
  :  a}  US S 2S S 2S S 2U	* S 2S S 24   S[         R                  " SU	5      R                  5       R                  UR                  5      U	-  -
  R                  U	S5      -  US S 2S S 2S S 2U	* S 2S S 24'   U
S:  ah  US S 2S S 2S S 2S S 2SU24   [         R                  " SU5      R                  5       R                  UR                  5      U-  -  US S 2S S 2S S 2S S 2SU24'   XS-
  :  am  US S 2S S 2S S 2S S 2U* S 24   S[         R                  " SU5      R                  5       R                  UR                  5      U-  -
  -  US S 2S S 2S S 2S S 2U* S 24'   U$ )Nr   r    )r<   arangerT   r   devicereshape)n_paramh_paramw_paramr&  nn_max	overlap_nr3  h_max	overlap_hww_max	overlap_ws                r5   rI  rI  M  s   !Ai!Ai!Ai1}q5)*1a9a+B)CQ	*00255ahh?)KgiA&*'AaAiKA%& qy=)*1a)a+B)CELLI.44699!((CiOOgiA&*'AaYJKA%& 	1u%&q!Q)Q'>%?LLI&,,.11!((;iG
')Q
& !Q1Y;
!" 	19}%&q!Q
Q'>%?Q	*00255ahh?)KK
')Q
& !QI:;
!" 	1u%&q!Q1Y;'>%?LLI&,,.11!((;iG&
!Q1a	k
!" 	19}%&q!QI:;'>%?Q	*00255ahh?)KK&
!Q1yjk
!" Hr7   )&rF  typingr   r   r   r<   torch.nnr&   configuration_utilsr   r   utils.accelerate_utilsr	   attention_processorr   r   autoencoders.vaer   r   downsamplingr   modeling_outputsr   modeling_utilsr   r   r   
upsamplingr   Moduler   r\   r   r   r   r   r   rI   r7   r5   <module>r     s      ) )   B 8 8 J ' 2 ' " #bryy bJT TnOryy OdlBII l^|ryy |~Dryy DNf):{ f)Rr7   