
    +h*                     X   S SK r S SKJrJrJr  S SKrS SKJr  S SKJs  J	r
  SSKJrJr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  \R6                  " \5      r " S S\R<                  5      r " S S\R@                  5      r! " S S\R@                  5      r" " S S\R@                  5      r# " S S\R@                  5      r$ " S S\R@                  5      r% " S S\R@                  5      r& " S S\R@                  5      r' " S S\R@                  5      r( " S S \\5      r)g)!    N)OptionalTupleUnion   )ConfigMixinregister_to_config)logging)apply_forward_hook   )get_activation)AutoencoderKLOutput)
ModelMixin   )DecoderOutputDiagonalGaussianDistributionc                      ^  \ rS rSr       SS\S\S\\\\S4   4   S\\\\S4   4   S\\\\S4   4   S\\\\S4   4   S	\S
\S\4U 4S jjjr	S r
S\R                  S\R                  4U 4S jjrSrU =r$ )EasyAnimateCausalConv3d#   in_channelsout_channelskernel_size.stridepaddingdilationgroupsbiaspadding_modec
                   > [        U[        5      (       a  UOU4S-  n[        U5      S:X  d   SU S35       e[        U[        5      (       a  UOU4S-  n[        U5      S:X  d   SU S35       e[        U[        5      (       a  UOU4S-  n[        U5      S:X  d   SU S35       eUu  pnUu  U l        pUu  nnnU
S-
  U-  nUcK  [        R
                  " US-
  U-  SU-
  -   S-  5      n[        R
                  " US-
  U-  SU-
  -   S-  5      nO'[        U[        5      (       a  U=nnO[        (       d   eUU l        [        R
                  " U
S-
  U-  SU-
  -   S-  5      U l	        S U l
        [        TU ]1  UUUUUSWW4UUU	S	9	  g )
Nr   z#Kernel size must be a 3-tuple, got z	 instead.zStride must be a 3-tuple, got z Dilation must be a 3-tuple, got r   r   r   )	r   r   r   r   r   r   r   r   r   )
isinstancetuplelent_stridemathceilintNotImplementedErrortemporal_paddingtemporal_padding_originprev_featuressuper__init__)selfr   r   r   r   r   r   r   r   r   t_ksh_ksw_ksh_stridew_stride
t_dilation
h_dilation
w_dilationt_padh_padw_pad	__class__s                        m/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/autoencoders/autoencoder_kl_magvit.pyr+    EasyAnimateCausalConv3d.__init__$   s    &0U%C%Ck+Z[I[;1$b(KK=Xa&bb$%fe446)a-6{aS#A&!SS)(E::8a8}!Y%EhZy#YY! 'D,2)x-5*
J
 Z' ?IIqJ6!h,G1LMEIIqJ6!h,G1LME%%##EE&&&& !&'+yy4!8z2IQQY\2Z^_1_'`$! 	#%#u%% 	 
	
    c                     U ? S U l         g Nr)   r,   s    r9   _clear_conv_cache)EasyAnimateCausalConv3d._clear_conv_cache^       !r;   hidden_statesreturnc           	        > UR                   nU R                  Gc  [        R                  " USSSSU R                  S4SS9nUR                  US9nU R                  5         US S 2S S 2U R                  * S 24   R                  5       U l        UR                  S5      n/ nSnXPR                  -   S-   U::  a`  [        TU ])  US S 2S S 2XUU R                  -   S-   24   5      nXPR                  -  nUR                  U5        XPR                  -   S-   U::  a  M`  [        R                  " US5      $ U R                  S:X  a<  [        R                  " U R                  S S 2S S 2U R                  S-
  * S 24   U/SS9nO![        R                  " U R                  U/SS9nUR                  US9nU R                  5         US S 2S S 2U R                  * S 24   R                  5       U l        UR                  S5      n/ nSnXPR                  -   S-   U::  a`  [        TU ])  US S 2S S 2XUU R                  -   S-   24   5      nXPR                  -  nUR                  U5        XPR                  -   S-   U::  a  M`  [        R                  " US5      $ )Nr   	replicate)padmode)dtyper   r   dim)rI   r)   FrG   r'   tor@   clonesizer*   forwardr"   appendtorchconcat)r,   rC   rI   
num_framesoutputsioutr8   s          r9   rP   EasyAnimateCausalConv3d.forwardb   s   ##%EE1a!6!6: M
 *,,5,9M ""$!.q!d6K6K5K5M/M!N!T!T!VD '++A.JGA+++a/:=gomAq!$BWBW>WZ[>[:[4[&\]]]"s# +++a/:= <<++ }}! %''10E0E0I.J.L(LM}]cd! !&d.@.@--PVW X),,5,9M ""$!.q!d6K6K5K5M/M!N!T!T!VD '++A.JGA+++a/:=gomAq!$BWBW>WZ[>[:[4[&\]]]"s# +++a/:= <<++r;   )r)   r"   r'   r(   )r   r   r   r   r   Tzeros)__name__
__module____qualname____firstlineno__r%   r   r   boolstrr+   r@   rR   TensorrP   __static_attributes____classcell__r8   s   @r9   r   r   #   s    
 45.//001#8
8
 8
 3c3h/0	8

 c5c?*+8
 sE#s(O+,8
 U38_,-8
 8
 8
 8
 8
t"/,U\\ /,ell /, /,r;   r   c                      ^  \ rS rSr      SS\S\S\S\S\S\S\S	\4U 4S
 jjjrS\	R                  S\	R                  4S jrSrU =r$ )EasyAnimateResidualBlock3D   r   r   non_linearitynorm_num_groupsnorm_epsspatial_group_normdropoutoutput_scale_factorc	                   > [         T	U ]  5         Xl        [        R                  " UUUSS9U l        [        U5      U l        [        XSS9U l	        [        R                  " XBUSS9U l
        [        R                  " U5      U l        [        X"SS9U l        X:w  a  [        R                  " XSS9U l        O[        R                   " 5       U l        X`l        g )NT)
num_groupsnum_channelsepsaffiner   r   r   )r*   r+   rl   nn	GroupNormnorm1r   nonlinearityr   conv1norm2Dropoutrk   conv2Conv3dshortcutIdentityrj   )
r,   r   r   rg   rh   ri   rj   rk   rl   r8   s
            r9   r+   #EasyAnimateResidualBlock3D.__init__   s     	#6  \\&$	

 +=9,[TUV
\\_]enrs
zz'*,\UVW
&IIkQODMKKMDM"4r;   rC   rD   c                 4   U R                  U5      nU R                  (       ao  UR                  S5      nUR                  SSSSS5      R	                  SS5      nU R                  U5      nUR                  SUS45      R                  SSSSS5      nOU R                  U5      nU R                  U5      nU R                  U5      nU R                  (       ao  UR                  S5      nUR                  SSSSS5      R	                  SS5      nU R                  U5      nUR                  SUS45      R                  SSSSS5      nOU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nX-   U R                  -  $ Nr   r   r   r      )r|   rj   rO   permuteflattenru   	unflattenrv   rw   rx   rk   rz   rl   )r,   rC   r|   
batch_sizes       r9   rP   "EasyAnimateResidualBlock3D.forward   s   ==/""&++A.J)11!Q1a@HHANM JJ}5M)33A
B7GHPP1aAM !JJ}5M))-8

=1""&++A.J)11!Q1a@HHANM JJ}5M)33A
B7GHPP1aAM !JJ}5M))-8]3

=1(D,D,DDDr;   )	rw   rz   rk   rv   ru   rx   rl   r|   rj   )silu    ư>T              ?rZ   r[   r\   r]   r%   r_   floatr^   r+   rR   r`   rP   ra   rb   rc   s   @r9   re   re      s    
 $!#'%("5"5 "5 	"5
 "5 "5 !"5 "5 #"5 "5HEU\\ Eell E Er;   re   c            	       z   ^  \ rS rSrSS\S\S\S\4U 4S jjjrS\R                  S\R                  4S	 jr	S
r
U =r$ )EasyAnimateDownsampler3D   r   r   r   r   c                 B   > [         TU ]  5         [        XX4SS9U l        g )Nr   )r   r   r   r   r   )r*   r+   r   conv)r,   r   r   r   r   r8   s        r9   r+   !EasyAnimateDownsampler3D.__init__   s"    +#Kpq
	r;   rC   rD   c                 V    [         R                  " US5      nU R                  U5      nU$ )N)r   r   r   r   )rL   rG   r   r,   rC   s     r9   rP    EasyAnimateDownsampler3D.forward   s&    m\:		-0r;   )r   )r   r   r   r   )rZ   r[   r\   r]   r%   r    r+   rR   r`   rP   ra   rb   rc   s   @r9   r   r      sJ    
C 
s 
 
Z_ 
 
U\\ ell  r;   r   c                      ^  \ rS rSr   SS\S\S\S\S\4
U 4S jjjrS rS	\R                  S
\R                  4S jr
SrU =r$ )EasyAnimateUpsampler3D   r   r   r   temporal_upsamplerj   c                 |   > [         TU ]  5         U=(       d    UnX@l        XPl        [	        XUS9U l        S U l        g )N)r   r   r   )r*   r+   r   rj   r   r   r)   )r,   r   r   r   r   rj   r8   s         r9   r+   EasyAnimateUpsampler3D.__init__   sA     	#2{!2"4+#K
	 "r;   c                     U ? S U l         g r=   r>   r?   s    r9   r@   (EasyAnimateUpsampler3D._clear_conv_cache   rB   r;   rC   rD   c                     [         R                  " USSS9nU R                  U5      nU R                  (       a>  U R                  c  Xl        U$ [         R                  " USU R
                  (       d  SOSS9nU$ )Nr   r   r   nearest)scale_factorrH   )r   r   r   	trilinear)rL   interpolater   r   r)   rj   r   s     r9   rP   EasyAnimateUpsampler3D.forward   sq    m)R[\		-0!!!!)%2"  !"!!*,0,C,C!
 r;   )r   r)   rj   r   )r   FT)rZ   r[   r\   r]   r%   r^   r+   r@   rR   r`   rP   ra   rb   rc   s   @r9   r   r      sm    
 "'#'"" " 	"
  " !" "&"U\\ ell  r;   r   c                      ^  \ rS rSr         SS\S\S\S\S\S\S\S	\S
\S\S\4U 4S jjjrS\	R                  S\	R                  4S jrSrU =r$ )EasyAnimateDownBlock3Di  r   r   
num_layersact_fnrh   ri   rj   rk   rl   add_downsampleadd_temporal_downsamplec                   > [         TU ]  5         [        R                  " / 5      U l        [        U5       H6  nUS:X  a  UOUnU R                  R                  [        UUUUUUUU	S95        M8     U
(       a&  U(       a  [        X"SSS9U l	        SU l
        SU l        g U
(       a&  U(       d  [        X"SSS9U l	        SU l
        SU l        g S U l	        SU l
        SU l        g )	Nr   r   r   rg   rh   ri   rj   rk   rl   r   r   )r   r   r   r   r   )r*   r+   rs   
ModuleListconvsrangerQ   re   r   downsamplerspatial_downsample_factortemporal_downsample_factor)r,   r   r   r   r   rh   ri   rj   rk   rl   r   r   rV   r8   s                r9   r+   EasyAnimateDownBlock3D.__init__  s     	]]2&
z"A)*a+\KJJ* +!-"($3%'9#(;	 # 57`ajstD-.D*./D+$;7`ajstD-.D*./D+#D-.D*./D+r;   rC   rD   c                 x    U R                    H  nU" U5      nM     U R                  b  U R                  U5      nU$ r=   )r   r   r,   rC   r   s      r9   rP   EasyAnimateDownBlock3D.forward>  s<    JJD /M ' ,,];Mr;   )r   r   r   r   )	r   r   r   r   Tr   r   TTr   rc   s   @r9   r   r     s    
 !#'%(#(,+0+0 +0 	+0
 +0 +0 +0 !+0 +0 #+0 +0 "&+0 +0ZU\\ ell  r;   r   c                      ^  \ rS rSr         SS\S\S\S\S\S\S\S	\S
\S\S\4U 4S jjjrS\	R                  S\	R                  4S jrSrU =r$ )EasyAnimateUpBlock3diF  r   r   r   r   rh   ri   rj   rk   rl   add_upsampleadd_temporal_upsamplec                 "  > [         TU ]  5         [        R                  " / 5      U l        [        U5       H6  nUS:X  a  UOUnU R                  R                  [        UUUUUUUU	S95        M8     U
(       a  [        UUUUS9U l	        g S U l	        g )Nr   r   )r   rj   )
r*   r+   rs   r   r   r   rQ   re   r   	upsampler)r,   r   r   r   r   rh   ri   rj   rk   rl   r   r   rV   r8   s                r9   r+   EasyAnimateUpBlock3d.__init__G  s     	]]2&
z"A)*a+\KJJ* +!-"($3%'9#(;	 # 3"7#5	DN "DNr;   rC   rD   c                 x    U R                    H  nU" U5      nM     U R                  b  U R                  U5      nU$ r=   r   r   r   s      r9   rP   EasyAnimateUpBlock3d.forwardq  s8    JJD /M >>% NN=9Mr;   r   )	r   r   r   r   Fr   r   TTr   rc   s   @r9   r   r   F  s    
 !#(%(!&*("(" (" 	("
 (" (" (" !(" (" #(" ("  $(" ("TU\\ ell  r;   r   c                      ^  \ rS rSr       SS\S\S\S\S\S\S\S	\4U 4S
 jjjrS\	R                  S\	R                  4S jrSrU =r$ )EasyAnimateMidBlock3diy  r   r   r   rh   ri   rj   rk   rl   c	                   > [         T
U ]  5         Ub  UO[        US-  S5      n[        R                  " [        UUUUUUUUS9/5      U l        [        US-
  5       H,  n	U R                  R                  [        UUUUUUUUS95        M.     g )Nr   r   r   r   )	r*   r+   minrs   r   re   r   r   rQ   )r,   r   r   r   rh   ri   rj   rk   rl   _r8   s             r9   r+   EasyAnimateMidBlock3d.__init__z  s     	-<-H/cR]abRbdfNg]]* +!,"($3%'9#(;	

 zA~&AJJ* +!,"($3%'9#(;	 'r;   rC   rD   c                 l    U R                   S   " U5      nU R                   SS   H  nU" U5      nM     U$ )Nr   r   r   )r,   rC   resnets      r9   rP   EasyAnimateMidBlock3d.forward  s8    

1m4jjnF"=1M %r;   r   )r   r   r   r   Tr   r   r   rc   s   @r9   r   r   y  s     !#'%(** * 	*
 * * !* * #* *XU\\ ell  r;   r   c                      ^  \ rS rSrSrSrSSS/ SQSS	S
SS4	S\S\S\\S4   S\\S4   S\S\S\S\	S\	4U 4S jjjr
S\R                  S\R                  4S jrSrU =r$ )EasyAnimateEncoderi  zh
Causal encoder for 3D video-like data used in [EasyAnimate](https://huggingface.co/papers/2405.18991).
Tr      SpatialDownBlock3DSpatialTemporalDownBlock3Dr   r            r   r   r   r   Fr   r   down_block_types.block_out_channelslayers_per_blockrh   r   double_zrj   c
                   > [         TU ]  5         [        XS   SS9U l        [        R
                  " / 5      U l        US   n
[        U5       H  u  pU
nXK   n
U[        U5      S-
  :H  nUS:X  a  [        UU
UUUSU	U(       + SS9	nO+US	:X  a  [        UU
UUUSU	U(       + S
S9	nO[        SU 35      eU R                  R                  U5        M     [        US   UUU	USSSS9U l        Xl        [        R                  " US   USS9U l        [#        U5      U l        U(       a  SU-  OUn[        US   USS9U l        SU l        g )Nr   r   rr   r   r   r   F)	r   r   r   r   rh   ri   rj   r   r   r   TUnknown up block type: r   )r   r   r   rj   rh   ri   rk   rl   ro   rn   rp   r   )r*   r+   r   conv_inrs   r   down_blocks	enumerater!   r   
ValueErrorrQ   r   	mid_blockrj   rt   conv_norm_outr   conv_actconv_outgradient_checkpointing)r,   r   r   r   r   r   rh   r   r   rj   output_channelsrV   down_block_typeinput_channelsis_final_block
down_blockconv_out_channelsr8   s                    r9   r+   EasyAnimateEncoder.__init__  s   " 	 /{q<Q_`a ==,,Q/"+,<"=A,N03O#&8"9A"==N"663 .!0/!$3!'9'5#5,1

 !$@@3 .!0/!$3!'9'5#5,0

 !#:?:K!LMM##J/= #>B /*2.'1+ !	
 #5\\+B/&

 'v. 19A,l/0B20FHYghi&+#r;   rC   rD   c                 b   U R                  U5      nU R                   HI  n[        R                  " 5       (       a$  U R                  (       a  U R                  X!5      nMA  U" U5      nMK     U R                  U5      nU R                  (       ao  UR                  S5      nUR                  SSSSS5      R                  SS5      nU R                  U5      nUR                  SUS45      R                  SSSSS5      nOU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   rR   is_grad_enabledr   _gradient_checkpointing_funcr   rj   rO   r   r   r   r   r   r   )r,   rC   r   r   s       r9   rP   EasyAnimateEncoder.forward  s   ]3**J$$&&4+F+F $ A A* \ *= 9	 + }5""&++A.J)11!Q1a@HHANM ..}=M)33A
B7GHPPQRTUWXZ[]^_M ..}=Mm4m4r;   )r   r   r   r   r   r   r   rj   rZ   r[   r\   r]   __doc__ _supports_gradient_checkpointingr%   r   r_   r^   r+   rR   r`   rP   ra   rb   rc   s   @r9   r   r     s     (,$ -
 /C !!#(R,R, R,  S/	R, "#s(OR, R, R, R, R, !R, R,hU\\ ell  r;   r   c                      ^  \ rS rSrSrSrSSS/ SQSS	S
S4S\S\S\\S4   S\\S4   S\S\S\S\	4U 4S jjjr
S\R                  S\R                  4S jrSrU =r$ )EasyAnimateDecoderi!  zh
Causal decoder for 3D video-like data used in [EasyAnimate](https://huggingface.co/papers/2405.18991).
Tr   r   SpatialUpBlock3DSpatialTemporalUpBlock3Dr   r   r   r   r   r   Fr   r   up_block_types.r   r   rh   r   rj   c	                   > [         TU ]  5         [        XS   SS9U l        [	        US   UUUSSSS9U l        [        R                  " / 5      U l        [        [        U5      5      n	U	S   n
[        U5       H  u  pU
nX   n
U[        U5      S-
  :H  nUS:X  a  [        UU
US-   UUSUU(       + S	S
9	nO.US:X  a  [        UU
US-   UUSUU(       + SS
9	nO[        SU 35      eU R                  R                  U5        M     Xl        [        R"                  " US   USS9U l        ['        U5      U l        [        US   USS9U l        S	U l        g )Nr   r   rr   r   r   r   )r   r   r   rh   ri   rk   rl   r   F)	r   r   r   r   rh   ri   rj   r   r   r   Tr   r   )r*   r+   r   r   r   r   rs   r   	up_blockslistreversedr   r!   r   r   rQ   rj   rt   r   r   r   r   r   )r,   r   r   r   r   r   rh   r   rj   reversed_block_out_channelsr   rV   up_block_typer   r   up_blockr8   s                   r9   r+   EasyAnimateDecoder.__init__(  s     	 /{r<R`ab /*2.'+ !
 r*&*84F+G&H#5a8 ). 9A,N9<O#&8"9A"==N  22/ .!0/!3!$3!'9%3!3*/
 "<</ .!0/!3!$3!'9%3!3*.
 !#:=/!JKKNN!!(+A !:F #5\\+A.&

 'v. 00B10E|abc&+#r;   rC   rD   c                    U R                  U5      n[        R                  " 5       (       a.  U R                  (       a  U R	                  U R
                  U5      nOU R                  U5      nU R                   HI  n[        R                  " 5       (       a$  U R                  (       a  U R	                  X!5      nMA  U" U5      nMK     U R                  (       ao  UR                  S5      nUR                  SSSSS5      R                  SS5      nU R                  U5      nUR                  SUS45      R                  SSSSS5      nOU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   rR   r   r   r   r   r   rj   rO   r   r   r   r   r   r   )r,   rC   r  r   s       r9   rP   EasyAnimateDecoder.forward|  s;   ]3  ""t'B'B ==dnnm\M NN=9MH$$&&4+F+F $ A A( Z ( 7	 ' ""&++A.J)11!Q1a@HHANM ..}=M)33A
B7GHPP1aAM !..}=Mm4m4r;   )r   r   r   r   r   r   rj   r   r   rc   s   @r9   r   r   !  s     (,$ +
 /C !!#(R,R, R, c3h	R, "#s(OR, R, R, R, !R, R,hU\\ ell  r;   r   c                     ^  \ rS rSrSrSr\SSS/ SQ/ SQ/ SQS	S
SSS4S\S\S\S\\S4   S\\	S4   S\\	S4   S\S\	S\S\
S\4U 4S jjj5       rS r      S:S\\   S\\   S\\   S\\
   S \\
   S!\\
   S"S4S# jjrS;S$ jrS;S% jrS;S& jr\ S<S'\R*                  S(\S"\\\\   4   4S) jj5       r\ S<S'\R*                  S(\S"\\\\   4   4S* jj5       rS<S+\R*                  S(\S"\\\R*                  4   4S, jjr\S<S+\R*                  S(\S"\\\R*                  4   4S- jj5       rS.\R*                  S/\R*                  S0\S"\R*                  4S1 jrS.\R*                  S/\R*                  S0\S"\R*                  4S2 jrS<S'\R*                  S(\S"\4S3 jjr S<S+\R*                  S(\S"\\\R*                  4   4S4 jjr!   S=S5\R*                  S6\S(\S7\\RD                     S"\\\R*                  4   4
S8 jjr#S9r$U =r%$ )>AutoencoderKLMagviti  a]  
A VAE model with KL loss for encoding images into latents and decoding latent representations into images. This
model is used in [EasyAnimate](https://huggingface.co/papers/2405.18991).

This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
for all models (such as downloading or saving).
Tr      r   r   r   r   r   r   g?r   latent_channelsr   r   .r   r   r   r   rh   scaling_factorrj   c                   > [         TU ]  5         [        UUUUUU	USUS9	U l        [	        UUUUUU	UUS9U l        [        R                  " SU-  SU-  SS9U l        [        R                  " X"SS9U l	        S[        U5      S-
  -  U l        S[        U5      S-
  -  U l        SU l        SU l        SU l        SU l        SU l        SU l        S	U l        S	U l        SU l        S
U l        S
U l        SU l        g )NT)	r   r   r   r   r   rh   r   r   rj   )r   r   r   r   r   rh   r   rj   r   r   rr   Fr   r   i  r   )r*   r+   r   encoderr   decoderrs   r{   
quant_convpost_quant_convr!   spatial_compression_ratiotemporal_compression_ratiouse_slicing
use_tilinguse_framewise_encodinguse_framewise_decodingnum_sample_frames_batch_sizenum_latent_frames_batch_sizetile_sample_min_heighttile_sample_min_widthtile_sample_min_num_framestile_sample_stride_heighttile_sample_stride_widthtile_sample_stride_num_frames)r,   r   r	  r   r   r   r   r   r   rh   r
  rj   r8   s               r9   r+   AutoencoderKLMagvit.__init__  s/   2 	 *#(-1-+1

 *'%)1-+1	
 ))A$7_9LZ[\!yyWXY)*s3E/F/J)K&*+4F0G!0K*L' !
   ',#&+# -.),-) '*#%("*+' *-&(+%-.*r;   c                     U R                  5        HQ  u  p[        U[        5      (       a  UR                  5         [        U[        5      (       d  MA  UR                  5         MS     g r=   )named_modulesr   r   r@   r   )r,   namemodules      r9   r@   %AutoencoderKLMagvit._clear_conv_cache  sK     ..0LD&"9::((*&"899((*	 1r;   Nr  r  r  r  r  r  rD   c                 f   SU l         SU l        SU l        U=(       d    U R                  U l        U=(       d    U R                  U l        U=(       d    U R
                  U l        U=(       d    U R                  U l        U=(       d    U R                  U l        U=(       d    U R                  U l        g)a  
Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
processing larger images.

Args:
    tile_sample_min_height (`int`, *optional*):
        The minimum height required for a sample to be separated into tiles across the height dimension.
    tile_sample_min_width (`int`, *optional*):
        The minimum width required for a sample to be separated into tiles across the width dimension.
    tile_sample_stride_height (`int`, *optional*):
        The minimum amount of overlap between two consecutive vertical tiles. This is to ensure that there are
        no tiling artifacts produced across the height dimension.
    tile_sample_stride_width (`int`, *optional*):
        The stride between two consecutive horizontal tiles. This is to ensure that there are no tiling
        artifacts produced across the width dimension.
TN)	r  r  r  r  r  r  r  r  r  )r,   r  r  r  r  r  r  s          r9   enable_tiling!AutoencoderKLMagvit.enable_tiling  s    4 &*#&*#&<&[@[@[#%:%Xd>X>X"*D*gHgHg')B)ddFdFd&(@(aDDaDa%-J-pdNpNp*r;   c                     SU l         g)z
Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
decoding in one step.
FN)r  r?   s    r9   disable_tiling"AutoencoderKLMagvit.disable_tiling(  s    
  r;   c                     SU l         g)z
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
TNr  r?   s    r9   enable_slicing"AutoencoderKLMagvit.enable_slicing/  s    
  r;   c                     SU l         g)z
Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
decoding in one step.
FNr+  r?   s    r9   disable_slicing#AutoencoderKLMagvit.disable_slicing6  s    
 !r;   xreturn_dictc           
      H   U R                   (       aI  UR                  S   U R                  :  d  UR                  S   U R                  :  a  U R	                  XS9$ U R                  USS2SS2SS2SS2SS24   5      nU/n[        SUR                  S   U R                  5       HC  nU R                  USS2SS2XUU R                  -   2SS2SS24   5      nUR                  U5        ME     [        R                  " USS9nU R                  U5      nU R                  5         U$ )a  
Encode a batch of images into latents.

Args:
    x (`torch.Tensor`): Input batch of images.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether to return a [`~models.autoencoder_kl.AutoencoderKLOutput`] instead of a plain tuple.

Returns:
        The latent representations of the encoded images. If `return_dict` is True, a
        [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned.
r   r2  Nr   r   rJ   )r  shaper  r  tiled_encoder  r   r  rQ   rR   catr  r@   )r,   r1  r2  first_frameshrV   next_framesmomentss           r9   _encodeAutoencoderKLMagvit._encode=  s      ??d.I.I IQWWUW[[_[u[uMu$$Q$@@||AaBQB1n$56Nq!''!*d&G&GHA,,qAqt7X7X3X/XZ[]^)^'_`KHH[! I IIaQ//!$ r;   c                 >   U R                   (       aY  UR                  S   S:  aF  UR                  S5       Vs/ s H  o0R                  U5      PM     nn[        R
                  " U5      nOU R                  U5      n[        U5      nU(       d  U4$ [        US9$ s  snf )a  
Encode a batch of images into latents.

Args:
    x (`torch.Tensor`): Input batch of images.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether to return a [`~models.autoencoder_kl.AutoencoderKLOutput`] instead of a plain tuple.

Returns:
        The latent representations of the encoded videos. If `return_dict` is True, a
        [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned.
r   r   )latent_dist)r  r6  splitr=  rR   r8  r   r   )r,   r1  r2  x_sliceencoded_slicesr:  	posteriors          r9   encodeAutoencoderKLMagvit.encode[  s      
QCD771:N:ll73:NN		.)AQA03	<"y99 Os   Bzc           
         UR                   u  p4pVnU R                  U R                  -  nU R                  U R                  -  n	U R                  (       a5  UR                   S   U:  d  UR                   S   U	:  a  U R                  XS9$ U R                  U5      nU R                  US S 2S S 2S S2S S 2S S 24   5      n
U
/n[        SUR                   S   U R                  5       HC  nU R                  US S 2S S 2XU R                  -   2S S 2S S 24   5      nUR                  U5        ME     [        R                  " USS9nU(       d  U4$ [        US9$ )Nr   r4  r5  r   r   rJ   sample)r6  r  r  r  r  tiled_decoder  r  r   r  rQ   rR   r8  r   )r,   rG  r2  r   ro   rT   heightwidthtile_latent_min_heighttile_latent_min_widthr9  decrV   r;  s                 r9   _decodeAutoencoderKLMagvit._decodew  s9   >?gg;
*e!%!<!<@^@^!^ $ : :d>\>\ \??.D DPRVkHk$$Q$@@  # ||AaBQB1n$56nq!''!*d&G&GHA,,qAqt7X7X3X/XZ[]^)^'_`KJJ{# I ii#6MC((r;   c                 p   U R                   (       ac  UR                  S   S:  aP  UR                  S5       Vs/ s H  o0R                  U5      R                  PM     nn[
        R                  " U5      nOU R                  U5      R                  nU R                  5         U(       d  U4$ [        US9$ s  snf )a  
Decode a batch of images.

Args:
    z (`torch.Tensor`): Input batch of latent vectors.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.

Returns:
    [`~models.vae.DecoderOutput`] or `tuple`:
        If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
        returned.
r   r   rI  )	r  r6  rA  rQ  rJ  rR   r8  r@   r   )r,   rG  r2  z_slicedecoded_slicesdecodeds         r9   decodeAutoencoderKLMagvit.decode  s     
QJK''RS*U*wll73::*NUii/Gll1o,,G :G,, Vs   $B3abblend_extentc           	         [        UR                  S   UR                  S   U5      n[        U5       HI  nUS S 2S S 2S S 2U* U-   S S 24   SXC-  -
  -  US S 2S S 2S S 2US S 24   XC-  -  -   US S 2S S 2S S 2US S 24'   MK     U$ )Nr   r   r   r6  r   )r,   rY  rZ  r[  ys        r9   blend_vAutoencoderKLMagvit.blend_v  s    1771:qwwqz<@|$A Aq<-!*;Q!>?1qGWCWX[\]^`acdfgij]j[k \  AaAq!m % r;   c                    [        UR                  S   UR                  S   U5      n[        U5       HI  nUS S 2S S 2S S 2S S 2U* U-   4   SXC-  -
  -  US S 2S S 2S S 2S S 2U4   XC-  -  -   US S 2S S 2S S 2S S 2U4'   MK     U$ )Nr   r   r]  )r,   rY  rZ  r[  r1  s        r9   blend_hAutoencoderKLMagvit.blend_h  s    1771:qwwqz<@|$A Aq!l]Q->!>?1qGWCWX[\]^`acdfgij]j[k \  AaAq!m % r;   c                    UR                   u  p4pVnX`R                  -  nXpR                  -  n	U R                  U R                  -  n
U R                  U R                  -  nU R                  U R                  -  nU R
                  U R                  -  nX-
  nX-
  n/ n[        SX`R                  5       GH-  n/ n[        SXpR
                  5       H  nUS S 2S S 2S S 2UUU R                  -   2UUU R                  -   24   nU R                  US S 2S S 2SS2S S 2S S 24   5      nU/n[        SXPR                  5       HD  nU R                  US S 2S S 2UUU R                  -   2S S 2S S 24   5      nUR                  U5        MF     [        R                  " USS9nU R                  U5      nU R                  5         UR                  U5        M     UR                  U5        GM0     / n[        U5       H  u  nn/ n[        U5       Hj  u  nnUS:  a  U R                  UUS-
     U   UU5      nUS:  a  U R!                  UUS-
     UU5      nUR                  US S 2S S 2S S 2S U2S U	24   5        Ml     UR                  [        R                  " USS95        M     [        R                  " USS9S S 2S S 2S S 2S U2S U	24   nU$ )Nr   r   r   rJ   r   r   )r6  r  r  r  r  r  r   r  r  rQ   rR   r8  r  r@   r   r_  rb  )r,   r1  r2  r   ro   rT   rL  rM  latent_heightlatent_widthrN  rO  tile_latent_stride_heighttile_latent_stride_widthblend_heightblend_widthrowsrV   rowjtiler9  tile_hkr;  result_rows
result_rowr<  s                               r9   r7   AutoencoderKLMagvit.tiled_encode  s   >?gg;
*e"@"@@ > >>!%!<!<@^@^!^ $ : :d>\>\ \$($B$BdFdFd$d!#'#@#@DDbDb#b -I+F q&"@"@AAC1e%B%BCD7777D6666	8  $||DAqsAq,AB&q*.O.OPA"&,,tAq!a$BcBc>c:cefhi4i/j"kKMM+. Q yyQ/t,&&(

4 # D$ KK) B* oFAsJ$S>4 q5<<QUAlKDq5<<AE
D+FD!!$q!Q'M"NO * uyy;< & ))KQ/1a-,0VWr;   c                    UR                   u  p4pVnX`R                  -  nXpR                  -  n	U R                  U R                  -  n
U R                  U R                  -  nU R                  U R                  -  nU R
                  U R                  -  nU R                  U R                  -
  nU R                  U R
                  -
  n/ n[        SXl5       GH  n/ n[        SX}5       H  nUS S 2S S 2S S 2UUU
-   2UUU-   24   nU R                  U5      nU R                  US S 2S S 2S S2S S 2S S 24   5      nU/n[        SXPR                  5       HD  nU R                  US S 2S S 2UUU R                  -   2S S 2S S 24   5      nUR                  U5        MF     [        R                  " USS9nU R                  5         UR                  U5        M     UR                  U5        GM     / n[        U5       H  u  nn/ n[        U5       H~  u  nnUS:  a  U R                  UUS-
     U   UU5      nUS:  a  U R!                  UUS-
     UU5      nUR                  US S 2S S 2S S 2S U R                  2S U R
                  24   5        M     UR                  [        R                  " USS95        M     [        R                  " USS9S S 2S S 2S S 2S U2S U	24   nU(       d  U4$ [#        US9$ )Nr   r   r   rJ   r   r   rI  )r6  r  r  r  r  r  r   r  r  r  rQ   rR   r8  r@   r   r_  rb  r   )r,   rG  r2  r   ro   rT   rL  rM  sample_heightsample_widthrN  rO  rg  rh  ri  rj  rk  rV   rl  rm  rn  r9  tile_decrp  r;  rV  rq  rr  rP  s                                r9   rK   AutoencoderKLMagvit.tiled_decode  s   >?gg;
*e!?!??===!%!<!<@^@^!^ $ : :d>\>\ \$($B$BdFdFd$d!#'#@#@DDbDb#b 22T5S5SS0043P3PP q&<AC1e>222111	3 ++D1  $||DArr1a,@A(>q*.O.OPA"&,,tAq!a$BcBc>c:cefhi4i/j"kKOOK0 Q  ))H!4&&(

7#+ ?, KK1 =2 oFAsJ$S>4 q5<<QUAlKDq5<<AE
D+FD!!$q!Q0P$2P2P0PRqTXTqTqRq'q"rs * uyy;< & ii+Aq!^m^]l],RS6MC((r;   rJ  sample_posterior	generatorc                     UnU R                  U5      R                  nU(       a  UR                  US9nOUR                  5       nU R	                  U5      R                  nU(       d  U4$ [        US9$ )a)  
Args:
    sample (`torch.Tensor`): Input sample.
    sample_posterior (`bool`, *optional*, defaults to `False`):
        Whether to sample from the posterior.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`DecoderOutput`] instead of a plain tuple.
)rz  rI  )rE  r@  rJ  rH   rW  r   )	r,   rJ  ry  r2  rz  r1  rD  rG  rP  s	            r9   rP   AutoencoderKLMagvit.forward,  sf     KKN..	  9 5A Akk!n##6MC((r;   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )NNNNNN)rD   N)T)FTN)&rZ   r[   r\   r]   r   r   r   r%   r   r_   r   r^   r+   r@   r   r%  r(  r,  r/  r
   rR   r`   r   r   r   r=  rE  r   rQ  rW  r_  rb  r7  rK  	GeneratorrP   ra   rb   rc   s   @r9   r  r    sn    (,$ !.B-
+
 !"! &#'-T/T/ T/ 	T/
 "#s(OT/  S/T/ c3hT/$ %T/& 'T/( )T/* +T/, !-T/ T/l+ 15/34859489="q ("q  (}"q %-SM	"q
 $,E?"q #+5/"q (0"q 
"qH  ! 37,0	"E*F$GG	H : 37::,0:	"E*F$GG	H: :6) )D )E-Y^YeYeJeDf )4 - -4 -5X]XdXdIdCe - -2 %,, c ell  %,, c ell 2ell 2 2I\ 2h;)ell ;) ;)}^c^j^jOjIk ;)@ "' /3)) ) 	)
 EOO,) 
}ell*	+) )r;   r  )*r#   typingr   r   r   rR   torch.nnrs   torch.nn.functional
functionalrL   configuration_utilsr   r   utilsr	   utils.accelerate_utilsr
   activationsr   modeling_outputsr   modeling_utilsr   vaer   r   
get_loggerrZ   loggerr{   r   Modulere   r   r   r   r   r   r   r   r   r;   r9   <module>r     s      ) )     B  8 ( 2 ' < 
		H	%n,bii n,bCE CELryy %RYY %P3RYY 3l0299 0f1BII 1hq qhv vrl)*k l)r;   