
    +h                         S SK Jr  S SKrS SKJr  SSKJrJr  SSKJ	r	J
r
  SSKJr  SSKJr  SSKJr   " S	 S
\R"                  5      r " S S\\5      rg)    )UnionN   )ConfigMixinregister_to_config)DecoderOutputVectorQuantizer)
ModelMixin)VQEncoderOutput)apply_forward_hookc                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )MixingResidualBlock   z5
Residual block with mixing used by Paella's VQ-VAE.
c           
      "  > [         TU ]  5         [        R                  " USSS9U l        [        R
                  " [        R                  " S5      [        R                  " XSUS95      U l        [        R                  " USSS9U l	        [        R
                  " [        R                  " X5      [        R                  " 5       [        R                  " X!5      5      U l        [        R                  " [        R                  " S5      SS	9U l        g )
NFgư>)elementwise_affineeps   r   )kernel_sizegroups   T)requires_grad)super__init__nn	LayerNormnorm1
SequentialReplicationPad2dConv2d	depthwisenorm2LinearGELUchannelwise	Parametertorchzerosgammas)selfinp_channels	embed_dim	__class__s      q/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/wuerstchen/modeling_paella_vq_model.pyr   MixingResidualBlock.__init__!   s    \\,5dS
"BIIlVW`l$m

 \\,5dS
==IIl.	299Y;]
 ll5;;q>F    c           	         U R                   nU R                  UR                  SSSS5      5      R                  SSSS5      SUS   -   -  US   -   nXR                  U5      US   -  -   nU R	                  UR                  SSSS5      5      R                  SSSS5      SUS   -   -  US   -   nXR                  UR                  SSSS5      5      R                  SSSS5      US   -  -   nU$ )Nr      r   r         )r'   r   permuter   r    r#   )r(   xmodsx_temps       r,   forwardMixingResidualBlock.forward1   s    {{AIIaAq12::1aAF!dSTg+VY]^_Y``v&a00AIIaAq12::1aAF!dSTg+VY]^_Y``  1a!;<DDQ1aPSWXYSZZZr.   )r#   r   r'   r   r    )	__name__
__module____qualname____firstlineno____doc__r   r7   __static_attributes____classcell__r+   s   @r,   r   r      s    G  r.   r   c                   T  ^  \ rS rSrSr\         SS\S\S\S\S\S\S	\S
\S\4U 4S jjj5       r\	SS\
R                  S\S\4S jj5       r\	 SS\
R                  S\S\S\\\
R                  4   4S jj5       rSS\
R                  S\S\\\
R                  4   4S jjrSrU =r$ )PaellaVQModel:   a)  VQ-VAE model from Paella model.

This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library
implements for all the model (such as downloading or saving, etc.)

Parameters:
    in_channels (int, *optional*, defaults to 3): Number of channels in the input image.
    out_channels (int,  *optional*, defaults to 3): Number of channels in the output.
    up_down_scale_factor (int, *optional*, defaults to 2): Up and Downscale factor of the input image.
    levels  (int, *optional*, defaults to 2): Number of levels in the model.
    bottleneck_blocks (int, *optional*, defaults to 12): Number of bottleneck blocks in the model.
    embed_dim (int, *optional*, defaults to 384): Number of hidden channels in the model.
    latent_channels (int, *optional*, defaults to 4): Number of latent channels in the VQ-VAE model.
    num_vq_embeddings (int, *optional*, defaults to 8192): Number of codebook vectors in the VQ-VAE.
    scale_factor (float, *optional*, defaults to 0.3764): Scaling factor of the latent space.
in_channelsout_channelsup_down_scale_factorlevelsbottleneck_blocksr*   latent_channelsnum_vq_embeddingsscale_factorc
                 >  > [         TU ]  5         [        [        U5      5       V
s/ s H
  oSU
-  -  PM     nn
[        R
                  " [        R                  " U5      [        R                  " XS-  -  US   SS95      U l        / n[        U5       H[  n
U
S:  a.  UR                  [        R                  " XS-
     X   SSSS95        [        X   X   S-  5      nUR                  U5        M]     UR                  [        R
                  " [        R                  " US   USSS	9[        R                  " U5      5      5        [        R
                  " U6 U l        [        XSS
S9U l        [        R
                  " [        R                  " X{S   SS95      /n[        U5       H  n
[        U
S:X  a  UOS5       H3  n[        XS-
  U
-
     XS-
  U
-
     S-  5      nUR                  U5        M5     XS-
  :  d  MW  UR                  [        R                  " XS-
  U
-
     XS-
  U
-
     SSSS95        M     [        R
                  " U6 U l        [        R
                  " [        R                  " US   X#S-  -  SS9[        R"                  " U5      5      U l        g s  sn
f )Nr0   r   r   )r   r1   )r   stridepaddingF)r   biasg      ?)vq_embed_dimlegacybeta)r   r   reversedranger   r   PixelUnshuffler   in_blockappendr   BatchNorm2ddown_blocksr   
vquantizerConvTranspose2d	up_blocksPixelShuffle	out_block)r(   rD   rE   rF   rG   rH   r*   rI   rJ   rK   ic_levelsrZ   blockr]   jr+   s                   r,   r   PaellaVQModel.__init__L   s_    	19%-1HI1HA!Q$'1HI23IIk!$;;Xa[VWX
 vA1u""299X!e_hkWXablm#no'X[1_EEu%	 
 	MM		(2,QUS/	
 ==+6 **;bgnrs ]]299_rlXY#Z[\	vAQ,A>+HaZ!^,DhXYz\]~F^abFbc  ' ? A:~  && !a0(A:>2JXYbcmn  	2IIhqk<2I#IWXYOO01
I Js   Jr4   return_dictreturnc                 l    U R                  U5      nU R                  U5      nU(       d  U4$ [        US9$ )N)latents)rW   rZ   r
   )r(   r4   re   hs       r,   encodePaellaVQModel.encode   s5    MM!Q4Kq))r.   ri   force_not_quantizec                     U(       d  U R                  U5      u  n  nOUnU R                  U5      nU R                  U5      nU(       d  U4$ [        US9$ )Nsample)r[   r]   r_   r   )r(   ri   rl   re   quant_r4   decs           r,   decodePaellaVQModel.decode   sQ     "//!,KE1aENN5!nnQ6MC((r.   ro   c                     UnU R                  U5      R                  nU R                  U5      R                  nU(       d  U4$ [	        US9$ )z
Args:
    sample (`torch.Tensor`): Input sample.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`DecoderOutput`] instead of a plain tuple.
rn   )rj   rh   rs   ro   r   )r(   ro   re   r4   ri   rr   s         r,   r7   PaellaVQModel.forward   sD     KKN""kk!n##6MC((r.   )rZ   rW   r_   r]   r[   )	r   r   r0   r0      i  r1   i    g?)T)TT)r9   r:   r;   r<   r=   r   intfloatr   r   r%   Tensorboolr
   rj   r   r   rs   r7   r>   r?   r@   s   @r,   rB   rB   :   s<   "  $%!# !%$5
5
 5
 "	5

 5
 5
 5
 5
 5
 5
 5
n * *4 *? * * TX))37)MQ)	}ell*	+) ))ell ) )}^c^j^jOjIk ) )r.   rB   )typingr   r%   torch.nnr   configuration_utilsr   r   models.autoencoders.vaer   r   models.modeling_utilsr	   models.vq_modelr
   utils.accelerate_utilsr   Moduler   rB    r.   r,   <module>r      s?        B E / . 8")) <r)J r)r.   