
    +hKC                        S SK JrJr  S SKrS SKJs  Jr  S SKJr  S SKJ	r	  SSK
JrJr  SSKJr  SS	KJrJr  SS
KJrJrJrJrJr  SSKJrJr  SSKJr  SSKJrJr  SSK J!r!J"r"   " S S\\\5      r# " S S\RH                  5      r% " S S\RH                  5      r& " S S\RH                  5      r' " S S\RH                  5      r(g)    )DictUnionN)nn
checkpoint   )ConfigMixinregister_to_config)PeftAdapterMixin   )BasicTransformerBlockSkipFFTransformerBlock)ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSAttentionProcessorAttnAddedKVProcessorAttnProcessor)TimestepEmbeddingget_timestep_embedding)
ModelMixin)GlobalResponseNormRMSNorm)Downsample2D
Upsample2Dc            .         ^  \ rS rSrSr\                      S!S\S\S\S\S\S\S	\S
\S\S\S\S\S\S\S\S\S\S\S\S\S\S\4,U 4S jjj5       r	S"S jr
\S\\\4   4S j5       rS\\\\\4   4   4S jrS rS rU =r$ )#UVit2DModel'   Thidden_sizeuse_biashidden_dropoutcond_embed_dimmicro_cond_encode_dimmicro_cond_embed_dimencoder_hidden_size
vocab_sizecodebook_sizein_channelsblock_out_channelsnum_res_blocks
downsampleupsampleblock_num_headsnum_hidden_layersnum_attention_headsattention_dropoutintermediate_sizelayer_norm_epsln_elementwise_affinesample_sizec                   > [         TU ]  5         [        R                  " XqUS9U l        [        UUU5      U l        [        XUUUU5      U l        [        Xd-   XS9U l
        [        UUUUUUUUUUS5      U l        [        UUU5      U l        [        R                  " XUS9U l        [        R                  " [!        U5       Vs/ s H  n[#        UUUU-  UUUSUUUUUUUS9PM     sn5      U l        [        UUU5      U l        [        R                  " XUS9U l        [        UUUUUUUUUSUS9U l        [-        XUUUU	5      U l        SU l        g s  snf )Nbias)sample_proj_biasFada_norm_continuous)dimr.   attention_head_dimdropoutcross_attention_dimattention_bias	norm_type-ada_norm_continous_conditioning_embedding_dimnorm_elementwise_affinenorm_epsada_norm_biasff_inner_dimff_biasattention_out_bias)r*   r+   )super__init__r   Linearencoder_projr   encoder_proj_layer_normUVit2DConvEmbedembedr   
cond_embed	UVitBlock
down_blockproject_to_hidden_normproject_to_hidden
ModuleListranger   transformer_layersproject_from_hidden_normproject_from_hiddenup_blockConvMlmLayer	mlm_layergradient_checkpointing)selfr   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   _	__class__s                           X/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/unets/uvit_2d.pyrG   UVit2DModel.__init__*   s   D 	II&9XV'.{NLa'b$$Z9NP^`h

 , 1;
 $!
 '..@.Rg&h#!#+=QY!Z"$--$ 01#" 2A! &#(;'26I'I*(3#+3BM,A+"*!2$'/  2##
, )0^Mb(c%#%99[S[#\ !!
 &X7Ln^k
 ',#Us   #Ec                   ^ U R                  U5      nU R                  U5      n[        UR                  5       U R                  R
                  SSS9nUR                  UR                  S   S45      n[        R                  " X6/SS9nUR                  U R                  S9nU R                  U5      R                  UR                  5      nU R                  U5      nU R                  UUUUS9nUR                  u  ppUR                  SS	S
S5      R                  XU-  U	5      nU R!                  U5      nU R#                  U5      nU R$                   HB  m[        R&                  " 5       (       a  U R(                  (       a  U4S jnOTnU" UUUSU0S9nMD     U R+                  U5      nU R-                  U5      nUR                  XX5      R                  SS
SS	5      nU R/                  UUUUS9nU R1                  U5      nU$ )NTr   )flip_sin_to_cosdownscale_freq_shift   r9   )dtype)pooled_text_embencoder_hidden_statescross_attention_kwargsr   r   c                     > [        T/U Q76 $ Nr   )argslayers    r^   layer_#UVit2DModel.forward.<locals>.layer_   s    %e3d33    rg   )rh   ri   added_cond_kwargs)rI   rJ   r   flattenconfigr"   reshapeshapetorchcattorf   rM   rL   rO   permuterP   rQ   rT   is_grad_enabledrZ   rU   rV   rW   rY   )r[   	input_idsrh   rg   micro_condsri   micro_cond_embedshidden_states
batch_sizechannelsheightwidthrn   logitsrm   s                 @r^   forwardUVit2DModel.forward   s    $ 1 12G H $ < <=R S2!4;;#D#DVZqr
 .55yq7I26NO))_$HaP),,4::,>///:==>S>Y>YZ

9-+"7#9	 ( 
 /<.A.A+
f%--aAq9AA*W\n^fg33MB..}=,,E$$&&4+F+F4 "&;'=#4o"F	M -  55mD00?%--j%RZZ[\^_abdef+"7#9	 & 
 .rp   returnc                    ^ 0 nS[         S[        R                  R                  S[        [         [
        4   4U4S jjmU R                  5        H  u  p#T" X#U5        M     U$ )z
Returns:
    `dict` of attention processors: A dictionary containing all attention processors used in the model with
    indexed by its weight name.
namemodule
processorsc                    > [        US5      (       a  UR                  5       X  S3'   UR                  5        H  u  p4T" U  SU 3XB5        M     U$ )Nget_processor
.processor.)hasattrr   named_children)r   r   r   sub_namechildfn_recursive_add_processorss        r^   r   @UVit2DModel.attn_processors.<locals>.fn_recursive_add_processors   sZ    v//282F2F2H
V:./#)#8#8#:+tfAhZ,@%T $; rp   )strrv   r   Moduler   r   r   )r[   r   r   r   r   s       @r^   attn_processorsUVit2DModel.attn_processors   sb     
	c 	588?? 	X\]`bt]tXu 	 !//1LD'jA 2 rp   	processorc           	      d  ^ [        U R                  R                  5       5      n[        U[        5      (       a-  [        U5      U:w  a  [        S[        U5       SU SU S35      eS[        S[        R                  R                  4U4S jjmU R                  5        H  u  p4T" X4U5        M     g)	a  
Sets the attention processor to use to compute attention.

Parameters:
    processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
        The instantiated processor class or a dictionary of processor classes that will be set as the processor
        for **all** `Attention` layers.

        If `processor` is a dict, the key needs to define the path to the corresponding cross attention
        processor. This is strongly recommended when setting trainable attention processors.

z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.r   r   c                 
  > [        US5      (       aJ  [        U[        5      (       d  UR                  U5        O#UR                  UR	                  U  S35      5        UR                  5        H  u  p4T" U  SU 3XB5        M     g )Nset_processorr   r   )r   
isinstancedictr   popr   )r   r   r   r   r   fn_recursive_attn_processors        r^   r   CUVit2DModel.set_attn_processor.<locals>.fn_recursive_attn_processor  ss    v//!)T22((3(($z7J)KL#)#8#8#:+tfAhZ,@%S $;rp   N)lenr   keysr   r   
ValueErrorr   rv   r   r   r   )r[   r   countr   r   r   s        @r^   set_attn_processorUVit2DModel.set_attn_processor   s     D((--/0i&&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1LD'i@ 2rp   c           	      ~   [        S U R                  R                  5        5       5      (       a  [        5       nOr[        S U R                  R                  5        5       5      (       a  [	        5       nO8[        S[        [        U R                  R                  5       5      5       35      eU R                  U5        g)zU
Disables custom attention processors and sets the default attention implementation.
c              3   F   #    U  H  oR                   [        ;   v   M     g 7frk   )r]   r   .0procs     r^   	<genexpr>9UVit2DModel.set_default_attn_processor.<locals>.<genexpr>  s     iKh4~~!>>Kh   !c              3   F   #    U  H  oR                   [        ;   v   M     g 7frk   )r]   r   r   s     r^   r   r     s     hJg$#==Jgr   zOCannot call `set_default_attn_processor` when attention processors are of type N)	allr   valuesr   r   r   nextiterr   )r[   r   s     r^   set_default_attn_processor&UVit2DModel.set_default_attn_processor  s     i4K_K_KfKfKhiii,.Ih$J^J^JeJeJghhh%Iabfgklp  mA  mA  mH  mH  mJ  hK  cL  bM  N  		*rp   )rM   rO   rL   rI   rJ   rZ   rY   rV   rU   rQ   rP   rT   rW   )i   F              i   r   i@   i    r   r   r   FF         r   i   gư>T@   rk   )__name__
__module____qualname____firstlineno__ _supports_gradient_checkpointingr
   intboolfloatrG   r   propertyr   r   r   r   r   r   r   __static_attributes____classcell__r]   s   @r^   r   r   '   s   '+$   #!%($(#&!"% !!##%#&!% $&*?j, j, 	j,
 j, j,  #j, "j, !j, j, j, j,   !j," #j,$ %j,& 'j,( )j,, -j,. !/j,2 !3j,6 7j,: ;j,<  $=j,> ?j, j,X;z c+=&=!>  0 AE2Dd3PbKbFc2c,d  AF+ +rp   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )rK   i!  c                    > [         TU ]  5         [        R                  " X15      U l        [        XU5      U l        [        R                  " XSUS9U l        g Nrd   )kernel_sizer6   )	rF   rG   r   	Embedding
embeddingsr   
layer_normConv2dconv)r[   r'   r(   r%   elementwise_affineepsr6   r]   s          r^   rG   UVit2DConvEmbed.__init__"  sB    ,,z?!+4FGIIk1SWX	rp   c                     U R                  U5      nU R                  U5      nUR                  SSSS5      nU R                  U5      nU$ )Nr   r   rd   r   )r   r   ry   r   )r[   r{   r   s      r^   r   UVit2DConvEmbed.forward(  sH    __Y/
__Z0
''1a3
YYz*
rp   )r   r   r   r   r   r   r   rG   r   r   r   r   s   @r^   rK   rK   !  s    Y rp   rK   c                   >   ^  \ rS rSrS\S\S\4U 4S jjrS rSrU =r	$ )rN   i0  r)   r*   r+   c                   > [         TU ]  5         U
(       a  [        USSSSSUUUS9	U l        OS U l        [        R
                  " [        U5       Vs/ s H  n[        UUUUUU5      PM     sn5      U l        [        R
                  " [        U5       Vs/ s H  n[        UUX-  UUU	UUUS9	PM     sn5      U l
        U(       a  [        USSSSSUUUS	S
9
U l        g S U l        g s  snf s  snf )NTr   Conv2d_0r   rms_norm)use_convpaddingr   r   r>   r   r   r6   )r=   rE   r   F)	use_conv_transposer   r   r   r>   r   r   r6   interpolate)rF   rG   r   r*   r   rR   rS   ConvNextBlock
res_blocksr   attention_blocksr   r+   )r[   r   r)   r   r    r2   r1   r   r,   r/   r*   r+   ir\   r]   s                 r^   rG   UVitBlock.__init__1  s/    	*$"#8
DO #DO-- ~.
 /A ")" /

 !# ~. /A '#/%#+'/
 /!
" &#'$"#8!DM !DMY
s   C%C*c                    U R                   b  U R                  U5      n[        U R                  U R                  5       Hk  u  pVU" X5      nUR                  u  pxpUR                  XxX-  5      R                  SSS5      nU" XUS9nUR                  SSS5      R                  XxX5      nMm     U R                  b  U R                  U5      nU$ )Nr   r   rd   )rh   ri   )r*   zipr   r   ru   viewry   r+   )r[   xrg   rh   ri   	res_blockattention_blockr   r   r   r   s              r^   r   UVitBlock.forward  s    ??&"A*-doot?T?T*U&I!-A23''/J&zV^<DDQ1MAWmA 		!Q"''
fLA +V ==$a Arp   )r   r*   r   r+   )
r   r   r   r   r   r   rG   r   r   r   r   s   @r^   rN   rN   0  s5    L! L! L! L!\ rp   rN   c                   4   ^  \ rS rSr SU 4S jjrS rSrU =r$ )r   i  c           	        > [         TU ]  5         [        R                  " UUSSUUS9U l        [        XU5      U l        [        R                  " U[        X-  5      US9U l	        [        R                  " 5       U l        [        [        X-  5      5      U l        [        R                  " [        X-  5      XS9U l        [        R                  " U5      U l        [        R                  " XaS-  U5      U l        g )Nr   rd   )r   r   groupsr6   r5   r   )rF   rG   r   r   	depthwiser   normrH   r   channelwise_linear_1GELUchannelwise_actr   channelwise_normchannelwise_linear_2Dropoutchannelwise_dropoutcond_embeds_mapper)	r[   r   r1   r2   r   r    r   res_ffn_factorr]   s	           r^   rG   ConvNextBlock.__init__  s     	
 H6KL	$&IIhH<U8V]e$f!!wwy 23x7P3Q R$&IIc(2K.Lh$f!#%::n#= "$))KAx"Prp   c                    UnU R                  U5      nUR                  SSSS5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUR                  SSSS5      nX-   nU R                  [        R                  " U5      5      R                  SSS9u  pEUSUS S 2S S 2S S 4   -   -  US S 2S S 2S S 4   -   nU$ )Nr   r   r   rd   re   )r   ry   r   r   r   r   r   r   r   Fsiluchunk)r[   r   cond_embedsx_resscaleshifts         r^   r   ConvNextBlock.forward  s   NN1IIaAq!IIaL%%a(  #!!!$%%a($$Q'IIaAq!I..qvvk/BCII!QRISU1at+,,-aD$6F0GGrp   )r   r   r   r   r   r   r   r   )   r   r   s   @r^   r   r     s    uvQ( rp   r   c                   J   ^  \ rS rSrS\S\S\S\S\S\4U 4S jjrS	 rS
r	U =r
$ )rX   i  r(   r'   r   r2   r1   r&   c                    > [         TU ]  5         [        R                  " XSUS9U l        [        X%U5      U l        [        R                  " X&SUS9U l        g r   )rF   rG   r   r   conv1r   r   conv2)r[   r(   r'   r   r2   r1   r&   r]   s          r^   rG   ConvMlmLayer.__init__  sI     	YY1AT\]
!+?TUYY{qxX
rp   c                     U R                  U5      nU R                  UR                  SSSS5      5      R                  SSSS5      nU R                  U5      nU$ )Nr   r   r   rd   )r  r   ry   r	  )r[   r~   r   s      r^   r   ConvMlmLayer.forward  sW    

=1(=(=aAq(IJRRSTVWYZ\]^M*rp   )r  r	  r   )r   r   r   r   r   r   r   rG   r   r   r   r   s   @r^   rX   rX     sS    YY Y 	Y
  $Y Y Y rp   rX   ))typingr   r   rv   torch.nn.functionalr   
functionalr   torch.utils.checkpointr   configuration_utilsr	   r
   loadersr   	attentionr   r   attention_processorr   r   r   r   r   r   r   r   modeling_utilsr   normalizationr   r   resnetr   r   r   r   rK   rN   r   rX    rp   r^   <module>r     s          - B ' E  C ' 7 -w+*k+; w+tbii `		 `F*BII *Z299 rp   