
    +h	B                         S SK Jr  S SKJrJrJr  S SKrS SKJr  SSK	J
r
Jr  SSKJr  SSKJrJrJr  SS	KJr  S
SKJrJrJr  \ " S S\5      5       r " S S\\
5      rg)    )	dataclass)OptionalTupleUnionN   )ConfigMixinregister_to_config)
BaseOutput   )GaussianFourierProjectionTimestepEmbedding	Timesteps)
ModelMixin   )UNetMidBlock2Dget_down_blockget_up_blockc                   8    \ rS rSr% Sr\R                  \S'   Srg)UNet2DOutput   z
The output of [`UNet2DModel`].

Args:
    sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)`):
        The hidden states output from the last layer of the model.
sample N)	__name__
__module____qualname____firstlineno____doc__torchTensor__annotations____static_attributes__r       X/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/unets/unet_2d.pyr   r      s     LLr"   r   c            ;         ^  \ rS rSrSrSrS/r\                            S*S\\	\
\\
\
4   4      S\
S\
S\S	\S
\\
   S\
S\S\\S4   S\\   S\\S4   S\\
S4   S\
S\S\
S\S\S\S\S\\
   S\
S\\
   S\S\S\S\\   S \\
   S!\\
   48U 4S" jjj5       r  S+S#\R"                  S$\	\R"                  \\
4   S%\\R"                     S&\S'\	\\4   4
S( jjrS)rU =r$ ),UNet2DModel(   a  
A 2D UNet model that takes a noisy sample and a timestep and returns a sample shaped output.

This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
for all models (such as downloading or saving).

Parameters:
    sample_size (`int` or `Tuple[int, int]`, *optional*, defaults to `None`):
        Height and width of input/output sample. Dimensions must be a multiple of `2 ** (len(block_out_channels) -
        1)`.
    in_channels (`int`, *optional*, defaults to 3): Number of channels in the input sample.
    out_channels (`int`, *optional*, defaults to 3): Number of channels in the output.
    center_input_sample (`bool`, *optional*, defaults to `False`): Whether to center the input sample.
    time_embedding_type (`str`, *optional*, defaults to `"positional"`): Type of time embedding to use.
    freq_shift (`int`, *optional*, defaults to 0): Frequency shift for Fourier time embedding.
    flip_sin_to_cos (`bool`, *optional*, defaults to `True`):
        Whether to flip sin to cos for Fourier time embedding.
    down_block_types (`Tuple[str]`, *optional*, defaults to `("DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D")`):
        Tuple of downsample block types.
    mid_block_type (`str`, *optional*, defaults to `"UNetMidBlock2D"`):
        Block type for middle of UNet, it can be either `UNetMidBlock2D` or `None`.
    up_block_types (`Tuple[str]`, *optional*, defaults to `("AttnUpBlock2D", "AttnUpBlock2D", "AttnUpBlock2D", "UpBlock2D")`):
        Tuple of upsample block types.
    block_out_channels (`Tuple[int]`, *optional*, defaults to `(224, 448, 672, 896)`):
        Tuple of block output channels.
    layers_per_block (`int`, *optional*, defaults to `2`): The number of layers per block.
    mid_block_scale_factor (`float`, *optional*, defaults to `1`): The scale factor for the mid block.
    downsample_padding (`int`, *optional*, defaults to `1`): The padding for the downsample convolution.
    downsample_type (`str`, *optional*, defaults to `conv`):
        The downsample type for downsampling layers. Choose between "conv" and "resnet"
    upsample_type (`str`, *optional*, defaults to `conv`):
        The upsample type for upsampling layers. Choose between "conv" and "resnet"
    dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
    act_fn (`str`, *optional*, defaults to `"silu"`): The activation function to use.
    attention_head_dim (`int`, *optional*, defaults to `8`): The attention head dimension.
    norm_num_groups (`int`, *optional*, defaults to `32`): The number of groups for normalization.
    attn_norm_num_groups (`int`, *optional*, defaults to `None`):
        If set to an integer, a group norm layer will be created in the mid block's [`Attention`] layer with the
        given number of groups. If left as `None`, the group norm layer will only be created if
        `resnet_time_scale_shift` is set to `default`, and if created will have `norm_num_groups` groups.
    norm_eps (`float`, *optional*, defaults to `1e-5`): The epsilon for normalization.
    resnet_time_scale_shift (`str`, *optional*, defaults to `"default"`): Time scale shift config
        for ResNet blocks (see [`~models.resnet.ResnetBlock2D`]). Choose from `default` or `scale_shift`.
    class_embed_type (`str`, *optional*, defaults to `None`):
        The type of class embedding to use which is ultimately summed with the time embeddings. Choose from `None`,
        `"timestep"`, or `"identity"`.
    num_class_embeds (`int`, *optional*, defaults to `None`):
        Input dimension of the learnable embedding matrix to be projected to `time_embed_dim` when performing class
        conditioning with `class_embed_type` equal to `None`.
Tnormsample_sizein_channelsout_channelscenter_input_sampletime_embedding_typetime_embedding_dim
freq_shiftflip_sin_to_cosdown_block_types.mid_block_typeup_block_typesblock_out_channelslayers_per_blockmid_block_scale_factordownsample_paddingdownsample_typeupsample_typedropoutact_fnattention_head_dimnorm_num_groupsattn_norm_num_groupsnorm_epsresnet_time_scale_shiftadd_attentionclass_embed_typenum_class_embedsnum_train_timestepsc                   > [         T*U ]  5         Xl        U=(       d    US   S-  n[        U	5      [        U5      :w  a  [	        SU	 SU S35      e[        U5      [        U	5      :w  a  [	        SU SU	 S35      e[
        R                  " X,S   SS	S
9U l        US:X  a  [        US   SS9U l	        SUS   -  nOJUS:X  a  [        US   X5      U l	        US   nO*US:X  a$  [
        R                  " UUS   5      U l	        US   n[        WU5      U l        Uc   Ub  [
        R                  " UU5      U l        OBUS:X  a  [        UU5      U l        O*US:X  a  [
        R                  " UU5      U l        OS U l        [
        R                   " / 5      U l        S U l        [
        R                   " / 5      U l        US   n[)        U	5       HY  u  n n!Un"UU    nU [        U5      S-
  :H  n#[+        U!UU"UUU#(       + UUUUb  UOUUUUUS9n$U R"                  R-                  U$5        M[     U
c  S U l        O#[/        US   UUUUUUUb  UOUS   UUUS9U l        [1        [3        U5      5      n%U%S   n[)        U5       Hz  u  n n&Un'U%U    nU%[5        U S-   [        U5      S-
  5         n"U [        U5      S-
  :H  n#[7        U&US-   U"UU'UU#(       + UUUUb  UOUUUUS9n(U R&                  R-                  U(5        M|     Ub  UO[5        US   S-  S5      n)[
        R8                  " US   U)US9U l        [
        R<                  " 5       U l        [
        R                  " US   USSS
9U l         g )Nr      z\Must provide the same number of `down_block_types` as `up_block_types`. `down_block_types`: z. `up_block_types`: .zbMust provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: z. `down_block_types`: r   )r   r   )kernel_sizepaddingfourier   )embedding_sizescaler   
positionallearnedtimestepidentityr   )
num_layersr)   r*   temb_channelsadd_downsample
resnet_epsresnet_act_fnresnet_groupsr;   r6   r?   r7   r9   )r)   rR   r9   rT   rU   output_scale_factorr?   r;   rV   attn_groupsr@   )rQ   r)   r*   prev_output_channelrR   add_upsamplerT   rU   rV   r;   r?   r8   r9       )num_channels
num_groupseps)!super__init__r(   len
ValueErrornnConv2dconv_inr   	time_projr   	Embeddingr   time_embeddingclass_embeddingIdentity
ModuleListdown_blocks	mid_block	up_blocks	enumerater   appendr   listreversedminr   	GroupNormconv_norm_outSiLUconv_actconv_out)+selfr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   time_embed_dimtimestep_input_dimoutput_channelidown_block_typeinput_channelis_final_block
down_blockreversed_block_out_channelsup_block_typerZ   up_blocknum_groups_out	__class__s+                                             r#   ra   UNet2DModel.__init___   s0   @ 	&+H/A!/Dq/H  C$77no  oA  AU  Vd  Ue  ef  g  !"c*:&;;t  vH  uI  I_  `p  _q  qr  s 
 yy.CQR\bc )+6FXYZF[cefDN!"%7%:!: L0&'9!'<oZDN!3A!6 I-\\*=?QRS?TUDN!3A!6/0BNS #(8(D#%<<0@.#QD +#45G#XD +#%;;~~#ND #'D ==,r* ,A."+,<"=A*M/2N#&8"9A"==N'+)+,#11#$-9K9W#5]k#5(? /J  ##J/+ #>0 !!DN+.r2,#$$:(?9K9W#5]opr]s-0+DN '+84F+G&H#4Q7 ). 9A}"08;N7AE3GYCZ]^C^8_`M#&8"9A"==N#+a/)+$7,!//#$-9K9W#5]k(?+H  NN!!(+/ !:4 -<,GSQcdeQfjkQkmoMp\\7I!7LYgmuv			"4Q"7ST^_`r"   r   rO   class_labelsreturn_dictreturnc           	         U R                   R                  (       a  SU-  S-
  nUn[        R                  " U5      (       d0  [        R                  " U/[        R
                  UR                  S9nOR[        R                  " U5      (       a7  [        UR                  5      S:X  a  US   R                  UR                  5      nU[        R                  " UR                  S   UR                  UR                  S9-  nU R                  U5      nUR                  U R                  S9nU R                  U5      nU R                  bf  Uc  [        S5      eU R                   R                   S:X  a  U R                  U5      nU R                  U5      R                  U R                  S9nXx-   nOU R                  c  Ub  [        S	5      eUn	U R#                  U5      nU4n
U R$                   H+  n['        US
5      (       a  U" XU	S9u  pn	OU" XS9u  pX-  n
M-     U R(                  b  U R)                  X5      nSn	U R*                   H\  nU
[        UR,                  5      * S nU
S[        UR,                  5      *  n
['        US
5      (       a  U" XXy5      u  pMS  U" XU5      nM^     U R/                  U5      nU R1                  U5      nU R3                  U5      nU	b  X-  nU R                   R4                  S:X  a@  UR7                  UR                  S   /S/[        UR                  SS 5      -  Q75      nX-  nU(       d  U4$ [9        US9$ )aC  
The [`UNet2DModel`] forward method.

Args:
    sample (`torch.Tensor`):
        The noisy input tensor with the following shape `(batch, channel, height, width)`.
    timestep (`torch.Tensor` or `float` or `int`): The number of timesteps to denoise an input.
    class_labels (`torch.Tensor`, *optional*, defaults to `None`):
        Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~models.unets.unet_2d.UNet2DOutput`] instead of a plain tuple.

Returns:
    [`~models.unets.unet_2d.UNet2DOutput`] or `tuple`:
        If `return_dict` is True, an [`~models.unets.unet_2d.UNet2DOutput`] is returned, otherwise a `tuple` is
        returned where the first element is the sample tensor.
r   g      ?)dtypedevicer   N)r   z=class_labels should be provided when doing class conditioningrO   zJclass_embedding needs to be initialized in order to use class conditioning	skip_conv)hidden_statestembskip_sample)r   r   rI   r   )r   )configr+   r   	is_tensortensorlongr   rb   shapetoonesr   rg   ri   rj   rc   rA   rf   rm   hasattrrn   ro   resnetsrv   rx   ry   r,   reshaper   )rz   r   rO   r   r   	timestepst_embemb	class_embr   down_block_res_samplesdownsample_blockres_samplesupsample_blocks                 r#   forwardUNet2DModel.forward   s   2 ;;**Z#%F 	y))i[

6==YI__Y''C	,@A,E!$**6==9I 

6<<?)//ZcZjZj kk	y)
 tzz*!!%(+# !`aa{{++z9#~~l;,,\:==DJJ=OI/C!!)l.Fijj f% #) $ 0 0'553C"(40[ '7V&V#"1" !1 >>%^^F0F "nnN0#n6L6L2M1M1OPK%;<Zs>CYCY?Z>Z%["~{33&4V#&[#'SA - ##F+v&v&"!F;;**i7!))6<<?*[qcCUVUWHXDY>Y*[\I'F96**r"   )rj   rx   rf   rv   ry   rm   rn   r(   ri   rg   ro   )Nr   r   FrM   Nr   T)DownBlock2DAttnDownBlock2Dr   r   r   )AttnUpBlock2Dr   r   	UpBlock2D)   i  i  i  r   r   r   convr   g        silu   r\   Ngh㈵>defaultTNNN)NT)r   r   r   r   r    _supports_gradient_checkpointing _skip_layerwise_casting_patternsr	   r   r   intr   boolstrfloatra   r   r   r   r   r!   __classcell__)r   s   @r#   r%   r%   (   s   1f (,$(.x$ >B$)#/,0 $,t(8*j.B !()"#%#,-!.2'0"*.*.-1;XaeCsCx$89:Xa Xa 	Xa
 "Xa !Xa %SMXa Xa Xa  S/Xa !Xa c3hXa "#s(OXa Xa !&Xa   !Xa" #Xa$ %Xa& 'Xa( )Xa* %SM+Xa, -Xa. 'sm/Xa0 1Xa2 "%3Xa4 5Xa6 #3-7Xa8 #3-9Xa: &c];Xa Xa| 04 h+h+ eS01h+ u||,	h+
 h+ 
|U"	#h+ h+r"   r%   )dataclassesr   typingr   r   r   r   torch.nnrd   configuration_utilsr   r	   utilsr
   
embeddingsr   r   r   modeling_utilsr   unet_2d_blocksr   r   r   r   r%   r   r"   r#   <module>r      sV    " ) )   B  P P ' H H 	: 	 	z+*k z+r"   