
    +hW                     $   S SK JrJrJrJr  S SKrS SKJr  S SK	r	S SK
Jr  S SKJr  SSKJrJr  SSKJr  SSKJrJr  SS	KJr  S
SKJrJrJrJrJr  \R>                  R@                   " S S\5      5       r!\ " S S\RD                  \\5      5       r#g)    )DictOptionalTupleUnionN)
FrozenDict   )ConfigMixinflax_register_to_config)
BaseOutput   )FlaxTimestepEmbeddingFlaxTimesteps)FlaxModelMixin   )FlaxCrossAttnDownBlock2DFlaxCrossAttnUpBlock2DFlaxDownBlock2DFlaxUNetMidBlock2DCrossAttnFlaxUpBlock2Dc                   8    \ rS rSr% Sr\R                  \S'   Srg)FlaxUNet2DConditionOutput#   z
The output of [`FlaxUNet2DConditionModel`].

Args:
    sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)`):
        The hidden states output conditioned on `encoder_hidden_states` input. Output of last layer of model.
sample N)	__name__
__module____qualname____firstlineno____doc__jnpndarray__annotations____static_attributes__r       g/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/unets/unet_2d_condition_flax.pyr   r   #   s     KKr$   r   c                   @   \ rS rSr% SrSr\\S'   Sr\\S'   Sr	\\S'   Sr
\\S	4   \S
'   Sr\\S	4   \S'   Sr\\   \S'   Sr\\\\   4   \S'   Sr\\S	4   \S'   Sr\\S'   Sr\\\\S	4   4   \S'   Sr\\\\\S	4   4      \S'   Sr\\S'   Sr\\S'   Sr\\S'   \R8                  r\R:                  \S'   Sr\\S '   S!r\\S"'   Sr \\S#'   Sr!\\S$'   S%r"\\\\S	4   4   \S&'   Sr#\\   \S''   Sr$\\   \S('   S)r%\\S*'   Sr&\\   \S+'   S,\'RP                  S-\)4S. jr*S:S/ jr+     S;S0\RX                  S1\\RX                  \\4   S2\RX                  S3\\\-\)4      S4\\\RX                  S	4      S5\\RX                     S6\S7\S-\\.\\RX                     4   4S8 jjr/S9r0g)<FlaxUNet2DConditionModel0   a  
A conditional 2D UNet model that takes a noisy sample, conditional state, and a timestep and returns a sample
shaped output.

This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it's generic methods
implemented for all models (such as downloading or saving).

This model is also a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matters related to its
general usage and behavior.

Inherent JAX features such as the following are supported:
- [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
- [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
- [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
- [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

Parameters:
    sample_size (`int`, *optional*):
        The size of the input sample.
    in_channels (`int`, *optional*, defaults to 4):
        The number of channels in the input sample.
    out_channels (`int`, *optional*, defaults to 4):
        The number of channels in the output.
    down_block_types (`Tuple[str]`, *optional*, defaults to `("FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxDownBlock2D")`):
        The tuple of downsample blocks to use.
    up_block_types (`Tuple[str]`, *optional*, defaults to `("FlaxUpBlock2D", "FlaxCrossAttnUpBlock2D", "FlaxCrossAttnUpBlock2D", "FlaxCrossAttnUpBlock2D")`):
        The tuple of upsample blocks to use.
    mid_block_type (`str`, *optional*, defaults to `"UNetMidBlock2DCrossAttn"`):
        Block type for middle of UNet, it can be one of `UNetMidBlock2DCrossAttn`. If `None`, the mid block layer
        is skipped.
    block_out_channels (`Tuple[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
        The tuple of output channels for each block.
    layers_per_block (`int`, *optional*, defaults to 2):
        The number of layers per block.
    attention_head_dim (`int` or `Tuple[int]`, *optional*, defaults to 8):
        The dimension of the attention heads.
    num_attention_heads (`int` or `Tuple[int]`, *optional*):
        The number of attention heads.
    cross_attention_dim (`int`, *optional*, defaults to 768):
        The dimension of the cross attention features.
    dropout (`float`, *optional*, defaults to 0):
        Dropout probability for down, up and bottleneck blocks.
    flip_sin_to_cos (`bool`, *optional*, defaults to `True`):
        Whether to flip the sin to cos in the time embedding.
    freq_shift (`int`, *optional*, defaults to 0): The frequency shift to apply to the time embedding.
    use_memory_efficient_attention (`bool`, *optional*, defaults to `False`):
        Enable memory efficient attention as described [here](https://huggingface.co/papers/2112.05682).
    split_head_dim (`bool`, *optional*, defaults to `False`):
        Whether to split the head dimension into a new axis for the self-attention computation. In most cases,
        enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL.
    sample_size   in_channelsout_channels)CrossAttnDownBlock2Dr.   r.   DownBlock2D.down_block_types)	UpBlock2DCrossAttnUpBlock2Dr2   r2   up_block_typesUNetMidBlock2DCrossAttnmid_block_typeFonly_cross_attention)i@  i     r7   block_out_channelsr   layers_per_block   attention_head_dimNnum_attention_headsr7   cross_attention_dimg        dropoutuse_linear_projectiondtypeTflip_sin_to_cosr   
freq_shiftuse_memory_efficient_attentionsplit_head_dimr   transformer_layers_per_blockaddition_embed_typeaddition_time_embed_dim@   addition_embed_type_num_heads%projection_class_embeddings_input_dimrngreturnc                    SU R                   U R                  U R                  4n[        R                  " U[        R                  S9n[        R
                  " S[        R                  S9n[        R                  " SSU R                  4[        R                  S9n[        R                  R                  U5      u  pgXgS.nS n	U R                  S:X  a  SU R                  R                  -  U R                  R                  -   U R                  R                  :H  n
U
(       a  SOSnU R                  R                  XR                  R                  -  -
  nU R                  U-
  nXR                  -  n[        R                  " SU4[        R                  S9[        R                  " SU4[        R                  S9S.n	U R                  XXEU	5      S	   $ )
Nr   r@   )r   )paramsr>   	text_time      )text_embedstime_idsrO   )r,   r*   r    zerosfloat32onesint32r=   jaxrandomsplitrF   configrG   rJ   init)selfrK   sample_shaper   	timestepsencoder_hidden_states
params_rngdropout_rngrngsadded_cond_kwargs
is_refinernum_micro_conditionstext_embeds_dimtime_ids_channelstime_ids_dimss                  r%   init_weights%FlaxUNet2DConditionModel.init_weights   s   4++T-=-=t?O?OP<s{{;HHT3	 #		1a1I1I*JRUR]R] ^"%**"2"23"7
$= ##{2 DKK777$++:Y:YY;;DDE  )31 "kkOO${{'J'JJO !% J J_ \-1M1MMM"yy!_)=S[[QIIq-&8L! yyyIZ[\deer$   c                 
   U R                   nUS   S-  nU R                  b  [        S5      eU R                  =(       d    U R                  n[        R
                  " US   SSSU R                  S9U l        [        US   U R                  U R                  R                  S9U l        [        X R                  S	9U l        U R                  n[!        U["        5      (       a  U4[%        U R&                  5      -  n[!        U[(        5      (       a  U4[%        U R&                  5      -  nU R*                  n[!        U[(        5      (       a  U/[%        U R&                  5      -  nU R,                  c  S U l        OU R,                  S
:X  ao  U R0                  c  [        SU R,                   S35      e[        U R0                  U R                  U R                  5      U l        [        X R                  S	9U l        O[        SU R,                   S35      e/ nUS   n[5        U R&                  5       H  u  pUn
X   nU[%        U5      S-
  :H  nU	S:X  a\  [7        U
UU R8                  U R:                  XX   X8   U(       + U R<                  XH   U R>                  U R@                  U R                  S9nO1[C        U
UU R8                  U R:                  U(       + U R                  S9nURE                  U5        M     X`l#        U R                  RH                  S:X  aQ  [K        US   U R8                  US   US   U R<                  U R>                  U R@                  U R                  S9U l&        OAU R                  RH                  c  S U l&        O"[        SU R                  RH                   35      e/ n[O        [Q        U5      5      n[O        [Q        U5      5      n[O        [Q        U5      5      nUS   n[O        [Q        U5      5      n[5        U RR                  5       H  u  nnUnX   nU[U        US-   [%        U5      S-
  5         n
U[%        U5      S-
  :H  nUS:X  aa  [W        U
UUU R:                  S-   UU   X   U(       + U R8                  U R<                  XH   U R>                  U R@                  U R                  S9nO5[Y        U
UUU R:                  S-   U(       + U R8                  U R                  S9nURE                  U5        UnM     Xl-        [        R\                  " SSS9U l/        [        R
                  " U R`                  SSSU R                  S9U l1        g )Nr   r+   a#  At the moment it is not possible to define the number of attention heads via `num_attention_heads` because of a naming issue as described in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing `num_attention_heads` will only be supported in diffusers v0.19.)r   r   r   r   )rn   rn   )kernel_sizestridespaddingr@   )rA   rB   rN   rP   zaddition_embed_type z2 requires `addition_time_embed_dim` to not be Nonezaddition_embed_type: z must be None or `text_time`.r   r.   )r,   r-   r>   
num_layersrE   r<   add_downsampler?   r6   rC   rD   r@   )r,   r-   r>   rr   rs   r@   r4   )r,   r>   r<   rE   r?   rC   rD   r@   zUnexpected mid_block_type r2   )r,   r-   prev_output_channelrr   rE   r<   add_upsampler>   r?   r6   rC   rD   r@   )r,   r-   ru   rr   rv   r>   r@   r)   gh㈵>)
num_groupsepsilon)2r8   r<   
ValueErrorr;   nnConvr@   conv_inr   rA   r\   rB   	time_projr   time_embeddingr6   
isinstanceboollenr0   intrE   rF   add_embeddingrG   add_time_proj	enumerater   r>   r9   r?   rC   rD   r   appenddown_blocksr5   r   	mid_blocklistreversedr3   minr   r   	up_blocks	GroupNormconv_norm_outr-   conv_out)r^   r8   time_embed_dimr<   r6   rE   r   output_channelidown_block_typeinput_channelis_final_block
down_blockr   reversed_block_out_channelsreversed_num_attention_heads%reversed_transformer_layers_per_blockup_block_typeru   up_blocks                       r%   setupFlaxUNet2DConditionModel.setup   s8   !44+A.2##/ v  #66Q$:Q:Q wwq!$**
 'q!43G3GTXT_T_TjTj
 4N**U#88*D11$8#:SAVAV=W#W )3//#6"83t?T?T;U"U (,'H'H$2C88,H+ICPTPePeLf+f( ##+!%D%%4++3 *4+C+C*DDvw  "/t/K/KTMaMacgcrcr!sD!6~ZZ!XD4T5M5M4NNklmm +A."+D,A,A"BA*M/2N#&8"9A"==N"885 -!/ LL#441M1P(;(>'5#5*.*D*D)=)@373V3V#'#6#6**
 - -!/ LL#44'5#5**
 z*= #C> ' ;;%%)BB8.r2$7$;-I"-M&*&@&@/3/R/R#22jj	DN [[''/!DN9$++:T:T9UVWW 	&*84F+G&H#'+H5H,I'J$#H-A$BC4Q704X>Z5[0\- )$*=*= >A}"08;N7AE3GYCZ]^C^8_`M#&8"9A"==N 441 -!/(;#44q81VWX1Y(D(G%3!3 LL*.*D*D)=)@373V3V#'#6#6**  ) -!/(;#44q8%3!3 LL** X&"0G !?H #  \\RF$**
r$   r   r`   ra   re   down_block_additional_residualsmid_block_additional_residualreturn_dicttrainc	           	         [        U[        R                  5      (       d%  [        R                  " U/[        R                  S9nOl[        U[        R                  5      (       aM  [        UR                  5      S:X  a4  UR                  [        R                  S9n[        R                  " US5      nU R                  U5      n	U R                  U	5      n	Sn
U R                  S:X  a  Uc&  [        SU R                   SU R                   S35      eUR                  S5      nUc  [        U R                   S	35      eUR                  S
5      nUc  [        U R                   S35      eU R!                  [        R"                  " U5      5      n[        R$                  " XR                  S   S45      n[        R&                  " X/SS9nU R)                  U5      n
U
b  X-   OU	n	[        R*                  " US5      nU R-                  U5      nU4nU R.                   H<  n[        U[0        5      (       a  U" XX8(       + S9u  nnOU" XU(       + S9u  nnUU-  nM>     Ub$  Sn[3        X5       H  u  nnUU-  nUU4-  nM     UnU R4                  b  U R5                  XX8(       + S9nUb  X-  nU R6                   H[  nXR8                  S-   * S nUSU R8                  S-   *  n[        U[:        5      (       a  U" UU	UUU(       + S9nMN  U" XUU(       + S9nM]     U R=                  U5      n[>        R@                  " U5      nU RC                  U5      n[        R*                  " US5      nU(       d  U4$ [E        US9$ )ac  
Args:
    sample (`jnp.ndarray`): (batch, channel, height, width) noisy inputs tensor
    timestep (`jnp.ndarray` or `float` or `int`): timesteps
    encoder_hidden_states (`jnp.ndarray`): (batch_size, sequence_length, hidden_size) encoder hidden states
    added_cond_kwargs: (`dict`, *optional*):
        A kwargs dictionary containing additional embeddings that if specified are added to the embeddings that
        are passed along to the UNet blocks.
    down_block_additional_residuals: (`tuple` of `torch.Tensor`, *optional*):
        A tuple of tensors that if specified are added to the residuals of down unet blocks.
    mid_block_additional_residual: (`torch.Tensor`, *optional*):
        A tensor that if specified is added to the residual of the middle unet block.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] instead of
        a plain tuple.
    train (`bool`, *optional*, defaults to `False`):
        Use deterministic functions and disable dropout when not training.

Returns:
    [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] or `tuple`:
    [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] if `return_dict` is True, otherwise a
    `tuple`. When returning a tuple, the first element is the sample tensor.
rN   r   NrP   z1Need to provide argument `added_cond_kwargs` for z! when using `addition_embed_type=`rS   z has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`rT   z has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`rt   )axis)r   r   r   r   )deterministicr   r   )tembra   res_hidden_states_tupler   )r   r   r   )r   r   r   r   )r   )#r   r    r!   arrayrX   r   shapeastyperV   expand_dimsr}   r~   rF   ry   	__class__getr   ravelreshapeconcatenater   	transposer|   r   r   zipr   r   r9   r   r   rz   silur   r   )r^   r   r`   ra   re   r   r   r   r   t_embaug_embrS   rT   time_embeds
add_embedsdown_block_res_samplesr   res_samplesnew_down_block_res_samplesdown_block_res_sampledown_block_additional_residualr   s                         r%   __call__!FlaxUNet2DConditionModel.__call__J  s   F )S[[11		9+SYY?I	3;;//C	4HA4M!((s{{(;I	15Iy)##E* ##{2 ( GGWWxy}  zR  zR  yS  ST  U  ,//>K" ~~&  '{  |  ),,Z8H ~~&  'x  y  ,,SYYx-@AK++k4E4Ea4H"3MNK+)C"MJ((4G#*#6E v|4f% #)**J*&>??&0@Uen&o#&0RWi&X#"k1" + +6)+&IL&JE%'E &)GG%*/D.FF*	J &@" >>%^^F3HXa^bF(43F H03H3H13L1M1OPK%;<Z@U@UXY@Y>Z%["($:;;!*?,7&+) "&kmrirs ' ##F+v&v|49(77r$   )
r   r   r|   r   r   r   r   r~   r}   r   )rL   N)NNNTF)1r   r   r   r   r   r*   r   r"   r,   r-   r0   r   strr3   r5   r   r6   r   r   r8   r9   r;   r<   r=   r>   floatr?   r    rV   r@   rA   rB   rC   rD   rE   rF   rG   rI   rJ   rY   Arrayr   rk   r   r!   r   r   r   r#   r   r$   r%   r'   r'   0   s   3j KKL#)eCHo  'vNE#s(Ou$=NHSM=5:%eDk 12:*@c3h@c67c5c?237AE%U38_(<"=>E##GU"'4'{{E399" OT J+0"D0 ND @A %U38_(<"=A)-#--1Xc]1)+!3+;?)8C=?f		 fj f@c
T @DMQ?C |8|8 eS01|8  #{{	|8
 $E$
*:$;<|8 *2%S8H2I)J|8 (0'<|8 |8 |8 
(%*<<	=|8 |8r$   r'   )$typingr   r   r   r   flax
flax.linenlinenrz   rY   	jax.numpynumpyr    flax.core.frozen_dictr   configuration_utilsr	   r
   utilsr   embeddings_flaxr   r   modeling_flax_utilsr   unet_2d_blocks_flaxr   r   r   r   r   struct	dataclassr   Moduler'   r   r$   r%   <module>r      s}    0 /   
  , G  B 0  	
 	 	 U8ryy.+ U8 U8r$   