
    +hM                       S SK Jr  S SKJr  S SKJrJrJrJrJ	r	J
r
  S SKrS SKrS SKJrJr  SSKJrJr  SSKJrJr  SS	KJr  S
SKJrJrJrJrJrJrJr  S
SKJ r J!r!  S
SK"J#r#  S
SK$J%r%J&r&J'r'J(r(J)r)J*r*J+r+  S
SK,J-r-  SSK.J/r/  \R`                  " \15      r2\ " S S\5      5       r3 " S S\Rh                  5      r5 " S S\Rh                  5      r6 " S S\Rh                  5      r7        S;S\8S\8S\8S\8S\8S\\8   S \\
\8\	\8   4      S!\\8   S"\\8   S#\9S$\\9   S%\\9   4S& jjr:       S<S'\8S(\8S\\8   S\\8   S \8S!\\8   S"\\8   S$\9S%\94S) jjr;S*\8S+\8S,\\8   4S- jr< " S. S/\#\5      r= " S0 S1\#\5      r> " S2 S3\Rh                  5      r? " S4 S5\Rh                  5      r@ " S6 S7\Rh                  5      rAS=S8 jrBS9 rCS: rDg)>    )	dataclass)gcd)AnyDictListOptionalTupleUnionN)Tensornn   )ConfigMixinregister_to_config)
BaseOutputlogging)apply_freeu   )ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORS	AttentionAttentionProcessorAttnAddedKVProcessorAttnProcessorFusedAttnProcessor2_0)TimestepEmbedding	Timesteps)
ModelMixin)CrossAttnDownBlock2DCrossAttnUpBlock2DDownsample2DResnetBlock2DTransformer2DModelUNetMidBlock2DCrossAttn
Upsample2D)UNet2DConditionModel   )ControlNetConditioningEmbeddingc                   (    \ rS rSr% SrSr\\S'   Srg)ControlNetXSOutput4   a%  
The output of [`UNetControlNetXSModel`].

Args:
    sample (`Tensor` of shape `(batch_size, num_channels, height, width)`):
        The output of the `UNetControlNetXSModel`. Unlike `ControlNetOutput` this is NOT to be added to the base
        model output, but is already the final output.
Nsample )	__name__
__module____qualname____firstlineno____doc__r+   r   __annotations____static_attributes__r,       d/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/controlnets/controlnet_xs.pyr)   r)   4   s     FFr4   r)   c                      ^  \ rS rSrSr  S
S\R                  S\R                  S\R                  S\\R                     S\\R                     4
U 4S jjjr	S	r
U =r$ )DownBlockControlNetXSAdapterB   zyComponents that together with corresponding components from the base model will form a
`ControlNetXSCrossAttnDownBlock2D`resnetsbase_to_ctrlctrl_to_base
attentionsdownsamplerc                 ^   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        g N)super__init__r9   r:   r;   r<   downsamplers)selfr9   r:   r;   r<   r=   	__class__s         r5   rA   %DownBlockControlNetXSAdapter.__init__F   s,     	(($'r4   )r<   r:   r;   rB   r9   )NN)r-   r.   r/   r0   r1   r   
ModuleListr   Conv2drA   r3   __classcell__rD   s   @r5   r7   r7   B   sh    * /3+/(( mm( mm	(
 R]]+( bii(( (r4   r7   c                   d   ^  \ rS rSrSrS\S\R                  S\R                  4U 4S jjrSr	U =r
$ )MidBlockControlNetXSAdapterV   zxComponents that together with corresponding components from the base model will form a
`ControlNetXSCrossAttnMidBlock2D`midblockr:   r;   c                 F   > [         TU ]  5         Xl        X l        X0l        g r?   )r@   rA   rM   r:   r;   )rC   rM   r:   r;   rD   s       r5   rA   $MidBlockControlNetXSAdapter.__init__Z   s     ((r4   )r:   r;   rM   )r-   r.   r/   r0   r1   r#   r   rF   rA   r3   rH   rI   s   @r5   rK   rK   V   s3    ))!8 ) )egerer ) )r4   rK   c                   H   ^  \ rS rSrSrS\R                  4U 4S jjrSrU =r	$ )UpBlockControlNetXSAdaptera   zwComponents that together with corresponding components from the base model will form a `ControlNetXSCrossAttnUpBlock2D`r;   c                 .   > [         TU ]  5         Xl        g r?   )r@   rA   r;   )rC   r;   rD   s     r5   rA   #UpBlockControlNetXSAdapter.__init__d   s    (r4   r;   )
r-   r.   r/   r0   r1   r   rF   rA   r3   rH   rI   s   @r5   rQ   rQ   a   s     B)R]] ) )r4   rQ   base_in_channelsbase_out_channelsctrl_in_channelsctrl_out_channelstemb_channelsmax_norm_num_groupstransformer_layers_per_blocknum_attention_headscross_attention_dimadd_downsampleupcast_attentionuse_linear_projectionc                 F   Sn/ n/ n/ n/ n[        U[        5      (       a  U/U-  n[        U5       H  nUS:X  a  U OUn US:X  a  UOUnUR                  [	        X 5      5        UR                  [        X -   UU[        X -   US9[        X5S9SS95        U(       a+  UR                  [        UX8-  UUU   U	UU[        X5S9S95        UR                  [	        X15      5        M     U
(       aC  UR                  [	        X5      5        [        X1-   SUSS	9nUR                  [	        X15      5        OS n[        [        R                  " U5      [        R                  " U5      [        R                  " U5      S
9nU(       a  [        R                  " U5      Ul        Ub  UUl        U$ )Nr   r   
max_factorh㈵>in_channelsout_channelsrZ   groups
groups_outepsrg   
num_layersr^   ra   r`   norm_num_groupsTopuse_convrh   name)r9   r:   r;   )
isinstanceintrangeappendmake_zero_convr!   find_largest_factorr"   r    r7   r   rF   r<   rB   )rV   rW   rX   rY   rZ   r[   has_crossattnr\   r]   r^   r_   r`   ra   rm   r9   r<   r;   r:   irB   down_block_componentss                        r5   get_down_block_adapterr|   i   s    JGJLL.44(D'E
'R$:/0Av+;L/0Av+;L 	N+;NO,?.+*+;+N[no./@a		
 "'%< 1;A>(;*?%5$78I$j	 	N+<PQG J  	N+<PQ#1DO`gk
 	N+<PQ8g&]]<0]]<0 +-==+D(-9*  r4   base_channelsctrl_channelsc	                     [        X 5      n	[        UX-   UU[        [        XU -   5      U5      UUUUS9	n
[        X5      n[	        XUS9$ )N	r\   rg   rh   rZ   resnet_groupsr^   r]   ra   r`   )r:   rM   r;   )rw   r#   rx   r   rK   )r}   r~   rZ   r[   r\   r]   r^   r`   ra   r:   rM   r;   s               r5   get_mid_block_adapterr      sb     "-?L&%A!1"#)#m]=Z*[]pq//3)H "-?L&Lbnoor4   rh   prev_output_channelctrl_skip_channelsc                     / nSn[        U5       H*  nUS:X  a  UOU nUR                  [        X%   U5      5        M,     [        [        R
                  " U5      S9$ )Nr   r   rU   )ru   rv   rw   rQ   r   rF   )rh   r   r   r;   rm   rz   resnet_in_channelss          r5   get_up_block_adapterr      s\    
 LJ:45F0N+=+@BTUV  &2==3NOOr4   c                    L  ^  \ rS rSrSr\               SS\S\S\\   S\	S\
S\\\\   4   S	\\   S
\\   S\S\\   S\\   S\\\\   4   S\
S\S\
4U 4S jjj5       r\        SS\S\\	   S	\\\      S\\\      S\
S\S\S\S\\   4S jj5       rS rSrU =r$ )ControlNetXSAdapter   a!  
A `ControlNetXSAdapter` model. To use it, pass it into a `UNetControlNetXSModel` (together with a
`UNet2DConditionModel` base model).

This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic
methods implemented for all models (such as downloading or saving).

Like `UNetControlNetXSModel`, `ControlNetXSAdapter` is compatible with StableDiffusion and StableDiffusion-XL. It's
default parameters are compatible with StableDiffusion.

Parameters:
    conditioning_channels (`int`, defaults to 3):
        Number of channels of conditioning input (e.g. an image)
    conditioning_channel_order (`str`, defaults to `"rgb"`):
        The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
    conditioning_embedding_out_channels (`tuple[int]`, defaults to `(16, 32, 96, 256)`):
        The tuple of output channels for each block in the `controlnet_cond_embedding` layer.
    time_embedding_mix (`float`, defaults to 1.0):
        If 0, then only the control adapters's time embedding is used. If 1, then only the base unet's time
        embedding is used. Otherwise, both are combined.
    learn_time_embedding (`bool`, defaults to `False`):
        Whether a time embedding should be learned. If yes, `UNetControlNetXSModel` will combine the time
        embeddings of the base model and the control adapter. If no, `UNetControlNetXSModel` will use the base
        model's time embedding.
    num_attention_heads (`list[int]`, defaults to `[4]`):
        The number of attention heads.
    block_out_channels (`list[int]`, defaults to `[4, 8, 16, 16]`):
        The tuple of output channels for each block.
    base_block_out_channels (`list[int]`, defaults to `[320, 640, 1280, 1280]`):
        The tuple of output channels for each block in the base unet.
    cross_attention_dim (`int`, defaults to 1024):
        The dimension of the cross attention features.
    down_block_types (`list[str]`, defaults to `["CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"]`):
        The tuple of downsample blocks to use.
    sample_size (`int`, defaults to 96):
        Height and width of input/output sample.
    transformer_layers_per_block (`Union[int, Tuple[int]]`, defaults to 1):
        The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for
        [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`].
    upcast_attention (`bool`, defaults to `True`):
        Whether the attention computation should always be upcasted.
    max_norm_num_groups (`int`, defaults to 32):
        Maximum number of groups in group normal. The actual number will be the largest divisor of the respective
        channels, that is <= max_norm_num_groups.
conditioning_channelsconditioning_channel_order#conditioning_embedding_out_channelstime_embedding_mixlearn_time_embeddingr]   block_out_channelsbase_block_out_channelsr^   down_block_typessample_sizer\   r`   r[   ra   c                 2  > [         T!U ]  5         US   nUS   S-  nUS;  a  [        SU 35      e[        U5      [        U
5      :w  a  [        SU SU
 S35      e[	        U[
        [        45      (       d  U/[        U
5      -  n[	        U	[
        [        45      (       d  U	/[        U
5      -  n	[	        U[
        [        45      (       d  U/[        U
5      -  n[        U5      [        U
5      :w  a  [        SU SU
 S35      e[        US   UUS	9U l        U(       a  [        UU5      U l
        OS U l
        [        R                  " / 5      U l        [        R                  " / 5      U l        [        R                  " SUS   S
SS9U l        [#        US   US   5      U l        US   nUS   n['        U
5       Hf  u  nnUnUU   nUnUU   nSU;   nU[        U
5      S-
  :H  nU R                  R)                  [+        UUUUUUUUU   UU   U	U   U(       + UUS95        Mh     [-        US   US   UUS   US   U	S   UUS9U l        US   /n['        U5       H1  u  nnU[        U5      S-
  :  a  S
OSnUR1                  U/U-  5        M3     [        [3        U5      5      nUS   n[5        [        U
5      5       HW  nUnUU   n[5        S
5       Vs/ s H  nUR7                  5       PM     n nU R                  R)                  [9        UUU S95        MY     g s  snf )Nr      )rgbbgrz&unknown `conditioning_channel_order`: zbMust provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: z. `down_block_types`: .zdMust provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: conditioning_embedding_channelsr   r   r   r&   kernel_sizepadding	CrossAttn)rV   rW   rX   rY   rZ   r[   ry   r\   r]   r^   r_   r`   ra   )r}   r~   rZ   r\   r]   r^   r`   ra   r   )rh   r   r   )r@   rA   
ValueErrorlenrs   listtupler'   controlnet_cond_embeddingr   time_embeddingr   rF   down_blocksup_connectionsrG   conv_inrw   control_to_base_for_conv_in	enumeraterv   r|   r   	mid_blockextendreversedru   popr   )"rC   r   r   r   r   r   r]   r   r   r^   r   r   r\   r`   r[   ra   time_embedding_input_dimtime_embedding_dimrW   rY   rz   down_block_typerV   rX   ry   is_final_blockr   rh   number_of_subblocks reversed_base_block_out_channelsprev_base_output_channel_ctrl_skip_channels_rD   s"                                    r5   rA   ControlNetXSAdapter.__init__$  s   0 	#:1#= 4Q7!; &^;EF`Eabcc!"c*:&;;t  vH  uI  I_  `p  _q  qr  s  6uFF,H+ICP`La+a(-e}==#6"7#>N:O"O-e}==#6"7#>N:O"O"#s+;'<<v  xK  wL  Lb  cs  bt  tu  v 
 *I,>q,AB"7*
&  "34LN`"aD"&D==, mmB/ yy$6q$9qRST+9:LQ:OQhijQk+l( 4A6.q1"+,<"=A0 7 :0 21 5'?:M#&6"7!";;N##&%5&7%5&7"4(;"/1Ma1P(;A(>(;A(>'5#5%5*? #>6 /1"5,R0,)Eb)I 3B 7 3B 7-"7	
 134();<OA|/0144!   %%|n7J&JK	  = ,09P0Q+R(<Q?s+,-A'8$ @ CEJ1X"NX#5#9#9#;X"N&&$!2(@': . #Os   Lunet
size_ratioc
                    USLn
USLnX-  (       d  [        S5      eU=(       d2    UR                  R                   Vs/ s H  n[        X-  5      PM     snnUc  UR                  R                  nU " UUU	UUUUUR                  R                  UR                  R
                  UR                  R                  UR                  R                  UR                  R                  UR                  R                  UR                  R                  UR                  R                  S9nUR                  UR                  5        U$ s  snf )ax  
Instantiate a [`ControlNetXSAdapter`] from a [`UNet2DConditionModel`].

Parameters:
    unet (`UNet2DConditionModel`):
        The UNet model we want to control. The dimensions of the ControlNetXSAdapter will be adapted to it.
    size_ratio (float, *optional*, defaults to `None`):
        When given, block_out_channels is set to a fraction of the base model's block_out_channels. Either this
        or `block_out_channels` must be given.
    block_out_channels (`List[int]`, *optional*, defaults to `None`):
        Down blocks output channels in control model. Either this or `size_ratio` must be given.
    num_attention_heads (`List[int]`, *optional*, defaults to `None`):
        The dimension of the attention heads. The naming seems a bit confusing and it is, see
        https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 for why.
    learn_time_embedding (`bool`, defaults to `False`):
        Whether the `ControlNetXSAdapter` should learn a time embedding.
    time_embedding_mix (`float`, defaults to 1.0):
        If 0, then only the control adapter's time embedding is used. If 1, then only the base unet's time
        embedding is used. Otherwise, both are combined.
    conditioning_channels (`int`, defaults to 3):
        Number of channels of conditioning input (e.g. an image)
    conditioning_channel_order (`str`, defaults to `"rgb"`):
        The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
    conditioning_embedding_out_channels (`Tuple[int]`, defaults to `(16, 32, 96, 256)`):
        The tuple of output channel for each block in the `controlnet_cond_embedding` layer.
NzePass exactly one of `block_out_channels` (for absolute sizing) or `size_ratio` (for relative sizing).)r   r   r   r   r   r]   r   r   r^   r   r   r\   r`   r[   ra   )r   configr   rt   attention_head_dimr^   r   r   r\   r`   rn   ra   todtype)clsr   r   r   r]   r   r   r   r   r   
fixed_sizerelative_sizebmodels                 r5   	from_unetControlNetXSAdapter.from_unet  s   R (t3
"$.*w 
 0pQUQ\Q\QoQo3pQoAC4GQo3p&"&++"@"@"7'A0S1!5 31$(KK$B$B $ ? ?![[99//)-)Q)Q![[99 $ ; ;"&++"C"C
& 	5 4qs   Ec                     [        S5      e)NzA ControlNetXSAdapter cannot be run by itself. Use it together with a UNet2DConditionModel to instantiate a UNetControlNetXSModel.)r   )rC   argskwargss      r5   forwardControlNetXSAdapter.forward  s     Q
 	
r4   )r   r   r   r   r   r   r   )r   r          `            ?Fr   r      r   r   i@  i     r      r   r   r   DownBlock2Dr   r&   Tr   T)NNNFr   r   r   r   )r-   r.   r/   r0   r1   r   rt   strr	   floatboolr
   r   rA   classmethodr%   r   r   r   r3   rH   rI   s   @r5   r   r      s   ,\  &'*/:K$'%*67)7.D#'(
 &(?@!%#%&*+F"F %(F .33Z	F
 "F #F #3c
?3F "#JF "'sF !F  *F" c]#F$ ',CsO&<%F& 'F( !)F*  $+F FP  '+2637%*"%%&*/:KJ"J UOJ %T#Y/	J
 &d3i0J #J  J  #J %(J .33ZJ JX
 
r4   r   c            /       Z  ^  \ rS rSrSrSr\                      S<S\\   S\	\
   S\	\
   S\	\   S\\   S	\\\	\   4   S
\\\	\   4   S\\\	\   4   S\\
   S\\   S\S\S\\   S\\   S\S\S\	\   S\
S\S\	\   S\\\	\   4   S\4,U 4S jjj5       r\     S=S\S\\   S\\   S\\\      S\\   S\\   4S jj5       rS>S! jr\S \\
\4   4S" j5       rS#\\\\
\4   4   4S$ jrS% rS&\S'\S(\S)\4S* jrS+ rS, rS- r         S?S.\S/\\ R>                  \\4   S0\ R>                  S1\\ R>                     S2\\   S3\\ R>                     S4\\ R>                     S5\\ R>                     S6\\\
\!4      S7\\\
\ R>                  4      S8\S9\S \\"\	4   4S: jjr#S;r$U =r%$ )@UNetControlNetXSModeli   a  
A UNet fused with a ControlNet-XS adapter model

This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic
methods implemented for all models (such as downloading or saving).

`UNetControlNetXSModel` is compatible with StableDiffusion and StableDiffusion-XL. It's default parameters are
compatible with StableDiffusion.

It's parameters are either passed to the underlying `UNet2DConditionModel` or used exactly like in
`ControlNetXSAdapter` . See their documentation for details.
Tr   r   up_block_typesr   rn   r^   r\   r]   addition_embed_typeaddition_time_embed_dimr`   ra   time_cond_proj_dim%projection_class_embeddings_input_dimr   ctrl_conditioning_channels(ctrl_conditioning_embedding_out_channelsctrl_conditioning_channel_orderctrl_learn_time_embeddingctrl_block_out_channelsctrl_num_attention_headsctrl_max_norm_num_groupsc                   > [         T0U ]  5         US:  d  US:  a  [        S5      eUS:  a  U(       d  [        S5      eU	b  U	S:w  a  [        S5      e[        U[        [
        45      (       d  U/[        U5      -  n[        U[        [
        45      (       d  U/[        U5      -  n[        U[        [
        45      (       d  U/[        U5      -  n[        U[        [
        45      (       d  U/[        U5      -  nUnSU l        [        R                  " SUS   SSS	9U l
        [        US   UUS
9U l        [        R                  " SUS   SSS	9U l        [        US   US   5      U l        US   nUS   S-  n[!        US   SSS9U l        [%        UUUS9U l        U(       a  [%        UUS9U l        OS U l        U	c  S U l        S U l        O![!        U
SSS9U l        [%        UU5      U l        / nUS   nUS   n[/        U5       Ha  u  nnUnUU   nUn UU   nSU;   n!U[        U5      S-
  :H  n"UR1                  [3        UUU UUUUU!UU   UU   UU   UU   U"(       + UUS95        Mc     [5        US   US   UUUUS   US   US   US   UUS9U l        / n#[	        [9        U5      5      n$[	        [9        U5      5      n%[	        [9        U5      5      n&US   /n'[/        U5       H1  u  nn(U[        U5      S-
  :  a  SOSn)U'R;                  U(/U)-  5        M3     [	        [9        U5      5      n*U*S   n([/        U5       H  u  nn+U(n,U*U   n(U*[=        US-   [        U5      S-
  5         n-[?        S5       V.s/ s H  n.U'RA                  5       PM     n/n.SU+;   n!U[        U5      S-
  :H  n"U#R1                  [C        U-U(U,U/UUU!U$U   U%U   U&U   U"(       + UUUS95        M     [        RD                  " U5      U l#        [        RD                  " U#5      U l$        [        RJ                  " US   US9U l&        [        RN                  " 5       U l(        [        R                  " US   SSSS	9U l)        g s  sn.f )Nr   r&   z1`time_embedding_mix` needs to be between 0 and 1.zKTo use `time_embedding_mix` < 1, `ctrl_learn_time_embedding` must be `True`	text_timezAs `UNetControlNetXSModel` currently only supports StableDiffusion and StableDiffusion-XL, `addition_embed_type` must be `None` or `'text_time'`.r   r   r   r   T)flip_sin_to_cosdownscale_freq_shift)cond_proj_dim)rg   time_embed_dimr   rV   rW   rX   rY   rZ   rn   r   ry   r\   base_num_attention_headsr   r^   r_   r`   ra   r   r}   r~   rZ   rn   r   r\   r   r   r^   r`   ra   r   )rg   rh   r   r   rZ   resolution_idxry   r\   r]   r^   add_upsampler`   rn   ra   )num_channels
num_groups)*r@   rA   r   rs   r   r   r   rg   r   rG   base_conv_inr'   r   ctrl_conv_inrw   r   r   base_time_projr   base_time_embeddingctrl_time_embeddingbase_add_time_projbase_add_embeddingr   rv    ControlNetXSCrossAttnDownBlock2DControlNetXSCrossAttnMidBlock2Dr   r   r   minru   r   ControlNetXSCrossAttnUpBlock2DrF   r   	up_blocks	GroupNormbase_conv_norm_outSiLUbase_conv_actbase_conv_out)1rC   r   r   r   r   rn   r^   r\   r]   r   r   r`   ra   r   r   r   r   r   r   r   r   r   r   r   time_embed_input_dimr   r   rW   rY   rz   r   rV   rX   ry   r   r   rev_transformer_layers_per_blockrev_num_attention_headsrev_cross_attention_dimr   rh   r   reversed_block_out_channelsup_block_typer   rg   r   r   rD   s1                                                   r5   rA   UNetControlNetXSModel.__init__  s?   B 	!%7!%;PQQ!*Cjkk*/Bk/Q d  6uFF,H+ICP`La+a(-e}==#6"7#>N:O"O-e}==#6"7#>N:O"O2T5MBB(@'ACHXDY'Y$#6  IIa);A)>AWXY)H,CA,FG"<*
&
 IIa)@)CQR\]^+9:QRS:TVhijVk+l(  2!4+A.2'(:1(=tjkl#4 ,$
 
 %'80(D$ (,D$&&*D#&*D#&/0GY]tu&vD#&78]_m&nD# .q13A6"+,<"=A0 21 50 7 :'?:M#&6"7!";;N0%5&7%5&7"0$3-E"/1Ma1P-Ea-H-Ea-H(;A(>'5#5%5*? #>: 9,R01"5(+%=)Eb)I%=b%A%=b%A 3B 7-"7
 	+/9U0V+W("&x0H'I"J"&x0C'D"E 6a89()@AOA|4599q   %%|n7J&JK	  B '+84F+G&H#215 ). 9A}".6q9L5c!a%EWAX[\A\6]^KEJ1X"NX#5#9#9#;X"N'=8M#&8"9A"==N. +!-(;':"0#$"/1QRS1T(?(B(?(B%3!3%5$3*? !:8 ==5y1"$,,<Nq<Q^m"nWWYYY'9!'<aQXYZ; #Os   3Q)r   
controlnetr   ctrl_optional_kwargsc                    Uc  [         R                  " XU40 UD6nO$[        S X4XV4 5       5      (       a  [        S5      e/ SQnUR                  R                  5        VV	s0 s H  u  pX;   d  M  X_M     nnn	UR                  R                  US'   / SQn
UR                  R                  5        VV	s0 s H  u  pX;   d  M  SU-   U	_M     n
nn	UR                  R                  U
S'   U R                  0 UEU
E5      n/ SQnU H8  n[        US	U-   5      R                  [        X5      R                  5       5        M:     S
S/nU HX  n[        X5      (       d  M  [        X5      c  M#  [        US	U-   5      R                  [        X5      R                  5       5        MZ     UR                  R                  UR                  R                  5       5        UR                  R                  UR                  R                  5       5        UR                   b3  UR"                  R                  UR                   R                  5       5        UR$                  R                  UR$                  R                  5       5        [&        R(                  " S [+        UR,                  UR,                  5       5       5      Ul        [.        R1                  UR2                  UR2                  5      Ul        [&        R(                  " S [+        UR4                  UR6                  5       5       5      Ul        UR9                  UR:                  5        U$ s  sn	nf s  sn	nf )a^  
Instantiate a [`UNetControlNetXSModel`] from a [`UNet2DConditionModel`] and an optional [`ControlNetXSAdapter`]
.

Parameters:
    unet (`UNet2DConditionModel`):
        The UNet model we want to control.
    controlnet (`ControlNetXSAdapter`):
        The ControlNet-XS adapter with which the UNet will be fused. If none is given, a new ControlNet-XS
        adapter will be created.
    size_ratio (float, *optional*, defaults to `None`):
        Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details.
    ctrl_block_out_channels (`List[int]`, *optional*, defaults to `None`):
        Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details,
        where this parameter is called `block_out_channels`.
    time_embedding_mix (`float`, *optional*, defaults to None):
        Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details.
    ctrl_optional_kwargs (`Dict`, *optional*, defaults to `None`):
        Passed to the `init` of the new controlnet if no controlnet was given.
c              3   (   #    U  H  oS Lv   M
     g 7fr?   r,   ).0os     r5   	<genexpr>2UNetControlNetXSModel.from_unet.<locals>.<genexpr>  s      'v!'vs   zWhen a controlnet is passed, none of these parameters should be passed: size_ratio, ctrl_block_out_channels, time_embedding_mix, ctrl_optional_kwargs.)r   r   r   r   rn   r^   r\   r   r   r`   ra   r   r   r]   )r   r   r   r   r   r]   r[   ctrl_r   )r   r   conv_norm_outconv_outbase_add_time_projadd_embeddingc              3   P   #    U  H  u  p[         R                  X5      v   M     g 7fr?   )r   from_modulesr  r   cs      r5   r  r  7  s'      *
E -99!??E   $&c              3   P   #    U  H  u  p[         R                  X5      v   M     g 7fr?   )r  r  r   s      r5   r  r  <  s'      (
F +77==Fr"  )r   r   anyr   r   itemsr   r   from_configgetattrload_state_dict
state_dicthasattrr   r   r   r   r   r   r   rF   zipr   r   r  r   r  r   r   r   )r   r   r  r   r   r   r  params_for_unetkvparams_for_controlnetr   modules_from_unetmoptional_modules_from_unets                  r5   r   UNetControlNetXSModel.from_unet  s   < ,66"9=QJ  (2M_'v   ! m 

 -1KK,=,=,?X,?DA1CW414,?X151O1O-.!
 =G<M<M<S<S<U t<UDAYZYs1a<U t6@6G6G6Z6Z23  L? L6K LM
 #AE7Q;'778H8S8S8UV # &
" ,AtGD$4$@w{+;;GD<L<W<W<YZ ,
 	''77
8\8\8g8g8ij**:+=+=+H+H+JK$$0%%55j6O6O6Z6Z6\]))99*:`:`:k:k:mn MM *
D,,j.D.DE*
 
 :FFt~~WaWkWkl-- (
DNNJ,E,EF(
 
 	w Y !us   "M1M4M
Mreturnc                    U R                  5        H
  nSUl        M     / SQnU Vs/ s H  n[        X5      c  M  [        X5      PM     nnU H!  nUR                  5        H
  nSUl        M     M#     U R                   H  nUR	                  5         M     U R
                  R	                  5         U R                   H  nUR	                  5         M     gs  snf )Freeze the weights of the parts belonging to the base UNet2DConditionModel, and leave everything else unfrozen for fine
tuning.T)r   r   r   r   r   r  r  r  NF)
parametersrequires_gradr'  r   freeze_base_paramsr   r  )rC   param
base_partspartdus         r5   freeze_unet_params(UNetControlNetXSModel.freeze_unet_paramsF  s     __&E"&E '	

 7AdjdGDDW)gd)j
dD*&+# +  !!A  " "))+A  "   es
   CCc                    ^ 0 nS[         S[        R                  R                  S[        [         [
        4   4U4S jjmU R                  5        H  u  p#T" X#U5        M     U$ )z
Returns:
    `dict` of attention processors: A dictionary containing all attention processors used in the model with
    indexed by its weight name.
rr   module
processorsc                    > [        US5      (       a  UR                  5       X  S3'   UR                  5        H  u  p4T" U  SU 3XB5        M     U$ )Nget_processor
.processorr   )r*  rE  named_children)rr   rB  rC  sub_namechildfn_recursive_add_processorss        r5   rJ  JUNetControlNetXSModel.attn_processors.<locals>.fn_recursive_add_processorsn  sZ    v//282F2F2H
V:./#)#8#8#:+tfAhZ,@%T $; r4   )r   torchr   Moduler   r   rG  )rC   rC  rr   rB  rJ  s       @r5   attn_processors%UNetControlNetXSModel.attn_processorsc  sb     
	c 	588?? 	X\]`bt]tXu 	 !//1LD'jA 2 r4   	processorc           	      d  ^ [        U R                  R                  5       5      n[        U[        5      (       a-  [        U5      U:w  a  [        S[        U5       SU SU S35      eS[        S[        R                  R                  4U4S jjmU R                  5        H  u  p4T" X4U5        M     g)	a  
Sets the attention processor to use to compute attention.

Parameters:
    processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
        The instantiated processor class or a dictionary of processor classes that will be set as the processor
        for **all** `Attention` layers.

        If `processor` is a dict, the key needs to define the path to the corresponding cross attention
        processor. This is strongly recommended when setting trainable attention processors.

z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.rr   rB  c                 
  > [        US5      (       aJ  [        U[        5      (       d  UR                  U5        O#UR                  UR	                  U  S35      5        UR                  5        H  u  p4T" U  SU 3XB5        M     g )Nset_processorrF  r   )r*  rs   dictrS  r   rG  )rr   rB  rP  rH  rI  fn_recursive_attn_processors        r5   rU  MUNetControlNetXSModel.set_attn_processor.<locals>.fn_recursive_attn_processor  ss    v//!)T22((3(($z7J)KL#)#8#8#:+tfAhZ,@%S $;r4   N)r   rN  keysrs   rT  r   r   rL  r   rM  rG  )rC   rP  countrr   rB  rU  s        @r5   set_attn_processor(UNetControlNetXSModel.set_attn_processor}  s     D((--/0i&&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1LD'i@ 2r4   c           	      ~   [        S U R                  R                  5        5       5      (       a  [        5       nOr[        S U R                  R                  5        5       5      (       a  [	        5       nO8[        S[        [        U R                  R                  5       5      5       35      eU R                  U5        g)zU
Disables custom attention processors and sets the default attention implementation.
c              3   F   #    U  H  oR                   [        ;   v   M     g 7fr?   )rD   r   r  procs     r5   r  CUNetControlNetXSModel.set_default_attn_processor.<locals>.<genexpr>  s     iKh4~~!>>Kh   !c              3   F   #    U  H  oR                   [        ;   v   M     g 7fr?   )rD   r   r]  s     r5   r  r_    s     hJg$#==Jgr`  zOCannot call `set_default_attn_processor` when attention processors are of type N)	allrN  valuesr   r   r   nextiterrY  )rC   rP  s     r5   set_default_attn_processor0UNetControlNetXSModel.set_default_attn_processor  s     i4K_K_KfKfKhiii,.Ih$J^J^JeJeJghhh%Iabfgklp  mA  mA  mH  mH  mJ  hK  cL  bM  N  		*r4   s1s2b1b2c                     [        U R                  5       H9  u  pV[        USU5        [        USU5        [        USU5        [        USU5        M;     g)a  Enables the FreeU mechanism from https://huggingface.co/papers/2309.11497.

The suffixes after the scaling factors represent the stage blocks where they are being applied.

Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of values that
are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.

Args:
    s1 (`float`):
        Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
        mitigate the "oversmoothing effect" in the enhanced denoising process.
    s2 (`float`):
        Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
        mitigate the "oversmoothing effect" in the enhanced denoising process.
    b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
    b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
rh  ri  rj  rk  N)r   r  setattr)rC   rh  ri  rj  rk  rz   upsample_blocks          r5   enable_freeu"UNetControlNetXSModel.enable_freeu  sJ    $ "+4>>!:AND"-ND"-ND"-ND"-	 ";r4   c                     1 Skn[        U R                  5       H9  u  p#U H.  n[        X45      (       d  [        X4S5      c  M"  [	        X4S5        M0     M;     g)zDisables the FreeU mechanism.>   rj  rk  rh  ri  N)r   r  r*  r'  rm  )rC   
freeu_keysrz   rn  r-  s        r5   disable_freeu#UNetControlNetXSModel.disable_freeu  sH    -
!*4>>!:A>--D1Q1]Nt4   ";r4   c                    SU l         U R                  R                  5        H3  u  pS[        UR                  R
                  5      ;   d  M*  [        S5      e   U R                  U l         U R                  5        H)  n[        U[        5      (       d  M  UR                  SS9  M+     U R                  [        5       5        g)u  
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
are fused. For cross-attention modules, key and value projection matrices are fused.

<Tip warning={true}>

This API is 🧪 experimental.

</Tip>
NAddedzQ`fuse_qkv_projections()` is not supported for models having added KV projections.T)fuse)original_attn_processorsrN  r%  r   rD   r-   r   modulesrs   r   fuse_projectionsrY  r   )rC   r   attn_processorrB  s       r5   fuse_qkv_projections*UNetControlNetXSModel.fuse_qkv_projections  s     )-%!%!5!5!;!;!=A#n66??@@ !tuu "> )-(<(<%llnF&),,''T'2 % 	 5 78r4   c                 V    U R                   b  U R                  U R                   5        gg)um   Disables the fused QKV projection if enabled.

<Tip warning={true}>

This API is 🧪 experimental.

</Tip>

N)rx  rY  )rC   s    r5   unfuse_qkv_projections,UNetControlNetXSModel.unfuse_qkv_projections  s)     ((4##D$A$AB 5r4   r+   timestepencoder_hidden_statescontrolnet_condconditioning_scaleclass_labelstimestep_condattention_maskcross_attention_kwargsadded_cond_kwargsreturn_dictapply_controlc                 
   U R                   R                  S:X  a  [        R                  " US/S9nUb2  SUR	                  UR
                  5      -
  S-  nUR                  S5      nUn[        R                  " U5      (       d  UR                  R                  S:H  nUR                  R                  S:H  n[        U[        5      (       a/  U(       d  U(       a  [        R                  O[        R                  nO.U(       d  U(       a  [        R                  O[        R                  n[        R                   " U/UUR                  S9nO7[#        UR$                  5      S	:X  a  US   R	                  UR                  5      nUR'                  UR$                  S	   5      nU R)                  U5      nUR	                  UR
                  S
9nU R                   R*                  (       aS  U(       aL  U R-                  UU5      nU R/                  UU5      nU R                   R0                  S-  nUU-  USU-
  -  -   nOU R/                  U5      nSnU R                   R2                  c  GOU R                   R2                  S:X  a  SU
;  a  [5        U R6                   S35      eU
R9                  S5      nSU
;  a  [5        U R6                   S35      eU
R9                  S5      nU R;                  UR=                  5       5      nUR?                  UR$                  S	   S45      n[        R@                  " UU/SS9nUR	                  UR
                  5      nU RC                  U5      nO#[5        SU R                   R2                   S35      eUb  UU-   OUnUnU=nn/ / nnU RE                  U5      n U RG                  U5      nU RI                  U5      nU b  UU -  nU(       a  UU RK                  U5      U-  -   nURM                  U5        URM                  U5        U RN                   H7  n!U!" UUUUUU	UUS9u  nnn"n#URQ                  U"5        URQ                  U#5        M9     U RS                  UUUUUU	UUS9u  nnU RT                   H>  n$[#        U$RV                  5      n%UU%* S n&UU%* S n'USU%*  nUSU%*  nU$" UU&U'UUUU	UUS9	nM@     U RY                  U5      nU R[                  U5      nU R]                  U5      nU(       d  U4$ [_        US9$ )au  
The [`ControlNetXSModel`] forward method.

Args:
    sample (`Tensor`):
        The noisy input tensor.
    timestep (`Union[torch.Tensor, float, int]`):
        The number of timesteps to denoise an input.
    encoder_hidden_states (`torch.Tensor`):
        The encoder hidden states.
    controlnet_cond (`Tensor`):
        The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`.
    conditioning_scale (`float`, defaults to `1.0`):
        How much the control model affects the base model outputs.
    class_labels (`torch.Tensor`, *optional*, defaults to `None`):
        Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings.
    timestep_cond (`torch.Tensor`, *optional*, defaults to `None`):
        Additional conditional embeddings for timestep. If provided, the embeddings will be summed with the
        timestep_embedding passed through the `self.time_embedding` layer to obtain the final timestep
        embeddings.
    attention_mask (`torch.Tensor`, *optional*, defaults to `None`):
        An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask
        is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large
        negative values to the attention scores corresponding to "discard" tokens.
    cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`):
        A kwargs dictionary that if specified is passed along to the `AttnProcessor`.
    added_cond_kwargs (`dict`):
        Additional conditions for the Stable Diffusion XL UNet.
    return_dict (`bool`, defaults to `True`):
        Whether or not to return a [`~models.controlnets.controlnet.ControlNetOutput`] instead of a plain
        tuple.
    apply_control (`bool`, defaults to `True`):
        If `False`, the input is run only through the base model.

Returns:
    [`~models.controlnetxs.ControlNetXSOutput`] **or** `tuple`:
        If `return_dict` is `True`, a [`~models.controlnetxs.ControlNetXSOutput`] is returned, otherwise a
        tuple is returned where the first element is the sample tensor.
r   r&   )dimsNg     mpsnpu)r   devicer   )r   g333333?r   text_embedsz has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`time_idsz has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`r   dimzgControlNet-XS currently only supports StableDiffusion and StableDiffusion-XL, so addition_embed_type = z is currently not supported.)hidden_states_basehidden_states_ctrltembr  r  r  r  r  )	hidden_statesres_hidden_states_tuple_baseres_hidden_states_tuple_ctrlr  r  r  r  r  r  )r+   )0r   r   rL  flipr   r   	unsqueeze	is_tensorr  typers   r   float32float64int32int64tensorr   shapeexpandr   r   r   r   r   r   r   rD   getr   flattenreshapeconcatr   r   r   r   r   rv   r   r   r   r  r9   r  r  r  r)   )(rC   r+   r  r  r  r  r  r  r  r  r  r  r  	timestepsis_mpsis_npur   t_emb	ctrl_temb	base_tembinterpolation_paramr  aug_embr  r  time_embeds
add_embedscembh_ctrlh_basehs_basehs_ctrlguided_hintdownresidual_hbresidual_hcup	n_resnetsskips_hbskips_hcs(                                           r5   r   UNetControlNetXSModel.forward  s   p ;;66%?#jjsCO %."3"3FLL"AAXMN+55a8N 	y)) ]]''50F]]''50F(E***0F(.&u{{i[fmmTI!Q&!$**6==9I $$V\\!_5	##I.
 v||,;;00]00FI00FI"&++"@"@#"E22Y!FYBY5ZZD++E2D ;;**2[[,,;$55 ~~&  '{  |  ,//>K!22 ~~&  'x  y  ),,Z8H11(2B2B2DEK%--{/@/@/CR.HIK{K&@bIJ#tzz2J--j9Gyz~  {F  {F  {Z  {Z  z[  [w  x  ")!4tg~$ % ! r 44_E ""6*""6*"k!Fd>>vFI[[[Fvv$$D7;#)#)&*#5'=-+	84FFK NN;'NN;' % %%"&1#9)' ( 	
 ..BBJJI	z{+H	z{+Hk	z*Gk	z*G$-5-5&*#5'=-+
F !& ((0##F+##F+9!00r4   )r   r   r  r   r  r  r   r   r   r   r   r   r   rg   r   rx  r  )r   r   )	UpBlock2Dr   r   r   r   r   r   r&   r   NNTTNNr   r   r   r   Fr   r   r   )NNNNNr4  N)	Nr   NNNNNTT)&r-   r.   r/   r0   r1    _supports_gradient_checkpointingr   r   rt   r	   r   r
   r   r   rA   r   r%   r   r   r   r   r?  propertyr   rN  rY  rf  ro  rs  r|  r  r   rL  r   r)   r   r3   rH   rI   s   @r5   r   r      s    (,$ &((
 &u)?)+6:?@67-115!%&*,0?C$'*+?P/4*/.<;<(*=|[ c]|[  *	|[ c
|[ "#J|[ "#|[ #3c
?3|[ ',CsO&<|[ #3c
?3|[  &c]!|[" "*##|[$ %|[&  $'|[( %SM)|[* 08}+|[. "/|[0 %(1|[2 38*3|[4 *-5|[6 $(7|[8 "'s9|[: #(U3Z"8;|[< #&=|[ |[|  59&*9=.2/3t"t 01t UO	t
 "*$u+!6t %UOt 'tnt tl#: c+=&=!>  0 AE2Dd3PbKbFc2c,d  AF+ .u .% .U . .2594C$ 37.1/30415;??C "J1J1 eS01J1  %||	J1
 "%,,/J1 %UOJ1 u||,J1  -J1 !.J1 !)c3h 8J1 $Dell):$;<J1 J1 J1 
!5(	)J1 J1r4   r   c                   j  ^  \ rS rSr          S"S\S\S\S\S\S\S\S	\\\\\   4      S
\\   S\\   S\\   S\S\\   S\\   4U 4S jjjr	\
S\S\4S j5       rS#S jr       S$S\S\S\\   S\\   S\\   S\\   S\\\\4      S\\   S\S\\\\\S4   \\S4   4   4S  jjrS!rU =r$ )%r   i  rV   rW   rX   rY   rZ   rn   r   r\   r   r   r^   r_   r`   ra   c                   > [         TU ]  5         / n/ n/ n/ n/ n/ nSn[        U	[        5      (       a  U	/U-  n	[	        U5       H  nUS:X  a  UOUnUS:X  a  UOUnUR                  [        X5      5        UR                  [        UUUUS95        UR                  [        X1-   UU[        X1-   US9[        XGS9SS95        U(       aO  UR                  [        U
X*-  UU	U   UUUUS95        UR                  [        UXK-  UU	U   UUU[        XGS9S95        UR                  [        XB5      5        M     U(       aY  UR                  [        X"5      5        [        USUS	S
9U l        [        XB-   SUS	S
9U l        UR                  [        XB5      5        OS U l        S U l        [        R                  " U5      U l        [        R                  " U5      U l        U(       a  [        R                  " U5      OS /U-  U l        U(       a  [        R                  " U5      OS /U-  U l        [        R                  " U5      U l        [        R                  " U5      U l        SU l        g )Nr   r   rg   rh   rZ   ri   rc   re   rf   rl   Tro   rp   F)r@   rA   rs   rt   ru   rv   rw   r!   rx   r"   r    base_downsamplersctrl_downsamplersr   rF   base_resnetsctrl_resnetsbase_attentionsctrl_attentionsr:   r;   gradient_checkpointing)rC   rV   rW   rX   rY   rZ   rn   r   ry   r\   r   r   r^   r_   r`   ra   r  r  r  r  r;   r:   rm   rz   rD   s                           r5   rA   )ControlNetXSCrossAttnDownBlock2D.__init__  s   $ 	
2C88,H+IJ+V(z"A346/?P346/?P /? RS 0!2"/*	  0 C!2"/.(;H`  33Dj	 &&&0)E$5#?#B,?.C)9(7	  &&&0)E$5#?#B,?.C)9(;<M(s	 /@ TUs #v  /@ TU%1!D?PW[&D" &2!5Sdko&D" /@ TU%)D"%)D"MM,7MM,7ANr}}_=UYTZ]gTgANr}}_=UYTZ]gTgMM,7MM,7&+#r4   base_downblockctrl_downblockc                    S nUR                   S   R                  nUR                   S   R                  nUR                   S   R                  U-
  nUR                   S   R                  nUR                   S   R                  R                  nUR                   S   R
                  R                  n	UR                   S   R
                  R                  n
[        US5      (       a  Sn[        UR                  S   R                  5      nU" U5      R                  nU" U5      R                  nU" U5      R                  nU" U5      R                  nUR                  S   R                  nOSnS nS nS nS nS nS nUR                  S LnU " UUUUUU	U
UUUUUUUUS9nUR                   R#                  UR                   R%                  5       5        UR&                  R#                  UR                   R%                  5       5        U(       af  UR(                  R#                  UR                  R%                  5       5        UR*                  R#                  UR                  R%                  5       5        U(       ai  UR,                  R#                  UR                  S   R%                  5       5        UR.                  R#                  UR                  R%                  5       5        UR0                  R#                  UR0                  R%                  5       5        UR2                  R#                  UR2                  R%                  5       5        U$ )Nc                 N    U R                   S   R                  S   R                  $ Nr   r<   transformer_blocksattn2blocks    r5   get_first_cross_attentionPControlNetXSCrossAttnDownBlock2D.from_modules.<locals>.get_first_cross_attention@  $    ##A&99!<BBBr4   r   r<   TFr   )r9   rg   rh   time_emb_projin_featuresnorm1r   r*  r   r<   r  headsr^   r`   ra   rB   r  r(  r)  r  r  r  r  r  r:   r;   )r   r  r  r  rV   rW   rX   rY   rZ   r   ctrl_num_groupsry   r\   r   r   r^   r`   ra   r_   r   s                       r5   r  -ControlNetXSCrossAttnDownBlock2D.from_modules=  s   	C *11!4@@*2215BB""1%114DD 	 +2215BB&..q1??KK#++A.44??
(00399DD><00 M+.~/H/H/K/^/^+_('@'P'V'V$'@'P'V'V$";N"K"_"_8HYY$2$=$=a$@$V$V!!M+/('+$'+$"&#$(!'44D@ -/-/'&%4')E%=%= 3)-"7
& 	**>+A+A+L+L+NO**>+A+A+L+L+NO!!11.2K2K2V2V2XY!!11.2K2K2V2V2XY##33N4O4OPQ4R4]4]4_`##33N4O4O4Z4Z4\]**>+F+F+Q+Q+ST**>+F+F+Q+Q+STr4   r4  c                    U R                  5        H
  nSUl        M     U R                  /n[        U R                  [
        R                  5      (       a  UR                  U R                  5        U R                  b  UR                  U R                  5        U H!  nUR                  5        H
  nSUl        M     M#     gr6  TNF)	r7  r8  r  rs   r  r   rF   rv   r  rC   r:  r;  r<  s       r5   r9  3ControlNetXSCrossAttnDownBlock2D.freeze_base_params  s     __&E"&E ' ''(
d**BMM::d223!!-d445D*&+# + r4   r  r  r  r  r  r  r  encoder_attention_maskr  .c
           
         Ub(  UR                  SS 5      b  [        R                  S5        Un
UnSnSn[        [	        U R
                  U R                  5      5      n[        [	        U R                  U R                  5      5      n[	        XU R                  U R                  5       GH  u  u  nnu  nnnnU	(       a  [        R                  " UU" U
5      /SS9n[        R                  " 5       (       a$  U R                  (       a  U R                  UX5      n
OU" X5      n
Ub  U" U
UUUUSS9S   n
U	(       aW  [        R                  " 5       (       a$  U R                  (       a  U R                  UX5      nOU" X5      nUb  U" UUUUUSS9S   nU	(       a  U
U" U5      U-  -   n
X4-   nX4-   nGM
     U R                   b  U R                  S	   nU R                  S	   nU	(       a  [        R                  " UU" U
5      /SS9nU R!                  U
5      n
U	(       a  U R#                  U5      nU	(       a  U
U" U5      U-  -   n
X4-   nX4-   nXX4$ )
NscaleSPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r,   r&   r  Fr  r  r  r  r  r   r   )r  loggerwarningr   r+  r  r  r  r  r:   r;   rL  catis_grad_enabledr  _gradient_checkpointing_funcr  r  )rC   r  r  r  r  r  r  r  r  r  r  r  base_output_statesctrl_output_statesbase_blocksctrl_blocksb_resb_attnc_resc_attnb2cc2bs                         r5   r   (ControlNetXSCrossAttnDownBlock2D.forward  si    "-%))'48Dtu##3t00$2F2FGH3t00$2F2FGH:=d&7&79J9J;
6OUF_eVc3 FCK#8a@ $$&&4+F+F::5&Ov,!*?+A#1+A %  ((**t/J/J!>>ufSF"60F%#.C/E'5/E$) F #f+0B"BB!3i!?!3i!?Y;
\ !!-##B'C##B'C FCK#8a@++F3F//7#f+0B"BB!3i!?!3i!?1EEr4   )	r  r  r  r:   r  r  r  r;   r  )
r   r   Tr&   r&   r&   r   TFTr  )NNr   NNNT)r-   r.   r/   r0   rt   r   r
   r	   r   rA   r   r   r7   r  r9  r   r   r   r   r   r   r3   rH   rI   s   @r5   r   r     s     "(*IJ2323-1#+004!t,t, t, 	t,
 t, t, t, #&t, '/uS%*_/E&Ft, #+3-t, #+3-t, &c]t, t, #4.t,   (~!t, t,l ?*> ?Pl ? ?B,* 37/3.1+/;?37"ZF"ZF ZF  (/	ZF
 %V,ZF %UOZF !(ZF !)c3h 8ZF !) 0ZF ZF 
vvuVS[153EE	FZF ZFr4   r   c                   &  ^  \ rS rSr         SS\S\S\\   S\S\S\S\\   S	\\   S
\\   S\S\\   4U 4S jjjr\S\	S\
4S j5       rSS jr      S S\S\S\S\\   S\\   S\\\\4      S\\   S\\   S\S\\\4   4S jjrSrU =r$ )!r   i  r}   r~   rZ   rn   r   r\   r   r   r^   r`   ra   c                    > [         TU ]  5         [        X5      U l        [	        UUUUU	UUU
S9U l        [	        UX!-   UU[        [        X"U-   5      U5      U	UUU
S9	U l        [        X!5      U l	        SU l
        g )N)r\   rg   rZ   r   r^   r]   ra   r`   r   F)r@   rA   rw   r:   r#   base_midblockrx   r   ctrl_midblockr;   r  )rC   r}   r~   rZ   rn   r   r\   r   r   r^   r`   ra   rD   s               r5   rA   (ControlNetXSCrossAttnMidBlock2D.__init__  s     	 +=H4)E%') 3 8"7-	
 5)E%5&'-M=#@AC[ !4 8"7-
" +=H&+#r4   r  r   c                    UR                   nUR                  nUR                  nS nUR                  nUR                  n[        UR                  S   R                  5      nUR                  S   R                  R                  n	UR                  S   R                  R                  n
UR                  S   R                  R                  nU" U5      R                  nU" U5      R                  nU" U5      R                  nU" U5      R                  nUR                  S   R                   nU " UUU	U
UUUUUUUS9nUR                   R#                  UR%                  5       5        UR&                  R#                  UR%                  5       5        UR(                  R#                  UR%                  5       5        UR                  R#                  UR%                  5       5        U$ )Nc                 N    U R                   S   R                  S   R                  $ r  r  )rM   s    r5   r  OControlNetXSCrossAttnMidBlock2D.from_modules.<locals>.get_first_cross_attention-  s$    &&q)<<Q?EEEr4   r   r   )r:   r;   rM   rh   rg   r   r<   r  r9   r  r  r  r   r  r^   r`   ra   r(  r)  r  r   )r   r  r   r:   r;   r  r}   r~   r\   rZ   r   r  r   r   r^   r`   ra   r   s                     r5   r  ,ControlNetXSCrossAttnMidBlock2D.from_modules"  s    %11$11%..	F %11$00'*=+C+CA+F+Y+Y'Z$%--a0>>JJ"**1-33>>
'//288CC#<]#K#Q#Q #<]#K#Q#Q 7FZZ4]CTT - 8 8 ; Q Q '''&%4)E%=%= 3-"7
 	**<+B+B+DE++M,D,D,FG++M,D,D,FG**<+B+B+DEr4   r4  c                     U R                  5        H
  nSUl        M     U R                  R                  5        H
  nSUl        M     g)r6  TFN)r7  r8  r  )rC   r:  s     r5   r9  2ControlNetXSCrossAttnMidBlock2D.freeze_base_paramsS  s@     __&E"&E ' ''224E"'E 5r4   r  r  r  r  r  r  r  r  r  c
                 T   Ub(  UR                  SS 5      b  [        R                  S5        Un
UnUUUUUS.nU	(       a%  [        R                  " XR                  U
5      /SS9nU R                  " U
40 UD6n
U	(       a)  U R                  " U40 UD6nXR                  U5      U-  -   n
X4$ )Nr  r  )r  r  r  r  r  r&   r  )	r  r  r  rL  r  r:   r  r   r;   )rC   r  r  r  r  r  r  r  r  r  r  r  
joint_argss                r5   r   'ControlNetXSCrossAttnMidBlock2D.forward^  s     "-%))'48Dtu## %:,&<&<

 YY(9(9&(ABJF##F9j9''=*=F//7:LLLF~r4   )r  r:   r   r;   r  )	Nr   r   r&   r&   r&   r   FTr  )Nr   NNNT)r-   r.   r/   r0   rt   r   r   rA   r   r#   rK   r  r9  r   r   r   r   r   r	   r   r3   rH   rI   s   @r5   r   r     s   
 (,!(*,-2323-1!&042,2, 2,  }	2,
 2, #&2, '*2, #+3-2, #+3-2, &c]2, 2,  (~2, 2,h ... 3. .`	(  04.1;?+/37"""" "  &	"
 %V," %UO" !)c3h 8" !(" !) 0" " 
vv~	" "r4   r   c                   <  ^  \ rS rSr         S#S\S\S\S\\   S\S\S\\   S	\S
\S\S\S\S\\   4U 4S jjjr\	S\
S\4S j5       rS$S jr       S%S\S\\S4   S\\S4   S\S\\   S\\   S\\\\4      S\\   S\\   S\\   S \S\4S! jjrS"rU =r$ )&r  i  rg   rh   r   r   rZ   rn   r   r\   r]   r^   r   r`   ra   c                   > [         TU ]  5         / n/ n/ nSnXl        Xl        [	        U	[
        5      (       a  U	/U-  n	[        U5       H  nUUS-
  :X  a  UOUnUS:X  a  UOUnUR                  [        UU   U5      5        UR                  [        UU-   UUUS95        U(       d  M_  UR                  [        U
X*-  UU	U   UUUUS95        M     [        R                  " U5      U l        U(       a  [        R                  " U5      OS /U-  U l        [        R                  " U5      U l        U(       a  [!        USUS9U l        OS U l        SU l        Xpl        g )	Nr   r&   r   r  rl   T)rq   rh   F)r@   rA   has_cross_attentionr]   rs   rt   ru   rv   rw   r!   r"   r   rF   r9   r<   r;   r$   
upsamplersr  r   )rC   rg   rh   r   r   rZ   rn   r   ry   r\   r]   r^   r   r`   ra   r9   r<   r;   rm   rz   res_skip_channelsr   rD   s                         r5   rA   'ControlNetXSCrossAttnUpBlock2D.__init__  sd   " 	

#0 #6 2C88,H+IJ+V(z"A01Z!^0C,89Q!4L/A!/DFX YZNN 25F F!-"/*	 }!!&+$;$0#?#B,?.C)9(7	! #: }}W-7D"--
34&S]J]MM,7(S_`DO"DO&+#,r4   base_upblockctrl_upblockc                    UR                   nS nUR                  S   R                  nUR                  S   R                  U-
  nUR                  S   R                  U-
  nU Vs/ s H  oR                  PM     n	nUR                  S   R                  R
                  n
UR                  S   R                  R                  nUR                  n[        US5      (       at  Sn[        UR                  S   R                  5      nU" U5      R                  nU" U5      R                  nU" U5      R                  nUR                  S   R                   nOSnS nS nS nS nS nUR"                  S LnU " UUUU	U
UUUUUUUUUS9nUR                  R%                  UR                  R'                  5       5        U(       a3  UR                  R%                  UR                  R'                  5       5        U(       a6  UR"                  R%                  UR"                  S   R'                  5       5        UR                   R%                  UR'                  5       5        U$ s  snf )Nc                 N    U R                   S   R                  S   R                  $ r  r  r  s    r5   r  NControlNetXSCrossAttnUpBlock2D.from_modules.<locals>.get_first_cross_attention  r  r4   r   r   r<   TF)rg   rh   r   r   rZ   rn   r   ry   r\   r]   r^   r   r`   ra   )r;   r9   rh   rg   r  r  r  r   r   r*  r   r<   r  r  r^   r`   ra   r  r(  r)  )r   r  r  ctrl_to_base_skip_connectionsr  rh   rg   prev_output_channelsr!  ctrl_skip_channelssrZ   r   r   ry   r\   r]   r^   r`   ra   r   r   s                        r5   r  +ControlNetXSCrossAttnUpBlock2D.from_modules  s>   (4(A(A%	C $++A.;;"**2.::\I+33A6BB\Q6ST6S}}6ST$,,Q/==II!))!,22==
%44<.. M+.|/F/Fq/I/\/\+](";L"I"O"O";L"I"]"]8FWW$0$;$;A$>$T$T!!M+/("&"&#$(!#..d: #% 42'&)')E 3 3%-"7
$ 	%%l&:&:&E&E&GH,,\-D-D-O-O-QR,,\-D-DQ-G-R-R-TU**+H+S+S+UV] Us   %Ir4  c                    U R                  5        H
  nSUl        M     U R                  /n[        U R                  [
        R                  5      (       a  UR                  U R                  5        U R                  b  UR                  U R                  5        U H!  nUR                  5        H
  nSUl        M     M#     gr  )	r7  r8  r9   rs   r<   r   rF   rv   r  r  s       r5   r9  1ControlNetXSCrossAttnUpBlock2D.freeze_base_params  s     __&E"&E ' ll^
door}}55doo.??&doo.D*&+# + r4   r  r  .r  r  r  r  r  r  upsample_sizer  r  c           
        ^ ^ Ub(  UR                  SS 5      b  [        R                  S5        [        T SS 5      =(       a5    [        T SS 5      =(       a!    [        T SS 5      =(       a    [        T SS 5      mUU 4S jn[	        T R
                  T R                  T R                  [        U5      [        U5      5       H  u  pnnnU(       a  X" U5      U-  -  nU" UU5      u  nn[        R                  " UU/SS	9n[        R                  " 5       (       a$  T R                  (       a  T R                  XU5      nOU" X5      nUc  M  U" UUUUU
S
S9S   nM     T R                  b  T R                  X5      nU$ )Nr  r  rh  ri  rj  rk  c           
         > T(       aA  [        TR                  U UTR                  TR                  TR                  TR
                  S9$ X4$ )N)rh  ri  rj  rk  )r   r   rh  ri  rj  rk  )r  
res_h_baseis_freeu_enabledrC   s     r5   maybe_apply_freeu_to_subblockMControlNetXSCrossAttnUpBlock2D.forward.<locals>.maybe_apply_freeu_to_subblock0  sI    "''!wwwwwwww  %00r4   r&   r  Fr  r   )r  r  r  r'  r+  r9   r<   r;   r   rL  r  r  r  r  r  )rC   r  r  r  r  r  r  r  r  r  r  r  r!  resnetattnr  r  
res_h_ctrlr   s   `                 @r5   r   &ControlNetXSCrossAttnUpBlock2D.forward  s{    "-%))'48Dtu D$% *dD)*dD)* dD)	 		1 :=LLOO1212:
5F#z: Z3E!EE(EmU_(`%M:!II}j&AqIM$$&&4+F+F $ A A&Y] ^ &} ; $!*?+A#1+A %! !':
8 ??& OOMIMr4   )r<   r;   r  r  r]   r9   r   r  )	r   NTr&   r&   r   TFTr  )Nr   NNNNT)r-   r.   r/   r0   rt   r   r   r   rA   r   r   rQ   r  r9  r   r	   r   r   r   r   r   r3   rH   rI   s   @r5   r  r    s     "(,,-#$#'!!&04E-E- E- !	E-
 !IE- E- E- !E- '*E- !E- !E- E- E-  (~E- E-N 8(: 8Jd 8 8t,. 37.1;?+/'+37"GG ',FCK&8G ',FCK&8	G
 G  (/G %UOG !)c3h 8G !(G  }G !) 0G G 
G Gr4   r  c           	      @    [        [        R                  " XSSS95      $ )Nr&   r   )r   )zero_moduler   rG   )rg   rh   s     r5   rw   rw   a  s    ryyAqIJJr4   c                 r    U R                  5        H"  n[        R                  R                  U5        M$     U $ r?   )r7  r   initzeros_)rB  ps     r5   r(  r(  e  s*     
q !Mr4   c                 V    UnX :  a  U $ US:w  a  X-  nUS:X  a  U$ US-  nUS:w  a  M  g g )Nr   r&   r,   )numberrd   factorresiduals       r5   rx   rx   k  s>    F
A+?q=M!	 A+r4   )r   Tr&   r&   r   TFT)Nr   r&   r&   r   FTr?   )Edataclassesr   mathr   typingr   r   r   r   r	   r
   rL  torch.utils.checkpointr   r   configuration_utilsr   r   utilsr   r   utils.torch_utilsr   attention_processorr   r   r   r   r   r   r   
embeddingsr   r   modeling_utilsr   unets.unet_2d_blocksr   r   r    r!   r"   r#   r$   unets.unet_2d_conditionr%   r  r'   
get_loggerr-   r  r)   rM  r7   rK   rQ   rt   r   r|   r   r   r   r   r   r   r  rw   r(  rx   r,   r4   r5   <module>r>     s   "  : :    B ( ,   6 '   ; 7 
		H	% 
 
 
(299 (()")) )) ) *,EF)*)-',,0X!X!X! X! 	X!
 X! "#X! #+5eCj+A"BX! "#X! "#X! X! tnX! $D>X!| $()+())*)-""& p p p C= p "#	 p
 #& p "# p "# p  p   pFPPP S	PH
*k H
VC1J C1LdFryy dFN	Sbii Sl[RYY [|Kr4   