
    +h_                        S SK r S SKJrJrJrJr  S SKrS SKJr  SSK	J
r
Jr  SSKJr  SSKJr  \R                   " \5      r " S S	\5      r " S
 S\\
5      r " S S\R*                  5      r " S S\R*                  5      r " S S\R*                  5      r " S S\R*                  5      r " S S\R*                  5      r " S S\R*                  5      r " S S\R*                  5      rg)    N)CallableListOptionalUnion   )ConfigMixinregister_to_config)logging   )
ModelMixinc                     ^  \ rS rSrSrS\S   4U 4S jjrSS\R                  S\	\\
      S\\R                     4S	 jjr    SS
\\\R                  4   S\S\S\S\	\   4
S jjr\S\	\\\R                  4      4S j5       rSrU =r$ )MultiAdapter   a}  
MultiAdapter is a wrapper model that contains multiple adapter models and merges their outputs according to
user-assigned weighting.

This model inherits from [`ModelMixin`]. Check the superclass documentation for common methods such as downloading
or saving.

Args:
    adapters (`List[T2IAdapter]`, *optional*, defaults to None):
        A list of `T2IAdapter` model instances.
adapters
T2IAdapterc                 &  > [         [        U ]  5         [        U5      U l        [
        R                  " U5      U l        [        U5      S:X  a  [        S5      e[        U5      S:X  a  [        S5      eUS   R                  nUS   R                  n[        S[        U5      5       H\  nX   R                  U:w  d  X   R                  U:w  d  M)  [        SU SU SU SX   R                   SU S	X   R                   35      e   X l	        X0l
        g )
Nr   zExpecting at least one adapterr   zQFor a single adapter, please use the `T2IAdapter` class instead of `MultiAdapter`zjExpecting all adapters to have the same downscaling behavior, but got:
adapters[0].total_downscale_factor=z
adapters[0].downscale_factor=z

adapter[`z`].total_downscale_factor=z`].downscale_factor=)superr   __init__lennum_adapternn
ModuleListr   
ValueErrortotal_downscale_factordownscale_factorrange)selfr   $first_adapter_total_downscale_factorfirst_adapter_downscale_factoridx	__class__s        R/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/adapter.pyr   MultiAdapter.__init__)   s$   lD*,x=h/x=A=>>x=Apqq 08{/Q/Q,)1!)E)E&CM*C448\\=115SS ::^9_ `44R3S T  #u$>x}?c?c>d e  #u$89W9W8X	Z  + 'K# >    xsadapter_weightsreturnc                    Uc2  [         R                  " SU R                  -  /U R                  -  5      nO[         R                  " U5      nSn[        XU R                  5       He  u  pEnU" U5      nUc(  Un[        [        U5      5       H  nXSU   -  X8'   M     M9  [        [        U5      5       H  nX8==   XWU   -  -  ss'   M     Mg     U$ )a2  
Args:
    xs (`torch.Tensor`):
        A tensor of shape (batch, channel, height, width) representing input images for multiple adapter
        models, concatenated along dimension 1(channel dimension). The `channel` dimension should be equal to
        `num_adapter` * number of channel per image.

    adapter_weights (`List[float]`, *optional*, defaults to None):
        A list of floats representing the weights which will be multiplied by each adapter's output before
        summing them together. If `None`, equal weights will be used for all adapters.
Nr   )torchtensorr   zipr   r   r   )	r   r%   r&   accume_statexwadapterfeaturesis	            r"   forwardMultiAdapter.forwardL   s     "#llA0@0@,@+ADDTDT+TUO#ll?;O dmmDMA'qzH#'s<01A&'q/&9LO 2 s8}-A OqA;6O . E r$   save_directoryis_main_processsave_functionsafe_serializationvariantc           	      r    SnUnU R                    H#  nUR                  UUUUUS9  US-  nUSU 3-   nM%     g)a  
Save a model and its configuration file to a specified directory, allowing it to be re-loaded with the
`[`~models.adapter.MultiAdapter.from_pretrained`]` class method.

Args:
    save_directory (`str` or `os.PathLike`):
        The directory where the model will be saved. If the directory does not exist, it will be created.
    is_main_process (`bool`, optional, defaults=True):
        Indicates whether current process is the main process or not. Useful for distributed training (e.g.,
        TPUs) and need to call this function on all processes. In this case, set `is_main_process=True` only
        for the main process to avoid race conditions.
    save_function (`Callable`):
        Function used to save the state dictionary. Useful for distributed training (e.g., TPUs) to replace
        `torch.save` with another method. Can also be configured using`DIFFUSERS_SAVE_MODE` environment
        variable.
    safe_serialization (`bool`, optional, defaults=True):
        If `True`, save the model using `safetensors`. If `False`, save the model with `pickle`.
    variant (`str`, *optional*):
        If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
r   )r5   r6   r7   r8   r   _N)r   save_pretrained)	r   r4   r5   r6   r7   r8   r    model_path_to_saver/   s	            r"   r;   MultiAdapter.save_pretrainedi   s[    8 +}}G##" /+#5 $  1HC!3#i!? %r$   pretrained_model_pathc                    Sn/ nUn[         R                  R                  U5      (       a[  [        R                  " U40 UD6nUR                  U5        US-  nUSU 3-   n[         R                  R                  U5      (       a  M[  [        R                  [        U5       SU S35        [        U5      S:X  a2  [        S[         R                  R                  U5       SUS-    S35      eU " U5      $ )	al  
Instantiate a pretrained `MultiAdapter` model from multiple pre-trained adapter models.

The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train
the model, set it back to training mode using `model.train()`.

Warnings:
    *Weights from XXX not initialized from pretrained model* means that the weights of XXX are not pretrained
    with the rest of the model. It is up to you to train those weights with a downstream fine-tuning. *Weights
    from XXX not used in YYY* means that the layer XXX is not used by YYY, so those weights are discarded.

Args:
    pretrained_model_path (`os.PathLike`):
        A path to a *directory* containing model weights saved using
        [`~diffusers.models.adapter.MultiAdapter.save_pretrained`], e.g., `./my_model_directory/adapter`.
    torch_dtype (`torch.dtype`, *optional*):
        Override the default `torch.dtype` and load the model under this dtype.
    output_loading_info(`bool`, *optional*, defaults to `False`):
        Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
    device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*):
        A map that specifies where each submodule should go. It doesn't need to be refined to each
        parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the
        same device.

        To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For
        more information about each option see [designing a device
        map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
    max_memory (`Dict`, *optional*):
        A dictionary mapping device identifiers to their maximum memory. Default to the maximum memory
        available for each GPU and the available CPU RAM if unset.
    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
        Speed up model loading by not initializing the weights and only loading the pre-trained weights. This
        also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
        model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
        setting this argument to `True` will raise an error.
    variant (`str`, *optional*):
        If specified, load weights from a `variant` file (*e.g.* pytorch_model.<variant>.bin). `variant` will
        be ignored when using `from_flax`.
    use_safetensors (`bool`, *optional*, defaults to `None`):
        If `None`, the `safetensors` weights will be downloaded if available **and** if`safetensors` library is
        installed. If `True`, the model will be forcibly loaded from`safetensors` weights. If `False`,
        `safetensors` is not used.
r   r   r:   z adapters loaded from .zNo T2IAdapters found under z. Expected at least _0)ospathisdirr   from_pretrainedappendloggerinfor   r   dirname)clsr>   kwargsr    r   model_path_to_loadr/   s          r"   rE   MultiAdapter.from_pretrained   s   Z 
 3ggmm.// 001CNvNGOOG$1HC!61SE!B ggmm.// 	s8}o%;<Q;RRSTUx=A-bggoo>S.T-UUij  CG  kG  jH  HI  J  8}r$   )r   r   r   r   N)TNTN)__name__
__module____qualname____firstlineno____doc__r   r   r)   Tensorr   floatr2   r   strrB   PathLikeboolr   r;   classmethodrE   __static_attributes____classcell__r!   s   @r"   r   r      s    
!?l!3 !?F%,, $u+9N Z^_d_k_kZl @ !%"&#'!%(@c2;;./(@ (@  	(@
 !(@ #(@T AHU3CS=T4U A Ar$   r   c                      ^  \ rS rSrSr\S/ SQSSS4S\S	\\   S
\S\S\4
U 4S jjj5       r	S\
R                  S\\
R                     4S jr\S 5       r\S 5       rSrU =r$ )r      a  
A simple ResNet-like model that accepts images containing control signals such as keyposes and depth. The model
generates multiple feature maps that are used as additional conditioning in [`UNet2DConditionModel`]. The model's
architecture follows the original implementation of
[Adapter](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L97)
 and
 [AdapterLight](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L235).

This model inherits from [`ModelMixin`]. Check the superclass documentation for the common methods, such as
downloading or saving.

Args:
    in_channels (`int`, *optional*, defaults to `3`):
        The number of channels in the adapter's input (*control image*). Set it to 1 if you're using a gray scale
        image.
    channels (`List[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
        The number of channels in each downsample block's output hidden state. The `len(block_out_channels)`
        determines the number of downsample blocks in the adapter.
    num_res_blocks (`int`, *optional*, defaults to `2`):
        Number of ResNet blocks in each downsample block.
    downscale_factor (`int`, *optional*, defaults to `8`):
        A factor that determines the total downscale factor of the Adapter.
    adapter_type (`str`, *optional*, defaults to `full_adapter`):
        Adapter type (`full_adapter` or `full_adapter_xl` or `light_adapter`) to use.
   @       rc   r      full_adapterin_channelschannelsnum_res_blocksr   adapter_typec                    > [         TU ]  5         US:X  a  [        XX45      U l        g US:X  a  [	        XX45      U l        g US:X  a  [        XX45      U l        g [        SU S35      e)Nre   full_adapter_xllight_adapterzUnsupported adapter_type: 'zH'. Choose either 'full_adapter' or 'full_adapter_xl' or 'light_adapter'.)r   r   FullAdapterr/   FullAdapterXLLightAdapterr   )r   rf   rg   rh   r   ri   r!   s         r"   r   T2IAdapter.__init__   sq     	>)&{n_DL..(aDL_,'~`DL-l^ <8 8 r$   r-   r'   c                 $    U R                  U5      $ )aj  
This function processes the input tensor `x` through the adapter model and returns a list of feature tensors,
each representing information extracted at a different scale from the input. The length of the list is
determined by the number of downsample blocks in the Adapter, as specified by the `channels` and
`num_res_blocks` parameters during initialization.
r/   r   r-   s     r"   r2   T2IAdapter.forward
  s     ||Ar$   c                 .    U R                   R                  $ rN   )r/   r   r   s    r"   r   !T2IAdapter.total_downscale_factor  s    ||222r$   c                 B    U R                   R                  R                  $ )zThe downscale factor applied in the T2I-Adapter's initial pixel unshuffle operation. If an input image's dimensions are
not evenly divisible by the downscale_factor then an exception will be raised.
)r/   	unshuffler   rv   s    r"   r   T2IAdapter.downscale_factor  s    
 ||%%666r$   rr   )rO   rP   rQ   rR   rS   r	   intr   rV   r   r)   rT   r2   propertyr   r   rZ   r[   r\   s   @r"   r   r      s    4  4 !* s) 	
   , $u||*<  3 3 7 7r$   r   c            	          ^  \ rS rSrSrS/ SQSS4S\S\\   S	\S
\4U 4S jjjrS\R                  S\\R                     4S jr
SrU =r$ )rm   i"  *
See [`T2IAdapter`] for more information.
r_   r`   r   rd   rf   rg   rh   r   c                   > [         TU ]  5         XS-  -  n[        R                  " U5      U l        [        R
                  " XS   SSS9U l        [        R                  " [        US   US   U5      /[        S[        U5      5       Vs/ s H  n[        X%S-
     X%   USS9PM     snQ5      U l        US[        U5      S-
  -  -  U l        g s  snf Nr   r   r_   r   kernel_sizepaddingTdown)r   r   r   PixelUnshufflery   Conv2dconv_inr   AdapterBlockr   r   bodyr   r   rf   rg   rh   r   r1   r!   s         r"   r   FullAdapter.__init__'  s     	!a$77**+;<yyqkqRSTMMXa[(1+~F #1c(m44 !a%(+~TXY4
	 '7s8}q?P9Q&Q#s   
C
r-   r'   c                     U R                  U5      nU R                  U5      n/ nU R                   H  nU" U5      nUR                  U5        M     U$ )a  
This method processes the input tensor `x` through the FullAdapter model and performs operations including
pixel unshuffling, convolution, and a stack of AdapterBlocks. It returns a list of feature tensors, each
capturing information at a different stage of processing within the FullAdapter model. The number of feature
tensors in the list is determined by the number of downsample blocks specified during initialization.
ry   r   r   rF   r   r-   r0   blocks       r"   r2   FullAdapter.forwardA  sN     NN1LLOYYEaAOOA  r$   r   r   r   ry   rO   rP   rQ   rR   rS   r{   r   r   r)   rT   r2   rZ   r[   r\   s   @r"   rm   rm   "  su     4 !RR s)R 	R
 R R4 $u||*<  r$   rm   c            	          ^  \ rS rSrSrS/ SQSS4S\S\\   S	\S
\4U 4S jjjrS\R                  S\\R                     4S jr
SrU =r$ )rn   iT  r~   r_   r`   r      rf   rg   rh   r   c           
      j  > [         TU ]  5         XS-  -  n[        R                  " U5      U l        [        R
                  " XS   SSS9U l        / U l        [        [        U5      5       H  nUS:X  a/  U R                  R                  [        X%S-
     X%   U5      5        M8  US:X  a.  U R                  R                  [        X%S-
     X%   USS95        Ml  U R                  R                  [        X%   X%   U5      5        M     [        R                  " U R                  5      U l        US-  U l        g r   )r   r   r   r   ry   r   r   r   r   r   rF   r   r   r   r   s         r"   r   FullAdapterXL.__init__Y  s     	!a$77**+;<yyqkqRST	s8}%AAv		  h1uox{N![\a		  h1uox{Nae!fg		  hk8;!WX & MM$)),	&6&:#r$   r-   r'   c                     U R                  U5      nU R                  U5      n/ nU R                   H  nU" U5      nUR                  U5        M     U$ )z
This method takes the tensor x as input and processes it through FullAdapterXL model. It consists of operations
including unshuffling pixels, applying convolution layer and appending each block into list of feature tensors.
r   r   s       r"   r2   FullAdapterXL.forwardu  sN    
 NN1LLOYYEaAOOA  r$   r   r   r\   s   @r"   rn   rn   T  so     4 ";; s); 	;
 ; ;8 $u||*<  r$   rn   c            	       ~   ^  \ rS rSrSrSS\S\S\S\4U 4S jjjrS\R                  S	\R                  4S
 jr
SrU =r$ )r   i  a  
An AdapterBlock is a helper model that contains multiple ResNet-like blocks. It is used in the `FullAdapter` and
`FullAdapterXL` models.

Args:
    in_channels (`int`):
        Number of channels of AdapterBlock's input.
    out_channels (`int`):
        Number of channels of AdapterBlock's output.
    num_res_blocks (`int`):
        Number of ResNet blocks in the AdapterBlock.
    down (`bool`, *optional*, defaults to `False`):
        If `True`, perform downsampling on AdapterBlock's input.
rf   out_channelsrh   r   c                 @  > [         TU ]  5         S U l        U(       a  [        R                  " SSSS9U l        S U l        X:w  a  [        R                  " XSS9U l        [        R                  " [        U5       Vs/ s H  n[        U5      PM     sn6 U l
        g s  snf )Nr   Tr   stride	ceil_moder   r   )r   r   
downsampler   	AvgPool2din_convr   
Sequentialr   AdapterResnetBlockresnets)r   rf   r   rh   r   r:   r!   s         r"   r   AdapterBlock.__init__  s}     llqdSDO&99[ANDL}}8=n8MN8M1 .8MN
Ns   <Br-   r'   c                     U R                   b  U R                  U5      nU R                  b  U R                  U5      nU R                  U5      nU$ )z
This method takes tensor x as input and performs operations downsampling and convolutional layers if the
self.downsample and self.in_conv properties of AdapterBlock model are specified. Then it applies a series of
residual blocks to the input tensor.
r   r   r   rs   s     r"   r2   AdapterBlock.forward  sE     ??&"A<<#QALLOr$   r   FrO   rP   rQ   rR   rS   r{   rX   r   r)   rT   r2   rZ   r[   r\   s   @r"   r   r     sO    
C 
s 
C 
W[ 
 
 %,,  r$   r   c                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	r   i  z
An `AdapterResnetBlock` is a helper model that implements a ResNet-like block.

Args:
    channels (`int`):
        Number of channels of AdapterResnetBlock's input and output.
rg   c                    > [         TU ]  5         [        R                  " XSSS9U l        [        R
                  " 5       U l        [        R                  " XSS9U l        g )Nr_   r   r   r   r   r   r   r   block1ReLUactblock2r   rg   r!   s     r"   r   AdapterResnetBlock.__init__  sA    ii1M779iiBr$   r-   r'   c                 l    U R                  U R                  U5      5      nU R                  U5      nX!-   $ )z
This method takes input tensor x and applies a convolutional layer, ReLU activation, and another convolutional
layer on the input tensor. It returns addition with the input tensor.
r   r   r   r   r-   hs      r"   r2   AdapterResnetBlock.forward  .     HHT[[^$KKNur$   r   rO   rP   rQ   rR   rS   r{   r   r)   rT   r2   rZ   r[   r\   s   @r"   r   r     s6    C C	 	%,, 	 	r$   r   c            	          ^  \ rS rSrSrS/ SQSS4S\S\\   S	\S
\4U 4S jjjrS\R                  S\\R                     4S jr
SrU =r$ )ro   i  r~   r_   )ra   rb   rc      rd   rf   rg   rh   r   c                   > [         TU ]  5         XS-  -  n[        R                  " U5      U l        [        R
                  " [        XS   U5      /[        [        U5      S-
  5       Vs/ s H  n[        X%   X%S-      USS9PM     snQ[        US   US   USS9P5      U l	        US[        U5      -  -  U l
        g s  snf )Nr   r   r   Tr   )r   r   r   r   ry   r   LightAdapterBlockr   r   r   r   r   s         r"   r   LightAdapter.__init__  s     	!a$77**+;<MM!+{NK #3x=1#455 &hk8E?NY]^5 "(2,nSWX	
	 '7!s8}:L&M#s   *B=
r-   r'   c                     U R                  U5      n/ nU R                   H  nU" U5      nUR                  U5        M     U$ )z
This method takes the input tensor x and performs downscaling and appends it in list of feature tensors. Each
feature tensor corresponds to a different level of processing within the LightAdapter.
)ry   r   rF   r   s       r"   r2   LightAdapter.forward  sA    
 NN1YYEaAOOA  r$   )r   r   ry   r   r\   s   @r"   ro   ro     su     . !NN s)N 	N
 N N4 $u||*<  r$   ro   c            	       ~   ^  \ rS rSrSrSS\S\S\S\4U 4S jjjrS\R                  S	\R                  4S
 jr
SrU =r$ )r   i  a  
A `LightAdapterBlock` is a helper model that contains multiple `LightAdapterResnetBlocks`. It is used in the
`LightAdapter` model.

Args:
    in_channels (`int`):
        Number of channels of LightAdapterBlock's input.
    out_channels (`int`):
        Number of channels of LightAdapterBlock's output.
    num_res_blocks (`int`):
        Number of LightAdapterResnetBlocks in the LightAdapterBlock.
    down (`bool`, *optional*, defaults to `False`):
        If `True`, perform downsampling on LightAdapterBlock's input.
rf   r   rh   r   c                 f  > [         TU ]  5         US-  nS U l        U(       a  [        R                  " SSSS9U l        [        R
                  " XSS9U l        [        R                  " [        U5       Vs/ s H  n[        U5      PM     sn6 U l
        [        R
                  " XRSS9U l        g s  snf )Nr   r   Tr   r   r   )r   r   r   r   r   r   r   r   r   LightAdapterResnetBlockr   out_conv)r   rf   r   rh   r   mid_channelsr:   r!   s          r"   r   LightAdapterBlock.__init__  s    #q( llqdSDOyyJ}}V[\jVk&lVkQR'>|'LVk&lm		,!L 'ms   5B.r-   r'   c                     U R                   b  U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ )z
This method takes tensor x as input and performs downsampling if required. Then it applies in convolution
layer, a sequence of residual blocks, and out convolutional layer.
)r   r   r   r   rs   s     r"   r2   LightAdapterBlock.forward  sI    
 ??&"ALLOLLOMM!r$   )r   r   r   r   r   r   r\   s   @r"   r   r     sU    
MC 
Ms 
MC 
MW[ 
M 
M %,,  r$   r   c                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	r   i.  z
A `LightAdapterResnetBlock` is a helper model that implements a ResNet-like block with a slightly different
architecture than `AdapterResnetBlock`.

Args:
    channels (`int`):
        Number of channels of LightAdapterResnetBlock's input and output.
rg   c                    > [         TU ]  5         [        R                  " XSSS9U l        [        R
                  " 5       U l        [        R                  " XSSS9U l        g )Nr_   r   r   r   r   s     r"   r    LightAdapterResnetBlock.__init__8  sC    ii1M779ii1Mr$   r-   r'   c                 l    U R                  U R                  U5      5      nU R                  U5      nX!-   $ )z
This function takes input tensor x and processes it through one convolutional layer, ReLU activation, and
another convolutional layer and adds it to input tensor.
r   r   s      r"   r2   LightAdapterResnetBlock.forward>  r   r$   r   r   r\   s   @r"   r   r   .  s6    N N	 	%,, 	 	r$   r   )rB   typingr   r   r   r   r)   torch.nnr   configuration_utilsr   r	   utilsr
   modeling_utilsr   
get_loggerrO   rG   r   r   Modulerm   rn   r   r   ro   r   r    r$   r"   <module>r      s    
 2 2   A  & 
		H	%y: yxD7[ D7T/")) /d/BII /d-299 -` <,299 ,^(		 (Vbii r$   