
    +h B                        S SK Jr  S SKJrJrJr  S SKrS SKJr  SSKJ	r	J
r
  SSKJrJr  SS	KJr  SS
KJrJrJr  SSKJr  SSKJr  SSKJrJr  \R6                  " \5      r\ " S S\5      5       r " S S\\	5      r " S S\5      r g)    )	dataclass)DictOptionalUnionN)nn   )ConfigMixinregister_to_config)
BaseOutputlogging   )AttentionProcessor)-HunyuanCombinedTimestepTextSizeStyleEmbedding
PatchEmbedPixArtAlphaTextProjection)
ModelMixin)HunyuanDiTBlock   )Tuplezero_modulec                   :    \ rS rSr% \\R                     \S'   Srg)HunyuanControlNetOutput$   controlnet_block_samples N)	__name__
__module____qualname____firstlineno__r   torchTensor__annotations____static_attributes__r       i/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/controlnets/controlnet_hunyuan.pyr   r   $   s    #ELL11r$   r   c                   4  ^  \ rS rSr\                SS\S\S\S\\   S\\   S\S\S	\S
\S\S\S\S\S\	4U 4S jjj5       r
\S\\\4   4S j5       rS\\\\\4   4   4S jr\ SS j5       r         SS\R(                  S\4S jjrSrU =r$ )HunyuanDiT2DControlNetModel)   conditioning_channelsnum_attention_headsattention_head_dimin_channels
patch_sizeactivation_fntransformer_num_layers	mlp_ratiocross_attention_dimcross_attention_dim_t5pooled_projection_dimtext_lentext_len_t5"use_style_cond_and_image_meta_sizec                 r  > [         TU ]  5         X l        X#-  U l        [	        UUS-  USS9U l        [        R                  " [        R                  " X-   U[        R                  S95      U l        [        UUUUUS S9U l        [        UUUUUS9U l        [        R                   " / 5      U l        [        R                   " [%        U	S-  S-
  5       Vs/ s HF  n['        U R                  U R(                  R*                  U[-        U R                  U
-  5      US	S
S9PMH     sn5      U l        [1        [        R2                  " X5      5      U l        [%        [7        U R.                  5      5       H?  n[        R2                  " X5      n[1        U5      nU R"                  R9                  U5        MA     g s  snf )N   	silu_fp32)in_featureshidden_sizeout_featuresact_fn)dtype)heightwidthr,   	embed_dimr-   pos_embed_type)r3   seq_lenr1   r6   r   r   TF)dimr*   r.   ff_inner_dimr1   qk_normskip)super__init__	num_heads	inner_dimr   text_embedderr   	Parameterr    randnfloat32text_embedding_paddingr   	pos_embedr   time_extra_emb
ModuleListcontrolnet_blocksranger   configr*   intblocksr   Linearinput_blocklenappend)selfr)   r*   r+   r,   r-   r.   sample_sizer;   r/   r0   r1   r2   r3   r4   r5   r6   layer_controlnet_block	__class__s                       r%   rI   $HunyuanDiT2DControlNetModel.__init__*   s   ( 	,,A6..2,	
 ')llKK.0C5==Y'
# $#!!
 L"7 6/Q
 "$r!2 mm ##9Q#>#BC DE  (,(G(G"/!$T^^i%?!@(;  D
 'ryy'JKs4;;'(A!yyB*+;<""))*:; )s   AF4returnc                    ^ 0 nS[         S[        R                  R                  S[        [         [
        4   4U4S jjmU R                  5        H  u  p#T" X#U5        M     U$ )z
Returns:
    `dict` of attention processors: A dictionary containing all attention processors used in the model with
    indexed by its weight name.
namemodule
processorsc                    > [        US5      (       a  UR                  SS9X  S3'   UR                  5        H  u  p4T" U  SU 3XB5        M     U$ )Nget_processorT)return_deprecated_lora
.processor.)hasattrrj   named_children)rf   rg   rh   sub_namechildfn_recursive_add_processorss        r%   rr   PHunyuanDiT2DControlNetModel.attn_processors.<locals>.fn_recursive_add_processors   s`    v//282F2F^b2F2c
V:./#)#8#8#:+tfAhZ,@%T $; r$   )strr    r   Moduler   r   ro   )r]   rh   rf   rg   rr   s       @r%   attn_processors+HunyuanDiT2DControlNetModel.attn_processorsv   sb     
	c 	588?? 	X\]`bt]tXu 	 !//1LD'jA 2 r$   	processorc           	      d  ^ [        U R                  R                  5       5      n[        U[        5      (       a-  [        U5      U:w  a  [        S[        U5       SU SU S35      eS[        S[        R                  R                  4U4S jjmU R                  5        H  u  p4T" X4U5        M     g)	a  
Sets the attention processor to use to compute attention.

Parameters:
    processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
        The instantiated processor class or a dictionary of processor classes that will be set as the processor
        for **all** `Attention` layers. If `processor` is a dict, the key needs to define the path to the
        corresponding cross attention processor. This is strongly recommended when setting trainable attention
        processors.
z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.rf   rg   c                 
  > [        US5      (       aJ  [        U[        5      (       d  UR                  U5        O#UR                  UR	                  U  S35      5        UR                  5        H  u  p4T" U  SU 3XB5        M     g )Nset_processorrl   rm   )rn   
isinstancedictr{   popro   )rf   rg   rx   rp   rq   fn_recursive_attn_processors        r%   r   SHunyuanDiT2DControlNetModel.set_attn_processor.<locals>.fn_recursive_attn_processor   ss    v//!)T22((3(($z7J)KL#)#8#8#:+tfAhZ,@%S $;r$   N)r[   rv   keysr|   r}   
ValueErrorrt   r    r   ru   ro   )r]   rx   countrf   rg   r   s        @r%   set_attn_processor.HunyuanDiT2DControlNetModel.set_attn_processor   s     D((--/0i&&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1LD'i@ 2r$   c                    UR                   nUR                  nUR                  nUR                  nUR                  n	UR
                  n
UR                  nUR                  nUR                  nUR                  nUR                  nUR                  nUR                  nUnU=(       d    UR                  nU " UUUUUU	U
UUUUUUUS9nU(       a9  UR                  UR                  5       SS9n[         R#                  SUS    35        U$ )N)r)   r/   r.   r+   r1   r2   r;   r,   r0   r*   r-   r^   r4   r5   F)strictz0controlnet load from Hunyuan-DiT. missing_keys: r   )rV   r.   r+   r1   r2   r;   r,   r0   r*   r-   r^   r4   r5   r/   load_state_dict
state_dictloggerwarning)clstransformerr)   r/   load_weights_from_transformerrV   r.   r+   r1   r2   r;   r,   r0   r*   r-   r^   r4   r5   
controlnetkeys                       r%   from_transformer,HunyuanDiT2DControlNetModel.from_transformer   s"    ##,,#66$88!'!>!>(((($$	$88&&
((??(( 5!7!X6;X;X"7#9'1 3#9## 3!##

  ),,[-C-C-Ee,TCNNMcRSfXVWr$   controlnet_condconditioning_scalec                 T   UR                   SS u  pU R                  U5      nXR                  U R                  U5      5      -   nU R                  X'XUR                  S9nUR                   u  nnnU R                  UR                  SUR                   S   5      5      nUR                  UUS5      n[        R                  " XW/SS9n[        R                  " Xh/SS9nUR                  S5      R                  5       n[        R                  " XeU R                  5      nSn[        U R                  5       H  u  nnU" UUUUS	9nUU4-   nM     Sn[        UU R                   5       H  u  nnU" U5      nUU4-   nM     U Vs/ s H  nUU-  PM
     nnU(       d  U4$ [#        US
9$ s  snf )  
The [`HunyuanDiT2DControlNetModel`] forward method.

Args:
hidden_states (`torch.Tensor` of shape `(batch size, dim, height, width)`):
    The input tensor.
timestep ( `torch.LongTensor`, *optional*):
    Used to indicate denoising step.
controlnet_cond ( `torch.Tensor` ):
    The conditioning input to ControlNet.
conditioning_scale ( `float` ):
    Indicate the conditioning scale.
encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
    Conditional embeddings for cross attention layer. This is the output of `BertModel`.
text_embedding_mask: torch.Tensor
    An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
    of `BertModel`.
encoder_hidden_states_t5 ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
    Conditional embeddings for cross attention layer. This is the output of T5 Text Encoder.
text_embedding_mask_t5: torch.Tensor
    An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
    of T5 Text Encoder.
image_meta_size (torch.Tensor):
    Conditional embedding indicate the image sizes
style: torch.Tensor:
    Conditional embedding indicate the style
image_rotary_emb (`torch.Tensor`):
    The image rotary embeddings to apply on query and key tensors during attention calculation.
return_dict: bool
    Whether to return a dictionary.
N)hidden_dtyper   )rD   r   r   )tembencoder_hidden_statesimage_rotary_emb)r   )shaperQ   rZ   rR   r>   rL   viewr    cat	unsqueezeboolwhererP   	enumeraterX   ziprT   r   )r]   hidden_statestimestepr   r   r   text_embedding_maskencoder_hidden_states_t5text_embedding_mask_t5image_meta_sizestyler   return_dictr?   r@   r   
batch_sizesequence_lengthr`   block_res_samplesr_   blockcontrolnet_block_res_samplesblock_res_samplera   samples                             r%   forward#HunyuanDiT2DControlNetModel.forward   s   ^ &++BC0}5 &(8(89X(YY""U]UcUc # 

 *B)G)G&
OQ#'#5#5$))".F.L.LR.PQ$
  $<#@#@_^`#a  %		+@*[ab c#ii)<(U[]^1;;A>CCE %,?X\XsXs t%dkk2LE5!&;!1	M !2]4D D 3 (*$256GI_I_2`../0@A+GK[J]+]( 3a
 So'oRn1C(CRn$'o022&@\]] (ps   F%)	rX   rT   rK   rZ   rJ   rQ   rL   rP   rR   )r      X   NNzgelu-approximate    i  (   g      @   i   r   M      T)r   NT	g      ?NNNNNNNT)r   r   r   r   r
   rW   r   rt   floatr   rI   propertyr   r   rv   r   r   classmethodr   r    r!   r   r#   __classcell__rb   s   @r%   r'   r'   )   s    &'#%"$%)$(/&(#'&*%)37#I<"I< !I<  	I<
 c]I< SMI< I< !$I< I< !I< !$I<  #I< I<  !I<" -1#I< I<V c+=&=!>  .AE2Dd3PbKbFc2c,d A@ nr' '\ %(" !%#]^ 	]^
 "]^ ]^r$   r'   c                   h   ^  \ rS rSrSrU 4S jr         SS\R                  S\4S jjr	Sr
U =r$ )	 HunyuanDiT2DMultiControlNetModeli8  a  
`HunyuanDiT2DMultiControlNetModel` wrapper class for Multi-HunyuanDiT2DControlNetModel

This module is a wrapper for multiple instances of the `HunyuanDiT2DControlNetModel`. The `forward()` API is
designed to be compatible with `HunyuanDiT2DControlNetModel`.

Args:
    controlnets (`List[HunyuanDiT2DControlNetModel]`):
        Provides additional conditioning to the unet during the denoising process. You must set multiple
        `HunyuanDiT2DControlNetModel` as a list.
c                 X   > [         TU ]  5         [        R                  " U5      U l        g )N)rH   rI   r   rS   nets)r]   controlnetsrb   s     r%   rI   )HunyuanDiT2DMultiControlNetModel.__init__E  s    MM+.	r$   r   r   c                     [        [        X4U R                  5      5       HQ  u  nu  pnU" UUUUUUUUU	U
UUS9nUS:X  a  UnM$  [        WS   US   5       VVs/ s H  u  nnUU-   PM     nnnU4nMS     W$ s  snnf )r   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   )r]   r   r   r   r   r   r   r   r   r   r   r   r   iimagescaler   block_samplescontrol_block_samplescontrol_block_sampleblock_samples                        r%   r   (HunyuanDiT2DMultiControlNetModel.forwardI  s    \ .7s?`d`i`i7j-k)A)j&+! %#(&;$7)A'= /!1'M  Av(5% ?BBWXYBZ\ijk\l>m)>m:,l )<7>m & ) *?(@%1 .l4 %$)s   A7)r   r   )r   r   r   r   __doc__rI   r    r!   r   r   r#   r   r   s   @r%   r   r   8  sP    
/ %(" !%#H% 	H%
 "H% H%r$   r   )!dataclassesr   typingr   r   r   r    r   configuration_utilsr	   r
   utilsr   r   attention_processorr   
embeddingsr   r   r   modeling_utilsr   #transformers.hunyuan_transformer_2dr   r   r   r   
get_loggerr   r   r   r'   r   r   r$   r%   <module>r      s    " ( (   B ( 4 
 ( A * 
		H	% 2j 2 2L^*k L^^Y%z Y%r$   