
    +h                       S SK JrJrJrJrJrJrJr  S SKrS SK	J
r
  S SKJ
s  Jr  SSKJrJr  SSKJrJrJr  SSKJr  SSKJrJrJrJrJrJr  SS	KJrJ r J!r!  SS
K"J#r#  SSK$J%r%J&r&J'r'J(r(J)r)  \" 5       (       a  S SK*r+OSr+\RX                  " \-5      r. " S S5      r/ " S S5      r0S\
Rb                  S\Rd                  S\3S\34S jr4\ " S S\
Rb                  5      5       r5\ " S S\
Rb                  5      5       r6\ " S S\
Rb                  5      5       r7 " S S\
Rb                  5      r8\ " S S\
Rb                  5      5       r9 " S S \
Rb                  5      r:\ " S! S"\
Rb                  5      5       r; " S# S$\
Rb                  5      r<g)%    )AnyCallableDictListOptionalTupleUnionN   )	deprecatelogging)is_torch_npu_availableis_torch_xla_availableis_xformers_available)maybe_allow_in_graph   )GEGLUGELUApproximateGELUFP32SiLULinearActivationSwiGLU)	AttentionAttentionProcessorJointAttnProcessor2_0)SinusoidalPositionalEmbedding)AdaLayerNormAdaLayerNormContinuousAdaLayerNormZeroRMSNormSD35AdaLayerNormZeroXc                   d    \ rS rSr\S\\\4   4S j5       rS\	\\\\4   4   4S jr
S rS rSrg	)
AttentionMixin'   returnc                    ^ 0 nS[         S[        R                  R                  S[        [         [
        4   4U4S jjmU R                  5        H  u  p#T" X#U5        M     U$ )z
Returns:
    `dict` of attention processors: A dictionary containing all attention processors used in the model with
    indexed by its weight name.
namemodule
processorsc                    > [        US5      (       a  UR                  5       X  S3'   UR                  5        H  u  p4T" U  SU 3XB5        M     U$ )Nget_processor
.processor.)hasattrr*   named_children)r&   r'   r(   sub_namechildfn_recursive_add_processorss        T/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/attention.pyr1   CAttentionMixin.attn_processors.<locals>.fn_recursive_add_processors2   sZ    v//282F2F2H
V:./#)#8#8#:+tfAhZ,@%T $;     )strtorchnnModuler   r   r.   )selfr(   r&   r'   r1   s       @r2   attn_processorsAttentionMixin.attn_processors(   sb     
	c 	588?? 	X\]`bt]tXu 	 !//1LD'jA 2 r4   	processorc           	      d  ^ [        U R                  R                  5       5      n[        U[        5      (       a-  [        U5      U:w  a  [        S[        U5       SU SU S35      eS[        S[        R                  R                  4U4S jjmU R                  5        H  u  p4T" X4U5        M     g)	a  
Sets the attention processor to use to compute attention.

Parameters:
    processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
        The instantiated processor class or a dictionary of processor classes that will be set as the processor
        for **all** `Attention` layers.

        If `processor` is a dict, the key needs to define the path to the corresponding cross attention
        processor. This is strongly recommended when setting trainable attention processors.

z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.r&   r'   c                 
  > [        US5      (       aJ  [        U[        5      (       d  UR                  U5        O#UR                  UR	                  U  S35      5        UR                  5        H  u  p4T" U  SU 3XB5        M     g )Nset_processorr+   r,   )r-   
isinstancedictr?   popr.   )r&   r'   r<   r/   r0   fn_recursive_attn_processors        r2   rC   FAttentionMixin.set_attn_processor.<locals>.fn_recursive_attn_processorU   ss    v//!)T22((3(($z7J)KL#)#8#8#:+tfAhZ,@%S $;r4   N)lenr:   keysr@   rA   
ValueErrorr5   r6   r7   r8   r.   )r9   r<   countr&   r'   rC   s        @r2   set_attn_processor!AttentionMixin.set_attn_processor@   s     D((--/0i&&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1LD'i@ 2r4   c                 "   U R                   R                  5        H3  u  pS[        UR                  R                  5      ;   d  M*  [        S5      e   U R                  5        H*  n[        U[        5      (       d  M  UR                  5         M,     g)z
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
are fused. For cross-attention modules, key and value projection matrices are fused.
AddedzQ`fuse_qkv_projections()` is not supported for models having added KV projections.N)
r:   itemsr5   	__class____name__rG   modulesr@   AttentionModuleMixinfuse_projections)r9   _attn_processorr'   s       r2   fuse_qkv_projections#AttentionMixin.fuse_qkv_projectionsb   sq    
 "&!5!5!;!;!=A#n66??@@ !tuu "> llnF&"677'') %r4   c                     U R                  5        H*  n[        U[        5      (       d  M  UR                  5         M,     g)ul   Disables the fused QKV projection if enabled.

<Tip warning={true}>

This API is 🧪 experimental.

</Tip>
N)rP   r@   rQ   unfuse_projections)r9   r'   s     r2   unfuse_qkv_projections%AttentionMixin.unfuse_qkv_projectionso   s.     llnF&"677))+ %r4    N)rO   
__module____qualname____firstlineno__propertyr   r5   r   r:   r	   rI   rU   rY   __static_attributes__r[   r4   r2   r"   r"   '   sU    c+=&=!>  . AE2Dd3PbKbFc2c,d  AD*,r4   r"   c                      \ rS rSrSr/ rSrS\SS4S jrS'S\	SS4S	 jjr
S
\4S jrS\	SS4S jr  S(S\	S\\\\   S4      SS4S jjr S)S\	S\\   SS4S jjr\R(                  " 5       S 5       r\R(                  " 5       S 5       rS\SS4S jrS\R2                  S\R2                  4S jrS*S\R2                  S\S\R2                  4S jjr S)S\R2                  S\R2                  S\\R2                     S\R2                  4S  jjr S*S\R2                  S!\S"\S\S\R2                  4
S# jjrS$\R2                  S\R2                  4S% jrS&rg)+rQ   }   NFr<   r$   c                 j   [        U S5      (       a  [        U R                  [        R                  R
                  5      (       ai  [        U[        R                  R
                  5      (       d@  [        R                  SU R                   SU 35        U R                  R                  S5        Xl        g)zu
Set the attention processor to use.

Args:
    processor (`AttnProcessor`):
        The attention processor to use.
r<   z-You are removing possibly trained weights of z with N)
r-   r@   r<   r6   r7   r8   loggerinfo_modulesrB   )r9   r<   s     r2   r?   "AttentionModuleMixin.set_processor   sx     D+&&4>>588??;;y%((//::KKGGWW]^g]hijMMk*"r4   return_deprecated_lorar   c                 *    U(       d  U R                   $ g)z
Get the attention processor in use.

Args:
    return_deprecated_lora (`bool`, *optional*, defaults to `False`):
        Set to `True` to return the deprecated LoRA attention processor.

Returns:
    "AttentionProcessor": The attention processor in use.
N)r<   )r9   rh   s     r2   r*   "AttentionModuleMixin.get_processor   s     &>>! &r4   backendc                    SSK Jn  UR                  R                  5        Vs1 s H  o3R                  iM     nnX;  a"  [        SU< S3SR                  U5      -   5      eU" UR                  5       5      nXR                  l	        g s  snf )Nr   )AttentionBackendNamez	`backend=z ` must be one of the following: z, )
attention_dispatchrm   __members__valuesvaluerG   joinlowerr<   _attention_backend)r9   rk   rm   xavailable_backendss        r2   set_attention_backend*AttentionModuleMixin.set_attention_backend   sw    </C/O/O/V/V/XY/X!gg/XY,z
*JKdiiXjNkkll&w}}7,3) Zs   Buse_npu_flash_attentionc                 h    U(       a  [        5       (       d  [        S5      eU R                  S5        g)z
Set whether to use NPU flash attention from `torch_npu` or not.

Args:
    use_npu_flash_attention (`bool`): Whether to use NPU flash attention or not.
ztorch_npu is not available_native_npuN)r   ImportErrorrw   )r9   ry   s     r2   set_use_npu_flash_attention0AttentionModuleMixin.set_use_npu_flash_attention   s*     #)++!">??""=1r4   use_xla_flash_attentionpartition_spec.c                 h    U(       a  [        5       (       d  [        S5      eU R                  S5        g)a  
Set whether to use XLA flash attention from `torch_xla` or not.

Args:
    use_xla_flash_attention (`bool`):
        Whether to use pallas flash attention kernel from `torch_xla` or not.
    partition_spec (`Tuple[]`, *optional*):
        Specify the partition specification if using SPMD. Otherwise None.
    is_flux (`bool`, *optional*, defaults to `False`):
        Whether the model is a Flux model.
ztorch_xla is not available_native_xlaN)r   r|   rw   )r9   r   r   is_fluxs       r2   set_use_xla_flash_attention0AttentionModuleMixin.set_use_xla_flash_attention   s*    " #)++!">??""=1r4   'use_memory_efficient_attention_xformersattention_opc                    U(       a  [        5       (       d
  [        SSS9e[        R                  R	                  5       (       d  [        S5      e [        5       (       aC  SnUb  Uu  pEUR                  tp6[        R                  " SSUS9n[        R                  " XwU5      nU R                  S5        gg! [         a  nUeSnAff = f)	ax  
Set whether to use memory efficient attention from `xformers` or not.

Args:
    use_memory_efficient_attention_xformers (`bool`):
        Whether to use memory efficient attention from `xformers` or not.
    attention_op (`Callable`, *optional*):
        The attention operation to use. Defaults to `None` which uses the default attention operation from
        `xformers`.
zeRefer to https://github.com/facebookresearch/xformers for more information on how to install xformersxformers)r&   zvtorch.cuda.is_available() should be True but is False. xformers' memory efficient attention is only available for GPU N)r   r
   (   cudadevicedtype)r   ModuleNotFoundErrorr6   r   is_availablerG   SUPPORTED_DTYPESrandnxopsmemory_efficient_attention	Exceptionrw   )	r9   r   r   r   op_fwop_bwrS   qes	            r2   +set_use_memory_efficient_attention_xformers@AttentionModuleMixin.set_use_memory_efficient_attention_xformers   s     3(**){#  ZZ,,.. / 

,.. $'3+7LE(-(>(>IE!KK
6O ;;A!D **:61 3* ! Gs   AB5 5
C?CCc                 f
   [        U SS5      (       a  gU R                  R                  R                  R                  nU R                  R                  R                  R
                  n[        U S5      (       Gak  U R                  (       GaY  [        R                  " U R                  R                  R                  U R                  R                  R                  /5      nUR                  S   nUR                  S   n[        R                  " XEU R                  XS9U l        U R                   R                  R#                  U5        [        U S5      (       a  U R                  (       ay  [        R                  " U R                  R$                  R                  U R                  R$                  R                  /5      nU R                   R$                  R#                  U5        GO[        R                  " U R                  R                  R                  U R                  R                  R                  U R                  R                  R                  /5      nUR                  S   nUR                  S   n[        R                  " XEU R                  XS9U l        U R&                  R                  R#                  U5        [        U S5      (       a  U R                  (       a  [        R                  " U R                  R$                  R                  U R                  R$                  R                  U R                  R$                  R                  /5      nU R&                  R$                  R#                  U5        [        U S	S5      Gb  [        U S
S5      Gb  [        U SS5      Gb  [        R                  " U R(                  R                  R                  U R*                  R                  R                  U R,                  R                  R                  /5      nUR                  S   nUR                  S   n[        R                  " XEU R.                  XS9U l        U R0                  R                  R#                  U5        U R.                  (       a  [        R                  " U R(                  R$                  R                  U R*                  R$                  R                  U R,                  R$                  R                  /5      nU R0                  R$                  R#                  U5        SU l        g)zU
Fuse the query, key, and value projections into a single projection for efficiency.
fused_projectionsFNis_cross_attentionr   r   )biasr   r   use_bias
add_q_proj
add_k_proj
add_v_projT)getattrto_qweightdatar   r   r-   r   r6   catto_kto_vshaper7   Linearr   to_kvcopy_r   to_qkvr   r   r   added_proj_biasto_added_qkvr   )r9   r   r   concatenated_weightsin_featuresout_featuresconcatenated_biass          r2   rR   %AttentionModuleMixin.fuse_projections   s    4,e44!!&&--		  %%++4-..43J3J3J#(99dii.>.>.C.CTYYEUEUEZEZ-[#\ .44Q7K/55a8L;4==Y_mDJJJ##$89tZ((T]]$)IItyy~~/B/BDIINNDWDW.X$Y!

%%&78 $)99dii.>.>.C.CTYYEUEUEZEZ\`\e\e\l\l\q\q-r#s .44Q7K/55a8L))KDMMZ`nDKKK$$%9:tZ((T]]$)IItyy~~/B/BDIINNDWDWY]YbYbYgYgYlYl.m$n!  &&'89 D,-9lD1=lD1=#(99'',,doo.D.D.I.I4??KaKaKfKfg$  /44Q7K/55a8L "		0D0DV!D $$**+?@##$)II__))..0D0D0I0I4??K_K_KdKde%! !!&&,,->?!%r4   c                     [        U SS5      (       d  g[        U S5      (       a  [        U S5        [        U S5      (       a  [        U S5        [        U S5      (       a  [        U S5        SU l        g)zL
Unfuse the query, key, and value projections back to separate projections.
r   FNr   r   r   )r   r-   delattrr   )r9   s    r2   rX   'AttentionModuleMixin.unfuse_projections5  sf     t0%88 4""D(#4!!D'"4((D.)!&r4   
slice_sizec                     [        U S5      (       a.  Ub+  XR                  :  a  [        SU SU R                   S35      eSnUb  U R                  S5      nUc  U R	                  5       nU R                  U5        g)z
Set the slice size for attention computation.

Args:
    slice_size (`int`):
        The slice size for attention computation.
sliceable_head_dimNzslice_size z has to be smaller or equal to r,   sliced)r-   r   rG   _get_compatible_processordefault_processor_clsr?   )r9   r   r<   s      r2   set_attention_slice(AttentionModuleMixin.set_attention_sliceJ  s     4-..:3Ij[r[rNr{:,6UVZVmVmUnnopqq	 !66x@I 224I9%r4   tensorc                     U R                   nUR                  u  p4nUR                  X2-  X$U5      nUR                  SSSS5      R                  X2-  XEU-  5      nU$ )z
Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`.

Args:
    tensor (`torch.Tensor`): The tensor to reshape.

Returns:
    `torch.Tensor`: The reshaped tensor.
r   r
   r      )headsr   reshapepermute)r9   r   	head_size
batch_sizeseq_lendims         r2   batch_to_head_dim&AttentionModuleMixin.batch_to_head_dima  s_     JJ	#)<< 
S
 7SQ1a+33J4KW\eVefr4   out_dimc                    U R                   nUR                  S:X  a  UR                  u  pEnSnOUR                  u  pGpVUR                  XEU-  X6U-  5      nUR	                  SSSS5      nUS:X  a  UR                  XC-  XW-  Xc-  5      nU$ )z
Reshape the tensor for multi-head attention processing.

Args:
    tensor (`torch.Tensor`): The tensor to reshape.
    out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor.

Returns:
    `torch.Tensor`: The reshaped tensor.
r   r   r   r
   )r   ndimr   r   r   )r9   r   r   r   r   r   r   	extra_dims           r2   head_to_batch_dim&AttentionModuleMixin.head_to_batch_dimq  s     JJ	;;!'-||$JI28,,/J7
i,?S\L\]1a+a<^^J$:G<OQTQabFr4   querykeyattention_maskc                 $   UR                   nU R                  (       a   UR                  5       nUR                  5       nUcV  [        R                  " UR
                  S   UR
                  S   UR
                  S   UR                   UR                  S9nSnOUnSn[        R                  " UUUR                  SS5      UU R                  S9nAU R                  (       a  UR                  5       nUR                  SS9nAUR                  U5      nU$ )a  
Compute the attention scores.

Args:
    query (`torch.Tensor`): The query tensor.
    key (`torch.Tensor`): The key tensor.
    attention_mask (`torch.Tensor`, *optional*): The attention mask to use.

Returns:
    `torch.Tensor`: The attention probabilities/scores.
r   r   r   r   )betaalphar   )r   upcast_attentionfloatr6   emptyr   r   baddbmm	transposescaleupcast_softmaxsoftmaxto)	r9   r   r   r   r   baddbmm_inputr   attention_scoresattention_probss	            r2   get_attention_scores)AttentionModuleMixin.get_attention_scores  s       KKME))+C!!KKAA		!EKKX]XdXdM D*MD ==MM"b!**
 /557*22r2:),,U3r4   target_lengthr   c                    U R                   nUc  U$ UR                  S   nXb:w  a  UR                  R                  S:X  a_  UR                  S   UR                  S   U4n[        R
                  " XqR                  UR                  S9n[        R                  " X/SS9nO[        R                  " USU4SS	9nUS
:X  a'  UR                  S   X5-  :  a  UR                  USS9nU$ US:X  a!  UR                  S5      nUR                  USS9nU$ )a  
Prepare the attention mask for the attention computation.

Args:
    attention_mask (`torch.Tensor`): The attention mask to prepare.
    target_length (`int`): The target length of the attention mask.
    batch_size (`int`): The batch size for repeating the attention mask.
    out_dim (`int`, *optional*, defaults to `3`): Output dimension.

Returns:
    `torch.Tensor`: The prepared attention mask.
r   mpsr   r   r   r
   r           )rq   r      )r   r   r   typer6   zerosr   r   Fpadrepeat_interleave	unsqueeze)	r9   r   r   r   r   r   current_lengthpadding_shapepaddings	            r2   prepare_attention_mask+AttentionModuleMixin.prepare_attention_mask  s!    JJ	!!!,2226*$$))U2 "0!5!5a!8.:N:Nq:QS` a++m;O;OXfXmXmn!&N+D!!L "#~=7IQT!Ua<##A&)??!/!A!A)QR!A!S
 	 \+55a8N+==iQ=ONr4   encoder_hidden_statesc                 f   U R                   c   S5       e[        U R                   [        R                  5      (       a  U R                  U5      nU$ [        U R                   [        R                  5      (       a7  UR                  SS5      nU R                  U5      nUR                  SS5      nU$  e)z
Normalize the encoder hidden states.

Args:
    encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder.

Returns:
    `torch.Tensor`: The normalized encoder hidden states.
zGself.norm_cross must be defined to call self.norm_encoder_hidden_statesr   r
   )
norm_crossr@   r7   	LayerNorm	GroupNormr   )r9   r   s     r2   norm_encoder_hidden_states/AttentionModuleMixin.norm_encoder_hidden_states  s     *u,uu*door||44$(OO4I$J! %$ 66 %:$C$CAq$I!$(OO4I$J!$9$C$CAq$I! %$ 5r4   )r   r<   r   r   r   )F)NFN)r   ) rO   r\   r]   r^   _default_processor_cls_available_processorsr   r   r?   boolr*   r5   rw   r}   r   r   r   r   r   r6   no_gradrR   rX   intr   Tensorr   r   r   r   r  r`   r[   r4   r2   rQ   rQ   }   s   !#'9 #d #("D "EY "4S 424 2D 2" ?C	2!%2 !x}c'9!:;2
 
20 ae%77;%7KST\K]%7	%7N ]]_8& 8&t ]]_' '(&c &d &.    s 5<< 4 `d-\\-(--FNu||F\-	-` ab)#ll);>)LO)Z])	)V% %QVQ]Q] %r4   rQ   ffhidden_states	chunk_dim
chunk_sizec                    UR                   U   U-  S:w  a  [        SUR                   U    SU S35      eUR                   U   U-  n[        R                  " UR	                  XBS9 Vs/ s H
  oP" U5      PM     snUS9nU$ s  snf )Nr   z)`hidden_states` dimension to be chunked: z$ has to be divisible by chunk size: z[. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`.r   )r   rG   r6   r   chunk)r
  r  r  r  
num_chunks	hid_slice	ff_outputs          r2   _chunked_feed_forwardr     s    9%
2a778K8KI8V7WW{  }G  |H  Hc  d
 	
 $$Y/:=J		(5(;(;J(;(VW(V9I(VWI  	Xs   )Bc                      ^  \ rS rSrSrS\S\S\S\4U 4S jjrS\R                  S	\R                  S
\R                  4S jr	Sr
U =r$ )GatedSelfAttentionDensei  aX  
A gated self-attention dense layer that combines visual features and object features.

Parameters:
    query_dim (`int`): The number of channels in the query.
    context_dim (`int`): The number of channels in the context.
    n_heads (`int`): The number of heads to use for attention.
    d_head (`int`): The number of channels in each head.
	query_dimcontext_dimn_headsd_headc                   > [         TU ]  5         [        R                  " X!5      U l        [        XUS9U l        [        USS9U l        [        R                  " U5      U l
        [        R                  " U5      U l        U R                  S[        R                  " [        R                  " S5      5      5        U R                  S[        R                  " [        R                  " S5      5      5        SU l        g )N)r  r   dim_headgegluactivation_fn
alpha_attnr   alpha_denseT)super__init__r7   r   linearr   attnFeedForwardr
  r   norm1norm2register_parameter	Parameterr6   r   enabled)r9   r  r  r  r  rN   s        r2   r"   GatedSelfAttentionDense.__init__  s     ii7	6R	iw?\\),
\\),
bll5<<;L.MNr||ELL<M/NOr4   ru   objsr$   c                    U R                   (       d  U$ UR                  S   nU R                  U5      nXR                  R	                  5       U R                  U R                  [        R                  " X/SS95      5      S S 2S U2S S 24   -  -   nXR                  R	                  5       U R                  U R                  U5      5      -  -   nU$ )Nr   r   )r*  r   r#  r  tanhr$  r&  r6   r   r   r
  r'  )r9   ru   r,  n_visuals       r2   forwardGatedSelfAttentionDense.forward,  s    ||H771:{{4 $$&4::eii	WX>Y3Z)[\]_h`h_hjk\k)lll  %%'$''$**Q-*@@@r4   )r$  r*  r
  r#  r&  r'  )rO   r\   r]   r^   __doc__r  r"  r6   r	  r0  r`   __classcell__rN   s   @r2   r  r    sT    # C # s "
 
U\\ 
ell 
 
r4   r  c                      ^  \ rS rSrSr   SS\S\S\S\S\\   S\4U 4S	 jjjr	SS
\\   S\4S jjr
 SS\R                  S\R                  S\R                  S\\\\4      4S jjrSrU =r$ )JointTransformerBlocki9  a  
A Transformer block following the MMDiT architecture, introduced in Stable Diffusion 3.

Reference: https://huggingface.co/papers/2403.03206

Parameters:
    dim (`int`): The number of channels in the input and output.
    num_attention_heads (`int`): The number of heads to use for multi-head attention.
    attention_head_dim (`int`): The number of channels in each head.
    context_pre_only (`bool`): Boolean to determine if we should add some blocks associated with the
        processing of `context` conditions.
r   num_attention_headsattention_head_dimcontext_pre_onlyqk_normuse_dual_attentionc                   > [         T	U ]  5         X`l        X@l        U(       a  SOSnU(       a  [	        U5      U l        O[        U5      U l        US:X  a  [        XSSSSS9U l        O&US:X  a  [        U5      U l        O[        SU S	35      e[        [        S
5      (       a  [        5       nO[        S5      e[        US UUUUUSUUSS9U l        U(       a  [        US UUUSUUSS9	U l        OS U l        [         R"                  " USSS9U l        ['        XSS9U l        U(       d+  [         R"                  " USSS9U l        ['        XSS9U l        OS U l        S U l        S U l        SU l        g )Nada_norm_continousada_norm_zeroFư>T
layer_norm)elementwise_affineepsr   	norm_typezUnknown context_norm_type: z>, currently only support `ada_norm_continous`, `ada_norm_zero`scaled_dot_product_attentionzYThe current PyTorch version does not support the `scaled_dot_product_attention` function.)r  cross_attention_dimadded_kv_proj_dimr  r   r   r9  r   r<   r:  rB  )	r  rE  r  r   r   r   r<   r:  rB  rA  rB  gelu-approximate)r   dim_outr  r   )r!  r"  r;  r9  r    r&  r   r   norm1_contextrG   r-   r   r   r   r$  attn2r7   r   r'  r%  r
  norm2_context
ff_context_chunk_size
_chunk_dim)
r9   r   r7  r8  r9  r:  r;  context_norm_typer<   rN   s
            r2   r"  JointTransformerBlock.__init__H  s    	"4 04D0/.s3DJ)#.DJ 44!7U4S_"D /1!1#!6D-.?-@@~  1455-/Ik   $!'%-
	 "$(+)#
DJ DJ\\#%TJ
#BTU!#ceQU!VD)cN`aDO!%D"DO  r4   r  c                     Xl         X l        g r  rN  rO  r9   r  r   s      r2   set_chunk_feed_forward,JointTransformerBlock.set_chunk_feed_forward      %r4   r  r   tembjoint_attention_kwargsc                 h   U=(       d    0 nU R                   (       a  U R                  XS9u  pVpxpnOU R                  XS9u  pVpxn	U R                  (       a  U R                  X#5      nOU R                  X#S9u  ppnU R                  " SUUS.UD6u  nnUR                  S5      U-  nUU-   nU R                   (       a-  U R                  " SSW
0UD6nWR                  S5      U-  nUU-   nU R                  U5      nUSUS S 2S 4   -   -  US S 2S 4   -   nU R                  b,  [        U R                  XPR                  U R                  5      nOU R                  U5      nU	R                  S5      U-  nUU-   nU R                  (       a  S nX!4$ WR                  S5      U-  nUU-   nU R                  U5      nUSWS S 2S 4   -   -  WS S 2S 4   -   nU R                  b,  [        U R                  XR                  U R                  5      nOU R                  U5      nUWR                  S5      U-  -   nX!4$ )N)emb)r  r   r   r  r[   )r;  r&  r9  rJ  r$  r   rK  r'  rN  r  r
  rO  rL  rM  )r9   r  r   rX  rY  norm_hidden_statesgate_msa	shift_mlp	scale_mlpgate_mlpnorm_hidden_states2	gate_msa2r  
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpattn_outputcontext_attn_outputattn_output2r  context_ff_outputs                         r2   r0  JointTransformerBlock.forward  s    "8!=2""kokuku lv lh)_h LP::Vc:KnH)  )-););<Q)X&[_[m[m% \n \X&Kj
 ,099 ,
,"<,
 %,
(( ((+k9%3""::b4GbKabL$..q1L@L)L8M!ZZ6/1yD7I3IJYWXZ^W^M__'-dgg7I??\`\l\lmI 23I&&q)I5	%	1   $(!  %33 #-"6"6q"9<O"O$9<O$O!)-););<Q)R&)Cq;WXZ^W^K_G_)`cnoprvovcw)w&+$9OO%?RVRbRb%! %)OO4N$O!$9J<P<PQR<SVg<g$g!$33r4   )rO  rN  r$  rK  r9  r
  rM  r&  rJ  r'  rL  r;  )FNFr   r  )rO   r\   r]   r^   r2  r  r  r   r5   r"  rU  r6   FloatTensorr   r   r0  r`   r3  r4  s   @r2   r6  r6  9  s    $ "'!%#(OO !O  	O
 O #O !O Od# S  <@C4((C4  %00C4 	C4
 !)c3h 8C4 C4r4   r6  c            -         ^  \ rS rSrSr                    S'S\S\S\S\\   S\S\\   S	\S
\S\S\S\S\S\	S\S\S\\   S\\   S\\   S\\   S\\   S\S\4,U 4S jjjr
S(S\\   S\4S jjr       S)S\R                  S\\R                     S\\R                     S\\R                     S \\R                     S!\\\4   S"\\R                     S#\\\\R                  4      S$\R                  4S% jjrS&rU =r$ )*BasicTransformerBlocki  ah  
A basic Transformer block.

Parameters:
    dim (`int`): The number of channels in the input and output.
    num_attention_heads (`int`): The number of heads to use for multi-head attention.
    attention_head_dim (`int`): The number of channels in each head.
    dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
    cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
    activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
    num_embeds_ada_norm (:
        obj: `int`, *optional*): The number of diffusion steps used during training. See `Transformer2DModel`.
    attention_bias (:
        obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
    only_cross_attention (`bool`, *optional*):
        Whether to use only cross-attention layers. In this case two cross attention layers are used.
    double_self_attention (`bool`, *optional*):
        Whether to use two self-attention layers. In this case no cross attention layers are used.
    upcast_attention (`bool`, *optional*):
        Whether to upcast the attention computation to float32. This is useful for mixed precision training.
    norm_elementwise_affine (`bool`, *optional*, defaults to `True`):
        Whether to use learnable elementwise affine parameters for normalization.
    norm_type (`str`, *optional*, defaults to `"layer_norm"`):
        The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`.
    final_dropout (`bool` *optional*, defaults to False):
        Whether to apply a final dropout after the last feed-forward layer.
    attention_type (`str`, *optional*, defaults to `"default"`):
        The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`.
    positional_embeddings (`str`, *optional*, defaults to `None`):
        The type of positional embeddings to apply to.
    num_positional_embeddings (`int`, *optional*, defaults to `None`):
        The maximum number of positional embeddings to apply.
r   r7  r8  rE  r  num_embeds_ada_normattention_biasonly_cross_attentiondouble_self_attentionr   norm_elementwise_affinerC  norm_epsfinal_dropoutattention_typepositional_embeddingsnum_positional_embeddings-ada_norm_continous_conditioning_embedding_dimada_norm_biasff_inner_dimff_biasattention_out_biasc                   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xl        Xl	        Xl
        UU l        UU l        Xl        US L=(       a    US:H  U l        US L=(       a    US:H  U l        US:H  U l        US:H  U l        US:H  U l        US;   a  Uc  ['        SU SU S	35      eXl        Xpl        U(       a  Uc  ['        S
5      eUS:X  a  [-        UUS9U l        OS U l        US:X  a  [1        X5      U l        OMUS:X  a  [5        X5      U l        O6US:X  a  [7        UUUUUS5      U l        O[8        R:                  " XUS9U l        [=        UUUUUU	(       a  UOS UUS9U l        Uc  U
(       an  US:X  a  [1        X5      U l         O8US:X  a  [7        UUUUUS5      U l         O[8        R:                  " XU5      U l         [=        UU
(       d  UOS UUUUUUS9U l!        O1US:X  a  [8        R:                  " XU5      U l         OS U l         S U l!        US:X  a  [7        UUUUUS5      U l"        O0US;   a  [8        R:                  " XU5      U l"        OUS:X  a  S U l"        [G        UUUUUUS9U l$        US:X  d  US:X  a  [K        XX#5      U l&        US:X  a6  [8        RN                  " [P        RR                  " SU5      US-  -  5      U l*        S U l+        SU l,        g )Nr>  ada_normada_norm_singler@  ada_norm_continuousr  r>  `norm_type` is set to w, but `num_embeds_ada_norm` is not defined. Please make sure to define `num_embeds_ada_norm` if setting `norm_type` to r,   \If `positional_embedding` type is defined, `num_positition_embeddings` must also be defined.
sinusoidalmax_seq_lengthrms_normrG  r  r   r  dropoutr   rE  r   out_biasr  rE  r   r  r  r   r   r  )r>  r  r@  layer_norm_i2vgenr  r  rv  	inner_dimr   gatedzgated-text-image   g      ?r   )-r!  r"  r   r7  r8  r  rE  r  rq  rs  rt  rx  ry  rr  use_ada_layer_norm_zerouse_ada_layer_normuse_ada_layer_norm_singleuse_layer_normuse_ada_layer_norm_continuousrG   rC  rp  r   	pos_embedr   r&  r   r   r7   r   r   attn1r'  rK  norm3r%  r
  r  fuserr)  r6   r   scale_shift_tablerN  rO  )r9   r   r7  r8  r  rE  r  rp  rq  rr  rs  r   rt  rC  ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  rN   s                           r2   r"  BasicTransformerBlock.__init__	  sL   4 	#6 "4#6 *,%:"'>$%:")B&$8! )<4(G'iYZiMi$#6d#B"_	U_H_)26G)G&'<7-6:O-O*55:M:U( 4KKT+UVX 
 ##6  &?&Gn  !L0:3OhiDN!DN 
"%c?DJ/))#CDJ///='DJ c[cdDJ%'7K 3QU-'	

 *.C J&)#C
333A+!
  \\#9PQ
"?T$7Z^)+#!1+	DJ --\\#9PQ
!
DJ --/='DJ EEc5LMDJ--DJ''"
 W$:L(L0K^sDJ ))%'\\%++a2ES2P%QD"  r4   r  c                     Xl         X l        g r  rS  rT  s      r2   rU  ,BasicTransformerBlock.set_chunk_feed_forward  rW  r4   r  r   r   encoder_attention_masktimestepcross_attention_kwargsclass_labelsadded_cond_kwargsr$   c	                    Ub(  UR                  SS 5      b  [        R                  S5        UR                  S   n	U R                  S:X  a  U R                  X5      n
OU R                  S:X  a  U R                  XXqR                  S9u  ppnOU R                  S;   a  U R                  U5      n
OU R                  S:X  a  U R                  XS	   5      n
OnU R                  S
:X  aS  U R                  S    UR                  U	SS5      -   R                  SSS9u  nnppU R                  U5      n
U
SU-   -  U-   n
O[        S5      eU R                  b  U R                  U
5      n
Ub  UR                  5       O0 nUR                  SS 5      nU R                  " U
4U R                  (       a  UOS US.UD6nU R                  S:X  a  WR!                  S5      U-  nOU R                  S
:X  a  WU-  nUU-   nUR"                  S:X  a  UR%                  S5      nUb  U R'                  UUS   5      nU R(                  b  U R                  S:X  a  U R+                  X5      n
OeU R                  S;   a  U R+                  U5      n
OCU R                  S
:X  a  Un
O0U R                  S:X  a  U R+                  XS	   5      n
O[        S5      eU R                  b!  U R                  S
:w  a  U R                  U
5      n
U R(                  " U
4UUS.UD6nUU-   nU R                  S:X  a  U R-                  XS	   5      n
O!U R                  S
:X  d  U R-                  U5      n
U R                  S:X  a  U
SWS S 2S 4   -   -  WS S 2S 4   -   n
U R                  S
:X  a  U R+                  U5      n
U
SW-   -  W-   n
U R.                  b,  [1        U R2                  XR4                  U R.                  5      nOU R3                  U
5      nU R                  S:X  a  WR!                  S5      U-  nOU R                  S
:X  a  WU-  nUU-   nUR"                  S:X  a  UR%                  S5      nU$ )Nr   SPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r   r  r>  )hidden_dtype)r@  r  r  pooled_text_embr  r  r   r   r   zIncorrect norm usedgligenr   r   r   r,  )r>  r@  r  zIncorrect norm)getrd   warningr   rC  r&  r   r  r   r  rG   r  copyrB   r  rr  r   r   squeezer  rK  r'  r  rN  r  r
  rO  )r9   r  r   r   r  r  r  r  r  r   r\  r]  r^  r_  r`  	shift_msa	scale_msagligen_kwargsrg  r  s                       r2   r0  BasicTransformerBlock.forward  sd    "-%))'48Dtu #((+
>>Z'!%M!D^^.KO::DWDW LV LH) ^^BB!%M!:^^44!%MM^;_!`^^00&&t,x/?/?
Ar/RReA1eo KIy(y "&M!:!3q9}!E	!Q233>>%!%0B!C CYBd!7!<!<!>jl.228TBjj
;?;T;T"7Z^)
 %	
 >>_,",,Q/+=K^^00"[0K#m3")11!4M $ JJ}mF6KLM ::!~~+%)ZZ%H"#WW%)ZZ%>"#44 &3"#88%)ZZQb?c%d" !122~~)dnn@Q.Q%)^^4F%G"**"&;5 )	K (-7M >>22!%MM^;_!`#44!%M!:>>_,!3q9QW;M7M!NQZ[\^b[bQc!c>>..!%M!:!3q9}!E	!Q'-dgg7I??\`\l\lmI 23I>>_, **1-	9I^^00 9,I!M1")11!4Mr4   )rO  rN  r  rq  r8  r  rK  rE  r   rs  r  r
  r  r&  r'  r  rt  rC  r7  rp  ry  rr  r  rx  r  r  r  r  r  r  )r   Nr  NFFFFTr@  h㈵>FdefaultNNNNNTTrl  )NNNNNNN)rO   r\   r]   r^   r2  r  r   r5   r  r   r"  rU  r6   r	  
LongTensorr   r   r0  r`   r3  r4  s   @r2   ro  ro    sP    N -1$-1$%*&+!&(,%#'/337GK'+&*#'1ff !f  	f &c]f f &c]f f #f  $f f "&f f f  !f" #f$  (}%f& $,C='f( 8@})f*  }+f, sm-f. /f0 !1f fP# S  268<9=/31537?Cx||x !.x  (5	x
 !) 6x 5++,x !%S#Xx u//0x $Dell):$;<x 
x xr4   ro  c            
       Z   ^  \ rS rSrSr  S
S\S\S\\   S\\   4U 4S jjjrS r	S	r
U =r$ )LuminaFeedForwardi1  a  
A feed-forward layer.

Parameters:
    hidden_size (`int`):
        The dimensionality of the hidden layers in the model. This parameter determines the width of the model's
        hidden representations.
    intermediate_size (`int`): The intermediate dimension of the feedforward layer.
    multiple_of (`int`, *optional*): Value to ensure hidden dimension is a multiple
        of this value.
    ffn_dim_multiplier (float, *optional*): Custom multiplier for hidden
        dimension. Defaults to None.
r   r  multiple_offfn_dim_multiplierc                   > [         TU ]  5         Ub  [        XB-  5      nX2U-   S-
  U-  -  n[        R                  " UUSS9U l        [        R                  " UUSS9U l        [        R                  " UUSS9U l        [        5       U l	        g )Nr   Fr   )
r!  r"  r  r7   r   linear_1linear_2linear_3r   silu)r9   r   r  r  r  rN   s        r2   r"  LuminaFeedForward.__init__@  s     	).:;I$;a$?K#OP			

 		

 		

 J	r4   c                     U R                  U R                  U R                  U5      5      U R                  U5      -  5      $ r  )r  r  r  r  )r9   ru   s     r2   r0  LuminaFeedForward.forward^  s1    }}TYYt}}Q'784==;KKLLr4   )r  r  r  r  )   N)rO   r\   r]   r^   r2  r  r   r   r"  r0  r`   r3  r4  s   @r2   r  r  1  sT    $ &).2  c]	
 %UO <M Mr4   r  c                      ^  \ rS rSrSr SS\S\S\S\S\\   4
U 4S jjjrS	\\   4S
 jr SS\	R                  S\S\\	R                     S\	R                  4S jjrSrU =r$ )TemporalBasicTransformerBlockib  a  
A basic Transformer block for video like data.

Parameters:
    dim (`int`): The number of channels in the input and output.
    time_mix_inner_dim (`int`): The number of channels for temporal attention.
    num_attention_heads (`int`): The number of heads to use for multi-head attention.
    attention_head_dim (`int`): The number of channels in each head.
    cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
r   time_mix_inner_dimr7  r8  rE  c                   > [         TU ]  5         X:H  U l        [        R                  " U5      U l        [        UUSS9U l        [        R                  " U5      U l        [        UUUS S9U l
        Ub-  [        R                  " U5      U l        [        UUUUS9U l        OS U l        S U l        [        R                  " U5      U l        [        USS9U l        S U l        S U l        g )Nr  )rI  r  )r  r   r  rE  )r  rE  r   r  r  )r!  r"  is_resr7   r   norm_inr%  ff_inr&  r   r  r'  rK  r  r
  rN  rO  )r9   r   r  r7  r8  rE  rN   s         r2   r"  &TemporalBasicTransformerBlock.__init__o  s     	/||C( !&!

 \\"45
(%' $	

 * &89DJ",$7)+	DJ DJDJ \\"45
0H  r4   r  c                     Xl         SU l        g )Nr   rS  )r9   r  kwargss      r2   rU  4TemporalBasicTransformerBlock.set_chunk_feed_forward  s    %r4   r  
num_framesr   r$   c                    UR                   S   nUR                   u  pVnXR-  nUS S S 24   R                  XBXg5      nUR                  SSSS5      nUR                  XF-  X'5      nUnU R                  U5      nU R                  b,  [        U R                  XR                  U R                  5      nOU R                  U5      nU R                  (       a  X-   nU R                  U5      n	U R                  U	S S9n
X-   nU R                  b$  U R                  U5      n	U R                  XS9n
X-   nU R                  U5      n	U R                  b,  [        U R                  XR                  U R                  5      nOU R                  U	5      nU R                  (       a  X-   nOUnUS S S 24   R                  XFX'5      nUR                  SSSS5      nUR                  XB-  Xg5      nU$ )Nr   r
   r   r   )r   )r   r   r   r  rN  r  r  rO  r  r&  r  rK  r'  r  r
  )r9   r  r  r   r   batch_frames
seq_lengthchannelsresidualr\  rg  r  s               r2   r0  %TemporalBasicTransformerBlock.forward  s    #((+
-:-@-@*(!/
%dAg.66zzd%--aAq9%--j.Ez\ ]3'1$**m__^b^n^noM JJ}5M;;)4M!ZZ6jj!34jP#3 ::!!%M!:**%7*eK'7M "ZZ6'-dgg7I??\`\l\lmI 23I;;%5M%M%dAg.66zzd%--aAq9%--j.Ez\r4   )rO  rN  r  rK  r
  r  r  r&  r'  r  r  r  )rO   r\   r]   r^   r2  r  r   r"  rU  r6   r	  r0  r`   r3  r4  s   @r2   r  r  b  s    	" .233  3 !	3
  3 &c]3 3j#  9=	7||7 7  (5	7
 
7 7r4   r  c                   d   ^  \ rS rSr    SS\S\S\S\S\S\\   S\S	\4U 4S
 jjjrS rSr	U =r
$ )SkipFFTransformerBlocki  r   r7  r8  kv_input_dimkv_input_dim_proj_use_biasrE  rq  r~  c
           
        > [         T
U ]  5         XA:w  a  [        R                  " XAU5      U l        OS U l        [        US5      U l        [        UUUUUUU	S9U l        [        US5      U l	        [        UUUUUUU	S9U l
        g )Nr?  )r  r   r  r  r   rE  r  )r  rE  r   r  r  r   r  )r!  r"  r7   r   	kv_mapperr   r&  r   r  r'  rK  )r9   r   r7  r8  r  r  r  rE  rq  r~  rN   s             r2   r"  SkipFFTransformerBlock.__init__  s     	YY|:TUDN!DNS%(
%' 3'

 S%(
 3%''

r4   c                 <   Ub  UR                  5       O0 nU R                  b%  U R                  [        R                  " U5      5      nU R	                  U5      nU R
                  " U4SU0UD6nXQ-   nU R                  U5      nU R                  " U4SU0UD6nXQ-   nU$ )Nr   )r  r  r   r  r&  r  r'  rK  )r9   r  r   r  r\  rg  s         r2   r0  SkipFFTransformerBlock.forward  s    BXBd!7!<!<!>jl>>%$(NN166:O3P$Q!!ZZ6jj
"7
 %
 $3!ZZ6jj
"7
 %
 $3r4   )r  rK  r  r&  r'  )r   NFT)rO   r\   r]   r^   r  r  r   r"  r0  r`   r3  r4  s   @r2   r  r    sx     -1$#'(
(
 !(
  	(

 (
 %)(
 &c](
 (
 !(
 (
T r4   r  c            /         ^  \ rS rSrSr                    S*S\S\S\S\S\\   S	\S
\\   S\	S\	S\	S\	S\	S\S\S\	S\\   S\\   S\\   S\	S\	S\S\S\4.U 4S jjjr
S\S\\\\4      4S jrS+S\S\S\\   4S jjr S+S\S\S\SS4S  jjrS,S!\\   S\SS4S" jjr    S-S#\R$                  S$\\R$                     S%\\R$                     S&\\R$                     S'\\\4   S\R$                  4S( jjrS)rU =r$ ).FreeNoiseTransformerBlocki,  a  
A FreeNoise Transformer block.

Parameters:
    dim (`int`):
        The number of channels in the input and output.
    num_attention_heads (`int`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`):
        The number of channels in each head.
    dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability to use.
    cross_attention_dim (`int`, *optional*):
        The size of the encoder_hidden_states vector for cross attention.
    activation_fn (`str`, *optional*, defaults to `"geglu"`):
        Activation function to be used in feed-forward.
    num_embeds_ada_norm (`int`, *optional*):
        The number of diffusion steps used during training. See `Transformer2DModel`.
    attention_bias (`bool`, defaults to `False`):
        Configure if the attentions should contain a bias parameter.
    only_cross_attention (`bool`, defaults to `False`):
        Whether to use only cross-attention layers. In this case two cross attention layers are used.
    double_self_attention (`bool`, defaults to `False`):
        Whether to use two self-attention layers. In this case no cross attention layers are used.
    upcast_attention (`bool`, defaults to `False`):
        Whether to upcast the attention computation to float32. This is useful for mixed precision training.
    norm_elementwise_affine (`bool`, defaults to `True`):
        Whether to use learnable elementwise affine parameters for normalization.
    norm_type (`str`, defaults to `"layer_norm"`):
        The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`.
    final_dropout (`bool` defaults to `False`):
        Whether to apply a final dropout after the last feed-forward layer.
    attention_type (`str`, defaults to `"default"`):
        The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`.
    positional_embeddings (`str`, *optional*):
        The type of positional embeddings to apply to.
    num_positional_embeddings (`int`, *optional*, defaults to `None`):
        The maximum number of positional embeddings to apply.
    ff_inner_dim (`int`, *optional*):
        Hidden dimension of feed-forward MLP.
    ff_bias (`bool`, defaults to `True`):
        Whether or not to use bias in feed-forward MLP.
    attention_out_bias (`bool`, defaults to `True`):
        Whether or not to use bias in attention output project layer.
    context_length (`int`, defaults to `16`):
        The maximum number of frames that the FreeNoise block processes at once.
    context_stride (`int`, defaults to `4`):
        The number of frames to be skipped before starting to process a new batch of `context_length` frames.
    weighting_scheme (`str`, defaults to `"pyramid"`):
        The weighting scheme to use for weighting averaging of processed latent frames. As described in the
        Equation 9. of the [FreeNoise](https://huggingface.co/papers/2310.15169) paper, "pyramid" is the default
        setting used.
Nr   r7  r8  r  rE  r  rp  rq  rr  rs  r   rt  rC  ru  rv  rx  ry  r|  r}  r~  context_lengthcontext_strideweighting_schemec                   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xl        Xl	        Xl
        UU l        UU l        Xl        U R                  UUU5        US L=(       a    US:H  U l        US L=(       a    US:H  U l        US:H  U l        US:H  U l        US:H  U l        US;   a  Uc  [)        SU SU S	35      eXl        Xpl        U(       a  Uc  [)        S
5      eUS:X  a  [/        UUS9U l        OS U l        [2        R4                  " XUS9U l        [9        UUUUUU	(       a  UOS UUS9U l        Uc  U
(       a:  [2        R4                  " XU5      U l        [9        UU
(       d  UOS UUUUUUS9U l        [A        UUUUUUS9U l!        [2        R4                  " XU5      U l"        S U l#        SU l$        g )Nr>  r  r  r@  r  r  r  r  r,   r  r  r  rG  r  r  r  r   )%r!  r"  r   r7  r8  r  rE  r  rq  rs  rt  rx  ry  rr  set_free_noise_propertiesr  r  r  r  r  rG   rC  rp  r   r  r7   r   r&  r   r  r'  rK  r%  r
  r  rN  rO  )r9   r   r7  r8  r  rE  r  rp  rq  rr  rs  r   rt  rC  ru  rv  rx  ry  r|  r}  r~  r  r  r  rN   s                           r2   r"  "FreeNoiseTransformerBlock.__init__d  s   4 	#6 "4#6 *,%:"'>$%:")B&$8!&&~~GWX )<4(G'iYZiMi$#6d#B"_	U_H_)26G)G&'<7-6:O-O*55:M:U( 4KKT+UVX 
 ##6  &?&Gn  !L0:3OhiDN!DN \\#W_`
%'7K 3QU-'	

 *.Cc5LMDJ"?T$7Z^)+#!1+	DJ ''"
 \\#1HI
  r4   r  r$   c                     / n[        SXR                  -
  S-   U R                  5       H/  nUn[        XU R                  -   5      nUR	                  XE45        M1     U$ )Nr   r   )ranger  r  minappend)r9   r  frame_indicesiwindow_start
window_ends         r2   _get_frame_indices,FreeNoiseTransformerBlock._get_frame_indices  sb    q*':'::Q>@S@STALZT-@-@)@AJ  ,!;< U r4   c                    US:X  a  S/U-  nU$ US:X  a`  US-  S:X  a)  US-  n[        [        SUS-   5      5      nX3S S S2   -   nU$ US-   S-  n[        [        SU5      5      nX4/-   US S S2   -   n U$ US:X  a`  US-  S:X  a-  US-  nS	/US-
  -  U/-   nU[        [        USS5      5      -   nU$ US-   S-  nS	/U-  nU[        [        USS5      5      -   n U$ [        S
U 35      e)Nflatg      ?pyramidr
   r   r   r   delayed_reverse_sawtoothg{Gz?z'Unsupported value for weighting_scheme=)listr  rG   )r9   r  r  weightsmids        r2   _get_frame_weights,FreeNoiseTransformerBlock._get_frame_weights  sL   v%ej(G8 5 *A~" AouQa01!DbDM1* % "A~!+uQ}-!E/GDbDM9   !;;A~" Ao&C!G,u4!DsAr):$;;  "A~!+&3,!DsAr):$;;  FGWFXYZZr4   c                 (    Xl         X l        X0l        g r  )r  r  r  )r9   r  r  r  s       r2   r  3FreeNoiseTransformerBlock.set_free_noise_properties  s     -, 0r4   r  c                     Xl         X l        g r  rS  rT  s      r2   rU  0FreeNoiseTransformerBlock.set_chunk_feed_forward  rW  r4   r  r   r   r  r  c                 z   Ub(  UR                  SS 5      b  [        R                  S5        Ub  UR                  5       O0 nUR                  nUR
                  n	UR                  S5      n
U R                  U
5      nU R                  U R                  U R                  5      n[        R                  " XU	S9R                  S5      R                  S5      nUS   S   U
:H  nU(       dU  XR                  :  a  [        SU
< SU R                  < 35      eXS   S   -
  nUR                  XR                  -
  U
45        [        R                   " SU
S4US	9n[        R"                  " U5      n[%        U5       GH  u  nu  nn[        R&                  " US S 2UU24   5      nUU-  nUS S 2UU24   nU R)                  U5      nU R*                  b  U R+                  U5      nU R,                  " U4U R.                  (       a  UOS US
.UD6nUU-   nUR0                  S:X  a  UR3                  S5      nU R4                  bZ  U R7                  U5      nU R*                  b!  U R8                  S:w  a  U R+                  U5      nU R4                  " U4UUS
.UD6nUU-   nU[;        U5      S-
  :X  aQ  U(       dJ  US S 2W* S 24==   US S 2U* S 24   US S 2U* S 24   -  -  ss'   US S 2U* S 24==   US S 2U* 4   -  ss'   GM  US S 2UU24==   UU-  -  ss'   US S 2UU24==   U-  ss'   GM     [        R<                  " [?        URA                  U R                  SS9URA                  U R                  SS95       VVs/ s H$  u  nn[        RB                  " US:  UU-  U5      PM&     snnSS9RE                  U	5      nU RG                  U5      nU RH                  b-  [K        U RL                  UU RN                  U RH                  5      nOU RM                  U5      nUU-   nUR0                  S:X  a  UR3                  S5      nU$ s  snnf )Nr   r  r   r   r   r   zExpected num_frames=z1 to be greater or equal than self.context_length=)r   r  r   r  r   )(r  rd   r  r  r   r   sizer  r  r  r  r6   r   r   rG   r  r   
zeros_like	enumerate	ones_liker&  r  r  rr  r   r  rK  r'  rC  rE   r   zipsplitwherer   r  rN  r  r
  rO  )r9   r  r   r   r  r  argsr  r   r   r  r  frame_weightsis_last_frame_batch_completelast_frame_batch_lengthnum_times_accumulatedaccumulated_valuesr  frame_start	frame_endr  hidden_states_chunkr\  rg  accumulated_splitnum_times_splitr  s                              r2   r0  !FreeNoiseTransformerBlock.forward
  s    "-%))'48DtuBXBd!7!<!<!>jl %%##"''*
//
;//0C0CTEZEZ[]OYYZ[\ffgij'4R'8';z'I$
 ,/// #8ZM9kW[WjWjVl!mnn&03DQ3G&G#  */B/B"BJ!OP %Q
A,>v N"--m<+4]+C'A'Y oo&;A{9?T<T&UVG}$G"/;y3H0H"I "&,?!@~~)%)^^4F%G"**"?C?X?X&;^b- )	K #.0C"C"''1,&9&A&A!&D# zz%%)ZZ0C%D">>-$..DU2U)-8J)K&"jj&*?#9 -	 '24G&G#C&**3O"1'>&>&?#?@',C+C+D(DEPQTkSkSlPlHmm@ &a*A)A)B&BCwqSjRjOjGkkC"1k)&;#;<@SV]@]]<%aY)>&>?7J?c ,D| 		 ;>&,,T-@-@a,H)//0C0C/K;;6% Oa/1B_1TVgh; 	
 "U) 	 "ZZ6'-dgg7I4??\`\l\lmI 23I!M1")11!4M-s   2+P7
)rO  rN  r  rq  r8  r  rK  r  r  rE  r   rs  r  r
  r&  r'  r  rt  rC  r7  rp  ry  rr  r  rx  r  r  r  r  r  r  )r   Nr  NFFFFTr@  r  FNNNTT   r   r  )r  rl  )NNNN)rO   r\   r]   r^   r2  r  r   r   r5   r  r"  r   r   r  r  r  rU  r6   r	  r   r   r0  r`   r3  r4  s   @r2   r  r  ,  su   4v -1$-1$%*&+!&(,%#/337&*#'  )1pp !p  	p
 p &c]p p &c]p p #p  $p p "&p p p  !p"  (}#p$ $,C=%p& sm'p( )p* !+p, -p. /p0 1p pdS T%S/5J S C X\]bXc B QZ1!1361JM1	1# S QU  268<9=15{||{ !.{  (5	{
 !) 6{ !%S#X{ 
{ {r4   r  c                      ^  \ rS rSrSr       SS\S\\   S\S\S\S\	S	\	4U 4S
 jjjr
S\R                  S\R                  4S jrSrU =r$ )r%  i  a  
A feed-forward layer.

Parameters:
    dim (`int`): The number of channels in the input.
    dim_out (`int`, *optional*): The number of channels in the output. If not given, defaults to `dim`.
    mult (`int`, *optional*, defaults to 4): The multiplier to use for the hidden dimension.
    dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
    activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
    final_dropout (`bool` *optional*, defaults to False): Apply a final dropout.
    bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
r   rI  multr  r  rv  r   c	                   > [         T
U ]  5         Uc  [        X-  5      nUb  UOUnUS:X  a
  [        XUS9n	US:X  a  [        XSUS9n	ODUS:X  a  [	        XUS9n	O3US:X  a  [        XUS9n	O"US:X  a  [        XUS9n	OUS	:X  a  [        XUS
S9n	[        R                  " / 5      U l
        U R                  R                  W	5        U R                  R                  [        R                  " U5      5        U R                  R                  [        R                  " XrUS95        U(       a0  U R                  R                  [        R                  " U5      5        g g )Ngelur  rH  r.  )approximater   r  zgeglu-approximateswigluzlinear-silur  )r   
activation)r!  r"  r  r   r   r   r   r   r7   
ModuleListnetr  Dropoutr   )r9   r   rI  r  r  r  rv  r  r   act_fnrN   s             r2   r"  FeedForward.__init__  s     	CJI$0'cF"#t4F..#f4HFg%35F11$S$?Fh&C6Fm+%c4FSF==$

7+,		)4@AHHOOBJJw/0 r4   r  r$   c                     [        U5      S:  d  UR                  SS 5      b  Sn[        SSU5        U R                   H  nU" U5      nM     U$ )Nr   r   zThe `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.z1.0.0)rE   r  r   r  )r9   r  r  r  deprecation_messager'   s         r2   r0  FeedForward.forward  sQ    t9q=FJJw5A #Ugw(;<hhF"=1M r4   )r  )Nr   r   r  FNT)rO   r\   r]   r^   r2  r  r   r   r5   r  r"  r6   r	  r0  r`   r3  r4  s   @r2   r%  r%    s      "&$#&1&1 #&1 	&1
 &1 &1 &1 &1 &1PU\\ u||  r4   r%  )=typingr   r   r   r   r   r   r	   r6   torch.nnr7   torch.nn.functional
functionalr   utilsr   r   utils.import_utilsr   r   r   utils.torch_utilsr   activationsr   r   r   r   r   r   attention_processorr   r   r   
embeddingsr   normalizationr   r   r   r   r    r   r   
get_loggerrO   rd   r"   rQ   r8   r	  r  r  r  r6  ro  r  r  r  r  r%  r[   r4   r2   <module>r/     s   E D D     & f f 4 Y Y U U 5 q q D 
		H	%S, S,l@% @%Fbii  QT be  &bii & &R h4BII h4 h4V HBII H HV
.M		 .Mb ~BII ~ ~BERYY EP X		 X Xv
<")) <r4   