
    +h"C                     0   S SK JrJr  S SKrS SKJr  S SKJs  Jr  SSK	J
r
Jr  SSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJr  SSKJr  SSKJr  SSKJr  \R@                  " \!5      r"\ " S S\RF                  5      5       r$ " S S\\
\5      r%g)    )OptionalTupleN   )ConfigMixinregister_to_config)logging)maybe_allow_in_graph   )FeedForward)AllegroAttnProcessor2_0	Attention)
CacheMixin)
PatchEmbedPixArtAlphaTextProjection)Transformer2DModelOutput)
ModelMixin)AdaLayerNormSinglec                   &  ^  \ rS rSrSr      SS\S\S\S\\   S\S\S	\S
\	4U 4S jjjr
     SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S\R                  4S jjrSrU =r$ )AllegroTransformerBlock%   a  
Transformer block used in [Allegro](https://github.com/rhymes-ai/Allegro) model.

Args:
    dim (`int`):
        The number of channels in the input and output.
    num_attention_heads (`int`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`):
        The number of channels in each head.
    dropout (`float`, defaults to `0.0`):
        The dropout probability to use.
    cross_attention_dim (`int`, defaults to `2304`):
        The dimension of the cross attention features.
    activation_fn (`str`, defaults to `"gelu-approximate"`):
        Activation function to be used in feed-forward.
    attention_bias (`bool`, defaults to `False`):
        Whether or not to use bias in attention projection layers.
    only_cross_attention (`bool`, defaults to `False`):
    norm_elementwise_affine (`bool`, defaults to `True`):
        Whether to use learnable elementwise affine parameters for normalization.
    norm_eps (`float`, defaults to `1e-5`):
        Epsilon value for normalization layers.
    final_dropout (`bool` defaults to `False`):
        Whether to apply a final dropout after the last feed-forward layer.
dimnum_attention_headsattention_head_dimcross_attention_dimactivation_fnattention_biasnorm_elementwise_affinenorm_epsc
           
        > [         T
U ]  5         [        R                  " XU	S9U l        [        UUUUUS [        5       S9U l        [        R                  " XU	S9U l        [        UUUUUU[        5       S9U l	        [        R                  " XU	S9U l
        [        UUUS9U l        [        R                  " [        R                  " SU5      US-  -  5      U l        g )Nelementwise_affineeps)	query_dimheadsdim_headdropoutbiasr   	processor)r#   r   r$   r%   r&   r'   r(   )r&   r            ?)super__init__nn	LayerNormnorm1r   r   attn1norm2attn2norm3r   ff	Parametertorchrandnscale_shift_table)selfr   r   r   r&   r   r   r   r   r   	__class__s             k/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/transformers/transformer_allegro.pyr,    AllegroTransformerBlock.__init__B   s     	 \\#W_`
%' $-/

 \\#W_`
 3%'-/

 \\#W_`
'
 "$ekk!S.ACH.L!M    hidden_statesencoder_hidden_statestembattention_maskencoder_attention_maskreturnc                 X   UR                   S   nU R                  S    UR                  USS5      -   R                  SSS9u  pppU R	                  U5      nUSU	-   -  U-   nUR                  S5      nU R                  US UUS9nX-  nX-   nUR                  S:X  a  UR                  S5      nU R                  b  UnU R                  UUUS S9nX-   nU R                  U5      nUSU-   -  U-   nU R                  U5      nUU-  nUU-   nUR                  S:X  a  UR                  S5      nU$ )Nr   r)      r   )r?   rA   image_rotary_emb   )shaper8   reshapechunkr/   squeezer0   ndimr2   r1   r4   )r9   r>   r?   r@   rA   rB   rH   
batch_size	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlpnorm_hidden_statesattn_output	ff_outputs                    r;   forwardAllegroTransformerBlock.forwardu   s}    #((+
 ""4(4<<
Ar+JJ
%q%/ 	G	h9 "ZZ6/1y=AIM/77:jj"&)-	 ! 
 ,#3")11!4M ::!!.**"&;5!%	 % K (7M "ZZ6/1y=AIMGG./	y(	!M1 ")11!4Mr=   )r0   r2   r4   r/   r1   r3   r8   )        NgegluFTgh㈵>)NNNNN)__name__
__module____qualname____firstlineno____doc__intr   strboolfloatr,   r6   Tensor
LongTensorrY   __static_attributes____classcell__r:   s   @r;   r   r   %   s   @ -1$$(,1N1N !1N  	1N &c]1N 1N 1N "&1N 1N 1Nl 9=+/159=8||8  (58 u''(	8
 !.8 !) 68 
8 8r=   r   c            *         ^  \ rS rSrSr Sr/ SQr\                    S"S\S\S\S\S\S	\S
\S\S\S\	S\S\S\S\
S\	S\S\S\S\S\4(U 4S jjj5       r    S#S\R                  S\R                  S\R                  S\\R                     S\\R                     S\\\R                  \R                  4      S\	4S  jjrS!rU =r$ )$AllegroTransformer3DModel   T)	pos_embednormadaln_single
patch_sizepatch_size_tr   r   in_channelsout_channels
num_layersr&   r   r   sample_heightsample_widthsample_framesr   r   r   caption_channelsinterpolation_scale_hinterpolation_scale_winterpolation_scale_tc                   > [         TU ]  5         X4-  U l        Ub  UOUS-  S:X  a  US-
  S-  S-   OUS-  nUb  UOUS-  nUb  UOUS-  n[        UUUUU R                  S S9U l        [
        R                  " [        U5       Vs/ s H  n[        U R                  UUUU	UU
UUS9	PM      sn5      U l	        [
        R                  " U R                  SS	S
9U l        [
        R                  " [        R                  " SU R                  5      U R                  S-  -  5      U l        [
        R                   " U R                  X-  U-  5      U l        [%        U R                  SS9U l        [)        UU R                  S9U l        SU l        g s  snf )Nr
   rF         (   )heightwidthrq   rs   	embed_dimpos_embed_type)r&   r   r   r   r   r   Fư>r    r*   )use_additional_conditions)in_featureshidden_size)r+   r,   	inner_dimr   rn   r-   
ModuleListranger   transformer_blocksr.   norm_outr5   r6   r7   r8   Linearproj_outr   rp   r   caption_projectiongradient_checkpointing)r9   rq   rr   r   r   rs   rt   ru   r&   r   r   rv   rw   rx   r   r   r   ry   rz   r{   r|   _r:   s                         r;   r,   "AllegroTransformer3DModel.__init__   s   0 	,A %0 " q A% !1$+a/"$ 	 :O9Z 5`mpr`r9N9Z 5`loq`q $ !#nn
 #%-- z* +A (NN'&#(;"/#1,C%
 +#
$ T^^SWX!#ekk!T^^.Lt~~_bOb.b!c		$..*2IL2XY /t~~Y^_ #<HXfjftft"u&+#7s   %F
r>   r?   timesteprA   rB   rH   return_dictc           
      ~   UR                   u  ppnU R                  R                  nU R                  R                  nX-  nX-  nX-  nUb  UR                  S:X  a  UR                  UR                  5      nUS S 2S U
24   nUR                  5       S:  aL  UR                  S5      n[        R                  " XMX4XU4S9nUR                  S5      R                  USS5      nUR                  5       S:  a/  SUR                  5       R                  UR                  5      -
  S-  OS nUbB  UR                  S:X  a2  SUR                  U R                  5      -
  S-  nUR                  S5      nU R                  X8UR                  S9u  nnUR                  SSSS	S5      R                  SS5      nU R!                  U5      nUR#                  SUS45      R                  SS5      nU R%                  U5      nUR                  USUR                   S   5      n['        U R(                  5       HU  u  nn[*        R,                  " 5       (       a*  U R.                  (       a  U R1                  UUUUUUU5      nMJ  U" UUUUUUS
9nMW     U R2                  S    US S 2S 4   -   R5                  SSS9u  nnU R7                  U5      nUSU-   -  U-   nU R9                  U5      nUR;                  S5      nUR=                  XUUXUS5      nUR                  SSSSSSS	S5      nUR=                  USXU5      nU(       d  U4$ [?        US9$ )NrI   r   rF   )kernel_sizestriderE   g     r
   )rO   hidden_dtyper   )r>   r?   r@   rA   rB   rH   rG         r)   )sample) rJ   configrr   rq   rN   todtypenumel	unsqueezeF
max_pool3dflattenviewrd   rp   permutern   	unflattenr   	enumerater   r6   is_grad_enabledr   _gradient_checkpointing_funcr8   rL   r   r   rM   rK   r   )r9   r>   r?   r   rA   rB   rH   r   rO   num_channels
num_framesr   r   p_tppost_patch_num_framespost_patch_heightpost_patch_widthembedded_timestepiblockshiftscaleoutputs                           r;   rY   !AllegroTransformer3DModel.forward3  s    ?L>Q>Q;
*ekk&&KK"" * 1"K : %.*=*=*B ,..}/B/BCN+A{
{N;N##%)!/!9!9!!<!"nPQ+_bgh^i!j!/!7!7!:!?!?
Ar!R SaRfRfRhklRl^((*--m.A.ABBhNrv 
 "-2H2M2MQR2R&'*@*C*CDJJ*O&OS[%["%;%E%Ea%H" '+&7&7-:M:M '8 '
##
 &--aAq!<DDQJ}5%//J3CDLLQPQR $ 7 78M N 5 : ::rK`KfKfgiKj k "$"9"9:HAu$$&&4+F+F $ A A!)"*$! !&"/*?!#1+A%5! ;. ..t47HD7QQXXYZ`aXbum4 &U3e;m4%--a0 &--/@BRTW\]_a
 &--aAq!Q1E&&z2z5Q9'v66r=   )	rp   r   r   r   r   rn   r   r8   r   )r
   rF      `   rI   rI       r[   i 	  TZ         zgelu-approximateFr   i          @r   g@)NNNT)r]   r^   r_   r`    _supports_gradient_checkpointing _skip_layerwise_casting_patternsr   rb   re   rd   rc   r,   r6   rf   rg   r   r   rY   rh   ri   rj   s   @r;   rl   rl      s   '+$,\ (,$'L$ #%"$#'#/(- $'*'*'*+L,L, L, !	L,
  L, L, L, L, L, !L, L, L, L, L, L,  "&!L," #L,$ %L,&  %'L,(  %)L,*  %+L, L,f 269=HL k7||k7  %||k7 ""	k7
 !.k7 !) 6k7 #5u||)C#DEk7 k7 k7r=   rl   )&typingr   r   r6   torch.nnr-   torch.nn.functional
functionalr   configuration_utilsr   r   utilsr   utils.torch_utilsr	   	attentionr   attention_processorr   r   cache_utilsr   
embeddingsr   r   modeling_outputsr   modeling_utilsr   normalizationr   
get_loggerr]   loggerModuler   rl    r=   r;   <module>r      s{     #     B  5 # D $ > 7 ' . 
		H	% Gbii G GTn7
K n7r=   