
    +hV                     
   S SK r S SKrS SKJrJrJrJrJr  S SKrS SK	J
r
  SSKJrJr  SSKJrJr  SSKJrJrJrJrJrJr  SSKJr  SS	KJrJrJr  SS
KJr  SSK J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)J*r*  \RV                  " \,5      r- " S S5      r. " S S5      r/ " S S\R                  R`                  \5      r1 " S S\
R`                  5      r2\ " S S\
R`                  5      5       r3\ " S S\'\\\\\!5      5       r4S r5g)    N)AnyDictOptionalTupleUnion   )ConfigMixinregister_to_config)FromOriginalModelMixinPeftAdapterMixin)USE_PEFT_BACKEND	deprecateis_torch_versionloggingscale_lora_layersunscale_lora_layers)maybe_allow_in_graph   )AttentionMixinAttentionModuleMixinFeedForward)dispatch_attention_fn)
CacheMixin)PixArtAlphaTextProjection)Transformer2DModelOutput)
ModelMixin)AdaLayerNormSingleRMSNormc                       \ rS rSrS rSrg)LTXVideoAttentionProcessor2_0'   c                 6    Sn[        SSU5        [        U0 UD6$ )Nz~`LTXVideoAttentionProcessor2_0` is deprecated and this will be removed in a future version. Please use `LTXVideoAttnProcessor`r    z1.0.0)r   LTXVideoAttnProcessor)clsargskwargsdeprecation_messages       g/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/transformers/transformer_ltx.py__new__%LTXVideoAttentionProcessor2_0.__new__(   s)     _17<OP$d5f55     N)__name__
__module____qualname____firstlineno__r)   __static_attributes__r,   r+   r(   r    r    '   s    6r+   r    c                       \ rS rSrSrSrS r   SSSS\R                  S\	\R                     S	\	\R                     S
\	\R                     S\R                  4S jjr
Srg)r#   /   z
Processor for implementing attention (SDPA is used by default if you're using PyTorch 2.0). This is used in the LTX
model. It applies a normalization layer and rotary embedding on the query and key vector.
Nc                 <    [        SS5      (       a  [        S5      eg )N<z2.0zlLTX attention processors require a minimum PyTorch version of 2.0. Please upgrade your PyTorch installation.)r   
ValueError)selfs    r(   __init__LTXVideoAttnProcessor.__init__7   s$    C''~  (r+   attnLTXAttentionhidden_statesencoder_hidden_statesattention_maskimage_rotary_embreturnc           
      :   Uc  UR                   OUR                   u  pgnUb<  UR                  XGU5      nUR                  XaR                  SUR                   S   5      nUc  UnUR	                  U5      n	UR                  U5      n
UR                  U5      nUR                  U	5      n	UR                  U
5      n
Ub  [        X5      n	[        X5      n
U	R                  SUR                  S45      n	U
R                  SUR                  S45      n
UR                  SUR                  S45      n[        U	U
UUSSU R                  S9nUR                  SS5      nUR                  U	R                  5      nUR                   S   " U5      nUR                   S   " U5      nU$ )	Nr           F)	attn_mask	dropout_p	is_causalbackendr   r      )shapeprepare_attention_maskviewheadsto_qto_kto_vnorm_qnorm_kapply_rotary_emb	unflattenr   _attention_backendflattentodtypeto_out)r7   r:   r<   r=   r>   r?   
batch_sizesequence_length_querykeyvalues               r(   __call__LTXVideoAttnProcessor.__call__=   s    $9#@MF[FaFa 	'
Q %!88ZdeN+00ZZ^MaMabdMefN ($1!		-(ii-.		/0E"kk#'$U=E"39CDJJ#34mmA

B/0DJJ#34-$++
 &--a3%((5A}5A}5r+   r,   NNN)r-   r.   r/   r0   __doc__rT   r8   torchTensorr   r_   r1   r,   r+   r(   r#   r#   /   s    
  9=153700 ||0  (5	0
 !.0 #5<<00 
0 0r+   r#   c                     ^  \ rS rSr\r\/r         SS\S\S\S\S\S\	S\
\   S	\	S
\4U 4S jjjr   SS\R                  S\
\R                     S\
\R                     S\
\R                     S\R                  4
S jjrSrU =r$ )r;   p   	query_dimrL   kv_headsdim_headdropoutbiascross_attention_dimout_biasqk_normc                 T  > [         TU ]  5         U	S:w  a  [        S5      eX@l        XB-  U l        Uc  U R                  OXC-  U l        Xl        Ub  UOUU l        X`l        XPl	        Xl
        X l        SnSn[        R                  R                  XB-  XS9U l        [        R                  R                  XC-  XS9U l        [        R                  R#                  XR                  US9U l        [        R                  R#                  U R                  U R
                  US9U l        [        R                  R#                  U R                  U R
                  US9U l        [        R                  R+                  / 5      U l        U R,                  R/                  [        R                  R#                  U R                  U R                  US95        U R,                  R/                  [        R                  R1                  U5      5        U
c  U R3                  5       n
U R5                  U
5        g )Nrms_norm_across_headszIOnly 'rms_norm_across_heads' is supported as a valid value for `qk_norm`.gh㈵>Tepselementwise_affine)rk   )superr8   NotImplementedErrorhead_dim	inner_diminner_kv_dimrg   rl   use_biasrj   out_dimrL   rc   nnr   rP   rQ   LinearrM   rN   rO   
ModuleListrX   appendDropout_default_processor_clsset_processor)r7   rg   rL   rh   ri   rj   rk   rl   rm   rn   	processornorm_epsnorm_elementwise_affine	__class__s                r(   r8   LTXAttention.__init__t   s    	--%&qrr !).6.>DNNHDW":M:Y#6_h  
"&hh&&x'7X&rhh&&x':&uHHOOI~~DOI	HHOOD$<$<d>O>OVZO[	HHOOD$<$<d>O>OVZO[	hh))"-588??4>>4<<h?WX588++G45335I9%r+   r<   r=   r>   r?   r@   c                    [        [        R                  " U R                  R                  5      R
                  R                  5       5      nUR                  5        VVs/ s H  u  pxXv;  d  M  UPM     n	nn[        U	5      S:  a:  [        R                  SU	 SU R                  R                  R                   S35        UR                  5        VV
s0 s H  u  pzXv;   d  M  Xz_M     nnn
U R                  " XX#U40 UD6$ s  snnf s  sn
nf )Nr   zattention_kwargs z are not expected by z and will be ignored.)setinspect	signaturer   r_   
parameterskeysitemslenloggerwarningr   r-   )r7   r<   r=   r>   r?   r&   attn_parameterskr[   unused_kwargsws              r(   forwardLTXAttention.forward   s     g//0G0GHSSXXZ['-||~R~tq9Q~R}!NN#M?2GH`H`HiHiGjj  A $*<<>J>41Q5I$!$>J~~d3HZjuntuu S
 Ks   D .D D"D)rl   rj   rv   rL   rw   rx   rQ   rP   rz   rg   rN   rX   rM   rO   ry   )	   r   @   rC   TNTrp   Nra   )r-   r.   r/   r0   r#   r   _available_processorsintfloatboolr   strr8   rc   rd   r   r1   __classcell__r   s   @r(   r;   r;   p   s   223
 -1.(&(& (& 	(&
 (& (& (& &c](& (& (& (&Z 9=1537v||v  (5v !.	v
 #5<<0v 
v vr+   r;   c                     ^  \ rS rSr      SS\S\S\S\S\S\S\S	S
4U 4S jjjrS\S\S\S\S\\R                  \\4   S\R                  S	\R                  4S jr     SS\R                  S\\   S\\   S\\   S\\\R                  \\4      S\\R                     S	\\R                  \R                  4   4S jjrSrU =r$ )LTXVideoRotaryPosEmbed   dimbase_num_framesbase_height
base_width
patch_sizepatch_size_tthetar@   Nc                 v   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xpl        g )N)	rt   r8   r   r   r   r   r   r   r   )	r7   r   r   r   r   r   r   r   r   s	           r(   r8   LTXVideoRotaryPosEmbed.__init__   s6     	.&$$(
r+   rY   
num_framesheightwidthrope_interpolation_scaledevicec                    [         R                  " U[         R                  US9n[         R                  " U[         R                  US9n[         R                  " U[         R                  US9n	[         R                  " XUSS9n
[         R                  " U
SS9n
U
R                  S5      R                  USSSS5      n
Ub  U
S S 2SS24   US   -  U R                  -  U R                  -  U
S S 2SS24'   U
S S 2SS24   US   -  U R                  -  U R                  -  U
S S 2SS24'   U
S S 2SS24   US   -  U R                  -  U R                  -  U
S S 2SS24'   U
R                  SS	5      R                  SS5      n
U
$ )
N)rW   r   ij)indexingr   r   rH   r   r      )rc   arangefloat32meshgridstack	unsqueezerepeatr   r   r   r   r   rU   	transpose)r7   rY   r   r   r   r   r   grid_hgrid_wgrid_fgrids              r(   _prepare_video_coords,LTXVideoRotaryPosEmbed._prepare_video_coords   sj    fEMM&Ie5==HjfM~~fftD{{4Q'~~a ''
Aq!Q?#/1Q3<*B1*EEHYHYY\`\p\ppDAaCL1Q3<*B1*EEWZ^ZjZjjDAaCL1Q3<*B1*EEWZ^ZiZiiDAaCL||Aq!++Aq1r+   r<   video_coordsc           	         UR                  S5      nUc  U R                  UUUUUUR                  S9nOT[        R                  " US S 2S4   U R
                  -  US S 2S4   U R                  -  US S 2S4   U R                  -  /SS9nSn	U R                  n
U R                  [        R                  " [        R                  " XR                  5      [        R                  " XR                  5      U R                  S-  UR                  [        R                  S	9-  nU[        R                  -  S
-  nXR                  S5      S-  S-
  -  nUR!                  SS5      R#                  S5      nUR%                  5       R'                  SSS9nUR)                  5       R'                  SSS9nU R                  S-  S:w  a  [        R*                  " US S 2S S 2S U R                  S-  24   5      n[        R,                  " US S 2S S 2S U R                  S-  24   5      n[        R.                  " X/SS9n[        R.                  " X/SS9nX4$ )Nr   )r   r   rH   r   rB   r         ?   )r   rW   g       @)sizer   r   rc   r   r   r   r   r   linspacemathlogr   r   pir   r   rU   cosrepeat_interleavesin	ones_like
zeros_likecat)r7   r<   r   r   r   r   r   rY   r   startendfreqs	cos_freqs	sin_freqscos_paddingsin_paddings                   r(   r   LTXVideoRotaryPosEmbed.forward   s    #''*
--)A$++ . D ;; A&)=)== A&)9)99 A&8
 D jj

ennHHUJJ'HHS**%HHM ''--
 
 #%+a/!34B'//2IIK11!1<	IIK11!1<	88a<1//)Aq.DHHqL.4H*IJK**9Q>TXX\>5I+JKK		;":CI		;":CI##r+   )r   r   r   r   r   r   r   )      r   rH   rH        @)NNNNN)r-   r.   r/   r0   r   r   r8   r   rc   rd   r   r   r   r   r1   r   r   s   @r(   r   r      so     "  	
     
 (  	
  #(eU(B"C  
: %) $#PT/34$||4$ SM4$ 	4$
 }4$ #+5ue1K+L"M4$ u||,4$ 
u||U\\)	*4$ 4$r+   r   c                   4  ^  \ rS rSrSr      SS\S\S\S\S\S\S	\S
\S\S\4U 4S jjjr	  SS\
R                  S\
R                  S\
R                  S\\\
R                  \
R                  4      S\\
R                     S\
R                  4S jjrSrU =r$ )LTXVideoTransformerBlocki  ai  
Transformer block used in [LTX](https://huggingface.co/Lightricks/LTX-Video).

Args:
    dim (`int`):
        The number of channels in the input and output.
    num_attention_heads (`int`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`):
        The number of channels in each head.
    qk_norm (`str`, defaults to `"rms_norm"`):
        The normalization layer to use.
    activation_fn (`str`, defaults to `"gelu-approximate"`):
        Activation function to use in feed-forward.
    eps (`float`, defaults to `1e-6`):
        Epsilon value for normalization layers.
r   num_attention_headsattention_head_dimrl   rn   activation_fnattention_biasattention_out_biasrr   rs   c                 :  > [         TU ]  5         [        XU
S9U l        [	        UUUUUS UUS9U l        [        XU
S9U l        [	        UUUUUUUUS9U l        [        XS9U l	        [        R                  " [        R                  " SU5      US-  -  5      U l        g )Nrq   )rg   rL   rh   ri   rk   rl   rm   rn   )rg   rl   rL   rh   ri   rk   rm   rn   )r   r         ?)rt   r8   r   norm1r;   attn1norm2attn2r   ffr{   	Parameterrc   randnscale_shift_table)r7   r   r   r   rl   rn   r   r   r   rr   rs   r   s              r(   r8   !LTXVideoTransformerBlock.__init__*  s     	S>PQ
!%(' $'	

 S>PQ
! 3%(''	

 c?!#ekk!S.ACH.L!Mr+   r<   r=   tembr?   encoder_attention_maskr@   c                    UR                  S5      nU R                  U5      nU R                  R                  S   nU R                  S   R	                  UR
                  5      UR                  XcR                  S5      US5      -   n	U	R                  SS9u  pppUSU-   -  U
-   nU R                  US US9nUUU-  -   nU R                  UUS US9nUU-   nU R                  U5      SU-   -  U-   nU R                  U5      nUUU-  -   nU$ )	Nr   NNrH   rB   r   r   )r<   r=   r?   )r=   r?   r>   )r   r   r   rI   rV   r   reshapeunbindr   r   r   r   )r7   r<   r=   r   r?   r   rY   norm_hidden_statesnum_ada_params
ada_values	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlpattn_hidden_states	ff_outputs                     r(   r    LTXVideoTransformerBlock.forwardU  sH    #''*
!ZZ6//55a8++J7::4;;G$,,		!nbK
 

 JTIZIZ_`IZIaF	h9/1y=AIM!ZZ,"&- ( 

 &(:X(EE!ZZ"7!1	 ( 
 &(::!ZZ6!i-H9TGG./	%	H(<<r+   )r   r   r   r   r   r   )rp   gelu-approximateTTư>Fr   )r-   r.   r/   r0   rb   r   r   r   r   r8   rc   rd   r   r   r   r1   r   r   s   @r(   r   r     s   0 //##'#()N)N !)N  	)N
 !)N )N )N )N !)N )N !)N )N` IM9=%||%  %||% ll	%
 #5u||)C#DE% !) 6% 
% %r+   r   c            "         ^  \ rS rSrSrSrS/rS/r\               S%S\	S\	S\	S	\	S
\	S\	S\	S\	S\
S\
S\S\S\	S\S\SS4 U 4S jjj5       r       S&S\R                  S\R                  S\R                   S\R                  S\\	   S\\	   S\\	   S\\\\\\4   \R                  4      S \\R                     S!\\\
\4      S"\S\R                  4S# jjrS$rU =r$ )'LTXVideoTransformer3DModeli}  a  
A Transformer model for video-like data used in [LTX](https://huggingface.co/Lightricks/LTX-Video).

Args:
    in_channels (`int`, defaults to `128`):
        The number of channels in the input.
    out_channels (`int`, defaults to `128`):
        The number of channels in the output.
    patch_size (`int`, defaults to `1`):
        The size of the spatial patches to use in the patch embedding layer.
    patch_size_t (`int`, defaults to `1`):
        The size of the tmeporal patches to use in the patch embedding layer.
    num_attention_heads (`int`, defaults to `32`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`, defaults to `64`):
        The number of channels in each head.
    cross_attention_dim (`int`, defaults to `2048 `):
        The number of channels for cross attention heads.
    num_layers (`int`, defaults to `28`):
        The number of layers of Transformer blocks to use.
    activation_fn (`str`, defaults to `"gelu-approximate"`):
        Activation function to use in feed-forward.
    qk_norm (`str`, defaults to `"rms_norm_across_heads"`):
        The normalization layer to use.
Tnormr   in_channelsout_channelsr   r   r   r   rl   
num_layersr   rn   r   r   caption_channelsr   r   r@   Nc                 X  > [         TU ]  5         U=(       d    UnXV-  n[        R                  " UU5      U l        [        R
                  " [        R                  " SU5      US-  -  5      U l        [        USS9U l
        [        UUS9U l        [        USSSUUSS	9U l        [        R                  " [!        U5       Vs/ s H  n[#        UUUUU
U	UUUUS
9
PM     sn5      U l        [        R&                  " USSS9U l        [        R                  " UU5      U l        SU l        g s  snf )Nr   r   F)use_additional_conditions)in_featureshidden_sizer   r   r   )r   r   r   r   r   r   r   )
r   r   r   rl   rn   r   r   r   rr   rs   r  rq   )rt   r8   r{   r|   proj_inr   rc   r   r   r   
time_embedr   caption_projectionr   roper}   ranger   transformer_blocks	LayerNormnorm_outproj_outgradient_checkpointing)r7   r  r  r   r   r   r   rl   r	  r   rn   r   r   r
  r   r   rw   r[   r   s                     r(   r8   #LTXVideoTransformer3DModel.__init__  s,   & 	#2{'<	yyi8!#ekk!Y.G)UX..X!Y,YRWX";HXfo"p*!%
	 #%-- z* +A )!(;'9(;#"/#1'9 '> +#
$ YDUS		)\:&+#)s    D'r<   r=   timestepr   r   r   r   r   r   attention_kwargsreturn_dictc           	         U
b#  U
R                  5       n
U
R                  SS5      nOSn[        (       a  [        X5        O+U
b(  U
R	                  SS 5      b  [
        R                  S5        U R                  XXgX5      nUbB  UR                  S:X  a2  SUR                  UR                  5      -
  S-  nUR                  S5      nUR                  S5      nU R                  U5      nU R                  UR                  5       UUR                  S9u  nnUR!                  US	UR                  S	5      5      nUR!                  US	UR                  S	5      5      nU R#                  U5      nUR!                  US	UR                  S	5      5      nU R$                   HP  n[&        R(                  " 5       (       a)  U R*                  (       a  U R-                  UUUUUU5      nMF  U" UUUUUS
9nMR     U R.                  S   US S 2S S 2S 4   -   nUS S 2S S 2S4   US S 2S S 2S4   nnU R1                  U5      nUSU-   -  U-   nU R3                  U5      n[        (       a  [5        X5        U(       d  U4$ [7        US9$ )Nscaler   zVPassing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective.r   rH   g     r   )rY   hidden_dtyperB   )r<   r=   r   r?   r   r   )sample)copypopr   r   getr   r   r  ndimrV   rW   r   r   r  r  rU   rK   r  r  rc   is_grad_enabledr  _gradient_checkpointing_funcr   r  r  r   r   )r7   r<   r=   r  r   r   r   r   r   r   r  r  
lora_scaler?   rY   r   embedded_timestepblockscale_shift_valuesshiftr  outputs                         r(   r   "LTXVideoTransformer3DModel.forward  s    '/446)--gs;JJd/+0@0D0DWd0S0_l  99]Ogv "-2H2M2MQR2R&'*@*C*CMDWDW*X&X\d%d"%;%E%Ea%H""''*
]3"&//!&,, #2 #
 yyR27-22:rCTCYCYZ\C]^ $ 7 78M N 5 : ::r=K]K]^`Ka b,,E$$&&4+F+F $ A A!)$*! !&"/*?%5+A! -& "33J?BSTUWXZ^T^B__)!Q'24Fq!Qw4Oum4%U3e;}-19'v66r+   )	r  r  r  r  r  r  r   r  r  )   r.  rH   rH       r   r      r  rp   Fr  i   TT)NNNNNNT)r-   r.   r/   r0   rb    _supports_gradient_checkpointing _skip_layerwise_casting_patterns_repeated_blocksr
   r   r   r   r   r8   rc   rd   
LongTensorr   r   r   r   r   r   r1   r   r   s   @r(   r  r  }  s   4 (,$(.x$23 #%"$#'/.(- $##'!=,=, =, 	=,
 =, !=,  =, !=, =, =, =, "&=, =, =, =,  !!=," 
#=, =,J %) $#^b/359 S7||S7  %||S7 ""	S7
 !&S7 SMS7 S7 }S7 #+5ueU7J1KU\\1Y+Z"[S7 u||,S7 #4S>2S7 S7 
S7 S7r+   r  c                 "   Uu  p#U R                  SS5      R                  S5      u  pE[        R                  " U* U/SS9R	                  S5      nU R                  5       U-  UR                  5       U-  -   R                  U R                  5      nU$ )Nr   )rB   r   rB   r   )rS   r   rc   r   rU   r   rV   rW   )xr   r   r   x_realx_imag	x_rotatedouts           r(   rR   rR   5  s|    HC[[G,33B7NFfWf-26>>qAI779s?Y__.44
8
8
ACJr+   )6r   r   typingr   r   r   r   r   rc   torch.nnr{   configuration_utilsr	   r
   loadersr   r   utilsr   r   r   r   r   r   utils.torch_utilsr   	attentionr   r   r   attention_dispatchr   cache_utilsr   
embeddingsr   modeling_outputsr   modeling_utilsr   normalizationr   r   
get_loggerr-   r   r    r#   Moduler;   r   r   r  rR   r,   r+   r(   <module>rJ     s       4 4   B ? s s 5 I I 6 $ 2 7 ' 7 
		H	%6 6> >B=v588??$8 =v@c$RYY c$L cryy c cL t7^-CEUWat7 t7nr+   