
    +hi                        S SK JrJrJrJrJr  S SKrS SKrS SK	J
r
  SSKJrJr  SSKJrJrJr  SSKJrJrJrJrJr  SSKJr  SSKJr  S	S
KJrJr  S	SKJ r   S	SK!J"r"J#r#J$r$J%r%  S	SK&J'r'  S	SK(J)r)  S	SK*J+r+J,r,J-r-  SSK.J/r/J0r0  \Rb                  " \25      r3 " S S\
Rh                  5      r5 " S S\
Rh                  5      r6 " S S\
Rh                  5      r7 " S S\
Rh                  5      r8 " S S\
Rh                  5      r9\ " S S\
Rh                  5      5       r:\ " S S\
Rh                  5      5       r; " S  S!\)\\\\\ \5	      r<g)"    )AnyDictOptionalTupleUnionN   )ConfigMixinregister_to_config)FluxTransformer2DLoadersMixinFromOriginalModelMixinPeftAdapterMixin)USE_PEFT_BACKEND	deprecateloggingscale_lora_layersunscale_lora_layers)is_torch_npu_available)maybe_allow_in_graph   )AttentionMixinFeedForward)
CacheMixin)FluxPosEmbedPixArtAlphaTextProjection	Timestepsget_timestep_embedding)Transformer2DModelOutput)
ModelMixin)CombinedTimestepLabelEmbeddingsFP32LayerNormRMSNorm   )FluxAttentionFluxAttnProcessorc                   `  ^  \ rS rSrSrSS\S\\   4U 4S jjjr    SS\R                  S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                  \R                  \R                  \R                  \R                  4   4S jjrSrU =r$ )ChromaAdaLayerNormZeroPruned'   
Norm layer adaptive layer norm zero (adaLN-Zero).

Parameters:
    embedding_dim (`int`): The size of each embedding vector.
    num_embeddings (`int`): The size of the embeddings dictionary.
embedding_dimnum_embeddingsc                    > [         TU ]  5         Ub  [        X!5      U l        OS U l        US:X  a  [        R
                  " USSS9U l        g US:X  a  [        USSS9U l        g [        SU S35      e)	N
layer_normFư>elementwise_affineepsfp32_layer_norm)r/   biasUnsupported `norm_type` (@) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'.)	super__init__r   embnn	LayerNormnormr    
ValueError)selfr)   r*   	norm_typer2   	__class__s        j/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/models/transformers/transformer_chroma.pyr6   %ChromaAdaLayerNormZeroPruned.__init__0   sx    %6~UDHDH$]uRVWDI++%mTYZDI+I;6vw     xtimestepclass_labelshidden_dtyper7   returnc                     U R                   b  U R                  X#US9nUR                  SS5      R                  SSS9u  pgppU R                  U5      SUS S 2S 4   -   -  US S 2S 4   -   nXXU4$ )N)rE   r"   r      dim)r7   flattenchunkr:   )r<   rB   rC   rD   rE   r7   	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlps               r?   forward$ChromaAdaLayerNormZeroPruned.forward@   s     88((8(MCILUVXYIZI`I`abhiI`IjF	h9IIaLA	!T' 223i46HHI(::rA   )r7   r:   )Nr,   T)NNNN)__name__
__module____qualname____firstlineno____doc__intr   r6   torchTensor
LongTensordtyper   rS   __static_attributes____classcell__r>   s   @r?   r&   r&   '   s    c 8C=  & ,037.2&*;<<; 5<<(; u//0	;
 u{{+; ell#; 
u||U\\5<<u||S	T; ;rA   r&   c                      ^  \ rS rSrSrS
S\4U 4S jjjr SS\R                  S\	\R                     S\
\R                  \R                  \R                  \R                  \R                  4   4S jjrS	rU =r$ )"ChromaAdaLayerNormZeroSinglePrunedO   r(   r)   c                    > [         TU ]  5         US:X  a  [        R                  " USSS9U l        g [        SU S35      e)Nr,   Fr-   r.   r3   r4   )r5   r6   r8   r9   r:   r;   )r<   r)   r=   r2   r>   s       r?   r6   +ChromaAdaLayerNormZeroSinglePruned.__init__X   sE    $]uRVWDI+I;6vw rA   rB   r7   rF   c                     UR                  SS5      R                  SSS9u  p4nU R                  U5      SUS S 2S 4   -   -  US S 2S 4   -   nX4$ )Nr"   r   r   rI   )rK   rL   r:   )r<   rB   r7   rM   rN   rO   s         r?   rS   *ChromaAdaLayerNormZeroSinglePruned.forwardb   s_    
 *-Q):)@)@)@)J&	hIIaLA	!T' 223i46HH{rA   r:   )r,   TN)rU   rV   rW   rX   rY   rZ   r6   r[   r\   r   r   rS   r_   r`   ra   s   @r?   rc   rc   O   su    c   '+<< ell# 
u||U\\5<<u||S	T	 rA   rc   c                      ^  \ rS rSrSr    SS\S\4U 4S jjjrS\R                  S\R                  S\R                  4S	 jr	S
r
U =r$ )"ChromaAdaLayerNormContinuousPrunedl   aq  
Adaptive normalization layer with a norm layer (layer_norm or rms_norm).

Args:
    embedding_dim (`int`): Embedding dimension to use during projection.
    conditioning_embedding_dim (`int`): Dimension of the input condition.
    elementwise_affine (`bool`, defaults to `True`):
        Boolean flag to denote if affine transformation should be applied.
    eps (`float`, defaults to 1e-5): Epsilon factor.
    bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use.
    norm_type (`str`, defaults to `"layer_norm"`):
        Normalization layer to use. Values supported: "layer_norm", "rms_norm".
r)   conditioning_embedding_dimc                    > [         TU ]  5         US:X  a  [        R                  " XX55      U l        g US:X  a  [        XU5      U l        g [        SU 35      e)Nr,   rms_normzunknown norm_type )r5   r6   r8   r9   r:   r!   r;   )r<   r)   rn   r/   r0   r2   r=   r>   s          r?   r6   +ChromaAdaLayerNormContinuousPruned.__init__{   sU     	$]9KRDI*$4FGDI1)=>>rA   rB   r7   rF   c                     [         R                  " UR                  SS5      R                  UR                  5      SSS9u  p4U R                  U5      SU-   S S 2S S S 24   -  US S 2S S S 24   -   nU$ )Nr"   r   rI   )r[   rL   rK   tor^   r:   )r<   rB   r7   shiftscales        r?   rS   *ChromaAdaLayerNormContinuousPruned.forward   sf    {{3;;q!#4#7#7#@!KIIaLAIq$z22U1dA:5FFrA   ri   )Tgh㈵>Tr,   )rU   rV   rW   rX   rY   rZ   r6   r[   r\   rS   r_   r`   ra   s   @r?   rl   rl   l   s]    .  ?? %(? ?, ELL U\\  rA   rl   c                   n   ^  \ rS rSrS\S\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	(ChromaCombinedTimestepTextProjEmbeddings   num_channelsout_dimc           
         > [         TU ]  5         [        USSS9U l        [        USSS9U l        U R                  S[        [        R                  " U5      S-  SU-  SSS9SS	9  g )
NTr   )rz   flip_sin_to_cosdownscale_freq_shiftmod_proj  r   )r}   r~   F)
persistent)	r5   r6   r   	time_projguidance_projregister_bufferr   r[   arange)r<   rz   r{   r>   s      r?   r6   1ChromaCombinedTimestepTextProjEmbeddings.__init__   ss    "dijk&LRVmno"W%,a,.>PTkl  	 	
rA   rC   rF   c                    U R                   R                  S   nUR                  S   nU R                  U5      R                  UR                  S9nU R                  [        R                  " S/U-  5      5      R                  UR                  UR                  S9nU R                   R                  UR                  UR                  S9R                  USS5      n[        R                  " XE/SS9R                  S5      R                  SUS5      n[        R                  " Xv/SS9nUR                  UR                  5      $ )Nr   )r^   )r^   devicer"   rI   )r   shaper   rs   r^   r   r[   tensorr   repeatcat	unsqueeze)	r<   rC   mod_index_length
batch_sizetimesteps_projr   r   timestep_guidance	input_vecs	            r?   rS   0ChromaCombinedTimestepTextProjEmbeddings.forward   s   ==..q1^^A&
1448>>4J**5<<j8H+IJMM.. N 
 ==##.*>*>~G\G\#]ddeoqrtuvII~51=GGJQQRSUeghi 	 II0;D	||HNN++rA   )r   r   )rU   rV   rW   rX   rZ   r6   r[   r\   rS   r_   r`   ra   s   @r?   rx   rx      s6    
S 
3 
, , , ,rA   rx   c            	       F   ^  \ rS rSrS	S\S\S\S\4U 4S jjjrS rSrU =r$ )
ChromaApproximator   in_dimr{   
hidden_dimn_layersc                   > [         TU ]  5         [        R                  " XSS9U l        [        R
                  " [        U5       Vs/ s H  n[        X3SS9PM     sn5      U l        [        R
                  " [        U5       Vs/ s H  n[        R                  " U5      PM     sn5      U l
        [        R                  " X25      U l        g s  snf s  snf )NTr2   silu)act_fn)r5   r6   r8   Linearin_proj
ModuleListranger   layersr!   normsout_proj)r<   r   r{   r   r   _r>   s         r?   r6   ChromaApproximator.__init__   s    yy$?mmW\]eWfgWfRS&zfMWfg
 ]]E(O#TOqBJJz$:O#TU
		*6 h#Ts   C Cc                     U R                  U5      n[        U R                  U R                  5       H  u  p#X" U" U5      5      -   nM     U R	                  U5      $ rj   )r   zipr   r   r   )r<   rB   layerr   s       r?   rS   ChromaApproximator.forward   sM    LLOTZZ8LEE%(O#A 9 }}QrA   )r   r   r   r   )   )	rU   rV   rW   rX   rZ   r6   rS   r_   r`   ra   s   @r?   r   r      s3    7s 7S 7c 7S 7 7   rA   r   c                     ^  \ rS rSr SS\S\S\S\4U 4S jjjr   SS\R                  S\R                  S	\	\
\R                  \R                  4      S
\	\R                     S\	\\\4      S\R                  4S jjrSrU =r$ )ChromaSingleTransformerBlock   rJ   num_attention_headsattention_head_dim	mlp_ratioc                   > [         TU ]  5         [        X-  5      U l        [	        U5      U l        [        R                  " XR                  5      U l        [        R                  " SS9U l
        [        R                  " XR                  -   U5      U l        [        5       (       a  SSKJn  Sn[        SSU5        U" 5       nO
[!        5       n[#        UUUUSUS	SS
9U l        g )Ntanh)approximater   )FluxAttnProcessor2_0_NPUzDefaulting to FluxAttnProcessor2_0_NPU for NPU devices will be removed. Attention processors should be set explicitly using the `set_attn_processor` method.npu_processorz0.34.0Tr-   )	query_dimdim_headheadsr{   r2   	processorr0   pre_only)r5   r6   rZ   mlp_hidden_dimrc   r:   r8   r   proj_mlpGELUact_mlpproj_outr   attention_processorr   r   r$   r#   attn)	r<   rJ   r   r   r   r   deprecation_messager   r>   s	           r?   r6   %ChromaSingleTransformerBlock.__init__   s     	!#/26s;			#':':;ww62		#(;(;";SA!##FR   ox1DE02I)+I!'%	
	rA   hidden_statestembimage_rotary_embattention_maskjoint_attention_kwargsrF   c                    UnU R                  XS9u  pxU R                  U R                  U5      5      n	U=(       d    0 nUb  US S 2S S S S 24   US S 2S S S 2S 4   -  nU R                  " SUUUS.UD6n
[        R
                  " X/SS9nUR                  S5      nXR                  U5      -  nXa-   nUR                  [        R                  :X  a  UR                  SS5      nU$ )	Nr7   )r   r   r   r   rI   r"        )r:   r   r   r   r[   r   r   r   r^   float16clip)r<   r   r   r   r   r   residualnorm_hidden_statesgatemlp_hidden_statesattn_outputs              r?   rS   $ChromaSingleTransformerBlock.forward   s     !#'99]9#E  LL7I)JK!7!=2%+AtT1,<=qRVXY[_O_@``Nii 
,-)
 %	
 		;"BJ~~a }}];; 0%--/)..vu=MrA   )r   r   r   r:   r   r   )g      @NNN)rU   rV   rW   rX   rZ   floatr6   r[   r\   r   r   r   strr   rS   r_   r`   ra   s   @r?   r   r      s     #
#
 !#
  	#

 #
 #
R IM15;?|| ll #5u||)C#DE	
 !. !)c3h 8 
 rA   r   c                   H  ^  \ rS rSr  SS\S\S\S\S\4
U 4S jjjr   SS\R                  S	\R                  S
\R                  S\
\\R                  \R                  4      S\
\R                     S\
\\\4      S\\R                  \R                  4   4S jjrSrU =r$ )ChromaTransformerBlocki  rJ   r   r   qk_normr0   c                 F  > [         TU ]  5         [        U5      U l        [        U5      U l        [        UUUUUSS[        5       US9	U l        [        R                  " USSS9U l
        [        XSS9U l        [        R                  " USSS9U l        [        XSS9U l        g )NFT)	r   added_kv_proj_dimr   r   r{   context_pre_onlyr2   r   r0   r-   r.   zgelu-approximate)rJ   dim_outactivation_fn)r5   r6   r&   norm1norm1_contextr#   r$   r   r8   r9   norm2r   ffnorm2_context
ff_context)r<   rJ   r   r   r   r0   r>   s         r?   r6   ChromaTransformerBlock.__init__  s     	1#6
9#>!!'%"')

	 \\#%TJ
#BTU\\#%TR%#J\]rA   r   encoder_hidden_statesr   r   r   r   rF   c                    US S 2S S24   US S 2SS 24   pU R                  XS9u  ppnU R                  X(S9u  pnnnU=(       d    0 nUb  US S 2S S S S 24   US S 2S S S 2S 4   -  nU R                  " S	U	UUUS.UD6n[        U5      S:X  a  Uu  nnO[        U5      S:X  a  Uu  nnnU
R	                  S5      W-  nUU-   nU R                  U5      n	U	SUS S 2S 4   -   -  US S 2S 4   -   n	U R                  U	5      nUR	                  S5      U-  nUU-   n[        U5      S:X  a  UW-   nUR	                  S5      W-  nUU-   nU R                  U5      nUSUS S 2S 4   -   -  US S 2S 4   -   nU R                  U5      nUUR	                  S5      U-  -   nUR                  [        R                  :X  a  UR                  SS5      nX!4$ )
NrH   r   )r   r   r   r   r   r   r"   r   r   r   )r   r   r   lenr   r   r   r   r   r^   r[   r   r   )r<   r   r   r   r   r   r   temb_imgtemb_txtr   rO   rP   rQ   rR   norm_encoder_hidden_states
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpattention_outputsr   context_attn_outputip_attn_output	ff_outputcontext_ff_outputs                            r?   rS   ChromaTransformerBlock.forward3  s\    "!RaR%[$q!"u+(GKzzR_zGnDiHW[WiWi! Xj X
T"[* "8!=2%+AtT1,<=qRVXY[_O_@``N !II 
,"<-)	

 %
  !Q&/@,K,"#q(?P<K,n ((+k9%3!ZZ6/1yD7I3IJYWXZ^W^M__GG./	&&q)I5	%	1 !Q&)N:M )22158KK 58K K%)%7%78M%N"%?1{STVZSZG[C[%\_jklnrkr_s%s" OO,FG 5
8L8LQ8ORc8c c &&%--7$9$>$>vu$M!$33rA   )r   r   r   r   r   r   r   )rp   r-   r   )rU   rV   rW   rX   rZ   r   r   r6   r[   r\   r   r   r   r   rS   r_   r`   ra   s   @r?   r   r     s     "^^ !^  	^
 ^ ^ ^F IM15;?<4||<4  %||<4 ll	<4
 #5u||)C#DE<4 !.<4 !)c3h 8<4 
u||U\\)	*<4 <4rA   r   c                     ^  \ rS rSrSrSrSS/rSS/rSS/r\	            S"S\
S	\
S
\\
   S\
S\
S\
S\
S\
S\\
S4   S\
S\
S\
4U 4S jjj5       r          S#S\R                  S\R                  S\R                   S\R                  S\R                  S\R                  S\\\\4      S\S\S\\R                  \4   4S  jjrS!rU =r$ )$ChromaTransformer2DModelir  a  
The Transformer model introduced in Flux, modified for Chroma.

Reference: https://huggingface.co/lodestones/Chroma

Args:
    patch_size (`int`, defaults to `1`):
        Patch size to turn the input data into small patches.
    in_channels (`int`, defaults to `64`):
        The number of channels in the input.
    out_channels (`int`, *optional*, defaults to `None`):
        The number of channels in the output. If not specified, it defaults to `in_channels`.
    num_layers (`int`, defaults to `19`):
        The number of layers of dual stream DiT blocks to use.
    num_single_layers (`int`, defaults to `38`):
        The number of layers of single stream DiT blocks to use.
    attention_head_dim (`int`, defaults to `128`):
        The number of dimensions to use for each attention head.
    num_attention_heads (`int`, defaults to `24`):
        The number of attention heads to use.
    joint_attention_dim (`int`, defaults to `4096`):
        The number of dimensions to use for the joint attention (embedding/channel dimension of
        `encoder_hidden_states`).
    axes_dims_rope (`Tuple[int]`, defaults to `(16, 56, 56)`):
        The dimensions to use for the rotary positional embeddings.
Tr   r   	pos_embedr:   
patch_sizein_channelsout_channels
num_layersnum_single_layersr   r   joint_attention_dimaxes_dims_rope.approximator_num_channelsapproximator_hidden_dimapproximator_layersc                 l  > [         TU ]  5         U=(       d    UU l        Xv-  U l        [	        SU	S9U l        [        U
S-  SU-  SU-  -   S-   S9U l        [        U
U R                  UUS9U l	        [        R                  " XR                  5      U l        [        R                  " X R                  5      U l        [        R                  " [        U5       Vs/ s H  n[!        U R                  UUS	9PM     sn5      U l        [        R                  " [        U5       Vs/ s H  n[%        U R                  UUS	9PM     sn5      U l        [)        U R                  U R                  S
SS9U l        [        R                  " U R                  X-  U R                  -  SS9U l        S
U l        g s  snf s  snf )Ni'  )thetaaxes_dim   r      r   )rz   r{   )r   r{   r   r   )rJ   r   r   Fr-   r.   Tr   )r5   r6   r   	inner_dimr   r   rx   time_text_embedr   distilled_guidance_layerr8   r   context_embedder
x_embedderr   r   r   transformer_blocksr   single_transformer_blocksrl   norm_outr   gradient_checkpointing)r<   r   r   r   r   r  r   r   r  r  r  r  r  r   r>   s                 r?   r6   !ChromaTransformer2DModel.__init__  s     	(7K,A%ENKG2a7))EJ,>>B 
 );,NN.(	)
% !#		*=~~ N))K@"$-- z* +A '(;'9
 +	#
 *, 01 2A -(;'9
 2	*
& ;NNDNNu$
 		$..*2IDL]L]2]dhi&+#5s   F, F1r   r   rC   img_idstxt_idsr   r   return_dictcontrolnet_blocks_repeatrF   c           
         Ub#  UR                  5       nUR                  SS5      nOSn[        (       a  [        X5        O+Ub(  UR	                  SS5      b  [
        R                  S5        U R                  U5      nUR                  UR                  5      S-  nU R                  U5      nU R                  U5      nU R                  U5      nUR                  S:X  a  [
        R                  S5        US   nUR                  S:X  a  [
        R                  S	5        US   n[        R                  " XT4SS
9nU R!                  U5      nUb;  SU;   a5  UR                  S5      nU R#                  U5      nUR%                  SU05        ['        U R(                  5       GH5  u  nnS[+        U R,                  5      -  nUS[+        U R(                  5      -  -   nUSU-  -   nUSU-  -   n[        R                  " USS2UUS-   24   USS2UUS-   24   4SS
9n[        R.                  " 5       (       a)  U R0                  (       a  U R3                  UXUUU5      u  p!OU" UUUUUUS9u  p!Uc  M  [+        U R(                  5      [+        U5      -  n[5        [6        R8                  " U5      5      nU(       a  XU[+        U5      -     -   nGM+  XUU-     -   nGM8     [        R                  " X!/SS
9n['        U R,                  5       H  u  nnSU-  nUSS2UUS-   24   n[        R.                  " 5       (       a&  U R0                  (       a  U R3                  UUUU5      nO
U" UUUUUS9nU	c  Mh  [+        U R,                  5      [+        U	5      -  n[5        [6        R8                  " U5      5      nUSS2UR:                  S   S2S4   U	UU-     -   USS2UR:                  S   S2S4'   M     USS2UR:                  S   S2S4   nUSS2SS24   nU R=                  UU5      nU R?                  U5      n[        (       a  [A        X5        U
(       d  U4$ [C        US9$ )a$  
The [`FluxTransformer2DModel`] forward method.

Args:
    hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`):
        Input `hidden_states`.
    encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`):
        Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
    timestep ( `torch.LongTensor`):
        Used to indicate denoising step.
    block_controlnet_hidden_states: (`list` of `torch.Tensor`):
        A list of tensors that if specified are added to the residuals of transformer blocks.
    joint_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
        tuple.

Returns:
    If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
    `tuple` where the first element is the sample tensor.
Nru   g      ?z\Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective.r   r   zrPassing `txt_ids` 3d torch.Tensor is deprecated.Please remove the batch dimension and pass it as a 2d torch Tensorr   zrPassing `img_ids` 3d torch.Tensor is deprecated.Please remove the batch dimension and pass it as a 2d torch TensorrI   ip_adapter_image_embedsip_hidden_statesrH   r"   )r   r   r   r   r   r   )r   r   r   r   r   .)sample)"copypopr   r   getloggerwarningr  rs   r^   r  r  r  ndimr[   r   r   encoder_hid_projupdate	enumerater  r   r  is_grad_enabledr  _gradient_checkpointing_funcrZ   npceilr   r  r   r   r   )r<   r   r   rC   r  r  r   r   controlnet_block_samplescontrolnet_single_block_samplesr  r  
lora_scaler   pooled_tembidsr   r  r  index_blockblock
img_offset
txt_offsetimg_modulationtext_modulationr   interval_control	start_idxoutputs                                r?   rS    ChromaTransformer2DModel.forward  s   L "-%;%@%@%B"/33GSAJJd/%16L6P6PQXZ^6_6kr 6;;}223d:((2	33I> $ 5 56K L<<1NNU ajG<<1NNU ajGii*2>>#.!-2KOe2e&<&@&@AZ&[##445LM"))+=?O*PQ"+D,C,C"DKS!?!?@@J#a#d.E.E*F&FFJ'!k/9N(1{?:O99>NQ4F#F FG?_q5H#H HI D $$&&4+F+F7;7X7X=GWYg84%}
 8="/*?%5#1+A84% (3#&t'>'>#?#F^B_#_ #&rww/?'@#A +%sSkOlAl(mm " %2[\lMl4m$mMK #EL 		#8"HaP"+D,J,J"KKKIq)i!m";;<D$$&&4+F+F $ A A!$	! !&"/%5#1+A! /:#&t'E'E#FMlIm#m #&rww/?'@#A !!%:%@%@%C%Es"JK5kEU6UVW a!6!<!<Q!?!A3FG1 #L: &a)>)D)DQ)G)I3&NO1bc6"mT:}-19'v66rA   )r  r  r  r  r  r   r   r   r  r  r  r  )r"   @   N   &         i   )   8   rA  r;  i   r   )
NNNNNNNNTF)rU   rV   rW   rX   rY    _supports_gradient_checkpointing_no_split_modules_repeated_blocks _skip_layerwise_casting_patternsr
   rZ   r   r   r6   r[   r\   r]   r   r   r   boolr   r   rS   r_   r`   ra   s   @r?   r   r   r  s   6 (,$13QR02PQ(3V'<$ &*!#"%#%#'*6)+'+#$>,>, >, sm	>,
 >, >,  >, !>, !>, c3h>, $'>, "%>, !>, >,F /3%) $ $'+;?!%(, ).e7||e7  %||e7 ""	e7
 e7 e7 e7 !)c3h 8e7 e7 #'e7 
u||55	6e7 e7rA   r   )=typingr   r   r   r   r   numpyr*  r[   torch.nnr8   configuration_utilsr	   r
   loadersr   r   r   utilsr   r   r   r   r   utils.import_utilsr   utils.torch_utilsr   	attentionr   r   cache_utilsr   
embeddingsr   r   r   r   modeling_outputsr   modeling_utilsr   normalizationr   r    r!   transformer_fluxr#   r$   
get_loggerrU   r"  Moduler&   rc   rl   rx   r   r   r   r   r   rA   r?   <module>rX     s    5 4    B ^ ^ a a 8 5 3 $ c c 7 ' S S > 
		H	%%;299 %;P :) )X,ryy ,@   & D299 D DN [4RYY [4 [4|O7!O7rA   