
    +hAC                        S SK r S SKJrJrJr  S SKrS SKrS SKJ	r	J
r
JrJr  SSKJr  SSKJr  SSKJrJr  SSKJr  SS	KJrJrJrJrJr  S
SKJrJr  S
SKJ r J!r!J"r"J#r#  SSKJ$r$  \" 5       (       a  S SK%r%\RL                  " \'5      r(S r)S r*S r+ SS\RX                  S\\RZ                     S\.4S jjr/ " S S\5      r0 " S S\5      r1g)    N)ListOptionalUnion)CLIPTextModelCLIPTokenizerT5EncoderModelT5TokenizerFast   )
FrozenDict)VaeImageProcessor)FluxLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKL)USE_PEFT_BACKENDis_ftfy_availableloggingscale_lora_layersunscale_lora_layers   )ModularPipelineBlocksPipelineState)ComponentSpec
ConfigSpec
InputParamOutputParam   )FluxModularPipelinec                     [         R                  " U 5      n [        R                  " [        R                  " U 5      5      n U R	                  5       $ N)ftfyfix_texthtmlunescapestriptexts    c/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/modular_pipelines/flux/encoders.pybasic_cleanr(   '   s3    ==D==t,-D::<    c                 V    [         R                  " SSU 5      n U R                  5       n U $ )Nz\s+ )resubr$   r%   s    r'   whitespace_cleanr.   -   s$    66&#t$D::<DKr)   c                 .    [        [        U 5      5      n U $ r   )r.   r(   r%   s    r'   prompt_cleanr0   3   s    K-.DKr)   encoder_output	generatorsample_modec                    [        U S5      (       a!  US:X  a  U R                  R                  U5      $ [        U S5      (       a   US:X  a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [        S5      e)Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrr5   r6   moder8   AttributeError)r1   r2   r3   s      r'   retrieve_latentsr<   9   s}     ~}--+2I))00;;		/	/K84K))..00		+	+%%%RSSr)   c                      \ rS rSrSr\S\4S j5       r\S\\	   4S j5       r
\S\\   4S j5       r\S\\   4S j5       r\S\R"                  S	\R$                  4S
 j5       r\R(                  " 5       S\S\S\4S j5       rSrg)FluxVaeEncoderStepF   fluxreturnc                     g)NzIVae Encoder step that encode the input image into a latent representation selfs    r'   descriptionFluxVaeEncoderStep.descriptionI   s    Zr)   c           
      Z    [        S[        5      [        S[        [        SSS.5      SS9/$ )Nvaeimage_processor   )vae_scale_factorvae_latent_channelsfrom_config)configdefault_creation_method)r   r   r   r   rD   s    r'   expected_components&FluxVaeEncoderStep.expected_componentsM   s7     %/!!!rRT"UV(5	
 	
r)   c                     [        SSS9[        S5      [        S5      [        S5      [        S[        R                  SS	9[        S
[        [           SS	9/$ )NimageT)requiredheightwidthr2   dtypez Data type of model tensor inputs	type_hintrF   preprocess_kwargszA kwargs dictionary that if specified is passed along to the `ImageProcessor` as defined under `self.image_processor` in [diffusers.image_processor.VaeImageProcessor])r   torchrX   r   dictrD   s    r'   inputsFluxVaeEncoderStep.inputsY   sY     w.x w{#w%++Cef#"4. E
 	
r)   c                 6    [        S[        R                  SS9/$ )Nimage_latentszUThe latents representing the reference image for image-to-image/inpainting generationrY   r   r\   TensorrD   s    r'   intermediate_outputs'FluxVaeEncoderStep.intermediate_outputsh   s#     ,,s
 	
r)   rT   r2   c                    [        U[        5      (       a\  [        UR                  S   5       Vs/ s H#  n[	        U R                  XUS-    5      X#   S9PM%     nn[        R                  " USS9nO[	        U R                  U5      US9nX@R                  R                  -
  U R                  R                  -  nU$ s  snf )Nr   r   )r2   )dim)
isinstancelistrangeshaper<   encoder\   catrO   shift_factorscaling_factor)rI   rT   r2   ira   s        r'   _encode_vae_image$FluxVaeEncoderStep._encode_vae_imager   s     i&&`efkfqfqrsft`u`u[\ Ea!e,<!=V`u   "IIm;M,SZZ->)TM&)@)@@CJJD]D]]s   *B<
componentsstatec                    U R                  U5      nUR                  =(       d    0 Ul        UR                  Ul        UR                  b  UR                  OUR
                  R                  Ul        UR                  R                  " UR                  4UR                  UR                  S.UR                  D6Ul        UR                  R                  UR                  UR                  S9Ul        UR                  R                  S   Ul        [        UR                  [         5      (       aR  [#        UR                  5      UR                  :w  a/  [%        S[#        UR                  5       SUR                   S35      eU R'                  UR
                  UR                  UR                  S9Ul        U R+                  X#5        X4$ )N)rV   rW   devicerX   r   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)rT   r2   )get_block_stater[   _execution_devicerw   rX   rI   rJ   
preprocessrT   rV   rW   tork   
batch_sizerh   r2   ri   len
ValueErrorrq   ra   set_block_staterE   rs   rt   block_states       r'   __call__FluxVaeEncoderStep.__call__   s   **51(3(E(E(K%'991<1B1B1NK--T^TbTbThTh&66AA
&1&8&8@Q@Q
U`UrUr
 (--008J8JR]RcRc0d!,!2!2!8!8!; k++T22s;;P;P7QU`UkUk7kA#kF[F[B\A] ^'2233su 
 %)$:$:NN+"3"3{?T?T %; %
! 	U0  r)   rC   N)__name__
__module____qualname____firstlineno__
model_namepropertystrrF   r   r   rQ   r   r^   r   rd   staticmethodr\   rc   	Generatorrq   no_gradr   r   r   __static_attributes__rC   r)   r'   r>   r>   F   s    J[S [ [ 	
T-%8 	
 	
 
Z( 
 
 
d;&7 
 
 ell u   ]]_!#6 !} !Q^ ! !r)   r>   c                   :   \ rS rSrSr\S\4S j5       r\S\\	   4S j5       r
\S\\   4S j5       r\S\\   4S j5       r\S\\   4S j5       r\S	 5       r\S
\\\\   4   S\S\S\R,                  4S j5       r\S
\\\\   4   S\S\R,                  4S j5       r\      SS
\\\\   4   S\\\\   4   S\\R,                     S\S\\R4                     S\\R4                     S\S\\   4S jj5       r\R:                  " 5       S\S\S\4S j5       r Sr!g)FluxTextEncoderStep   r@   rA   c                     g)NzMText Encoder step that generate text_embeddings to guide the video generationrC   rD   s    r'   rF   FluxTextEncoderStep.description   s    ^r)   c                 ~    [        S[        5      [        S[        5      [        S[        5      [        S[        5      /$ )Ntext_encoder	tokenizertext_encoder_2tokenizer_2)r   r   r   r   r	   rD   s    r'   rQ   'FluxTextEncoderStep.expected_components   s7     .-8+}5*N;-9	
 	
r)   c                     / $ r   rC   rD   s    r'   expected_configs$FluxTextEncoderStep.expected_configs   s    	r)   c                 B    [        S5      [        S5      [        S5      /$ )Npromptprompt_2joint_attention_kwargs)r   rD   s    r'   r^   FluxTextEncoderStep.inputs   s'     x z"/0
 	
r)   c                     [        S[        R                  SS9[        S[        R                  SS9[        S[        R                  SS9/$ )Nprompt_embedsz2text embeddings used to guide the image generationrY   pooled_prompt_embedsz9pooled text embeddings used to guide the image generationtext_idsz#ids from the text sequence for RoPErb   rD   s    r'   rd   (FluxTextEncoderStep.intermediate_outputs   sQ     ,,P
 &,,W
 ,,A
 	
r)   c                     U R                   U R                  4 HK  nUc  M  [        U[        5      (       a  M  [        U[        5      (       a  M6  [        S[        U5       35      e   g )Nz@`prompt` or `prompt_2` has to be of type `str` or `list` but is )r   r   rh   r   ri   r~   type)r   r   s     r'   check_inputs FluxTextEncoderStep.check_inputs   sY    "));+?+?@F!:fc+B+B:V\^bKcKc #cdhiodpcq!rss Ar)   r   num_images_per_promptmax_sequence_lengthrw   c           
         U R                   R                  n[        U[        5      (       a  U/OUn[	        U5      n[        U [
        5      (       a  U R                  XR                  5      nU R                  USUSSSSS9nUR                  nU R                  USSS9R                  n	U	R                  S   UR                  S   :  a]  [        R                  " X5      (       dB  U R                  R                  U	S S 2US	-
  S24   5      n
[        R                  S
U SU
 35        U R                  UR                  U5      SS9S   nUR                  XTS9nUR                  u  pnUR!                  S	US	5      nUR#                  Xb-  US5      nU$ )N
max_lengthTFpt)paddingr   
truncationreturn_lengthreturn_overflowing_tokensreturn_tensorslongestr   r   r   zXThe following part of your input was truncated because `max_sequence_length` is set to  	 tokens: output_hidden_statesr   rX   rw   )r   rX   rh   r   r}   r   maybe_convert_promptr   	input_idsrk   r\   equalbatch_decodeloggerwarningr{   repeatview)rs   r   r   r   rw   rX   r|   text_inputstext_input_idsuntruncated_idsremoved_textr   _seq_lens                 r'   _get_t5_prompt_embeds)FluxTextEncoderStep._get_t5_prompt_embeds   s    ))//'44&&[
j"=>>44V=S=STF ,, *&+ - 
 %..$00[_0`jj  $(<(<R(@@UcIuIu%11>>qRehiRilnRnOn?opLNN'(	,A
 #11.2C2CF2Kbg1hijk%((u(D%++A &,,Q0EqI%**:+MwXZ[r)   c           
      `   [        U[        5      (       a  U/OUn[        U5      n[        U [        5      (       a  U R	                  XR
                  5      nU R                  USU R
                  R                  SSSSS9nUR                  nU R
                  R                  nU R                  USSS9R                  nUR                  S   UR                  S   :  a]  [        R                  " Xh5      (       dB  U R
                  R                  US S 2US	-
  S24   5      n	[        R                  S
U SU	 35        U R                  UR                  U5      SS9n
U
R                   n
U
R                  U R                  R"                  US9n
U
R%                  S	U5      n
U
R'                  XB-  S5      n
U
$ )Nr   TFr   )r   r   r   r   r   r   r   r   r   r   z\The following part of your input was truncated because CLIP can only handle sequences up to r   r   r   )rh   r   r}   r   r   r   model_max_lengthr   rk   r\   r   r   r   r   r   r{   pooler_outputrX   r   r   )rs   r   r   rw   r|   r   r   tokenizer_max_lengthr   r   r   s              r'   _get_clip_prompt_embeds+FluxTextEncoderStep._get_clip_prompt_embeds  s    (44&&[
j"=>>44V=Q=QRF ** !++<<&+ + 
 %..)33DD$..vyY].^hh  $(<(<R(@@UcIuIu%//<<_QPdghPhkmPmMm=noLNN()<.B #//0A0A&0I`e/f &33%((z/F/F/L/LU[(\ &,,Q0EF%**:+MrRr)   Nr   r   r   
lora_scalec	                    U=(       d    U R                   nUbw  [        U [        5      (       ab  Xl        U R                  b!  [
        (       a  [        U R                  U5        U R                  b!  [
        (       a  [        U R                  U5        [        U[        5      (       a  U/OUnUcR  U=(       d    Un[        U[        5      (       a  U/OUn[        R                  U UUUS9n[        R                  U UUUUS9nU R                  b6  [        U [        5      (       a!  [
        (       a  [        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [        U R                  U5        U R                  b  U R                  R                  O[        R                  n	[        R                   " UR"                  S   S5      R%                  X9S9n
XVU
4$ )as  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in all text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
)r   rw   r   )r   r   r   rw   r   r
   rv   )ry   rh   r   _lora_scaler   r   r   r   r   r   r   r   r   rX   r\   bfloat16zerosrk   r{   )rs   r   r   rw   r   r   r   r   r   rX   r   s              r'   encode_prompt!FluxTextEncoderStep.encode_prompt.  s   D 7:77 !j=P&Q&Q%/" &&27G7G!*"9"9:F((49I9I!*";";ZH'44&& )6H%/#%>%>zHH $7#N#N&;	 $O $  0EE&;$7 F M "".*&9::?O?O#J$;$;ZH$$0*&9::?O?O#J$=$=zJ1;1H1H1T
''--Z_ZhZh;;}2215q9<<F<XH<<r)   rs   rt   c                 ~   U R                  U5      nU R                  U5        UR                  Ul        UR                  b  UR                  R                  SS 5      OS Ul        U R                  UUR                  S S S UR                  SUR                  S9u  Ul	        Ul
        Ul        U R                  X#5        X4$ )Nscaler   )r   r   r   r   rw   r   r   )rx   r   ry   rw   r   gettext_encoder_lora_scaler   r   r   r   r   r   r   s       r'   r   FluxTextEncoderStep.__call__  s     **51+&'99
 11= ..227DA 	+
 _c^p^p%%!%%%"#":: _q 	_
[	"K$DkFZ 	U0  r)   rC   )Nr   NNi   N)"r   r   r   r   r   r   r   rF   r   r   rQ   r   r   r   r^   r   rd   r   r   r   intr\   rw   r   r   r   FloatTensorfloatr   r   r   r   r   r   rC   r)   r'   r   r      s6   J_S _ _ 
T-%8 
 
 $z"2   
Z( 
 
 
d;&7 
 
& t t
 *c49n%*  #* !	*
 * *X )c49n%)  #) 	) )V 
 *.%&59<@#&&*P=c49n%P= T#Y'P= &	P=
  #P=   1 12P= 'u'8'89P= !P= UOP= P=d ]]_!#6 !} !Q^ ! !r)   r   )Nr6   )2r"   typingr   r   r   regexr,   r\   transformersr   r   r   r	   configuration_utilsr   rJ   r   loadersr   r   modelsr   utilsr   r   r   r   r   modular_pipeliner   r   modular_pipeline_utilsr   r   r   r   r   r    
get_loggerr   r   r(   r.   r0   rc   r   r   r<   r>   r   rC   r)   r'   <module>r      s     ( (   V V - 0 G # i i C W W 1  
		H	% ck
TLL
T-5eoo-F
T\_
TV!. V!r}!/ }!r)   