
    +h"                     .   S SK r S SKJrJrJrJrJrJr  S SKr	S SK
r
S SKJrJrJrJr  SSKJrJr  SSKJrJrJr  SSKJr  SSKJr  SS	KJr  SS
KJrJrJ r J!r!J"r"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)  \" 5       (       a  S SK*J+s  J,r-  Sr.OSr.\ R^                  " \05      r1Sr2    S#S\3S\3S\4S\44S jjr5 S$S\
Rl                  S\\
Rn                     S\84S jjr9    S%S\\3   S\\\8\
Rt                  4      S\\\3      S\\\4      4S  jjr; " S! S"\'\\5      r<g)&    N)AnyCallableDictListOptionalUnion)CLIPTextModelCLIPTokenizerT5EncoderModelT5TokenizerFast   )PipelineImageInputVaeImageProcessor)FluxLoraLoaderMixinFromSingleFileMixinTextualInversionLoaderMixin)AutoencoderKL)FluxTransformer2DModel)FlowMatchEulerDiscreteScheduler)USE_PEFT_BACKENDis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor   )DiffusionPipeline   )FluxPipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from controlnet_aux import CannyDetector
        >>> from diffusers import FluxControlImg2ImgPipeline
        >>> from diffusers.utils import load_image

        >>> pipe = FluxControlImg2ImgPipeline.from_pretrained(
        ...     "black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16
        ... ).to("cuda")

        >>> prompt = "A robot made of exotic candies and chocolates of different kinds. Abstract background"
        >>> image = load_image(
        ...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/watercolor-painting.jpg"
        ... )
        >>> control_image = load_image(
        ...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png"
        ... )

        >>> processor = CannyDetector()
        >>> control_image = processor(
        ...     control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024
        ... )

        >>> image = pipe(
        ...     prompt=prompt,
        ...     image=image,
        ...     control_image=control_image,
        ...     strength=0.8,
        ...     height=1024,
        ...     width=1024,
        ...     num_inference_steps=50,
        ...     guidance_scale=30.0,
        ... ).images[0]
        >>> image.save("output.png")
        ```
base_seq_lenmax_seq_len
base_shift	max_shiftc                 4    XC-
  X!-
  -  nX5U-  -
  nX-  U-   nU$ N )image_seq_lenr!   r"   r#   r$   mbmus           p/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/flux/pipeline_flux_control_img2img.pycalculate_shiftr-   [   s3     
	K$>?A%%A		Q	BI    encoder_output	generatorsample_modec                    [        U S5      (       a!  US:X  a  U R                  R                  U5      $ [        U S5      (       a   US:X  a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [        S5      e)Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrr3   r4   moder6   AttributeError)r/   r0   r1   s      r,   retrieve_latentsr:   i   s}     ~}--+2I))00;;		/	/K84K))..00		+	+%%%RSSr.   num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`List[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`List[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr=   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r=   r<   r>   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r>   r<   r<   r'   )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r=   len)	schedulerr;   r<   r=   r>   kwargsaccepts_timestepsaccept_sigmass           r,   retrieve_timestepsrM   w   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r.   c            -       t  ^  \ rS rSrSrSr/ rSS/rS\S\	S\
S	\S
\S\S\4U 4S jjr     S=S\\\\   4   S\S\S\\R,                     S\\R.                     4
S jjr  S>S\\\\   4   S\S\\R,                     4S jjr       S?S\\\\   4   S\\\\\   4      S\\R,                     S\S\\R4                     S\\R4                     S\S\\   4S jjrS\R:                  S\R<                  4S jrS r     S@S  jr!\"S! 5       r#\"S" 5       r$\"S# 5       r% SAS$ jr&  SBS% jr'\(S& 5       r)\(S' 5       r*\(S( 5       r+\(S) 5       r,\RZ                  " 5       \." \/5      SSSSSSS*S+SS,SSSSSS-S.SSS/S4S\\\\   4   S\\\\\   4      S\0S/\0S0\\   S1\\   S2\S3\S4\\\      S5\S\\   S\\\R<                  \\R<                     4      S\\R4                     S\\R4                     S\\R4                     S6\\   S7\1S8\\2\\34      S9\\4\\\2/S4      S:\\   S\4*S; jj5       5       r5S<r6U =r7$ )CFluxControlImg2ImgPipeline   as  
The Flux pipeline for image inpainting.

Reference: https://blackforestlabs.ai/announcing-black-forest-labs/

Args:
    transformer ([`FluxTransformer2DModel`]):
        Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
    scheduler ([`FlowMatchEulerDiscreteScheduler`]):
        A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModel`]):
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
        the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
    text_encoder_2 ([`T5EncoderModel`]):
        [T5](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel), specifically
        the [google/t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
    tokenizer (`CLIPTokenizer`):
        Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_2 (`T5TokenizerFast`):
        Second Tokenizer of class
        [T5TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast).
z.text_encoder->text_encoder_2->transformer->vaer6   prompt_embedsrI   vaetext_encoder	tokenizertext_encoder_2tokenizer_2transformerc           
        > [         TU ]  5         U R                  UUUUUUUS9  [        U SS 5      (       a/  S[	        U R
                  R                  R                  5      S-
  -  OSU l        [        U R                  S-  S9U l
        [        U S5      (       a#  U R                  b  U R                  R                  OSU l        S	U l        g )
N)rR   rS   rU   rT   rV   rW   rI   rR   r   r      )vae_scale_factorrT   M      )super__init__register_modulesgetattrrH   rR   configblock_out_channelsrZ   r   image_processorr7   rT   model_max_lengthtokenizer_max_lengthdefault_sample_size)	selfrI   rR   rS   rT   rU   rV   rW   rG   s	           r,   r^   #FluxControlImg2ImgPipeline.__init__   s     	%)## 	 	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw  1$BWBWZ[B[\/6t[/I/IdnnNhDNN++np 	! $' r.   Nr      promptnum_images_per_promptmax_sequence_lengthr<   dtypec           
         U=(       d    U R                   nU=(       d    U R                  R                  n[        U[        5      (       a  U/OUn[        U5      n[        U [        5      (       a  U R                  XR                  5      nU R                  USUSSSSS9nUR                  nU R                  USSS9R                  n	U	R                  S   UR                  S   :  ag  [        R                  " X5      (       dL  U R                  R                  U	S S 2U R                  S	-
  S24   5      n
[        R!                  S
U SU
 35        U R#                  UR%                  U5      SS9S   nU R"                  R                  nUR%                  XTS9nUR                  u  pnUR'                  S	US	5      nUR)                  Xb-  US5      nU$ )N
max_lengthTFpt)paddingro   
truncationreturn_lengthreturn_overflowing_tokensreturn_tensorslongestrq   ru   r   zXThe following part of your input was truncated because `max_sequence_length` is set to  	 tokens: output_hidden_statesr   rm   r<   )_execution_devicerS   rm   
isinstancestrrH   r   maybe_convert_promptrV   	input_idsshapetorchequalbatch_decodere   loggerwarningrU   torepeatview)rg   rj   rk   rl   r<   rm   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textrQ   _seq_lens                 r,   _get_t5_prompt_embeds0FluxControlImg2ImgPipeline._get_t5_prompt_embeds   s    14110**00'44&&[
d788..v7G7GHF&& *&+ ' 
 %..**69UY*Zdd  $(<(<R(@@UcIuIu++88DLeLehiLilnLnIn9opLNN'(	,A
 ++N,=,=f,E\a+bcde##))%((u(D%++A &,,Q0EqI%**:+MwXZ[r.   c           
      r   U=(       d    U R                   n[        U[        5      (       a  U/OUn[        U5      n[        U [        5      (       a  U R                  XR                  5      nU R                  USU R                  SSSSS9nUR                  nU R                  USSS9R                  nUR                  S   UR                  S   :  aq  [        R                  " Xg5      (       dV  U R                  R                  US S 2U R                  S	-
  S24   5      n[        R                  S
U R                   SU 35        U R                  UR!                  U5      SS9n	U	R"                  n	U	R!                  U R                  R$                  US9n	U	R'                  S	U5      n	U	R)                  XB-  S5      n	U	$ )Nro   TFrp   )rq   ro   rr   rt   rs   ru   rv   rw   rx   r   z\The following part of your input was truncated because CLIP can only handle sequences up to ry   rz   r|   )r}   r~   r   rH   r   r   rT   re   r   r   r   r   r   r   r   rS   r   pooler_outputrm   r   r   )
rg   rj   rk   r<   r   r   r   r   r   rQ   s
             r,   _get_clip_prompt_embeds2FluxControlImg2ImgPipeline._get_clip_prompt_embeds"  s    1411'44&&[
d788..v~~FFnn 00&+ % 
 %....SW.Xbb  $(<(<R(@@UcIuIu>>66q$JcJcfgJgjlJlGl7mnLNN--.i~G )).*;*;F*CZ_)` &33%((t/@/@/F/Fv(V &,,Q0EF%**:+MrRr.   prompt_2pooled_prompt_embeds
lora_scalec	                    U=(       d    U R                   nUbw  [        U [        5      (       ab  Xl        U R                  b!  [
        (       a  [        U R                  U5        U R                  b!  [
        (       a  [        U R                  U5        [        U[        5      (       a  U/OUnUcH  U=(       d    Un[        U[        5      (       a  U/OUnU R                  UUUS9nU R                  UUUUS9nU R                  b6  [        U [        5      (       a!  [
        (       a  [        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [        U R                  U5        U R                  b  U R                  R                  OU R                  R                  n	[        R                  " UR                   S   S5      R#                  X9S9n
XVU
4$ )a?  

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in all text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
)rj   r<   rk   )rj   rk   rl   r<   r   r   r<   rm   )r}   r~   r   _lora_scalerS   r   r   rU   r   r   r   r   rm   rW   r   zerosr   r   )rg   rj   r   r<   rk   rQ   r   rl   r   rm   text_idss              r,   encode_prompt(FluxControlImg2ImgPipeline.encode_promptO  s   @ 1411 !j7J&K&K)   ,1A1A!$"3"3Z@"".3C3C!$"5"5zB'44&& )6H%/#%>%>zHH $(#?#?&; $@ $ 
 !66&;$7	 7 M ($ 3449I9I#D$5$5zB*$ 3449I9I#D$7$7D+/+<+<+H!!''dN^N^NdNd;;}2215q9<<F<XH<<r.   imager0   c                    [        U[        5      (       af  [        UR                  S   5       Vs/ s H-  n[	        U R
                  R                  XUS-    5      X#   S9PM/     nn[        R                  " USS9nO#[	        U R
                  R                  U5      US9nX@R
                  R                  R                  -
  U R
                  R                  R                  -  nU$ s  snf )Nr   r   r0   dim)r~   listranger   r:   rR   encoder   catra   shift_factorscaling_factor)rg   r   r0   iimage_latentss        r,   _encode_vae_image,FluxControlImg2ImgPipeline._encode_vae_image  s    i&& u{{1~..A !1q51A!Bil[.   "IIm;M,TXX__U-CyYM&)E)EEIgIggs   4C$c                 N   [        X-  U5      n[        [        X-
  S5      5      nU R                  R                  XPR                  R
                  -  S  n[        U R                  S5      (       a1  U R                  R                  XPR                  R
                  -  5        XaU-
  4$ )Nr   set_begin_index)minintmaxrI   r=   orderr7   r   )rg   r;   strengthr<   init_timestept_startr=   s          r,   get_timesteps(FluxControlImg2ImgPipeline.get_timesteps  s    /:<OPc-=qABNN,,W~~7K7K-K-MN	4>>#455NN**7^^5I5I+IJ777r.   c
           
        ^  US:  d  US:  a  [        SU 35      eUT R                  S-  -  S:w  d  UT R                  S-  -  S:w  a,  [        R                  ST R                  S-   SU SU S35        UbW  [	        U 4S	 jU 5       5      (       d=  [        S
T R
                   SU V
s/ s H  oT R
                  ;  d  M  U
PM     sn
 35      eUb  Ub  [        SU SU S35      eUb  Ub  [        SU SU S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUb  Uc  [        S5      eU	b  U	S:  a  [        SU	 35      eg g s  sn
f )Nr   r   z2The value of strength should in [0.0, 1.0] but is r   z-`height` and `width` have to be divisible by z	 but are z and z(. Dimensions will be resized accordinglyc              3   @   >#    U  H  oTR                   ;   v   M     g 7fr&   )_callback_tensor_inputs).0krg   s     r,   	<genexpr>:FluxControlImg2ImgPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.ri   z8`max_sequence_length` cannot be greater than 512 but is )
r@   rZ   r   r   allr   r~   r   r   type)rg   rj   r   r   heightwidthrQ   r   "callback_on_step_end_tensor_inputsrl   r   s   `          r,   check_inputs'FluxControlImg2ImgPipeline.check_inputs  s    a<8a<QRZQ[\]]T**Q./14AVAVYZAZ8[_`8`NN?@U@UXY@Y?ZZcdjckkpqvpw  x`  a .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa!:h+D+DZX`bfMgMgSTXYaTbScdee$)=)E U  */BS/HWXkWlmnn 0I*7 pHs   (G?Gc                 ,   [         R                  " XS5      nUS   [         R                  " U5      S S 2S 4   -   US'   US   [         R                  " U5      S S S 24   -   US'   UR                  u  pgnUR	                  Xg-  U5      nUR                  X4S9$ )Nr   ).r   ).r   r   )r   r   aranger   reshaper   )	r   r   r   r<   rm   latent_image_idslatent_image_id_heightlatent_image_id_widthlatent_image_id_channelss	            r,   _prepare_latent_image_ids4FluxControlImg2ImgPipeline._prepare_latent_image_ids  s     !;;va8#3F#;ell6>RSTVZSZ>[#[ #3F#;ell5>QRVXYRY>Z#Z RbRhRhO7O+33":<T
  ""&">>r.   c                     U R                  XUS-  SUS-  S5      n U R                  SSSSSS5      n U R                  XS-  US-  -  US-  5      n U $ )Nr   r      r   r      )r   permuter   )r6   r   num_channels_latentsr   r   s        r,   _pack_latents(FluxControlImg2ImgPipeline._pack_latents  sg     ,,z1aQVZ[Q[]^_//!Q1a3//*{uz.JL`cdLder.   c                    U R                   u  pEnS[        U5      US-  -  -  nS[        U5      US-  -  -  nU R                  XAS-  US-  US-  SS5      n U R                  SSSSSS5      n U R	                  XFS-  X5      n U $ )Nr   r   r   r   r   r   )r   r   r   r   r   )r6   r   r   rZ   r   num_patcheschannelss          r,   _unpack_latents*FluxControlImg2ImgPipeline._unpack_latents  s     -4MM)
 c&k&6&:;<SZ$4q$89:,,zQ;
HPQMSTVWX//!Q1a3//*5.A6Qr.   c                 j   [        U	[        5      (       a*  [        U	5      U:w  a  [        S[        U	5       SU S35      eS[	        U5      U R
                  S-  -  -  nS[	        U5      U R
                  S-  -  -  nX4XV4nU R                  X5S-  US-  X5      nU
b  U
R                  XS9U4$ UR                  XS9nU R                  XS9nX=R                  S   :  a@  X=R                  S   -  S:X  a+  X=R                  S   -  n[        R                  " U/U-  SS9nO\X=R                  S   :  a4  X=R                  S   -  S:w  a  [        S	UR                  S    S
U S35      e[        R                  " U/SS9n[        XXS9nU R                  R                  XU5      n
U R                  XXEU5      n
X4$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.r   r   )r   r0   r   r   z'Cannot duplicate `image` of batch size z to z text prompts.)r0   r<   rm   )r~   r   rH   r@   r   rZ   r   r   r   r   r   r   r   rI   scale_noiser   )rg   r   timestepr   r   r   r   rm   r<   r0   r6   r   r   r   additional_image_per_promptnoises                   r,   prepare_latents*FluxControlImg2ImgPipeline.prepare_latents  s    i&&3y>Z+GA#i.AQ R&<'gi  c&kd&;&;a&?@ASZD$9$9A$=>?6A99*PQkSX\]S]_em::V:9;KKK4..U.P++A..:@S@STU@V3VZ[3[*48K8KA8N*N'!II}o8S&SYZ[M--a00ZBUBUVWBX5X\]5]9-:M:Ma:P9QQUV`Uaaop  "II}o1=MUT..,,]eL$$W:NX]^((r.   c
                 <   [        U[        R                  5      (       a  OU R                  R	                  XUS9nUR
                  S   n
U
S:X  a  UnOUnUR                  USS9nUR                  XgS9nU(       a!  U	(       d  [        R                  " U/S-  5      nU$ )Nr   r   r   r   r   r   r   )	r~   r   Tensorrc   
preprocessr   repeat_interleaver   r   )rg   r   r   r   r   rk   r<   rm   do_classifier_free_guidance
guess_modeimage_batch_size	repeat_bys               r,   prepare_image(FluxControlImg2ImgPipeline.prepare_imageH  s     eU\\**((33EPU3VE ;;q>q "I .I''	q'94&zIIugk*Er.   c                     U R                   $ r&   )_guidance_scalerg   s    r,   guidance_scale)FluxControlImg2ImgPipeline.guidance_scalej  s    ###r.   c                     U R                   $ r&   )_joint_attention_kwargsr   s    r,   joint_attention_kwargs1FluxControlImg2ImgPipeline.joint_attention_kwargsn  s    +++r.   c                     U R                   $ r&   )_num_timestepsr   s    r,   num_timesteps(FluxControlImg2ImgPipeline.num_timestepsr  s    """r.   c                     U R                   $ r&   )
_interruptr   s    r,   	interrupt$FluxControlImg2ImgPipeline.interruptv  s    r.   g333333?   g      @pilTcontrol_imager   r   r   r;   r>   r   output_typereturn_dictr   callback_on_step_endr   c                 R   U=(       d    U R                   U R                  -  nU=(       d    U R                   U R                  -  nU R                  UUUUUUUUUS9	  Xl        UU l        SU l        U R                  R                  X5US9nUR                  [        R                  S9nUb  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                   nU R"                  b  U R"                  R%                  SS5      OSnU R'                  UUUUUUUUS	9u  nnnU	c  [(        R*                  " S
SU-  U5      OU	n	[-        U5      U R                  -  S-  [-        U5      U R                  -  S-  -  n[/        UU R0                  R2                  R%                  SS5      U R0                  R2                  R%                  SS5      U R0                  R2                  R%                  SS5      U R0                  R2                  R%                  SS5      5      n[5        U R0                  UUU	US9u  nnU R7                  XU5      u  nnUS:  a  [9        SU SU S35      eUSS R;                  UU-  5      nU R<                  R2                  R>                  S-  nU RA                  UUUUU-  UUU RB                  RD                  S9nURF                  S:X  a  U RB                  RI                  U5      RJ                  RM                  US9nX@RB                  R2                  RN                  -
  U RB                  R2                  RP                  -  nUR                  SS u  n n!U RS                  UUU-  UU U!5      nU RU                  UUUU-  UUUURD                  UUU5
      u  nn"[W        [        U5      XR0                  RX                  -  -
  S5      n#[        U5      U l-        U R<                  R2                  R\                  (       aE  [        R^                  " S/U
U[        R                  S9n$U$Ra                  UR                  S   5      n$OSn$U Rc                  US9 n%[e        U5       GH  u  n&n'U Rf                  (       a  M  [        Rh                  " X/SS9n(U'Ra                  UR                  S   5      R                  URD                  5      n)U R=                  U(U)S-  U$UUUU"U R"                  SS 9	S   n*URD                  n+U R0                  Rk                  U*U'USS!9S   nURD                  U+:w  a>  [        Rl                  Rn                  Rq                  5       (       a  UR                  U+5      nUbJ  0 n,U H  n-[s        5       U-   U,U-'   M     U" U U&U'U,5      n.U.Ru                  S"U5      nU.Ru                  S#U5      nU&[        U5      S-
  :X  d)  U&S-   U#:  a0  U&S-   U R0                  RX                  -  S:X  a  U%Rw                  5         [x        (       d  GM  [z        R|                  " 5         GM     SSS5        US$:X  a  UnOU R                  XX`R                  5      nXRB                  R2                  RP                  -  U RB                  R2                  RN                  -   nU RB                  R                  USS!9S   nU R                  R                  UUS%9nU R                  5         U(       d  U4$ [        US&9$ ! , (       d  f       N= f)'a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        will be used instead
    image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
        `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
        numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
        or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
        list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image
        latents as `image`, but if passing latents directly it is not encoded again.
    control_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
            `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
        The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
        specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
        as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
        width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
        images must be passed as a list such that each element of the list can be correctly batched for input
        to a single ControlNet.
    height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The height in pixels of the generated image. This is set to 1024 by default for the best results.
    width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The width in pixels of the generated image. This is set to 1024 by default for the best results.
    strength (`float`, *optional*, defaults to 1.0):
        Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
        starting point and more noise is added the higher the `strength`. The number of denoising steps depends
        on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
        process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
        essentially ignores `image`.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    sigmas (`List[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    guidance_scale (`float`, *optional*, defaults to 7.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.FloatTensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.flux.FluxPipelineOutput`] instead of a plain tuple.
    joint_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    max_sequence_length (`int` defaults to 512): Maximum sequence length to use with the `prompt`.

Examples:

Returns:
    [`~pipelines.flux.FluxPipelineOutput`] or `tuple`: [`~pipelines.flux.FluxPipelineOutput`] if `return_dict`
    is True, otherwise a `tuple`. When returning a tuple, the first element is a list with the generated
    images.
)rQ   r   r   rl   Fr   )rm   Nr   r   scale)rj   r   rQ   r   r<   rk   rl   r   g      ?r   base_image_seq_len   max_image_seq_len   r#         ?r$   ffffff?)r>   r+   z?After adjusting the num_inference_steps by strength parameter: z!, the number of pipelinesteps is z4 which is < 1 and not appropriate for this pipeline.rY   )r   r   r   r   rk   r<   rm   r   r   r   )totalr   i  )	hidden_statesr   guidancepooled_projectionsencoder_hidden_statestxt_idsimg_idsr   r  )r  r6   rQ   latent)r  )images)Drf   rZ   r   r   r   r   rc   r   r   r   float32r~   r   r   rH   r   r}   r   getr   nplinspacer   r-   rI   ra   rM   r   r@   r   rW   in_channelsr   rR   rm   ndimr   r3   r4   r   r   r   r   r   r   r   guidance_embedsfullexpandprogress_bar	enumerater   r   stepbackendsmpsis_availablelocalspopupdateXLA_AVAILABLExm	mark_stepr   decodepostprocessmaybe_free_model_hooksr    )/rg   rj   r   r   r  r   r   r   r;   r>   r   rk   r0   r6   rQ   r   r  r  r   r  r   rl   
init_imager   r<   r   r   r(   r+   r=   latent_timestepr   height_control_imagewidth_control_imager   num_warmup_stepsr  r"  r   tlatent_model_inputr   
noise_predlatents_dtypecallback_kwargsr   callback_outputss/                                                  r,   __call__#FluxControlImg2ImgPipeline.__call__z  s   h K433d6K6KKI11D4I4II 	'!5/Q 3 	 
	
  .'=$ ))44UQV4W
]]]7
 *VS"9"9JJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	 '!5"7 3!  	
		
  TZSaS!&9"9;NOgmV(=(==Bs5zUYUjUjGjnoGopNN!!%%&:C@NN!!%%&94@NN!!%%lC8NN!!%%k48
 *<NN*
&	& *.););<O[a)b&	&"QRZQ[ \/00df  $BQ-..z<Q/QR  $//66BBaG**!$99"7((.. + 
 " HHOOM:FFMMXaMbM*XX__-I-IITXX__MkMkkM8E8K8KAB8O5 "5 ..22$$#M %)$8$8.. %
!! s9~0CnnFZFZ0ZZ\]^!)n ""22zz1#~fEMMZHw}}Q'78HH %89\!),1>>%*YY/GQ%O" 88GMM!$4588G!--"4%_%';*7$,+/+F+F % . 
 

 !(..--j!WRW-XYZ[==M1~~))6688")**]";'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM I**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNY - :^ ("E **7ECXCXYG!?!??488??C_C__GHHOOGO?BE((44U4TE 	##%8O!//} :9s   ;F4\4\
\&)	r   r   r   r   r   rf   rc   re   rZ   )Nr   ri   NN)r   N)NNr   NNri   NNNNNr&   )FF)8__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r	   r
   r   r   r   r^   r   r   r   r   r   r   r<   rm   r   r   FloatTensorfloatr   r   	Generatorr   r   r   staticmethodr   r   r   r   r   propertyr   r   r   r   no_gradr   EXAMPLE_DOC_STRINGr   boolr   r   r   r<  __static_attributes____classcell__)rG   s   @r,   rO   rO      s   4 M(/:'2' ' $	'
 !' '' %' ,'B )-%&#&)-'+/c49n%/  #/ !	/
 &/ $/j &')-	*c49n%*  #* &	*` 59)-%&59<@#&&*M=c49n%M= 5d3i01M= &	M=
  #M=   1 12M= 'u'8'89M= !M= UOM=`u||  	8$ !+/ 4ol ? ?    4 -)r %* D $ $ , , # #   ]]_12 )-48$(,0 $##%(, #/0MQ/359<@%* ;?KO9B#&-q0c49n%q0 5d3i01q0 "	q0
 *q0 q0 }q0 q0 !q0 e%q0 q0  (}q0 E%//43H"HIJq0 %++,q0   1 12q0  'u'8'89!q0" c]#q0$ %q0& !)c3h 8'q0( 'xc40@$0F'GH)q0* -1I+q0, !-q0 3 q0r.   rO   )r  r  r  r  )Nr4   r>  )=rB   typingr   r   r   r   r   r   numpyr  r   transformersr	   r
   r   r   rc   r   r   loadersr   r   r   models.autoencodersr   models.transformersr   
schedulersr   utilsr   r   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   pipeline_outputr    torch_xla.core.xla_modelcore	xla_modelr,  r+  
get_loggerr?  r   rL  r   rG  r-   r   rH  r   r:   r<   rM   rO   r'   r.   r,   <module>r_     s_    = =   V V D \ \ 0 9 9  . . / ))MM 
		H	%% V 

 
 	

 
 ck
TLL
T-5eoo-F
T\_
T  *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*v{0!24GI\ {0r.   