
    +hMC                    b   S SK r S SKJrJrJrJrJrJrJr  S SK	r
S SKrS SKrS SKJs  Jr  S SKJrJrJrJrJr  S SKJr  SSKJrJr  SSKJrJr  SSK J!r!J"r"J#r#J$r$  SS	K%J&r&J'r'J(r(J)r)J*r*  SS
K+J,r,J-r-  SSK.J/r/  SSK0J1r1  SSK2J3r3J4r4J5r5J6r6J7r7J8r8  SSK9J:r:J;r;J<r<  SSK=J>r>J?r?  SSK@JArA  \" 5       (       a  SSKBJCrC  SSK2JDrD  \D" 5       (       a  S SKEJFs  JGrH  SrIOSrI\5R                  " \K5      rLSrM    SS\\N   S\\\O\R                  4      S\\\N      S\\\Q      4S jjrR " S S\>\?\$\#\"\!5      rSg)    N)AnyCallableDictListOptionalTupleUnion)CLIPImageProcessorCLIPTextModelCLIPTextModelWithProjectionCLIPTokenizerCLIPVisionModelWithProjection) is_invisible_watermark_available   )MultiPipelineCallbacksPipelineCallback)PipelineImageInputVaeImageProcessor)FromSingleFileMixinIPAdapterMixin StableDiffusionXLLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLControlNetModelImageProjectionMultiControlNetModelUNet2DConditionModel)AttnProcessor2_0XFormersAttnProcessor)adjust_lora_scale_text_encoder)KarrasDiffusionSchedulers)USE_PEFT_BACKEND	deprecateloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)is_compiled_moduleis_torch_versionrandn_tensor   )DiffusionPipelineStableDiffusionMixin)StableDiffusionXLPipelineOutput)StableDiffusionXLWatermarker)is_torch_xla_availableTFa/  
    Examples:
        ```py
        >>> # !pip install opencv-python transformers accelerate
        >>> from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
        >>> from diffusers.utils import load_image
        >>> import numpy as np
        >>> import torch

        >>> import cv2
        >>> from PIL import Image

        >>> prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
        >>> negative_prompt = "low quality, bad quality, sketches"

        >>> # download an image
        >>> image = load_image(
        ...     "https://hf.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png"
        ... )

        >>> # initialize the models and pipeline
        >>> controlnet_conditioning_scale = 0.5  # recommended for good generalization
        >>> controlnet = ControlNetModel.from_pretrained(
        ...     "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
        ... )
        >>> vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
        >>> pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
        ...     "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
        ... )
        >>> pipe.enable_model_cpu_offload()

        >>> # get canny image
        >>> image = np.array(image)
        >>> image = cv2.Canny(image, 100, 200)
        >>> image = image[:, :, None]
        >>> image = np.concatenate([image, image, image], axis=2)
        >>> canny_image = Image.fromarray(image)

        >>> # generate image
        >>> image = pipe(
        ...     prompt, controlnet_conditioning_scale=controlnet_conditioning_scale, image=canny_image
        ... ).images[0]
        ```
num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`List[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`List[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr3   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r3   r2   r4   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r4   r2   r2    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r3   len)	schedulerr1   r2   r3   r4   kwargsaccepts_timestepsaccept_sigmass           r/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.pyretrieve_timestepsrE   }   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))    c            R       .  ^  \ rS rSrSrSr/ SQr/ SQr    S_S\S	\	S
\
S\S\S\S\\\\   \\   \4   S\S\S\\   S\S\4U 4S jjjr            S`S\S\\   S\\R4                     S\S\S\\   S\\   S\\R8                     S\\R8                     S\\R8                     S \\R8                     S!\\   S"\\   4S# jjrSaS$ jrS% r S& r!            SbS) jr"S* r#  ScS, jr$SaS- jr% SaS. jr&S/ r'S0\RP                  4S1\R8                  S2\S3\RR                  S4\R8                  4S5 jjr*\+S6 5       r,\+S7 5       r-\+S8 5       r.\+S9 5       r/\+S: 5       r0\+S; 5       r1\+S< 5       r2\Rf                  " 5       \4" \55      SSSSSS=SSSS>SSSS(SSSSSSSSS?SSS'S+S(S'SS@SSS@SSSSA/4&S\\\\   4   S\\\\\   4      SB\6SC\\   SD\\   SE\SF\\   SG\\   SH\\   SI\S\\\\\   4      S\\\\\   4      S\\   SJ\SK\\\Rn                  \\Rn                     4      SA\\R8                     S\\R8                     S\\R8                     S\\R8                     S \\R8                     SL\\6   SM\\\R8                        SN\\   SO\SP\\8\\94      SQ\\\\   4   SR\SS\\\\   4   ST\\\\   4   SU\\\4   SV\\\4   SW\\\4   SX\\\\4      SY\\\4   SZ\\\\4      S"\\   S[\\\:\\\8/S4   \;\<4      S\\\   4LS] jj5       5       r=S^r>U =r?$ )d#StableDiffusionXLControlNetPipeline   ay
  
Pipeline for text-to-image generation using Stable Diffusion XL with ControlNet guidance.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

The pipeline also inherits the following loading methods:
    - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
    - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
    - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
    - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
    - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

Args:
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
    text_encoder ([`~transformers.CLIPTextModel`]):
        Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
    text_encoder_2 ([`~transformers.CLIPTextModelWithProjection`]):
        Second frozen text-encoder
        ([laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)).
    tokenizer ([`~transformers.CLIPTokenizer`]):
        A `CLIPTokenizer` to tokenize text.
    tokenizer_2 ([`~transformers.CLIPTokenizer`]):
        A `CLIPTokenizer` to tokenize text.
    unet ([`UNet2DConditionModel`]):
        A `UNet2DConditionModel` to denoise the encoded image latents.
    controlnet ([`ControlNetModel`] or `List[ControlNetModel]`):
        Provides additional conditioning to the `unet` during the denoising process. If you set multiple
        ControlNets as a list, the outputs from each ControlNet are added together to create one combined
        additional conditioning.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
        [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    force_zeros_for_empty_prompt (`bool`, *optional*, defaults to `"True"`):
        Whether the negative prompt embeddings should always be set to 0. Also see the config of
        `stabilityai/stable-diffusion-xl-base-1-0`.
    add_watermarker (`bool`, *optional*):
        Whether to use the [invisible_watermark](https://github.com/ShieldMnt/invisible-watermark/) library to
        watermark output images. If not defined, it defaults to `True` if the package is installed; otherwise no
        watermarker is used.
z6text_encoder->text_encoder_2->image_encoder->unet->vae)	tokenizertokenizer_2text_encodertext_encoder_2feature_extractorimage_encoder)latentsprompt_embedsnegative_prompt_embedsadd_text_embedsadd_time_idsnegative_pooled_prompt_embedsnegative_add_time_idsimageTNvaerL   rM   rJ   rK   unet
controlnetr@   force_zeros_for_empty_promptadd_watermarkerrN   rO   c                   > [         TU ]  5         [        U[        [        45      (       a  [        U5      nU R                  UUUUUUUUUUS9
  [        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OSU l        [        U R                  SS9U l        [        U R                  SSS	9U l        U
b  U
O	[!        5       n
U
(       a  [#        5       U l        OS U l        U R'                  U	S
9  g )N)
rX   rL   rM   rJ   rK   rY   rZ   r@   rN   rO   rX   r+         T)vae_scale_factordo_convert_rgbF)r`   ra   do_normalize)r[   )super__init__
isinstancelisttupler   register_modulesgetattrr?   rX   configblock_out_channelsr`   r   image_processorcontrol_image_processorr   r/   	watermarkregister_to_config)selfrX   rL   rM   rJ   rK   rY   rZ   r@   r[   r\   rN   rO   r>   s                rD   rd   ,StableDiffusionXLControlNetPipeline.__init__   s    	j4-00-j9J%)#!/' 	 	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw0$BWBWhlm'8!224V[(
$ .=-H/NnNp9;DN!DN=YZrF   r^   promptprompt_2r2   num_images_per_promptdo_classifier_free_guidancenegative_promptnegative_prompt_2rQ   rR   pooled_prompt_embedsrU   
lora_scale	clip_skipc                    U=(       d    U R                   nUb  [        U [        5      (       a  Xl        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        [        U[        5      (       a  U/OUnUb  [        U5      nOUR                  S   nU R                  b  U R                  U R                  /OU R                  /nU R                  b  U R                  U R                  /OU R                  /nUGc  U=(       d    Un[        U[        5      (       a  U/OUn/ nX/n[        UUU5       GHQ  u  nnn[        U [        5      (       a  U R!                  UU5      nU" USUR"                  SSS9nUR$                  nU" USSS9R$                  nUR                  S	   UR                  S	   :  ah  [&        R(                  " UU5      (       dL  UR+                  USS2UR"                  S
-
  S	24   5      n[,        R/                  SUR"                   SU 35        U" UR1                  U5      SS9nU
c  US   R2                  S:X  a  US   n
Uc  UR4                  S   nOUR4                  US-   *    nUR7                  U5        GMT     [&        R8                  " US	S9nUSL =(       a    U R:                  R<                  nU(       a8  U	c5  U(       a.  [&        R>                  " U5      n	[&        R>                  " U
5      nGOU(       Ga  U	Gc  U=(       d    SnU=(       d    Un[        U[        5      (       a  X/-  OUn[        U[        5      (       a  X/-  OUnUb;  [A        U5      [A        U5      La$  [C        S[A        U5       S[A        U5       S35      eU[        U5      :w  a!  [E        SU S[        U5       SU SU S3	5      eXg/n/ n[        UUU5       H  u  nnn[        U [        5      (       a  U R!                  UU5      nUR                  S
   nU" USUSSS9nU" UR$                  R1                  U5      SS9n	Uc  U	S   R2                  S:X  a  U	S   nU	R4                  S   n	UR7                  U	5        M     [&        R8                  " US	S9n	U R                  b%  UR1                  U R                  RF                  US9nO$UR1                  U RH                  RF                  US9nUR                  u  nnn URK                  S
US
5      nURM                  UU-  US	5      nU(       a  U	R                  S
   nU R                  b%  U	R1                  U R                  RF                  US9n	O$U	R1                  U RH                  RF                  US9n	U	RK                  S
US
5      n	U	RM                  X-  US	5      n	U
RK                  S
U5      RM                  UU-  S	5      n
U(       a%  URK                  S
U5      RM                  UU-  S	5      nU R                  b6  [        U [        5      (       a!  [
        (       a  [O        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [O        U R                  U5        XX4$ )a
  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in both text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
Nr   
max_lengthTpt)paddingr|   
truncationreturn_tensorslongest)r~   r   r^   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: output_hidden_statesr+   dim z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)dtyper2   )(_execution_devicere   r   _lora_scalerL   r"   r    r&   rM   strr?   shaperJ   rK   zipr   maybe_convert_promptmodel_max_length	input_idstorchequalbatch_decodeloggerwarningtondimhidden_statesappendconcatrj   r[   
zeros_liketype	TypeErrorr7   r   rY   repeatviewr'   )!rp   rr   rs   r2   rt   ru   rv   rw   rQ   rR   rx   rU   ry   rz   
batch_size
tokenizerstext_encodersprompt_embeds_listpromptsrJ   rL   text_inputstext_input_idsuntruncated_idsremoved_textzero_out_negative_promptuncond_tokensnegative_prompt_embeds_listr|   uncond_inputbs_embedseq_len_s!                                    rD   encode_prompt1StableDiffusionXLControlNetPipeline.encode_prompt/  s   t 1411 !j7W&X&X)   ,''243D3DjQ%d&7&7D"".''243F3F
S%d&9&9:F'44&&VJ&,,Q/J <@>>;Udnnd&6&67\`\l\l[m
8<8I8I8UT 3 34\`\o\o[p 	  )6H%/#%>%>zHH "$(G36w
M3Z/	<d$?@@!66vyIF'((99##' "-!6!6"+FIVZ"["e"e"((,0D0DR0HHQVQ\Q\"OR R $-#9#9/!YMgMgjkMknpMpJp:q#rLNN%667yP
 !-^->->v-F]a b (/M!4D4I4IQ4N+8+;($$1$?$?$CM %2$?$?)a-@P$QM"))-8I 4[L "LL);DM $3d#:#gt{{?g?g &+A+INf%*%5%5m%D",1,<,<=Q,R)(-C-K-3O 1 D_ AK?\_@`@`j+<<fuO4>?PRU4V4V
00\m 
 !d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  "1 D*,'<?z[h<i8Ld$?@@&*&?&?QZ&[O*003
(#()##'  *6 **--f5)-*& 18=STU=V=[=[_`=`4J14M1)?)M)Mb)Q&+223IJ/ =j2 &+\\2MSU%V"*),,43F3F3L3LU[,\M),,499??6,RM,22'1%,,Q0EqI%**86K+KWVXY&,2215G"".)?)B)BI\I\IbIbkq)B)r&)?)B)Bag)B)h&%;%B%B1F[]^%_"%;%@%@Acelnp%q"3::1>STYY,,b 
 ',I,P,PQRTi,j,o,o00"-) ($ @AAFVFV#D$5$5zB*$ @AAFVFV#D$7$7D6JiirF   c                 d   [        U R                  R                  5       5      R                  n[	        U[
        R                  5      (       d  U R                  USS9R                  nUR                  X%S9nU(       aq  U R                  USS9R                  S   nUR                  USS9nU R                  [
        R                  " U5      SS9R                  S   nUR                  USS9nXg4$ U R                  U5      R                  nUR                  USS9n[
        R                  " U5      n	X4$ )	Nr}   )r   r2   r   Tr   r   r   r   )nextrO   r<   r   re   r   TensorrN   pixel_valuesr   r   repeat_interleaver   image_embeds)
rp   rW   r2   rt   r   r   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             rD   encode_image0StableDiffusionXLControlNetPipeline.encode_image  s?   T''2245;;%..**5*FSSE4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +JJ--e4AAL'99:OUV9WL"'"2"2<"@44rF   c                 
   / nU(       a  / nUGc&  [        U[        5      (       d  U/n[        U5      [        U R                  R                  R
                  5      :w  aB  [        S[        U5       S[        U R                  R                  R
                  5       S35      e[        XR                  R                  R
                  5       Hh  u  p[        U	[        5      (       + n
U R                  XSU
5      u  pUR                  US S S 24   5        U(       d  MP  WR                  US S S 24   5        Mj     OEU H?  nU(       a$  UR                  S5      u  pWR                  U5        UR                  U5        MA     / n[        U5       Hw  u  p[        R                  " U/U-  SS9nU(       a2  [        R                  " WU   /U-  SS9n[        R                  " X/SS9nUR                  US9nUR                  U5        My     U$ )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got  images and z IP Adapters.r^   r+   r   r   )r2   )re   rf   r?   rY   encoder_hid_projimage_projection_layersr7   r   r   r   r   chunk	enumerater   catr   )rp   ip_adapter_imageip_adapter_image_embedsr2   rt   ru   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 rD   prepare_ip_adapter_image_embedsCStableDiffusionXLControlNetPipeline.prepare_ip_adapter_image_embeds7  s9    &$&!"*.55$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A ))"<"<"T"T>9' +55E*W&W#DHDUDU+Q8KEA# ##$7a$@A..)001MdTUg1VW> (?#.H[HaHabcHdE0)001MN##$78	 (? #%&/&="A"'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1M0cij&k#"5"8"8"8"G#**+>? '> '&rF   c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Neta	generator)r8   r9   r:   r@   stepr<   r=   )rp   r   r   accepts_etaextra_step_kwargsaccepts_generators         rD   prepare_extra_step_kwargs=StableDiffusionXLControlNetPipeline.prepare_extra_step_kwargse  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  rF         ?        c                   ^  Ub6  [        U[        5      (       a  US::  a  [        SU S[        U5       S35      eUbX  [	        U 4S jU 5       5      (       d>  [        ST R
                   SU Vs/ s H  nUT R
                  ;  d  M  UPM     sn 35      eUb  Ub  [        SU S	U S
35      eUb  Ub  [        SU S	U S
35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUb  Ub  [        SU SU S
35      eUb  Ub  [        SU SU S
35      eUbC  Ub@  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eUb  U	c  [        S5      eUb  Uc  [        S5      e[        T R                  [        5      (       aW  [        U[        5      (       aB  [        R                  S[        T R                  R                  5       S[        U5       S35        [        [         S5      =(       a8    [        T R                  ["        R$                  R&                  R(                  5      n[        T R                  [*        5      (       d0  U(       a=  [        T R                  R,                  [*        5      (       a  T R/                  X1U5        GO[        T R                  [        5      (       d0  U(       a  [        T R                  R,                  [        5      (       a  [        U[        5      (       d  [1        S5      e[3        S U 5       5      (       a  [        S5      e[        U5      [        T R                  R                  5      :w  a8  [        S[        U5       S[        T R                  R                  5       S35      eU H  nT R/                  UX5        M     O e[        T R                  [*        5      (       d0  U(       aJ  [        T R                  R,                  [*        5      (       a!  [        U[4        5      (       d  [1        S 5      eO[        T R                  [        5      (       d0  U(       a  [        T R                  R,                  [        5      (       a  [        U[        5      (       a#  [3        S! U 5       5      (       a  [        S5      eOO[        U[        5      (       a7  [        U5      [        T R                  R                  5      :w  a  [        S"5      eO e[        U[6        [        45      (       d  U/n[        U[6        [        45      (       d  U/n[        U5      [        U5      :w  a$  [        S#[        U5       S$[        U5       S%35      e[        T R                  [        5      (       a  [        U5      [        T R                  R                  5      :w  a[  [        S&U S'[        U5       S([        T R                  R                  5       S)[        T R                  R                  5       S3	5      e[9        X5       HH  u  nnUU:  a  [        S*U S+U S35      eUS,:  a  [        S*U S-35      eUS.:  d  M;  [        S/U S035      e   U
b  Ub  [        S15      eUb\  [        U[        5      (       d  [        S2[        U5       35      eUS   R:                  S3;  a  [        S4US   R:                   S535      eg g s  snf )6Nr   z5`callback_steps` has to be a positive integer but is z	 of type r   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0krp   s     rD   	<genexpr>CStableDiffusionXLControlNetPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: z)Cannot forward both `negative_prompt_2`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.zIf `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`.z	You have z! ControlNets and you have passed z= prompts. The conditionings will be fixed across the prompts.scaled_dot_product_attentionz5For multiple controlnets: `image` must be type `list`c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   re   rf   r   r   s     rD   r   r     s     8%QZ4((%   zEA single batch of multiple conditionings are supported at the moment.zbFor multiple controlnets: `image` must have the same length as the number of controlnets, but got r   z ControlNets.zLFor single controlnet: `controlnet_conditioning_scale` must be type `float`.c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   r   r   s     rD   r   r     s     R4Qqz!T**4Qr   zFor multiple controlnets: When `controlnet_conditioning_scale` is specified as `list`, it must have the same length as the number of controlnetsz`control_guidance_start` has z* elements, but `control_guidance_end` has zI elements. Make sure to provide the same number of elements to each list.z`control_guidance_start`: z has z elements but there are z- controlnets available. Make sure to provide zcontrol guidance start: z4 cannot be larger or equal to control guidance end: r   z can't be smaller than 0.r   zcontrol guidance end: z can't be larger than 1.0.zProvide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined.z:`ip_adapter_image_embeds` has to be of type `list` but is )r      zF`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is D)re   intr7   r   allr   r   rf   r   rZ   r   r   r   r?   netshasattrFr   _dynamo
eval_frameOptimizedModuler   	_orig_modcheck_imager   anyfloatrg   r   r   )rp   rr   rs   rW   callback_stepsrv   rw   rQ   rR   rx   r   r   rU   controlnet_conditioning_scalecontrol_guidance_startcontrol_guidance_end"callback_on_step_end_tensor_inputsr   is_compiledimage_startends   `                     rD   check_inputs0StableDiffusionXLControlNetPipeline.check_inputsv  sc   & %z.#/N/NR`deReGGW X(), 
 .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  bc  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa!:h+D+DZX`bfMgMgSTXYaTbScdee&+A+M9/9J K*++]_  */E/Q;<M;N O*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  $)=)E U  "-2O2W y  doo';<<&$''DOO$8$8 9::[\_`f\g[hST a!?@ 
ZOOU]]55EEF
 t884??44oFFUM:t(<==4??446JKKeT** WXX 8%888 !hiiUs4??#7#788 xy|  ~C  zD  yE  EQ  RU  VZ  Ve  Ve  Vj  Vj  Rk  Ql  ly  z     ?   5 t884??44oFF;UCC noo D t(<==4??446JKK7>>R4QRRR$%lmm S94@@SIfEgkn$$l F !D 
 505$-@@&<%=".>>$8#9 %&#.B*CC/4J0K/LLvwz  |P  xQ  wR  R[  \  doo';<<)*c$//2F2F.GG 01G0HcRhNiMj  kC  DG  HL  HW  HW  H\  H\  D]  C^  ^K  LO  PT  P_  P_  Pd  Pd  Le  Kf  fg  h  4KJE3| .ug5ijminnop  s{ #;E7B[!\]]Sy #9#>X!YZZ L ',C,O ^  #.5t<< PQUVmQnPop  )+00> \]tuv]w]|]|\}}~  ? /m pHs   /]]c                    [        U[        R                  R                  5      n[        U[        R                  5      n[        U[
        R                  5      n[        U[        5      =(       a'    [        US   [        R                  R                  5      n[        U[        5      =(       a    [        US   [        R                  5      n[        U[        5      =(       a    [        US   [
        R                  5      n	U(       d:  U(       d3  U(       d,  U(       d%  U(       d  U	(       d  [        S[        U5       35      eU(       a  Sn
O[        U5      n
Ub  [        U[        5      (       a  SnO6Ub!  [        U[        5      (       a  [        U5      nOUb  UR                  S   nU
S:w  a  U
W:w  a  [        SU
 SU 35      eg g )Nr   zimage must be passed and be one of PIL image, numpy array, torch tensor, list of PIL images, list of numpy arrays or list of torch tensors, but is r^   zdIf image batch size is not 1, image batch size must be same as prompt batch size. image batch size: z, prompt batch size: )re   PILImager   r   npndarrayrf   r   r   r?   r   r   r7   )rp   rW   rr   rQ   image_is_pilimage_is_tensorimage_is_npimage_is_pil_listimage_is_tensor_listimage_is_np_listimage_batch_sizeprompt_batch_sizes               rD   r   /StableDiffusionXLControlNetPipeline.check_image4  s   !%9$UELL9 

3&ud3]
58SYY__8])%6]:eAhPUP\P\;]%eT2Wz%(BJJ7W #%($ f  gk  lq  gr  fs  t   "5z*VS"9"9 !Jvt$<$< #F& - 3 3A 6q %59J%Jv  xH  wI  I^  _p  ^q  r  &K rF   Fc
                 2   U R                   R                  XUS9R                  [        R                  S9nUR
                  S   n
U
S:X  a  UnOUnUR                  USS9nUR                  XgS9nU(       a!  U	(       d  [        R                  " U/S-  5      nU$ )N)heightwidthr   r   r^   r   r   r+   )rm   
preprocessr   r   float32r   r   r   )rp   rW   r  r  r   rt   r2   r   ru   
guess_moder  	repeat_bys               rD   prepare_image1StableDiffusionXLControlNetPipeline.prepare_imageZ  s     ,,77TY7Z]]didqdq]r ;;q>q "I .I''	q'94&zIIugk*ErF   c	                 V   UU[        U5      U R                  -  [        U5      U R                  -  4n	[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      eUc  [        XXeS9nOUR                  U5      nXR                  R                  -  nU$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   r2   r   )
r   r`   re   rf   r?   r7   r*   r   r@   init_noise_sigma)
rp   r   num_channels_latentsr  r  r   r2   r   rP   r   s
             rD   prepare_latents3StableDiffusionXLControlNetPipeline.prepare_latentsy  s     K4000J$///	
 i&&3y>Z+GA#i.AQ R&<'gi 
 ?"5fZGjj(G NN;;;rF   c                 2   [        X-   U-   5      nU R                  R                  R                  [	        U5      -  U-   nU R                  R
                  R                  R                  nX:w  a  [        SU SU S35      e[        R                  " U/US9nU$ )Nz7Model expects an added time embedding vector of length z, but a vector of z was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`.r  )rf   rY   rj   addition_time_embed_dimr?   add_embeddinglinear_1in_featuresr7   r   tensor)	rp   original_sizecrops_coords_top_lefttarget_sizer   text_encoder_projection_dimrT   passed_add_embed_dimexpected_add_embed_dims	            rD   _get_add_time_ids5StableDiffusionXLControlNetPipeline._get_add_time_ids  s     MAKOP II44s<7HHKff 	 "&!8!8!A!A!M!M!9IJ`Iaas  uI  tJ  JU  V  ||\N%@rF   c                 *   U R                   R                  nU R                   R                  [        R                  S9  [        U R                   R                  R                  R                  S   R                  [        [        45      nU(       a  U R                   R                  R                  U5        U R                   R                  R                  R                  U5        U R                   R                  R                  R                  U5        g g )Nr  r   )rX   r   r   r   r  re   decoder	mid_block
attentions	processorr   r   post_quant_convconv_in)rp   r   use_torch_2_0_or_xformerss      rD   
upcast_vae.StableDiffusionXLControlNetPipeline.upcast_vae  s    %--($.HH&&11!4>> %%
! %HH$$''.HH$$''.HH&&))%0 %rF   i   wembedding_dimr   returnc                 r   [        UR                  5      S:X  d   eUS-  nUS-  n[        R                  " [        R                  " S5      5      US-
  -  n[        R
                  " [        R                  " XCS9U* -  5      nUR                  U5      SS2S4   USSS24   -  n[        R                  " [        R                  " U5      [        R                  " U5      /SS9nUS-  S:X  a*  [        R                  R                  R                  US5      nUR                  UR                  S	   U4:X  d   eU$ )
a,  
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298

Args:
    w (`torch.Tensor`):
        Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
    embedding_dim (`int`, *optional*, defaults to 512):
        Dimension of the embeddings to generate.
    dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
        Data type of the generated embeddings.

Returns:
    `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
r^   g     @@r+   g     @r  Nr   )r   r^   r   )r?   r   r   logr&  exparanger   r   sincosnn
functionalpad)rp   r9  r:  r   half_dimembs         rD   get_guidance_scale_embedding@StableDiffusionXLControlNetPipeline.get_guidance_scale_embedding  s    " 177|q   J A%iiW-.(Q,?iiX;sdBCdd5k!T'"Sq\1ii338a@1!((%%))#v6CyyQWWQZ7777
rF   c                     U R                   $ r   )_guidance_scalerp   s    rD   guidance_scale2StableDiffusionXLControlNetPipeline.guidance_scale  s    ###rF   c                     U R                   $ r   )
_clip_skiprK  s    rD   rz   -StableDiffusionXLControlNetPipeline.clip_skip      rF   c                 r    U R                   S:  =(       a"    U R                  R                  R                  S L $ )Nr^   )rJ  rY   rj   time_cond_proj_dimrK  s    rD   ru   ?StableDiffusionXLControlNetPipeline.do_classifier_free_guidance  s.    ##a'WDII,<,<,O,OSW,WWrF   c                     U R                   $ r   )_cross_attention_kwargsrK  s    rD   cross_attention_kwargs:StableDiffusionXLControlNetPipeline.cross_attention_kwargs  s    +++rF   c                     U R                   $ r   )_denoising_endrK  s    rD   denoising_end1StableDiffusionXLControlNetPipeline.denoising_end      """rF   c                     U R                   $ r   )_num_timestepsrK  s    rD   num_timesteps1StableDiffusionXLControlNetPipeline.num_timesteps  r]  rF   c                     U R                   $ r   )
_interruptrK  s    rD   	interrupt-StableDiffusionXLControlNetPipeline.interrupt  rQ  rF   2   g      @pil)r   r   rP   rW   r  r  r1   r3   r4   r[  rL  r   r   r   r   output_typereturn_dictrW  r   r  r   r   r'  r(  r)  negative_original_sizenegative_crops_coords_top_leftnegative_target_sizecallback_on_step_endr   c'                    ^] U'R                  SS5      n(U'R                  SS5      n)U(b  [        SSS5        U)b  [        SSS5        [        U%[        [        45      (       a  U%R
                  n&[        U R                  5      (       a  U R                  R                  OU R                  n*[        U[        5      (       d%  [        U[        5      (       a  [        U5      U/-  nO[        U[        5      (       d%  [        U[        5      (       a  [        U5      U/-  nOb[        U[        5      (       dM  [        U[        5      (       d8  [        U*[        5      (       a  [        U*R                  5      OSn+U+U/-  U+U/-  nnU R                  UUUU)UUUUUUUUUUUU&5        Xl        U$U l        UU l        Xl        SU l        Ub  [        U[&        5      (       a  Sn,O3Ub!  [        U[        5      (       a  [        U5      n,OUR(                  S	   n,U R*                  n-[        U*[        5      (       a.  [        U[,        5      (       a  U/[        U*R                  5      -  n[        U*[.        5      (       a  U*R0                  R2                  O"U*R                  S	   R0                  R2                  n.U=(       d    U.nU R4                  b  U R4                  R7                  S
S5      OSn/U R9                  UUU-UU R:                  UUUUUUU/U R<                  S9u  nnnnUc  Ub"  U R?                  UUU-U,U-  U R:                  5      n0[        U*[.        5      (       a@  U RA                  UUUU,U-  UU-U*RB                  U R:                  US9	nUR(                  SS u  pEOx[        U*[        5      (       aa  / n1U HB  n2U RA                  U2UUU,U-  UU-U*RB                  U R:                  US9	n2U1RE                  U25        MD     U1nUS	   R(                  SS u  pEO e[G        U RH                  UU-Xx5      u  pv[        U5      U l%        U RL                  R0                  RN                  n3U RQ                  U,U-  U3UUURB                  U-UU5      nSn4U RL                  R0                  RR                  b{  [T        RV                  " U RX                  S-
  5      R[                  U,U-  5      n5U R]                  U5U RL                  R0                  RR                  S9R_                  U-URB                  S9n4U Ra                  X5      n6/ n7[c        [        U5      5       H  n8[e        UU5       V9V:s/ s H>  u  n9n:S[-        U8[        U5      -  U9:  =(       d    U8S-   [        U5      -  U::  5      -
  PM@     n;n9n:U7RE                  [        U*[.        5      (       a  U;S	   OU;5        M     [        U[        5      (       a  U=(       d    US	   R(                  SS nOU=(       d    UR(                  SS nU =(       d    XE4n Un<U Rf                  c  [i        UR(                  S   5      n=O U Rf                  R0                  Rj                  n=U Rm                  UUU URB                  U=S9n>U!b!  U#b  U Rm                  U!U"U#URB                  U=S9n?OU>n?U R:                  (       aE  [T        Rn                  " UU/S	S9n[T        Rn                  " UU</S	S9n<[T        Rn                  " U?U>/S	S9n>UR_                  U-5      nU<R_                  U-5      n<U>R_                  U-5      R[                  U,U-  S5      n>[        U5      X`RH                  Rp                  -  -
  n@U Rr                  b  [        U Rr                  [,        5      (       a  U Rr                  S	:  a  U Rr                  S:  a  [i        [u        U RH                  R0                  Rv                  U Rr                  U RH                  R0                  Rv                  -  -
  5      5      m][        [        [y        U]4S jU5      5      5      nUSU n[        U RL                  5      nA[        U R                  5      nB[{        SS5      nCU R}                  US9 nD[        U5       GH  u  n8nEU R                  (       a  M  [T        R                  R                  5       (       a3  WA(       a,  WB(       a%  WC(       a  [T        R                  R                  5         U R:                  (       a  [T        Rn                  " U/S-  5      OUnFU RH                  R                  UFWE5      nFU<U>S.nGU(       am  U R:                  (       a\  UnHU RH                  R                  UHWE5      nHUR                  S5      S   nIU<R                  S5      S   U>R                  S5      S   S.nJOWFnHUnIWGnJ[        U7U8   [        5      (       a(  [e        UU7U8   5       VKV9s/ s H  u  nKn9UKU9-  PM     nLnKn9O$UnM[        UM[        5      (       a  WMS	   nMWMU7U8   -  nLU R                  WHWEWIUWLUWJSS9u  nNnOU(       ay  U R:                  (       ah  WN VPs/ s H/  nP[T        Rn                  " [T        R                  " UP5      UP/5      PM1     nNnP[T        Rn                  " [T        R                  " WO5      UO/5      nOUc  Ub  W0WGS'   U RM                  WFWEUU4U R4                  WNWOWGSS9	S	   nQU R:                  (       a  WQR                  S5      u  nRnSURU
USUR-
  -  -   nQU RH                  R                  " WQWEU40 U6DSS0D6S	   nU%b  0 nTU& H  nU[        5       UU   WTUU'   M     U%" U U8WEWT5      nVUVR                  SU5      nUVR                  SU5      nUVR                  S U5      nUVR                  S!U<5      n<UVR                  S"U5      nUVR                  S#U>5      n>UVR                  S$U?5      n?UVR                  S%U5      nU8[        U5      S-
  :X  d)  U8S-   W@:  a`  U8S-   U RH                  Rp                  -  S	:X  a@  WDR                  5         U(b-  U8U)-  S	:X  a$  U8[        U RH                  S&S5      -  nWU(" UWWEU5        [        (       d  GM  [        R                  " 5         GM     SSS5        US':X  Gd  U R                  RB                  [T        R                  :H  =(       a     U R                  R0                  R                  nXUX(       a_  U R                  5         UR_                  [        [        U R                  R                  R                  5       5      5      RB                  5      n[        U R                  R0                  S(5      =(       a"    U R                  R0                  R                  SLnY[        U R                  R0                  S)5      =(       a"    U R                  R0                  R                  SLnZWY(       Ga  WZ(       a  [T        RV                  " U R                  R0                  R                  5      R                  SS*SS5      R_                  UR                  URB                  5      n[[T        RV                  " U R                  R0                  R                  5      R                  SS*SS5      R_                  UR                  URB                  5      n\UU\-  U R                  R0                  R                  -  U[-   nO#UU R                  R0                  R                  -  nU R                  R                  USS+9S	   nWX(       a'  U R                  R_                  [T        R                  S,9  OUnUS':X  dB  U R                  b  U R                  R                  U5      nU R                  R                  UUS-9nU R                  5         U(       d  U4$ [        US.9$ s  sn:n9f s  sn9nKf s  snPf ! , (       d  f       GNV= f)/u0  
The call function to the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in both text-encoders.
    image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
            `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
        The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
        specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
        as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
        width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
        images must be passed as a list such that each element of the list can be correctly batched for input
        to a single ControlNet.
    height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The height in pixels of the generated image. Anything below 512 pixels won't work well for
        [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
        and checkpoints that are not specifically fine-tuned on low resolutions.
    width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The width in pixels of the generated image. Anything below 512 pixels won't work well for
        [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
        and checkpoints that are not specifically fine-tuned on low resolutions.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    timesteps (`List[int]`, *optional*):
        Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
        in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
        passed will be used. Must be in descending order.
    sigmas (`List[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    denoising_end (`float`, *optional*):
        When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
        completed before it is intentionally prematurely terminated. As a result, the returned sample will
        still retain a substantial amount of noise as determined by the discrete timesteps selected by the
        scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
        "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
        Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
    guidance_scale (`float`, *optional*, defaults to 5.0):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide what to not include in image generation. If not defined, you need to
        pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide what to not include in image generation. This is sent to `tokenizer_2`
        and `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    eta (`float`, *optional*, defaults to 0.0):
        Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
        applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor is generated by sampling using the supplied random `generator`.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
        provided, text embeddings are generated from the `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
        not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
        not provided, pooled text embeddings are generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs (prompt
        weighting). If not provided, pooled `negative_prompt_embeds` are generated from `negative_prompt` input
        argument.
    ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
    ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
        Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
        IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
        contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
        provided, embeddings are computed from the `ip_adapter_image` input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated image. Choose between `PIL.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
        plain tuple.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
        [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
        The outputs of the ControlNet are multiplied by `controlnet_conditioning_scale` before they are added
        to the residual in the original `unet`. If multiple ControlNets are specified in `init`, you can set
        the corresponding scale as a list.
    guess_mode (`bool`, *optional*, defaults to `False`):
        The ControlNet encoder tries to recognize the content of the input image even if you remove all
        prompts. A `guidance_scale` value between 3.0 and 5.0 is recommended.
    control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
        The percentage of total steps at which the ControlNet starts applying.
    control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
        The percentage of total steps at which the ControlNet stops applying.
    original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
        `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
        explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
        `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
        `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
        `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        For most cases, `target_size` should be set to the desired height and width of the generated image. If
        not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
        section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        To negatively condition the generation process based on a specific image resolution. Part of SDXL's
        micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
        To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
        micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        To negatively condition the generation process based on a target image resolution. It should be as same
        as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
    callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
        A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
        each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
        DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
        list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
        otherwise a `tuple` is returned containing the output images.
callbackNr   z1.0.0zjPassing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`zpPassing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`r^   Fr   scale)rQ   rR   rx   rU   ry   rz   )	rW   r  r  r   rt   r2   r   ru   r  r   )r:  r   r   r   )r   r*  r   c                    > U T:  $ r   r6   )tsdiscrete_timestep_cutoffs    rD   <lambda>>StableDiffusionXLControlNetPipeline.__call__.<locals>.<lambda>  s    RC[=[rF   z>=z2.1)totalr+   )text_embedstime_ids)encoder_hidden_statescontrolnet_condconditioning_scaler  added_cond_kwargsri  r   )ry  timestep_condrW  down_block_additional_residualsmid_block_additional_residualr|  ri  ri  rP   rQ   rR   rS   rU   rT   rV   rW   orderlatentlatents_meanlatents_stdr   )ri  r  )rh  )images)dpopr#   re   r   r   tensor_inputsr(   rZ   r   rf   r?   r   r   r  rJ  rO  rV  rZ  rc  r   r   r   r   r   rj   global_pool_conditionsrW  getr   ru   rz   r   r  r   r   rE   r@   r_  rY   in_channelsr  rS  r   r&  rL  r   rG  r   r   ranger   rM   r   projection_dimr-  r   r  r[  roundnum_train_timestepsfilterr)   progress_barr   rd  cudais_available	_inductorcudagraph_mark_step_beginscale_model_inputr   r   r   localsupdateri   XLA_AVAILABLExm	mark_steprX   float16force_upcastr7  r   iterr4  r<   r   r  r  r   r2   scaling_factordecodern   apply_watermarkrl   postprocessmaybe_free_model_hooksr.   )^rp   rr   rs   rW   r  r  r1   r3   r4   r[  rL  rv   rw   rt   r   r   rP   rQ   rR   rx   rU   r   r   rh  ri  rW  r   r  r   r   r'  r(  r)  rj  rk  rl  rz   rm  r   rA   ro  r   rZ   multr   r2   r  text_encoder_lora_scaler   r  r   r  r}  guidance_scale_tensorr   controlnet_keepr   sekeepsrS   r*  rT   rV   num_warmup_stepsis_unet_compiledis_controlnet_compiledis_torch_higher_equal_2_1r  tlatent_model_inputr|  control_model_inputcontrolnet_prompt_embedscontrolnet_added_cond_kwargsc
cond_scalecontrolnet_cond_scaledown_block_res_samplesmid_block_res_sampled
noise_prednoise_pred_uncondnoise_pred_textcallback_kwargsr   callback_outputsstep_idxneeds_upcastinghas_latents_meanhas_latents_stdr  r  rs  s^                                                                                                @rD   __call__,StableDiffusionXLControlNetPipeline.__call__  s*   L ::j$/$4d;|
 %  C *-=?U,VWW1E1S1S.2DT__2U2UT__..[_[j[j
 0$77JG[]a<b<b%()=%>BXAY%Y"0$77JG]_c<d<d#&'=#>BVAW#W 2D99*MacgBhBh+5jBV+W+W3z']^D.//,-- %9" 	" #))" .!	
&  .#'=$+ *VS"9"9JJvt$<$<VJ&,,Q/J''j"677JGdfk<l<l-J,KcR\RaRaNb,b) *o66 44#**AA 	
  9#9
 ?C>Y>Y>eD''++GT:ko 	  !,,'#9!5*G.nn  
	
" )$ '+B+N?? '2200L j/22&&%(==&; &&,0,L,L% ' 
E "KK,MFE
$899F++ !),AA*?!$**040P0P) , 
 f%   E!!HNN23/MFE5 *<NN/*
&	 ")n  $yy//;;&&.. 	
 99..:$)LL1D1Dq1H$I$P$PQ[^sQs$t! ==%TYY5E5E5X5X > bgmmb4 
 !::9J s9~&A   68LMMDAq eAI.2Rq1uI6NQR6RSSM   ""z*o/V/V58\ab ' eT"")@U1X^^BC-@M)=U[[-=M!4f_.&*-.B.H.H.L*M'*.*=*=*D*D*S*S'--!%%(C . 
 "-2F2R$($:$:&.$#)),G %; %! %1!++!II'=}&MSTUM#ii)F(X^_`O 99&;\%JPQRL%((0),,V4#v.55jCX6XZ[\ y>,?..BVBV,VV *4--u55""Q&""Q&'*NN))==))DNN,A,A,U,UUW($ #&d62[]f+g&h"i!"6#67I-dii8!3DOO!D$4T5$A!%89\!),1>>
 JJ++--).D1OO==?AEAaAaUYYy1}%=gn"%)^^%E%EFXZ[%\"4CQ]$^! $"B"B*1'*...*J*JK^`a*b'/</B/B1/Ea/H,'6'<'<Q'?'B$0$6$6q$9!$<40
 +='/<,3D0oa0$77478UWfghWi4j!k4jDAq!a%4jJ!kJ,I)!"7>>0Ea0H-!69K!KJ?C'*B$)'1)&B % @O 	@<&(< $"B"B \r-r[qVWeii9I9I!9La8P.Q[q*-r+099e6F6FG[6\^r5s+t(#/3J3V8D%n5 "YY&*7"/+/+F+F4J2F&7 % ' 
 

 339C9I9I!9L6%!2^YjGj5k!kJ ..--j!WmHYmglmnop'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*&6&:&:;Lo&^O4D4H4H79V51 $4#7#7#UL,<,@,@AXZo,p),00%@E I**A9I/IqSTuX\XfXfXlXlNlpqNq '')+N0Ba0G#$(K#K 1g6 =LLNY - :^ h&"hhnn=^$((//B^B^O!!**T$txx/G/G/R/R/T*U%V%\%\]  'txxGtDHHOOLhLhptLt%dhhoo}Eq$((//JeJemqJqOOLL!=!=>CCAq!QORRSZSaSacjcpcpq  LL!<!<=BB1aANQQRYR`R`bibobop  "K/$((//2P2PPS__!DHHOO$B$BBHHOOGO?BE %--0Eh&~~)66u=((44U4TE 	##%8O.e<<AP "l, .ss :9s:   A?E.0-%
?A06+G000%00
?)rO  rV  rZ  rJ  rc  r   r_  rm   rl   r`   rn   )TNNN)NNr^   TNNNNNNNNr   )NNNNNNNNr   r   r   N)FF)@__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r   r   r	   r   r   r   r   r!   boolr   r
   r   rd   r   r   r2   r   r   r   r   r   r   r   r  r   r  r  r-  r7  r  r   rG  propertyrL  rz   ru   rW  r[  r`  rd  no_gradr%   EXAMPLE_DOC_STRINGr   	Generatorr   r   r   r   r   r  __static_attributes____classcell__)r>   s   @rD   rH   rH      s   )X U	* .2*.047;,[,[ $,[ 4	,[
 !,[ #,[ #,[ /4+@%BXZnno,[ -,[ '+,[ "$,[ .,[ 5,[ ,[d #')-%&,0)-+/049=7;@D&*#'ljlj 3-lj &	lj
  #lj &*lj "#lj $C=lj  -lj !) 6lj 'u||4lj (0'=lj UOlj C=lj^52+'\!. #! $&*&)" +/#{|#^ %*>0 ei&1& 58emm.1@E	< $ $   X X , , # # # #   ]]_12 )-48$( $##%#")- #;?=A/0MQ*.049=7;@D9=@D%* ;?CF <?:=)-17'+<@:@:>#' 9BS\	=c49n%\	= 5d3i01\	= "	\	=
 \	= }\	= !\	= 9\	= U\	=  \	= \	= "%T#Y"78\	= $E#tCy.$9:\	=  (}\	= \	=  E%//43H"HIJ!\	=" %,,'#\	=$  -%\	=& !) 6'\	=( 'u||4)\	=* (0'=+\	=, ##56-\	=. "*$u||*<!=/\	=0 c]1\	=2 3\	=4 !)c3h 85\	=6 (-UDK-?'@7\	=8 9\	=: !&eT%[&8 9;\	=< $E4;$67=\	=> S#X?\	=@  %S#XA\	=B 38_C\	=D !)sCx 9E\	=F ).c3hG\	=H 'uS#X7I\	=J C=K\	=L '(Cd+T124DF\\]
M\	=R -1IS\	= 3 \	=rF   rH   )NNNN)Tr9   typingr   r   r   r   r   r   r	   numpyr  	PIL.Imager  r   torch.nn.functionalrB  rC  r   transformersr
   r   r   r   r   diffusers.utils.import_utilsr   	callbacksr   r   rl   r   r   loadersr   r   r   r   modelsr   r   r   r   r   models.attention_processorr   r   models.lorar    
schedulersr!   utilsr"   r#   r$   r%   r&   r'   utils.torch_utilsr(   r)   r*   pipeline_utilsr,   r-   #stable_diffusion_xl.pipeline_outputr.   stable_diffusion_xl.watermarkr/   r0   torch_xla.core.xla_modelcore	xla_modelr  r  
get_loggerr  r   r  r   r   r2   r   rE   rH   r6   rF   rD   <module>r     s9     D D D       J A D  r q : 3  T S D Q $%%L , ))MM			H	%+ b *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*vX=$X=rF   