
    +h                    (   S SK r S SKJrJrJrJrJrJrJr  S SK	r	S SK
JrJrJrJrJr  SSKJrJr  SSKJrJrJrJr  SSKJrJrJr  SSKJrJrJr  SS	K J!r!  SS
K"J#r#  SSK$J%r%J&r&J'r'J(r(J)r)J*r*J+r+  SSK,J-r-  SSK.J/r/J0r0  SSK1J2r2  SSK3J4r4  \&" 5       (       a  SSK5J6r6  \'" 5       (       a  S SK7J8s  J9r:  Sr;OSr;\(Rx                  " \=5      r>Sr?SS jr@    SS\\A   S\\\B\	R                  4      S\\\A      S\\\D      4S jjrE " S S\/\0\\\\\45	      rFg)     N)AnyCallableDictListOptionalTupleUnion)CLIPImageProcessorCLIPTextModelCLIPTextModelWithProjectionCLIPTokenizerCLIPVisionModelWithProjection   )PipelineImageInputVaeImageProcessor)FromSingleFileMixinIPAdapterMixin StableDiffusionXLLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLImageProjectionUNet2DConditionModel)AttnProcessor2_0FusedAttnProcessor2_0XFormersAttnProcessor)adjust_lora_scale_text_encoder)KarrasDiffusionSchedulers)USE_PEFT_BACKEND is_invisible_watermark_availableis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor   )DiffusionPipelineStableDiffusionMixin)StableDiffusionXLPipelineOutput   )PAGMixin)StableDiffusionXLWatermarkerTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import AutoPipelineForText2Image

        >>> pipe = AutoPipelineForText2Image.from_pretrained(
        ...     "stabilityai/stable-diffusion-xl-base-1.0",
        ...     torch_dtype=torch.float16,
        ...     enable_pag=True,
        ... )
        >>> pipe = pipe.to("cuda")

        >>> prompt = "a photo of an astronaut riding a horse on mars"
        >>> image = pipe(prompt, pag_scale=0.3).images[0]
        ```
c                     UR                  [        [        SUR                  5      5      SS9nU R                  [        [        SU R                  5      5      SS9nXU-  -  nX%-  SU-
  U -  -   n U $ )a  
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
Flawed](https://huggingface.co/papers/2305.08891).

Args:
    noise_cfg (`torch.Tensor`):
        The predicted noise tensor for the guided diffusion process.
    noise_pred_text (`torch.Tensor`):
        The predicted noise tensor for the text-guided diffusion process.
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        A rescale factor applied to the noise predictions.

Returns:
    noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
r*   T)dimkeepdim)stdlistrangendim)	noise_cfgnoise_pred_textguidance_rescalestd_textstd_cfgnoise_pred_rescaleds         d/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/pag/pipeline_pag_sd_xl.pyrescale_noise_cfgr;   Z   s{    " ""tE!_5I5I,J'KUY"ZHmmU1inn%= >mMG#''9: 6!>N:NR[9[[I    num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`List[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`List[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr?   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r?   r>   r@   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r@   r>   r>    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r?   len)	schedulerr=   r>   r?   r@   kwargsaccepts_timestepsaccept_sigmass           r:   retrieve_timestepsrP   u   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r<   c            K         ^  \ rS rSrSrSr/ SQr/ SQr     S[S\S	\	S
\
S\S\S\S\S\S\S\S\\   S\\\\   4   4U 4S jjjr            S\S\S\\   S\\R.                     S\S\S\\   S\\   S\\R2                     S\\R2                     S\\R2                     S \\R2                     S!\\   S"\\   4S# jjrS]S$ jrS% rS& r         S^S' jrS]S( jr  S]S) jr!S* r"S+\RF                  4S,\R2                  S-\S.\RH                  S/\R2                  4S0 jjr%\&S1 5       r'\&S2 5       r(\&S3 5       r)\&S4 5       r*\&S5 5       r+\&S6 5       r,\&S7 5       r-\&S8 5       r.\R^                  " 5       \0" \15      SSSSS9SSSS:SSSS;SSSSSSSSS<SSS;SS=SSS=SSSS>/S?S;4$S\\\\   4   S\\\\\   4      S@\\   SA\\   SB\SC\\   SD\\   SE\\   SF\S\\\\\   4      S\\\\\   4      S\\   SG\SH\\\Rd                  \\Rd                     4      S>\\R2                     S\\R2                     S\\R2                     S\\R2                     S \\R2                     SI\\3   SJ\\\R2                        SK\\   SL\SM\\4\\54      SN\SO\\6\\4      SP\6\\4   SQ\\6\\4      SR\\6\\4      SS\6\\4   ST\\6\\4      S"\\   SU\\7\\\4/S4      SV\\   SW\SX\4HSY jj5       5       r8SZr9U =r:$ )_StableDiffusionXLPAGPipeline   ao  
Pipeline for text-to-image generation using Stable Diffusion XL.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

The pipeline also inherits the following loading methods:
    - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
    - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
    - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
    - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
    - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

Args:
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModel`]):
        Frozen text-encoder. Stable Diffusion XL uses the text portion of
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
        the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
    text_encoder_2 ([` CLIPTextModelWithProjection`]):
        Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
        specifically the
        [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
        variant.
    tokenizer (`CLIPTokenizer`):
        Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_2 (`CLIPTokenizer`):
        Second Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
        [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    force_zeros_for_empty_prompt (`bool`, *optional*, defaults to `"True"`):
        Whether the negative prompt embeddings shall be forced to always be set to 0. Also see the config of
        `stabilityai/stable-diffusion-xl-base-1-0`.
    add_watermarker (`bool`, *optional*):
        Whether to use the [invisible_watermark library](https://github.com/ShieldMnt/invisible-watermark/) to
        watermark output images. If not defined, it will default to True if the package is installed, otherwise no
        watermarker will be used.
z6text_encoder->text_encoder_2->image_encoder->unet->vae)	tokenizertokenizer_2text_encodertext_encoder_2image_encoderfeature_extractor)latentsprompt_embedsnegative_prompt_embedsadd_text_embedsadd_time_idsnegative_pooled_prompt_embedsnegative_add_time_idsNTvaerV   rW   rT   rU   unetrL   rX   rY   force_zeros_for_empty_promptadd_watermarkerpag_applied_layersc                 ~  > [         TU ]  5         U R                  UUUUUUUUU	S9	  U R                  U
S9  [	        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OSU l	        [        U R                  S9U l        [        U S5      (       aR  U R                  bE  [        U R                  R                  S	5      (       a   U R                  R                  R                  OS
U l        Ub  UO	[!        5       nU(       a  [#        5       U l        OS U l        U R'                  U5        g )N)	ra   rV   rW   rT   rU   rb   rL   rX   rY   )rc   ra   r&   r*      )vae_scale_factorrb   sample_size   )super__init__register_modulesregister_to_configgetattrrK   ra   configblock_out_channelsrh   r   image_processorhasattrrb   ri   default_sample_sizer   r,   	watermarkset_pag_applied_layers)selfra   rV   rW   rT   rU   rb   rL   rX   rY   rc   rd   re   rJ   s                r:   rl   %StableDiffusionXLPAGPipeline.__init__   s#    	%)#'/ 	 
	
 	=YZV]^bdikoVpVpc$((//*L*L&MPQ&Q Rvw0$BWBWX tV$$)>7499K[K[]jCkCk II(( 	  .=-H/NnNp9;DN!DN##$67r<   r*   promptprompt_2r>   num_images_per_promptdo_classifier_free_guidancenegative_promptnegative_prompt_2r[   r\   pooled_prompt_embedsr_   
lora_scale	clip_skipc                    U=(       d    U R                   nUb  [        U [        5      (       a  Xl        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        [        U[        5      (       a  U/OUnUb  [        U5      nOUR                  S   nU R                  b  U R                  U R                  /OU R                  /nU R                  b  U R                  U R                  /OU R                  /nUGc  U=(       d    Un[        U[        5      (       a  U/OUn/ nX/n[        UUU5       GHQ  u  nnn[        U [        5      (       a  U R!                  UU5      nU" USUR"                  SSS9nUR$                  nU" USSS9R$                  nUR                  S	   UR                  S	   :  ah  [&        R(                  " UU5      (       dL  UR+                  USS2UR"                  S
-
  S	24   5      n[,        R/                  SUR"                   SU 35        U" UR1                  U5      SS9nU
c  US   R2                  S:X  a  US   n
Uc  UR4                  S   nOUR4                  US-   *    nUR7                  U5        GMT     [&        R8                  " US	S9nUSL =(       a    U R:                  R<                  nU(       a8  U	c5  U(       a.  [&        R>                  " U5      n	[&        R>                  " U
5      nGOU(       Ga  U	Gc  U=(       d    SnU=(       d    Un[        U[        5      (       a  X/-  OUn[        U[        5      (       a  X/-  OUnUb;  [A        U5      [A        U5      La$  [C        S[A        U5       S[A        U5       S35      eU[        U5      :w  a!  [E        SU S[        U5       SU SU S3	5      eXg/n/ n[        UUU5       H  u  nnn[        U [        5      (       a  U R!                  UU5      nUR                  S
   nU" USUSSS9nU" UR$                  R1                  U5      SS9n	Uc  U	S   R2                  S:X  a  U	S   nU	R4                  S   n	UR7                  U	5        M     [&        R8                  " US	S9n	U R                  b%  UR1                  U R                  RF                  US9nO$UR1                  U RH                  RF                  US9nUR                  u  nnn URK                  S
US
5      nURM                  UU-  US	5      nU(       a  U	R                  S
   nU R                  b%  U	R1                  U R                  RF                  US9n	O$U	R1                  U RH                  RF                  US9n	U	RK                  S
US
5      n	U	RM                  X-  US	5      n	U
RK                  S
U5      RM                  UU-  S	5      n
U(       a%  URK                  S
U5      RM                  UU-  S	5      nU R                  b6  [        U [        5      (       a!  [
        (       a  [O        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [O        U R                  U5        XX4$ )a
  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in both text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
Nr   
max_lengthTpt)paddingr   
truncationreturn_tensorslongest)r   r   r*   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: output_hidden_statesr&   r.    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)dtyper>   )(_execution_device
isinstancer   _lora_scalerV   r   r   r#   rW   strrK   shaperT   rU   zipr   maybe_convert_promptmodel_max_length	input_idstorchequalbatch_decodeloggerwarningtor3   hidden_statesappendconcatrp   rc   
zeros_liketype	TypeErrorrC   r   rb   repeatviewr$   )!rw   ry   rz   r>   r{   r|   r}   r~   r[   r\   r   r_   r   r   
batch_size
tokenizerstext_encodersprompt_embeds_listpromptsrT   rV   text_inputstext_input_idsuntruncated_idsremoved_textzero_out_negative_promptuncond_tokensnegative_prompt_embeds_listr   uncond_inputbs_embedseq_len_s!                                    r:   encode_prompt*StableDiffusionXLPAGPipeline.encode_prompt)  s   t 1411 !j7W&X&X)   ,''243D3DjQ%d&7&7D"".''243F3F
S%d&9&9:F'44&&VJ&,,Q/J <@>>;Udnnd&6&67\`\l\l[m
8<8I8I8UT 3 34\`\o\o[p 	  )6H%/#%>%>zHH "$(G36w
M3Z/	<d$?@@!66vyIF'((99##' "-!6!6"+FIVZ"["e"e"((,0D0DR0HHQVQ\Q\"OR R $-#9#9/!YMgMgjkMknpMpJp:q#rLNN%667yP
 !-^->->v-F]a b (/M!4D4I4IQ4N+8+;($$1$?$?$CM %2$?$?)a-@P$QM"))-8I 4[L "LL);DM $3d#:#gt{{?g?g &+A+INf%*%5%5m%D",1,<,<=Q,R)(-C-K-3O 1 D_ AK?\_@`@`j+<<fuO4>?PRU4V4V
00\m 
 !d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  "1 D*,'<?z[h<i8Ld$?@@&*&?&?QZ&[O*003
(#()##'  *6 **--f5)-*& 18=STU=V=[=[_`=`4J14M1)?)M)Mb)Q&+223IJ/ =j2 &+\\2MSU%V"*),,43F3F3L3LU[,\M),,499??6,RM,22'1%,,Q0EqI%**86K+KWVXY&,2215G"".)?)B)BI\I\IbIbkq)B)r&)?)B)Bag)B)h&%;%B%B1F[]^%_"%;%@%@Acelnp%q"3::1>STYY,,b 
 ',I,P,PQRTi,j,o,o00"-) ($ @AAFVFV#D$5$5zB*$ @AAFVFV#D$7$7D6Jiir<   c                 d   [        U R                  R                  5       5      R                  n[	        U[
        R                  5      (       d  U R                  USS9R                  nUR                  X%S9nU(       aq  U R                  USS9R                  S   nUR                  USS9nU R                  [
        R                  " U5      SS9R                  S   nUR                  USS9nXg4$ U R                  U5      R                  nUR                  USS9n[
        R                  " U5      n	X4$ )	Nr   )r   r>   r   Tr   r   r   r   )nextrX   rH   r   r   r   TensorrY   pixel_valuesr   r   repeat_interleaver   image_embeds)
rw   imager>   r{   r   r   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             r:   encode_image)StableDiffusionXLPAGPipeline.encode_image  s?   T''2245;;%..**5*FSSE4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +JJ--e4AAL'99:OUV9WL"'"2"2<"@44r<   c                 
   / nU(       a  / nUGc&  [        U[        5      (       d  U/n[        U5      [        U R                  R                  R
                  5      :w  aB  [        S[        U5       S[        U R                  R                  R
                  5       S35      e[        XR                  R                  R
                  5       Hh  u  p[        U	[        5      (       + n
U R                  XSU
5      u  pUR                  US S S 24   5        U(       d  MP  WR                  US S S 24   5        Mj     OEU H?  nU(       a$  UR                  S5      u  pWR                  U5        UR                  U5        MA     / n[        U5       Hw  u  p[        R                  " U/U-  SS9nU(       a2  [        R                  " WU   /U-  SS9n[        R                  " X/SS9nUR                  US9nUR                  U5        My     U$ )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r*   r&   r   r   )r>   )r   r1   rK   rb   encoder_hid_projimage_projection_layersrC   r   r   r   r   chunk	enumerater   catr   )rw   ip_adapter_imageip_adapter_image_embedsr>   r{   r|   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 r:   prepare_ip_adapter_image_embeds<StableDiffusionXLPAGPipeline.prepare_ip_adapter_image_embeds1  s9    &$&!"*.55$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A ))"<"<"T"T>9' +55E*W&W#DHDUDU+Q8KEA# ##$7a$@A..)001MdTUg1VW> (?#.H[HaHabcHdE0)001MN##$78	 (? #%&/&="A"'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1M0cij&k#"5"8"8"8"G#**+>? '> '&r<   c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Neta	generator)rD   rE   rF   rL   steprH   rI   )rw   r   r   accepts_etaextra_step_kwargsaccepts_generators         r:   prepare_extra_step_kwargs6StableDiffusionXLPAGPipeline.prepare_extra_step_kwargs_  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  r<   c           
         ^  US-  S:w  d	  US-  S:w  a  [        SU SU S35      eUb6  [        U[        5      (       a  US::  a  [        SU S[        U5       S35      eUbW  [	        U 4S jU 5       5      (       d=  [        S	T R
                   S
U Vs/ s H  oT R
                  ;  d  M  UPM     sn 35      eUb  Ub  [        SU SU S35      eUb  Ub  [        SU SU S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUb  U	b  [        SU SU	 S35      eUb  U	b  [        SU SU	 S35      eUbC  U	b@  UR                  U	R                  :w  a&  [        SUR                   SU	R                   S35      eUb  U
c  [        S5      eU	b  Uc  [        S5      eUb  Ub  [        S5      eUb\  [        U[        5      (       d  [        S[        U5       35      eUS   R                  S;  a  [        SUS   R                   S35      eg g s  snf )Nrg   r   z7`height` and `width` have to be divisible by 8 but are z and r   z5`callback_steps` has to be a positive integer but is z	 of type c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0krw   s     r:   	<genexpr><StableDiffusionXLPAGPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: z)Cannot forward both `negative_prompt_2`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.zIf `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`.zProvide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined.z:`ip_adapter_image_embeds` has to be of type `list` but is )r      zF`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is D)
rC   r   intr   allr   r   r1   r   r3   )rw   ry   rz   heightwidthcallback_stepsr}   r~   r[   r\   r   r_   r   r   "callback_on_step_end_tensor_inputsr   s   `               r:   check_inputs)StableDiffusionXLPAGPipeline.check_inputsq  s   " A:?eai1nVW]V^^cdicjjklmm%z.#/N/NR`deReGGW X(), 
 .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa!:h+D+DZX`bfMgMgSTXYaTbScdee&+A+M9/9J K*++]_  */E/Q;<M;N O*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  $)=)E U  "-2O2W y  ',C,O ^  #.5t<< PQUVmQnPop  )+00> \]tuv]w]|]|\}}~  ? /q pHs   J*Jc	                 V   UU[        U5      U R                  -  [        U5      U R                  -  4n	[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      eUc  [        XXeS9nOUR                  U5      nXR                  R                  -  nU$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   r>   r   )
r   rh   r   r1   rK   rC   r%   r   rL   init_noise_sigma)
rw   r   num_channels_latentsr   r   r   r>   r   rZ   r   s
             r:   prepare_latents,StableDiffusionXLPAGPipeline.prepare_latents  s     K4000J$///	
 i&&3y>Z+GA#i.AQ R&<'gi 
 ?"5fZGjj(G NN;;;r<   c                 2   [        X-   U-   5      nU R                  R                  R                  [	        U5      -  U-   nU R                  R
                  R                  R                  nX:w  a  [        SU SU S35      e[        R                  " U/US9nU$ )Nz7Model expects an added time embedding vector of length z, but a vector of z was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`.r   )r1   rb   rp   addition_time_embed_dimrK   add_embeddinglinear_1in_featuresrC   r   tensor)	rw   original_sizecrops_coords_top_lefttarget_sizer   text_encoder_projection_dimr^   passed_add_embed_dimexpected_add_embed_dims	            r:   _get_add_time_ids.StableDiffusionXLPAGPipeline._get_add_time_ids  s     MAKOP II44s<7HHKff 	 "&!8!8!A!A!M!M!9IJ`Iaas  uI  tJ  JU  V  ||\N%@r<   c                 4   U R                   R                  nU R                   R                  [        R                  S9  [        U R                   R                  R                  R                  S   R                  [        [        [        45      nU(       a  U R                   R                  R                  U5        U R                   R                  R                  R                  U5        U R                   R                  R                  R                  U5        g g )Nr   r   )ra   r   r   r   float32r   decoder	mid_block
attentions	processorr   r   r   post_quant_convconv_in)rw   r   use_torch_2_0_or_xformerss      r:   
upcast_vae'StableDiffusionXLPAGPipeline.upcast_vae  s    %--($.HH&&11!4>> %%%
! %HH$$''.HH$$''.HH&&))%0 %r<   i   wembedding_dimr   returnc                 r   [        UR                  5      S:X  d   eUS-  nUS-  n[        R                  " [        R                  " S5      5      US-
  -  n[        R
                  " [        R                  " XCS9U* -  5      nUR                  U5      SS2S4   USSS24   -  n[        R                  " [        R                  " U5      [        R                  " U5      /SS9nUS-  S:X  a*  [        R                  R                  R                  US5      nUR                  UR                  S	   U4:X  d   eU$ )
a,  
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298

Args:
    w (`torch.Tensor`):
        Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
    embedding_dim (`int`, *optional*, defaults to 512):
        Dimension of the embeddings to generate.
    dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
        Data type of the generated embeddings.

Returns:
    `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
r*   g     @@r&   g     @r   Nr   )r   r*   r   )rK   r   r   logr   exparanger   r   sincosnn
functionalpad)rw   r  r  r   half_dimembs         r:   get_guidance_scale_embedding9StableDiffusionXLPAGPipeline.get_guidance_scale_embedding  s    " 177|q   J A%iiW-.(Q,?iiX;sdBCdd5k!T'"Sq\1ii338a@1!((%%))#v6CyyQWWQZ7777
r<   c                     U R                   $ r   )_guidance_scalerw   s    r:   guidance_scale+StableDiffusionXLPAGPipeline.guidance_scale-  s    ###r<   c                     U R                   $ r   )_guidance_rescaler$  s    r:   r6   -StableDiffusionXLPAGPipeline.guidance_rescale1  s    %%%r<   c                     U R                   $ r   )
_clip_skipr$  s    r:   r   &StableDiffusionXLPAGPipeline.clip_skip5      r<   c                 r    U R                   S:  =(       a"    U R                  R                  R                  S L $ )Nr*   )r#  rb   rp   time_cond_proj_dimr$  s    r:   r|   8StableDiffusionXLPAGPipeline.do_classifier_free_guidance<  s.    ##a'WDII,<,<,O,OSW,WWr<   c                     U R                   $ r   )_cross_attention_kwargsr$  s    r:   cross_attention_kwargs3StableDiffusionXLPAGPipeline.cross_attention_kwargs@  s    +++r<   c                     U R                   $ r   )_denoising_endr$  s    r:   denoising_end*StableDiffusionXLPAGPipeline.denoising_endD      """r<   c                     U R                   $ r   )_num_timestepsr$  s    r:   num_timesteps*StableDiffusionXLPAGPipeline.num_timestepsH  r9  r<   c                     U R                   $ r   )
_interruptr$  s    r:   	interrupt&StableDiffusionXLPAGPipeline.interruptL  r-  r<   2   g      @        pil)r   r   rZ   g      @r   r   r=   r?   r@   r7  r%  r   r   r   r   output_typereturn_dictr3  r6   r   r   r  negative_original_sizenegative_crops_coords_top_leftnegative_target_sizecallback_on_step_endr   	pag_scalepag_adaptive_scalec%                 X  ^F U=(       d    U R                   U R                  -  nU=(       d    U R                   U R                  -  nU=(       d    X44nU=(       d    X44nU R                  UUUUSU
UUUUUUUU"5        Xl        UU l        U U l        UU l        Xl        SU l        U#U l	        U$U l
        Ub  [        U[        5      (       a  Sn%O3Ub!  [        U[        5      (       a  [        U5      n%OUR                  S   n%U R                   n&U R"                  b  U R"                  R%                  SS5      OSn'U R'                  UUU&UU R(                  U
UUUUUU'U R*                  S9u  nnnn[-        U R.                  UU&Xg5      u  peU R0                  R2                  R4                  n(U R7                  U%U-  U(UUUR8                  U&UU5      nU R;                  X5      n)Un*U R<                  c  [?        UR                  S   5      n+O U R<                  R2                  R@                  n+U RC                  UUUUR8                  U+S9n,Ub!  Ub  U RC                  UUUUR8                  U+S9n-OU,n-U RD                  (       aX  U RG                  UUU R(                  5      nU RG                  U*UU R(                  5      n*U RG                  U,U-U R(                  5      n,OVU R(                  (       aE  [H        RJ                  " UU/SS	9n[H        RJ                  " UU*/SS	9n*[H        RJ                  " U-U,/SS	9n,URM                  U&5      nU*RM                  U&5      n*U,RM                  U&5      RO                  U%U-  S5      n,Uc  Ub  U RQ                  UUU&U%U-  U R(                  5      n[S        U5       H  u  n.n/Sn0U R(                  (       a  U/RU                  S
5      u  n0n/U RD                  (       a  U RG                  U/U0U R(                  5      n/O(U R(                  (       a  [H        RJ                  " U0U//SS	9n/U/RM                  U&5      n/U/UU.'   M     [W        [        U5      XPR.                  RX                  -  -
  S5      n1U RZ                  b  [        U RZ                  [\        5      (       a  U RZ                  S:  a  U RZ                  S:  a  [?        [_        U R.                  R2                  R`                  U RZ                  U R.                  R2                  R`                  -  -
  5      5      mF[        [        [c        UF4S jU5      5      5      nUSU nSn2U R0                  R2                  Rd                  b{  [H        Rf                  " U Rh                  S-
  5      RO                  U%U-  5      n3U Rk                  U3U R0                  R2                  Rd                  S9RM                  U&UR8                  S9n2U RD                  (       a:  U R0                  Rl                  n4U Ro                  U Rp                  U R(                  S9  [        U5      U l9        U Ru                  US9 n5[S        U5       GH  u  n.n6U Rv                  (       a  M  [H        RJ                  " U/UR                  S   UR                  S   -  -  5      n7U R.                  Ry                  U7U65      n7U*U,S.n8Ub  UU8S'   U R1                  U7U6UU2U R"                  U8SS9S   n9U RD                  (       a-  U R{                  U9U R(                  U Rh                  U6S5      u  n9n:O:U R(                  (       a)  U9RU                  S
5      u  n;n:U;U Rh                  U:U;-
  -  -   n9U R(                  (       a%  U R|                  S:  a  [        U9W:U R|                  S9n9UR8                  n<U R.                  R                  " U9U6U40 U)DSS0D6S   nUR8                  U<:w  a>  [H        R                  R                  R                  5       (       a  URM                  U<5      nU!b  0 n=U" H  n>[        5       U>   U=U>'   M     U!" U U.U6U=5      n?U?R                  SU5      nU?R                  SU5      nU?R                  SU5      nU?R                  SU*5      n*U?R                  SU5      nU?R                  SU,5      n,U?R                  SU-5      n-U.[        U5      S-
  :X  d)  U.S-   U1:  a0  U.S-   U R.                  RX                  -  S:X  a  U5R                  5         [        (       d  GM  [        R                  " 5         GM     SSS5        US:X  Gd=  U R                  R8                  [H        R                  :H  =(       a     U R                  R2                  R                  n@U@(       a`  U R                  5         URM                  [        [        U R                  R                  R                  5       5      5      R8                  5      nO{UR8                  U R                  R8                  :w  aW  [H        R                  R                  R                  5       (       a*  U R                  RM                  UR8                  5      U lJ        [        U R                  R2                  S5      =(       a"    U R                  R2                  R                  SLnA[        U R                  R2                  S 5      =(       a"    U R                  R2                  R                  SLnBWA(       Ga  WB(       a  [H        Rf                  " U R                  R2                  R                  5      R                  SS!SS5      RM                  UR                  UR8                  5      nC[H        Rf                  " U R                  R2                  R                  5      R                  SS!SS5      RM                  UR                  UR8                  5      nDUUD-  U R                  R2                  R                  -  UC-   nO"XR                  R2                  R                  -  nU R                  R                  USS"9S   nEW@(       a'  U R                  RM                  [H        R                  S#9  OUnEUS:X  dB  U R                  b  U R                  R                  WE5      nEU R                  R                  WEUS$9nEU R                  5         U RD                  (       a  U R0                  R                  W45        U(       d  WE4$ [        WES%9$ ! , (       d  f       GN= f)&uo.  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in both text-encoders
    height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The height in pixels of the generated image. This is set to 1024 by default for the best results.
        Anything below 512 pixels won't work well for
        [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
        and checkpoints that are not specifically fine-tuned on low resolutions.
    width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The width in pixels of the generated image. This is set to 1024 by default for the best results.
        Anything below 512 pixels won't work well for
        [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
        and checkpoints that are not specifically fine-tuned on low resolutions.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    timesteps (`List[int]`, *optional*):
        Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
        in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
        passed will be used. Must be in descending order.
    sigmas (`List[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    denoising_end (`float`, *optional*):
        When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
        completed before it is intentionally prematurely terminated. As a result, the returned sample will
        still retain a substantial amount of noise as determined by the discrete timesteps selected by the
        scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
        "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
        Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
    guidance_scale (`float`, *optional*, defaults to 5.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    eta (`float`, *optional*, defaults to 0.0):
        Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
        applies to [`schedulers.DDIMScheduler`], will be ignored for others.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
    ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
        Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
        IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
        contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
        provided, embeddings are computed from the `ip_adapter_image` input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
        of a plain tuple.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
        Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
        [Common Diffusion Noise Schedules and Sample Steps are
        Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
        using zero terminal SNR.
    original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
        `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
        explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
        `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
        `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
        `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        For most cases, `target_size` should be set to the desired height and width of the generated image. If
        not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
        section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
    negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        To negatively condition the generation process based on a specific image resolution. Part of SDXL's
        micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
        To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
        micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
        To negatively condition the generation process based on a target image resolution. It should be as same
        as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
        [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
        information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    pag_scale (`float`, *optional*, defaults to 3.0):
        The scale factor for the perturbed attention guidance. If it is set to 0.0, the perturbed attention
        guidance will not be used.
    pag_adaptive_scale (`float`, *optional*, defaults to 0.0):
        The adaptive scale factor for the perturbed attention guidance. If it is set to 0.0, `pag_scale` is
        used.

Examples:

Returns:
    [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
    [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
    `tuple`. When returning a tuple, the first element is a list with the generated images.
NFr*   r   scale)ry   rz   r>   r{   r|   r}   r~   r[   r\   r   r_   r   r   r   )r   r  r   r&   c                    > U T:  $ r   rB   )tsdiscrete_timestep_cutoffs    r:   <lambda>7StableDiffusionXLPAGPipeline.__call__.<locals>.<lambda>  s    RC[=[r<   )r  r   )re   r|   )total)text_embedstime_idsr   )encoder_hidden_statestimestep_condr3  added_cond_kwargsrF  TrC  )r6   rF  rZ   r[   r\   r]   r_   r^   r`   latentlatents_meanlatents_stdr   )rF  r   )rE  )images)`rt   rh   r   r#  r(  r+  r2  r6  r?  
_pag_scale_pag_adaptive_scaler   r   r1   rK   r   r   r3  getr   r|   r   rP   rL   rb   rp   in_channelsr   r   r   rW   r   projection_dimr  do_perturbed_attention_guidance%_prepare_perturbed_attention_guidancer   r   r   r   r   r   r   maxorderr7  floatroundnum_train_timestepsfilterr/  r   r%  r   attn_processors_set_pag_attn_processorre   r;  progress_barr@  scale_model_input#_apply_perturbed_attention_guidancer6   r;   r   backendsmpsis_availablelocalspopupdateXLA_AVAILABLExm	mark_stepra   float16force_upcastr  r   iterr  rH   rs   r[  r\  r   r>   scaling_factordecoderu   apply_watermarkrr   postprocessmaybe_free_model_hooksset_attn_processorr)   )Grw   ry   rz   r   r   r=   r?   r@   r7  r%  r}   r~   r{   r   r   rZ   r[   r\   r   r_   r   r   rE  rF  r3  r6   r   r   r  rG  rH  rI  r   rJ  r   rK  rL  r   r>   r   r   r   r]   r  r^   r`   r   r   r   num_warmup_stepsrX  guidance_scale_tensororiginal_attn_procrm  tlatent_model_inputrY  
noise_predr5   noise_pred_uncondlatents_dtypecallback_kwargsr   callback_outputsneeds_upcastinghas_latents_meanhas_latents_stdr[  r\  r   rQ  sG                                                                         @r:   __call__%StableDiffusionXLPAGPipeline.__call__P  s   D K433d6K6KKI11D4I4II%8&!4f_ 	" )#.	
"  .!1#'=$+##5  *VS"9"9JJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	 "7(,(H(H+/'#9!5*G!nn  
	
" )$ *<NN/*
&	
  $yy//;;&&.. 	
 !::9J /&*-.B.H.H.L*M'*.*=*=*D*D*S*S'--!%%(C . 
 "-2F2R$($:$:&.$#)),G %; %! %1!// FF5t7W7WM #HH!>@`@`O  EE3T5U5UL --!II'=}&MSTUM#ii)F(X^_`O 99&;\%JPQRL%((0),,V4#v.55jCX6XZ[\'+B+N&*&J&J '2200'# $--D#E<(,%33:F:L:LQ:O7)<77#'#M#M$&;T=]=]$L 55#(99.C\-RXY#ZL+v6-9'* $F s9~0CnnFZFZ0ZZ\]^ *4--u55""Q&""Q&'*NN))==))DNN,A,A,U,UUW($ #&d62[]f+g&h"i!"6#67I 99..:$)LL1D1Dq1H$I$P$PQ[^sQs$t! ==%TYY5E5E5X5X > bgmmb4  //!%!:!:((#'#:#:,0,L,L ) 
 ")n%89\!),1>> &+YYyM<O<OPQ<RV]VcVcdeVf<f/g%h"%)^^%E%EFXZ[%\" 5DQ]$^!*68O%n5!YY&*7"/+/+F+F&7 % '  
 77262Z2Z"D$D$DdFYFY[\^b3/J 559C9I9I!9L6%!2T5H5HO^oLo5p!pJ338M8MPS8S!2:aeavav!wJ !(..--j!WmHYmglmnop==M1~~))6688")**]";'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*&6&:&:;Lo&^O4D4H4H79V51 $4#7#7#UL,<,@,@AXZo,p)I**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNC - :H h&"hhnn=^$((//B^B^O!!**T$txx/G/G/R/R/T*U%V%\%\]$((..0>>%%2244#xx{{7==9DH  'txxGtDHHOOLhLhptLt%dhhoo}Eq$((//JeJemqJqOOLL!=!=>CCAq!QORRSZSaSacjcpcpq  LL!<!<=BB1aANQQRYR`R`bibobop  "K/$((//2P2PPS__!HHOO$B$BBHHOOGO?BE %--0Eh&~~)66u=((44U4TE 	##%//II(();<8O.e<<o :9s   Ktt
t))r+  r2  r6  r(  r#  r?  r   r;  r_  r^  rt   rr   ra   rh   ru   )NNTNmid)NNr*   TNNNNNNNNr   )	NNNNNNNNN);__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r   r   r   r   r
   boolr   r	   r   r   rl   r   r>   r   r   rg  r   r   r   r   r   r   r  r  r  r   r   propertyr%  r6   r   r|   r3  r7  r<  r@  no_gradr"   EXAMPLE_DOC_STRING	Generatorr   r   r   r   r   r  __static_attributes____classcell__)rJ   s   @r:   rR   rR      sR   +Z U& 8<04-1*.49-8-8 $-8 4	-8
 !-8 #-8 #-8 --8 5-8 .-8 '+-8 "$-8 "#tCy.1-8 -8f #')-%&,0)-+/049=7;@D&*#'ljlj 3-lj &	lj
  #lj &*lj "#lj $C=lj  -lj !) 6lj 'u||4lj (0'=lj UOlj C=lj^52+'\!2 #!&* $+/^B0 ei&1( 58emm.1@E	< $ $ & &   X X , , # # # #   ]]_12 )-48 $##%#")- #;?=A/0MQ*.049=7;@D9=@D%* ;?"%371715<@:@:>#'KO9B$'Kx=c49n%x= 5d3i01x= 	x=
 }x= !x= 9x= Ux=  x= x= "%T#Y"78x= $E#tCy.$9:x=  (}x= x= E%//43H"HIJx=  %,,'!x="  -#x=$ !) 6%x=& 'u||4'x=( (0'=)x=* ##56+x=, "*$u||*<!=-x=. c]/x=0 1x=2 !)c3h 83x=4  5x=6  c3h07x=8  %S#X9x=: eCHo.;x=< !)sCx 9=x=> ).c3h?x=@ 'uS#X7Ax=B C=Cx=D 'xc40@$0F'GHEx=F -1IGx=H Ix=J "Kx= 3 x=r<   rR   )rC  )NNNN)GrE   typingr   r   r   r   r   r   r	   r   transformersr
   r   r   r   r   rr   r   r   loadersr   r   r   r   modelsr   r   r   models.attention_processorr   r   r   models.lorar   
schedulersr   utilsr   r   r    r!   r"   r#   r$   utils.torch_utilsr%   pipeline_utilsr'   r(   #stable_diffusion_xl.pipeline_outputr)   	pag_utilsr+   stable_diffusion_xl.watermarkr,   torch_xla.core.xla_modelcore	xla_modelrw  rv  
get_loggerr  r   r  r;   r   r   r>   rg  rP   rR   rB   r<   r:   <module>r     s2    D D D   E  K J 
 : 3   . D Q  $%%L))MM 
		H	% (: *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*vZ=$Z=r<   