
    +hG                        S SK r S SKJrJrJrJrJrJr  S SKrS SK	J
r
JrJrJr  SSKJr  SSKJrJr  SSKJrJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJrJ r J!r!J"r"  SSK#J$r$  SSK%J&r&  SSK'J(r(  SSK)J*r*  \" 5       (       a  S SK+J,s  J-r.  Sr/OSr/\R`                  " \15      r2Sr3    SS\\4   S\\\5\Rl                  4      S\\\4      S\\\7      4S jjr8 " S S\&\\\*5      r9g)    N)AnyCallableDictListOptionalUnion)CLIPTextModelWithProjectionCLIPTokenizerT5EncoderModelT5TokenizerFast   )VaeImageProcessor)FromSingleFileMixinSD3LoraLoaderMixin)PAGCFGJointAttnProcessor2_0PAGJointAttnProcessor2_0)AutoencoderKL)SD3Transformer2DModel)FlowMatchEulerDiscreteScheduler)USE_PEFT_BACKENDis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor   )DiffusionPipeline)StableDiffusion3PipelineOutput   )PAGMixinTFa^  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import AutoPipelineForText2Image

        >>> pipe = AutoPipelineForText2Image.from_pretrained(
        ...     "stabilityai/stable-diffusion-3-medium-diffusers",
        ...     torch_dtype=torch.float16,
        ...     enable_pag=True,
        ...     pag_applied_layers=["blocks.13"],
        ... )
        >>> pipe.to("cuda")
        >>> prompt = "A cat holding a sign that says hello world"
        >>> image = pipe(prompt, guidance_scale=5.0, pag_scale=0.7).images[0]
        >>> image.save("sd3_pag.png")
        ```
num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`List[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`List[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr$   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r$   r#   r%   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r%   r#   r#    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r$   len)	schedulerr"   r#   r$   r%   kwargsaccepts_timestepsaccept_sigmass           c/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/pag/pipeline_pag_sd_3.pyretrieve_timestepsr6   M   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))    c            9         ^  \ rS rSrSrSr/ r/ SQr SFS\S\	S\
S\S	\S
\S\S\S\S\\\\   4   4U 4S jjjr     SGS\\\\   4   S\S\S\\R,                     S\\R.                     4
S jjr    SHS\\\\   4   S\S\\R,                     S\\   S\4
S jjr             SIS\\\\   4   S\\\\   4   S\\\\   4   S\\R,                     S\S\S \\\\\   4      S!\\\\\   4      S"\\\\\   4      S#\\R6                     S$\\R6                     S%\\R6                     S&\\R6                     S\\   S\S'\\   4 S( jjr         SJS) jr SKS* jr\ S+ 5       r!\ S, 5       r"\ S- 5       r#\ S. 5       r$\ S/ 5       r%\ S0 5       r&\RN                  " 5       \(" \)5      SSSSSS1SS2SSSSSSSSSSS3SSSSS4/SS5S64S\\\\   4   S\\\\\   4      S\\\\\   4      S7\\   S8\\   S9\S:\\\      S;\S \\\\\   4      S!\\\\\   4      S"\\\\\   4      S\\   S<\\\RT                  \\RT                     4      S4\\R6                     S#\\R6                     S$\\R6                     S%\\R6                     S&\\R6                     S=\\   S>\S?\\+\\,4      S\\   S@\\-\\\+/S4      SA\\   S\SB\SC\46SD jj5       5       r.SEr/U =r0$ )LStableDiffusion3PAGPipeline   a\  
[PAG pipeline](https://huggingface.co/docs/diffusers/main/en/using-diffusers/pag) for text-to-image generation
using Stable Diffusion 3.

Args:
    transformer ([`SD3Transformer2DModel`]):
        Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
    scheduler ([`FlowMatchEulerDiscreteScheduler`]):
        A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModelWithProjection`]):
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
        specifically the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant,
        with an additional added projection layer that is initialized with a diagonal matrix with the `hidden_size`
        as its dimension.
    text_encoder_2 ([`CLIPTextModelWithProjection`]):
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
        specifically the
        [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
        variant.
    text_encoder_3 ([`T5EncoderModel`]):
        Frozen text-encoder. Stable Diffusion 3 uses
        [T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel), specifically the
        [t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
    tokenizer (`CLIPTokenizer`):
        Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_2 (`CLIPTokenizer`):
        Second Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_3 (`T5TokenizerFast`):
        Tokenizer of class
        [T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
z>text_encoder->text_encoder_2->text_encoder_3->transformer->vae)latentsprompt_embedsnegative_prompt_embedsnegative_pooled_prompt_embedstransformerr1   vaetext_encoder	tokenizertext_encoder_2tokenizer_2text_encoder_3tokenizer_3pag_applied_layersc                   > [         TU ]  5         U R                  UUUUUUU	UUS9	  [        U SS 5      (       a/  S[	        U R
                  R                  R                  5      S-
  -  OSU l        [        U R                  S9U l
        [        U S5      (       a#  U R                  b  U R                  R                  OSU l        [        U S	5      (       a-  U R                  b   U R                  R                  R                   OS
U l        [        U S	5      (       a-  U R                  b   U R                  R                  R$                  OSU l        U R'                  U
[)        5       [+        5       4S9  g )N)	r@   rA   rC   rE   rB   rD   rF   r?   r1   r@   r   r       )vae_scale_factorrB   M   r?      )pag_attn_processors)super__init__register_modulesgetattrr0   r@   configblock_out_channelsrJ   r   image_processorhasattrrB   model_max_lengthtokenizer_max_lengthr?   sample_sizedefault_sample_size
patch_sizeset_pag_applied_layersr   r   )selfr?   r1   r@   rA   rB   rC   rD   rE   rF   rG   r/   s              r5   rO   $StableDiffusion3PAGPipeline.__init__   sX    	%))### 	 
	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw0$BWBWX/6t[/I/IdnnNhDNN++np 	!
 t]++0@0@0L ##// 	  3:$2N2NSWScScSoD##..uv 	 	##5P5RTlTn4o 	$ 	
r7   Nr       promptnum_images_per_promptmax_sequence_lengthr#   dtypec           	         U=(       d    U R                   nU=(       d    U R                  R                  n[        U[        5      (       a  U/OUn[        U5      nU R                  cC  [        R                  " Xb-  U R                  U R                  R                  R                  4UUS9$ U R                  USUSSSS9nUR                  nU R                  USSS9R                  n	U	R                  S   UR                  S   :  ag  [        R                   " X5      (       dL  U R                  R#                  U	S S 2U R                  S	-
  S24   5      n
[$        R'                  S
U SU
 35        U R                  UR)                  U5      5      S   nU R                  R                  nUR)                  XTS9nUR                  u  pnUR+                  S	US	5      nUR-                  Xb-  US5      nU$ )Nr#   rb   
max_lengthTpt)paddingre   
truncationadd_special_tokensreturn_tensorslongestrg   rj   r    zXThe following part of your input was truncated because `max_sequence_length` is set to  	 tokens: r   rb   r#   )_execution_devicerA   rb   
isinstancestrr0   rE   torchzerosrW   r?   rR   joint_attention_dimrF   	input_idsshapeequalbatch_decodeloggerwarningtorepeatview)r\   r_   r`   ra   r#   rb   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textr<   _seq_lens                 r5   _get_t5_prompt_embeds1StableDiffusion3PAGPipeline._get_t5_prompt_embeds   s    14110**00'44&&[
&;;6--$$++??
   && *# ' 
 %..**69UY*Zdd  $(<(<R(@@UcIuIu++88DLeLehiLilnLnIn9opLNN'(	,A
 ++N,=,=f,EFqI##))%((u(D%++A &,,Q0EqI%**:+MwXZ[r7   	clip_skipclip_model_indexc                    U=(       d    U R                   nU R                  U R                  /nU R                  U R                  /nXe   nXu   n	[        U[        5      (       a  U/OUn[        U5      n
U" USU R                  SSS9nUR                  nU" USSS9R                  nUR                  S   UR                  S   :  ag  [        R                  " X5      (       dL  UR                  US S 2U R                  S-
  S24   5      n[        R                  S	U R                   S
U 35        U	" UR!                  U5      SS9nUS   nUc  UR"                  S   nOUR"                  US-   *    nUR!                  U R                  R$                  US9nUR                  u  nnnUR'                  SUS5      nUR)                  X-  US5      nUR'                  SUS5      nUR)                  X-  S5      nUU4$ )Nre   Trf   )rg   re   rh   rj   rk   rl   rm   r    z\The following part of your input was truncated because CLIP can only handle sequences up to rn   )output_hidden_statesr   r   ro   )rp   rB   rD   rA   rC   rq   rr   r0   rW   rv   rw   rs   rx   ry   rz   r{   r|   hidden_statesrb   r}   r~   )r\   r_   r`   r#   r   r   clip_tokenizersclip_text_encodersrB   rA   r   r   r   r   r   r<   pooled_prompt_embedsr   r   s                      r5   _get_clip_prompt_embeds3StableDiffusion3PAGPipeline._get_clip_prompt_embeds  s    1411>>4+;+;<"//1D1DE#5	);'44&&[
 00
 %..#FIdS]]  $(<(<R(@@UcIuIu$11/!TE^E^abEbegEgBg2hiLNN--.i~G %^%6%6v%>UYZ,Q/)77;M)77)a-8HIM%((t/@/@/F/Fv(V%++7A%,,Q0EqI%**:+MwXZ[3::1>SUVW3889[]_`222r7   Tprompt_2prompt_3do_classifier_free_guidancenegative_promptnegative_prompt_2negative_prompt_3r<   r=   r   r>   
lora_scalec                 V   U=(       d    U R                   nUbx  [        U [        5      (       ac  UU l        U R                  b!  [
        (       a  [        U R                  U5        U R                  b!  [
        (       a  [        U R                  U5        [        U[        5      (       a  U/OUnUb  [        U5      nOU
R                  S   nU
Gc  U=(       d    Un[        U[        5      (       a  U/OUnU=(       d    Un[        U[        5      (       a  U/OUnU R                  UUUUSS9u  nnU R                  UUUUSS9u  nn[        R                  " UU/SS9nU R                  UUUUS9n[        R                  R                   R#                  USUR                  S   UR                  S   -
  45      n[        R                  " UU/SS9n
[        R                  " UU/SS9nU(       Ga  UGc  U=(       d    S	nU=(       d    UnU	=(       d    Un	[        U[        5      (       a  UU/-  OUn[        U[        5      (       a  UU/-  OUn[        U	[        5      (       a  UU	/-  OU	n	Ub;  [%        U5      [%        U5      La$  ['        S
[%        U5       S[%        U5       S35      eU[        U5      :w  a!  [)        SU S[        U5       SU SU S3	5      eU R                  UUUSSS9u  nnU R                  UUUSSS9u  nn[        R                  " UU/SS9nU R                  U	UUUS9n[        R                  R                   R#                  USUR                  S   UR                  S   -
  45      n[        R                  " UU/SS9n[        R                  " UU/SS9nU R                  b6  [        U [        5      (       a!  [
        (       a  [+        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [+        U R                  U5        XX4$ )a  

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in all text-encoders
    prompt_3 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_3` and `text_encoder_3`. If not defined, `prompt` is
        used in all text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
    negative_prompt_3 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and
        `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders.
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
Nr   )r_   r#   r`   r   r   r    rm   dim)r_   r`   ra   r#   r    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)r#   r`   r   r   )rp   rq   r   _lora_scalerA   r   r   rC   rr   r0   rw   r   rs   catr   nn
functionalpadtype	TypeErrorr(   r   )r\   r_   r   r   r#   r`   r   r   r   r   r<   r=   r   r>   r   ra   r   r   prompt_embedpooled_prompt_embedprompt_2_embedpooled_prompt_2_embedclip_prompt_embedst5_prompt_embednegative_prompt_embednegative_pooled_prompt_embednegative_prompt_2_embednegative_pooled_prompt_2_embednegative_clip_prompt_embedst5_negative_prompt_embeds                                 r5   encode_prompt)StableDiffusion3PAGPipeline.encode_promptO  s   D 1411 !j7I&J&J)D   ,1A1A!$"3"3Z@"".3C3C!$"5"5zB'44&&VJ&,,Q/J )6H%/#%>%>zHH)6H%/#%>%>zHH040L0L&;#!" 1M 1-L- 594P4P&;#!" 5Q 51N1 "'L.+Ir!R"88&;$7	 9 O "'!4!4!8!8"Q(=(=b(ADVD\D\]_D`(`$a" "II'9?&KQSTM#(99.ACX-Y_a#b &+A+I-3O 1 D_ 1 D_ AK?\_@`@`jO+<<fuO4>?PRU4V4V
/00\m  5??PRU4V4V
/00\m  !d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  CGB^B^&;!" C_ C?!#? GKFbFb!&;!" Gc GC#%C +0))5JLc4djl*m''+'A'A(&;$7	 (B ($ +0((*=*=*A*A+,22269T9Z9Z[]9^^_+'
 &+YY0KMe/fln%o",1II-/MNTV-) ($ 2338H8H#D$5$5zB*$ 2338H8H#D$7$7D6Jiir7   c                   ^  UT R                   T R                  -  -  S:w  d   UT R                   T R                  -  -  S:w  aj  [        ST R                   T R                  -   SU SU SXDT R                   T R                  -  -  -
   SXUT R                   T R                  -  -  -
   S35      eUbW  [        U 4S jU 5       5      (       d=  [        S	T R                   S
U Vs/ s H  oT R                  ;  d  M  UPM     sn 35      eUb  U	b  [        SU SU	 S35      eUb  U	b  [        SU SU	 S35      eUb  U	b  [        SU SU	 S35      eUc  U	c  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUb  U
b  [        SU SU
 S35      eUb  U
b  [        SU SU
 S35      eUb  U
b  [        SU SU
 S35      eU	bC  U
b@  U	R                  U
R                  :w  a&  [        SU	R                   SU
R                   S35      eU	b  Uc  [        S5      eU
b  Uc  [        S5      eUb  US:  a  [        SU 35      eg g s  snf )Nr   z-`height` and `width` have to be divisible by z	 but are z and z.You can use height z and width r   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0kr\   s     r5   	<genexpr>;StableDiffusion3PAGPipeline.check_inputs.<locals>.<genexpr>+  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: z Cannot forward both `prompt_3`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is z4`prompt_3` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: z)Cannot forward both `negative_prompt_2`: z)Cannot forward both `negative_prompt_3`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.zIf `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`.i   z8`max_sequence_length` cannot be greater than 512 but is )
rJ   rZ   r(   allr   rq   rr   listr   rw   )r\   r_   r   r   heightwidthr   r   r   r<   r=   r   r>   "callback_on_step_end_tensor_inputsra   r   s   `               r5   check_inputs(StableDiffusion3PAGPipeline.check_inputs  s{   $ d++doo=>!C--?@AE?@U@UX\XgXg@g?hhqrxqyy~  @E  F F&&,9N9NQUQ`Q`9`/a&a%bbmns  @D  @U  @U  X\  Xg  Xg  @g  wh  oh  ni  ijk 
 .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa!:h+D+DZX`bfMgMgSTXYaTbScdee!:h+D+DZX`bfMgMgSTXYaTbScdee&+A+M9/9J K*++]_  */E/Q;<M;N O*++]_  */E/Q;<M;N O*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  $)=)E U  "-2O2W y  */BS/HWXkWlmnn 0I* pHs    K47K4c	                     Ub  UR                  XeS9$ UU[        U5      U R                  -  [        U5      U R                  -  4n	[        U[        5      (       a*  [        U5      U:w  a  [        S[        U5       SU S35      e[        XXeS9nU$ )Nrd   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)	generatorr#   rb   )r|   intrJ   rq   r   r0   r(   r   )
r\   r   num_channels_latentsr   r   rb   r#   r   r;   rw   s
             r5   prepare_latents+StableDiffusion3PAGPipeline.prepare_latentsr  s     ::V:99  K4000J$///	
 i&&3y>Z+GA#i.AQ R&<'gi 
 u&Vr7   c                     U R                   $ r   _guidance_scaler\   s    r5   guidance_scale*StableDiffusion3PAGPipeline.guidance_scale  s    ###r7   c                     U R                   $ r   )
_clip_skipr   s    r5   r   %StableDiffusion3PAGPipeline.clip_skip      r7   c                      U R                   S:  $ )Nr    r   r   s    r5   r   7StableDiffusion3PAGPipeline.do_classifier_free_guidance  s    ##a''r7   c                     U R                   $ r   )_joint_attention_kwargsr   s    r5   joint_attention_kwargs2StableDiffusion3PAGPipeline.joint_attention_kwargs  s    +++r7   c                     U R                   $ r   )_num_timestepsr   s    r5   num_timesteps)StableDiffusion3PAGPipeline.num_timesteps  s    """r7   c                     U R                   $ r   )
_interruptr   s    r5   	interrupt%StableDiffusion3PAGPipeline.interrupt  r   r7      g      @pilr;   g      @g        r   r   r"   r%   r   r   output_typereturn_dictr   callback_on_step_endr   	pag_scalepag_adaptive_scalec                 >   U=(       d    U R                   U R                  -  nU=(       d    U R                   U R                  -  nU R                  UUUUUU	U
UUUUUUUS9  Xl        UU l        UU l        SU l        UU l        UU l        Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                  nU R                  b  U R                  R!                  SS5      OSnU R#                  UUUU	U
UU R$                  UUUUUU R&                  UUUS9u  nnnnU R(                  (       a;  U R+                  UUU R$                  5      nU R+                  UUU R$                  5      nO?U R$                  (       a.  [,        R.                  " UU/SS9n[,        R.                  " UU/SS9n[1        U R2                  UUUS	9u  nn[5        [        U5      X`R2                  R6                  -  -
  S5      n [        U5      U l        U R:                  R<                  R>                  n!U RA                  UU-  U!UUURB                  UUU5      nU R(                  (       a:  U R:                  RD                  n"U RG                  U RH                  U R$                  S
9  U RK                  US9 n#[M        U5       GH\  u  n$n%U RN                  (       a  M  [,        R.                  " U/UR                  S   UR                  S   -  -  5      n&U%RQ                  U&R                  S   5      n'U R;                  U&U'UUU R                  SS9S   n(U R(                  (       a)  U RS                  U(U R$                  U RT                  U%5      n(O:U R$                  (       a)  U(RW                  S5      u  n)n*U)U RT                  U*U)-
  -  -   n(URB                  n+U R2                  RY                  U(U%USS9S   nURB                  U+:w  a>  [,        RZ                  R\                  R_                  5       (       a  URa                  U+5      nUbn  0 n,U H  n-[c        5       U-   U,U-'   M     U" U U$U%U,5      n.U.Re                  SU5      nU.Re                  SU5      nU.Re                  SU5      nU.Re                  SU5      nU$[        U5      S-
  :X  d)  U$S-   U :  a0  U$S-   U R2                  R6                  -  S:X  a  U#Rg                  5         [h        (       d  GMG  [j        Rl                  " 5         GM_     SSS5        US:X  a  Un/OzXRn                  R<                  Rp                  -  U Rn                  R<                  Rr                  -   nU Rn                  Ru                  USS9S   n/U Rv                  Ry                  U/US9n/U R{                  5         U R(                  (       a  U R:                  R}                  W"5        U(       d  U/4$ [        U/S9$ ! , (       d  f       N= f)a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        will be used instead
    prompt_3 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `prompt` is
        will be used instead
    height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The height in pixels of the generated image. This is set to 1024 by default for the best results.
    width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The width in pixels of the generated image. This is set to 1024 by default for the best results.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    sigmas (`List[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    guidance_scale (`float`, *optional*, defaults to 7.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used instead
    negative_prompt_3 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and
        `text_encoder_3`. If not defined, `negative_prompt` is used instead
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.FloatTensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
        of a plain tuple.
    joint_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    max_sequence_length (`int` defaults to 256): Maximum sequence length to use with the `prompt`.
    pag_scale (`float`, *optional*, defaults to 3.0):
        The scale factor for the perturbed attention guidance. If it is set to 0.0, the perturbed attention
        guidance will not be used.
    pag_adaptive_scale (`float`, *optional*, defaults to 0.0):
        The adaptive scale factor for the perturbed attention guidance. If it is set to 0.0, `pag_scale` is
        used.

Examples:

Returns:
    [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] or `tuple`:
    [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] if `return_dict` is True, otherwise a
    `tuple`. When returning a tuple, the first element is a list with the generated images.
)	r   r   r   r<   r=   r   r>   r   ra   FNr    r   scale)r_   r   r   r   r   r   r   r<   r=   r   r>   r#   r   r`   ra   r   r   )r%   )rG   r   )total)r   timestepencoder_hidden_statespooled_projectionsr   r   r   )r   r;   r<   r=   r>   latent)r   )images)@rY   rJ   r   r   r   r   r   
_pag_scale_pag_adaptive_scalerq   rr   r   r0   rw   rp   r   getr   r   r   do_perturbed_attention_guidance%_prepare_perturbed_attention_guidancers   r   r6   r1   maxorderr   r?   rR   in_channelsr   rb   attn_processors_set_pag_attn_processorrG   progress_bar	enumerater   expand#_apply_perturbed_attention_guidancer   chunkstepbackendsmpsis_availabler|   localspopupdateXLA_AVAILABLExm	mark_stepr@   scaling_factorshift_factordecoderT   postprocessmaybe_free_model_hooksset_attn_processorr   )0r\   r_   r   r   r   r   r"   r%   r   r   r   r   r`   r   r;   r<   r=   r   r>   r   r   r   r   r   r   ra   r   r   r   r#   r   r$   num_warmup_stepsr   original_attn_procr   itlatent_model_inputr   
noise_prednoise_pred_uncondnoise_pred_textlatents_dtypecallback_kwargsr   callback_outputsimages0                                                   r5   __call__$StableDiffusion3PAGPipeline.__call__  s   D K433d6K6KKI11D4I4II 	+//'#9!5*G/Q 3 	 	
"  .#'=$##5  *VS"9"9JJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	 +//(,(H(H'#9!5*Gnn"7 3!!  
	
" )( // FF5t7W7WM $(#M#M$&CTEeEe$  --!II'=}&MSTUM#(99.KMa-bhi#j  *<DNNL_agpv)w&	&s9~0CnnFZFZ0ZZ\]^!)n  $//66BB&&.. 	
 //!%!1!1!A!A((#'#:#:,0,L,L )  %89\!),1>> &+YYyM<O<OPQ<RV]VcVcdeVf<f/g%h"88$6$<$<Q$?@!--"4%*7';+/+F+F % .  
 77!%!I!I"D$D$DdFYFY[\"J 559C9I9I!9L6%!2T5H5HO^oLo5p!pJ !(..--j!WRW-XYZ[==M1~~))6688")**]";'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*4D4H4H79V51
 I**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNo - :t ("E !?!??488??C_C__GHHOOGO?BE((44U4TE 	##%////0BC8O-U;;Y :9s   IXX
X)r   r   r   r   r   r   r   r   rY   rT   rZ   rW   rJ   )zblocks.1)Nr    r^   NN)r    NNr   )Nr    TNNNNNNNNr^   N)	NNNNNNNNNr   )1__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r	   r
   r   r   r   rr   r   rO   r   r   rs   r#   rb   r   r   boolFloatTensorfloatr   r   r   propertyr   r   r   r   r   r   no_gradr   EXAMPLE_DOC_STRING	Generatorr   r   r   r  __static_attributes____classcell__)r/   s   @r5   r9   r9      s5   "H ]u 5?*
**
 3*
 	*

 2*
 !*
 4*
 #*
 '*
 %*
 "#tCy.1*
 *
^ )-%&#&)-'+6c49n%6  #6 !	6
 &6 $6x &')-#' !53c49n%53  #53 &	53
 C=53 53z *.%&,0;?=A=A59>B<@EI#'#&&*#jc49n%j T#Y'j T#Y'	j
 &j  #j &*j "%T#Y"78j $E#tCy.$9:j $E#tCy.$9:j   1 12j !)):): ;j 'u'8'89j (00A0A'Bj C=j  !!j" UO#jR #!&*+/ ^oT > $ $   ( ( , , # #   ]]_12 )-4848 $##%(, #;?=A=A/0MQ/359>B<@EI%* ;?#'KO9B#&$'9r<c49n%r< 5d3i01r< 5d3i01	r<
 r< }r< !r< e%r< r< "%T#Y"78r< $E#tCy.$9:r< $E#tCy.$9:r<  (}r< E%//43H"HIJr< %++,r<    1 12!r<" !)):): ;#r<$ 'u'8'89%r<& (00A0A'B'r<( c])r<* +r<, !)c3h 8-r<. C=/r<0 'xc40@$0F'GH1r<2 -1I3r<4 !5r<6 7r<8 "9r< 3 r<r7   r9   )NNNN):r*   typingr   r   r   r   r   r   rs   transformersr	   r
   r   r   rT   r   loadersr   r   models.attention_processorr   r   models.autoencodersr   models.transformersr   
schedulersr   utilsr   r   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   "stable_diffusion_3.pipeline_outputr   	pag_utilsr!   torch_xla.core.xla_modelcore	xla_modelr  r  
get_loggerr  rz   r)  r   rr   r#   r&  r6   r9   r'   r7   r5   <module>r=     s     = =   1 > _ 0 8 9  . . O  ))MM 
		H	% . *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*vX<"35GI\^f X<r7   