
    +hl                        S SK JrJrJrJrJr  S SKrS SKJrJ	r	  SSK
Jr  SSKJrJr  SSKJr  SSKJrJrJrJr  SS	KJr  S
SKJrJr  \" 5       (       a  S SKJs  Jr  SrOSr\R@                  " \!5      r"Sr#SS jr$ " S S\\5      r%g)    )CallableDictListOptionalUnionN)T5EncoderModelT5Tokenizer   )StableDiffusionLoraLoaderMixin)Kandinsky3UNetVQModel)DDPMScheduler)	deprecateis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutputTFa  
    Examples:
        ```py
        >>> from diffusers import AutoPipelineForText2Image
        >>> import torch

        >>> pipe = AutoPipelineForText2Image.from_pretrained(
        ...     "kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
        ... )
        >>> pipe.enable_model_cpu_offload()

        >>> prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."

        >>> generator = torch.Generator(device="cpu").manual_seed(0)
        >>> image = pipe(prompt, num_inference_steps=25, generator=generator).images[0]
        ```

c                 n    XS-  -  nXS-  -  S:w  a  US-  nXS-  -  nXS-  -  S:w  a  US-  nX2-  XB-  4$ )Nr   r       )heightwidthscale_factor
new_height	new_widths        l/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/kandinsky3/pipeline_kandinsky3.pydownscale_height_and_widthr    1   s[    ?*Ja1$a
q(IQ!#Q	$i&>>>    c            %         ^  \ rS rSrSr/ SQrS\S\S\S\	S\
4
U 4S	 jjrS
 r\R                  " 5                S+S\\R                      S\\R                      S\\R                      S\\R                      4S jj5       rS r      S,S jr\S 5       r\S 5       r\S 5       r\R                  " 5       \" \5      SSSSSSSSSSSSSSSSS/4S\\\\   4   S\S\S \\\\\   4      S!\\   S"\\   S#\\   S$\\\R>                  \\R>                     4      S\\R                      S\\R                      S\\R                      S\\R                      S%\\   S&\ S'\\!\\\"/S4      S(\\   4 S) jj5       5       r#S*r$U =r%$ )-Kandinsky3Pipeline;   ztext_encoder->unet->movq)latentsprompt_embedsnegative_prompt_embedsnegative_attention_maskattention_mask	tokenizertext_encoderunet	schedulermovqc                 D   > [         TU ]  5         U R                  XX4US9  g )N)r*   r+   r,   r-   r.   )super__init__register_modules)selfr*   r+   r,   r-   r.   	__class__s         r   r1   Kandinsky3Pipeline.__init__E   s+     	ae 	 	
r!   c                     U(       aX  [         R                  " XS:H     5      XS:H  '   UR                  S5      R                  5       S-   nUS S 2S U24   nUS S 2S U24   nX4$ )Nr   r   )torch
zeros_likesummax)r3   
embeddingsr)   cut_contextmax_seq_lengths        r   process_embeds!Kandinsky3Pipeline.process_embedsS   ss    .3.>.>z\]J]?^._J*++//3779A=N#A$67J+A,>?N))r!   Tr   Nr&   r'   r)   r(   c                    Ub>  Ub;  [        U5      [        U5      La$  [        S[        U5       S[        U5       S35      eUc  U R                  nUb  [        U[        5      (       a  SnO3Ub!  [        U[
        5      (       a  [        U5      nOUR                  S   nSnUc  U R                  USUS	S
S9nUR                  R                  U5      nUR                  R                  U5      n	U R                  UU	S9nUS   nU R                  XiU5      u  piXiR                  S5      -  nU R                  b  U R                  R                  nOSnUR                  XS9nUR                  u  nnnUR!                  SUS5      nUR#                  UU-  US5      nU	R!                  US5      n	U(       Ga+  UGc'  Uc  S/U-  nOK[        U[        5      (       a  U/nO2U[        U5      :w  a!  [%        SU S[        U5       SU SU S3	5      eUnUb  U R                  USSS	S	S
S9nUR                  R                  U5      nUR                  R                  U5      n
U R                  UU
S9nUS   nUSS2SUR                  S   24   nU
SS2SUR                  S   24   n
XzR                  S5      -  nO,[&        R(                  " U5      n[&        R(                  " U	5      n
U(       as  UR                  S   nUR                  XS9nUR                  UR                  :w  a:  UR!                  SUS5      nUR#                  X-  US5      nU
R!                  US5      n
OSnSn
XgX4$ )a  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    device: (`torch.device`, *optional*):
        torch device to place the resulting embeddings on
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`, *optional*, defaults to `True`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
        Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    attention_mask (`torch.Tensor`, *optional*):
        Pre-generated attention mask. Must provide if passing `prompt_embeds` directly.
    negative_attention_mask (`torch.Tensor`, *optional*):
        Pre-generated negative attention mask. Must provide if passing `negative_prompt_embeds` directly.
Nz?`negative_prompt` should be the same type to `prompt`, but got z != .r   r      
max_lengthTpt)paddingrD   
truncationreturn_tensors)r)   r   )dtypedevicer7    z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)rF   rD   rG   return_attention_maskrH   )type	TypeError_execution_device
isinstancestrlistlenshaper*   	input_idstor)   r+   r?   	unsqueezerI   repeatview
ValueErrorr8   r9   )r3   promptdo_classifier_free_guidancenum_images_per_promptrJ   negative_promptr&   r'   _cut_contextr)   r(   
batch_sizerD   text_inputstext_input_idsrI   bs_embedseq_len_uncond_tokensuncond_inputs                        r   encode_prompt Kandinsky3Pipeline.encode_prompt[   s   T /"=F|4#88UVZ[jVkUl mV~Q( 
 >++F*VS"9"9JJvt$<$<VJ&,,Q/J
 ..$%# ) K )2255f=N(77::6BN --- . M *!,M,0,?,?_k,l)M),D,DQ,GGM(%%++EE%((u(D,22'1%,,Q0EqI%**86K+KWVXY'../DaH&+A+I &!#z 1OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0*#~~!("#*.#'  .   ".!7!7!:!:6!B*6*E*E*H*H*P')-):):"#: *; *& *@)B&)?C[]EXEXYZE[C[@[)\&*A!E]}GZGZ[\G]E]B]*^')?BcBcdeBf)f& */)9)9-)H&*/*:*:>*J'&,2215G%;%>%>U%>%Z"%++}/B/BB)?)F)FqJ_ab)c&)?)D)DZEgiprt)u&*A*H*HI^`a*b' &*"&*#n]]r!   c                     Uc  [        XX2S9nO<UR                  U:w  a  [        SUR                   SU 35      eUR                  U5      nXVR                  -  nU$ )N)	generatorrJ   rI   zUnexpected latents shape, got z, expected )r   rT   rZ   rV   init_noise_sigma)r3   rT   rI   rJ   rk   r%   r-   s          r   prepare_latents"Kandinsky3Pipeline.prepare_latents   s`    ?"5fZG}}% #A'--P[\a[b!cddjj(G666r!   c	           
      D  ^  Ub6  [        U[        5      (       a  US::  a  [        SU S[        U5       S35      eUbW  [	        U 4S jU 5       5      (       d=  [        ST R
                   SU V	s/ s H  oT R
                  ;  d  M  U	PM     sn	 35      eUb  Ub  [        SU S	U S
35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUb  Ub  [        SU SU S
35      eUbC  Ub@  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eUb  Uc  [        S5      eUbI  UbF  UR                  S S UR                  :w  a)  [        SUR                  S S  SUR                   S35      eUb  Uc  [        S5      eUbK  UbG  UR                  S S UR                  :w  a)  [        SUR                  S S  SUR                   S35      eg g g s  sn	f )Nr   z5`callback_steps` has to be a positive integer but is z	 of type rB   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN_callback_tensor_inputs.0kr3   s     r   	<genexpr>2Kandinsky3Pipeline.check_inputs.<locals>.<genexpr>        F
7Y!---7Y   2`callback_on_step_end_tensor_inputs` has to be in , but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zLPlease provide `negative_attention_mask` along with `negative_prompt_embeds`r   z`negative_prompt_embeds` and `negative_attention_mask` must have the same batch_size and token length when passed directly, but got: `negative_prompt_embeds` z != `negative_attention_mask` z:Please provide `attention_mask` along with `prompt_embeds`z`prompt_embeds` and `attention_mask` must have the same batch_size and token length when passed directly, but got: `prompt_embeds` z != `attention_mask` )	rP   intrZ   rM   allrs   rQ   rR   rT   )
r3   r[   callback_stepsr^   r&   r'   "callback_on_step_end_tensor_inputsr)   r(   rv   s
   `         r   check_inputsKandinsky3Pipeline.check_inputs   s    %z.#/N/NR`deReGGW X(),  .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8 
 "-2I2Qkll!-2I2U%++BQ/3J3P3PP 66L6R6RSUTU6V5W X/556a9  $)?YZZ$)C""2A&.*>*>> --:-@-@!-D,E F&,,-Q0  ? *D$U pHs   /HHc                     U R                   $ rq   _guidance_scaler3   s    r   guidance_scale!Kandinsky3Pipeline.guidance_scaleB  s    ###r!   c                      U R                   S:  $ )Nr   r   r   s    r   r\   .Kandinsky3Pipeline.do_classifier_free_guidanceF  s    ##a''r!   c                     U R                   $ rq   )_num_timestepsr   s    r   num_timesteps Kandinsky3Pipeline.num_timestepsJ  s    """r!      g      @i   pilr%   r[   num_inference_stepsr   r^   r]   r   r   rk   output_typereturn_dictcallback_on_step_endr   c                 
  ^  UR                  SS5      nUR                  SS5      nUb  [        SSS5        Ub  [        SSS5        UbX  [        U 4S jU 5       5      (       d>  [        ST R                   S	U Vs/ s H  nUT R                  ;  d  M  UPM     sn 35      eS
nT R
                  nT R                  UUUU	U
UUU5        UT l        Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOU	R                  S   nT R                  UT R                  UUUU	U
UUUS9
u  ppT R                  (       a<  [        R                   " X/5      n	[        R                   " X/5      R#                  5       nT R$                  R'                  UUS9  T R$                  R(                  n[+        XgS5      u  pgT R-                  UU-  SXg4U	R.                  UUUT R$                  5      n[1        T S5      (       a'  T R2                  b  T R2                  R5                  5         [        U5      UT R$                  R6                  -  -
  n[        U5      T l        T R;                  US9 n[=        U5       GH  u  nnT R                  (       a  [        R                   " U/S-  5      OUnT R?                  UUU	USS9S   nT R                  (       a"  URA                  S5      u  n n!US-   U!-  UU -  -
  nT R$                  RC                  UUUUS9RD                  nUb  0 n"U H  n[G        5       U   U"U'   M     U" T UUU"5      n#U#R                  SU5      nU#R                  SU	5      n	U#R                  SU
5      n
U#R                  SU5      nU#R                  SU5      nU[        U5      S-
  :X  d)  US-   U:  a`  US-   T R$                  R6                  -  S:X  a@  URI                  5         Ub-  UU-  S:X  a$  U[K        T R$                  SS5      -  n$U" U$UU5        [L        (       d  GM  [N        RP                  " 5         GM     US;  a  [        SU 35      eUS :X  d  T RR                  RU                  US
S!9S"   n%US#;   aX  U%S$-  S$-   n%U%RW                  SS5      n%U%RY                  5       R[                  SSS%S5      R]                  5       R_                  5       n%US&:X  a  T Ra                  U%5      n%OUn%T Rc                  5         U(       d  U%4sSSS5        $ [e        U%S'9sSSS5        $ s  snf ! , (       d  f       g= f)(u  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    num_inference_steps (`int`, *optional*, defaults to 25):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    timesteps (`List[int]`, *optional*):
        Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
        timesteps are used. Must be in descending order.
    guidance_scale (`float`, *optional*, defaults to 3.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    height (`int`, *optional*, defaults to self.unet.config.sample_size):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to self.unet.config.sample_size):
        The width in pixels of the generated image.
    eta (`float`, *optional*, defaults to 0.0):
        Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
        applies to [`schedulers.DDIMScheduler`], will be ignored for others.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    attention_mask (`torch.Tensor`, *optional*):
        Pre-generated attention mask. Must provide if passing `prompt_embeds` directly.
    negative_attention_mask (`torch.Tensor`, *optional*):
        Pre-generated negative attention mask. Must provide if passing `negative_prompt_embeds` directly.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
    callback (`Callable`, *optional*):
        A function that will be called every `callback_steps` steps during inference. The function will be
        called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
    callback_steps (`int`, *optional*, defaults to 1):
        The frequency at which the `callback` function will be called. If not specified, the callback will be
        called at every step.
    clean_caption (`bool`, *optional*, defaults to `True`):
        Whether or not to clean the caption before creating embeddings. Requires `beautifulsoup4` and `ftfy` to
        be installed. If the dependencies are not installed, the embeddings will be created from the raw
        prompt.
    cross_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`

callbackNr   z1.0.0zhPassing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`znPassing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`c              3   @   >#    U  H  oTR                   ;   v   M     g 7frq   rr   rt   s     r   rw   .Kandinsky3Pipeline.__call__.<locals>.<genexpr>  ry   rz   r{   r|   Tr   r   )r]   rJ   r^   r&   r'   r_   r)   r(   )rJ         text_encoder_offload_hook)totalr   F)encoder_hidden_statesencoder_attention_maskr   g      ?)rk   r%   r&   r'   r)   r(   order)rE   npr   latentzSOnly the output types `pt`, `pil`, `np` and `latent` are supported not output_type=r   )force_not_quantizesample)r   r   g      ?r
   r   )images)3popr   r~   rZ   rs   rO   r   r   rP   rQ   rR   rS   rT   rh   r\   r8   catboolr-   set_timesteps	timestepsr    rm   rI   hasattrr   offloadr   r   progress_bar	enumerater,   chunkstepprev_samplelocalsupdategetattrXLA_AVAILABLExm	mark_stepr.   decodeclampcpupermutefloatnumpynumpy_to_pilmaybe_free_model_hooksr   )&r3   r[   r   r   r^   r]   r   r   rk   r&   r'   r)   r(   r   r   r%   r   r   kwargsr   r   rv   r=   rJ   r`   r   num_warmup_stepsr   itlatent_model_input
noise_prednoise_pred_uncondnoise_pred_textcallback_kwargscallback_outputsstep_idximages&   `                                     r   __call__Kandinsky3Pipeline.__call__N  s   @ ::j$/$4d;z
 %  A .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  bc  ko  kG  kG  bGpq  |^  pH  oI  J  '' 	".#		
  .*VS"9"9JJvt$<$<VJ&,,Q/J Z^YkYk,,"7+'#9$)$; Zl Z
V~ ++!II'=&MNM"YY(?'PQVVXN$$%8$HNN,,	 36!D&&//FBNN
 4455$:X:X:d**224 y>,?$..BVBV,VV!)n%89\!),1AEAaAaUYYy1}%=gn" "YY&*7+9 % '  
 339C9I9I!9L6%"03"6/!IN]nLn!nJ ..--'	 . 
 +  (3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*%5%9%9:JN%[N.>.B.BC\^u.v+I**A9I/IqSTuX\XfXfXlXlNlpqNq '')+N0Ba0G#$(K#K 1g6 =LLN[ -` "?? ijuivw  (*		((T(J8T-/!CK#-E!KK1-E!IIK//1a;AACIIKE%' --e4E'')xM :9P 'e4Q :9E pHD :9s&   :T1T18F6T63C"T6T66
U)r   r   )	Tr   NNNNFNN)NNNNNN)&__name__
__module____qualname____firstlineno__model_cpu_offload_seqrs   r	   r   r   r   r   r1   r?   r8   no_gradr   Tensorrh   rm   r   propertyr   r\   r   r   EXAMPLE_DOC_STRINGr   rQ   r   r}   r   	Generatorr   r   r   r   __static_attributes____classcell__)r4   s   @r   r#   r#   ;   s   6

 %
 	

 !
 
* ]]_ %)049=15:>S^  -S^ !) 6S^ !.S^ "*%,,!7S^ S^j	 #+/ $DL $ $ ( ( # # ]]_12 )-#% #;?/0 $#MQ049=15:>%* KO9B%|5c49n%|5 !|5 	|5
 "%T#Y"78|5  (}|5 |5 }|5 E%//43H"HIJ|5  -|5 !) 6|5 !.|5 "*%,,!7|5 c]|5 |5" 'xc40@$0F'GH#|5$ -1I%|5 3 |5r!   r#   )r   )&typingr   r   r   r   r   r8   transformersr   r	   loadersr   modelsr   r   
schedulersr   utilsr   r   r   r   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr   loggerr   r    r#   r   r!   r   <module>r      sv    8 8  4 5 - '  . C ))MM			H	% (?Q5*,J Q5r!   