
    +h)8                         S SK JrJrJrJrJr  S SKrSSKJrJ	r	  SSK
Jr  SSKJrJrJrJr  SSKJr  SS	KJrJr  \" 5       (       a  S SKJs  Jr  S
rOSr\R6                  " \5      rSrSS jr " S S\5      r g)    )CallableDictListOptionalUnionN   )UNet2DConditionModelVQModel)DDPMScheduler)	deprecateis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutputTFae  
    Examples:
        ```py
        >>> from diffusers import KandinskyV22Pipeline, KandinskyV22PriorPipeline
        >>> import torch

        >>> pipe_prior = KandinskyV22PriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior")
        >>> pipe_prior.to("cuda")
        >>> prompt = "red cat, 4k photo"
        >>> out = pipe_prior(prompt)
        >>> image_emb = out.image_embeds
        >>> zero_image_emb = out.negative_image_embeds
        >>> pipe = KandinskyV22Pipeline.from_pretrained("kandinsky-community/kandinsky-2-2-decoder")
        >>> pipe.to("cuda")
        >>> image = pipe(
        ...     image_embeds=image_emb,
        ...     negative_image_embeds=zero_image_emb,
        ...     height=768,
        ...     width=768,
        ...     num_inference_steps=50,
        ... ).images
        >>> image[0].save("cat.png")
        ```
c                 n    XS-  -  nXS-  -  S:w  a  US-  nXS-  -  nXS-  -  S:w  a  US-  nX2-  XB-  4$ )Nr   r       )heightwidthscale_factor
new_height	new_widths        p/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.pydownscale_height_and_widthr   >   s[    ?*Ja1$a
q(IQ!#Q	$i&>>>    c                      ^  \ rS rSrSrSr/ SQrS\S\S\	4U 4S jjr
S	 r\S
 5       r\S 5       r\S 5       r\R"                  " 5       \" \5      SSSSSSSSSSS/4S\\R*                  \\R*                     4   S\\R*                  \\R*                     4   S\S\S\S\S\S\\\R4                  \\R4                     4      S\\R*                     S\\   S\S\\\\\/S4      S \\   4S! jj5       5       rS"r U =r!$ )#KandinskyV22PipelineH   ak  
Pipeline for text-to-image generation using Kandinsky

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
        A scheduler to be used in combination with `unet` to generate image latents.
    unet ([`UNet2DConditionModel`]):
        Conditional U-Net architecture to denoise the image embedding.
    movq ([`VQModel`]):
        MoVQ Decoder to generate the image from the latents.
z
unet->movq)latentsimage_embedsnegative_image_embedsunet	schedulermovqc                    > [         TU ]  5         U R                  UUUS9  S[        U R                  R
                  R                  5      S-
  -  U l        g )N)r%   r&   r'   r   r   )super__init__register_moduleslenr'   configblock_out_channelsmovq_scale_factor)selfr%   r&   r'   	__class__s       r   r*   KandinskyV22Pipeline.__init__[   sV     	 	 	

 "#s499+;+;+N+N'ORS'S!Tr   c                     Uc  [        XX2S9nO<UR                  U:w  a  [        SUR                   SU 35      eUR                  U5      nXVR                  -  nU$ )N)	generatordevicedtypezUnexpected latents shape, got z, expected )r   shape
ValueErrortoinit_noise_sigma)r0   r7   r6   r5   r4   r"   r&   s          r   prepare_latents$KandinskyV22Pipeline.prepare_latentsk   s`    ?"5fZG}}% #A'--P[\a[b!cddjj(G666r   c                     U R                   $ N_guidance_scaler0   s    r   guidance_scale#KandinskyV22Pipeline.guidance_scalev   s    ###r   c                      U R                   S:  $ )Nr   r?   rA   s    r   do_classifier_free_guidance0KandinskyV22Pipeline.do_classifier_free_guidancez   s    ##a''r   c                     U R                   $ r>   )_num_timestepsrA   s    r   num_timesteps"KandinskyV22Pipeline.num_timesteps~   s    """r   i   d   g      @r   NpilTr"   r#   r$   r   r   num_inference_stepsrB   num_images_per_promptr4   output_typereturn_dictcallback_on_step_end"callback_on_step_end_tensor_inputsc           
      V
  ^  UR                  SS5      nUR                  SS5      nUb  [        SSS5        Ub  [        SSS5        UbX  [        U 4S jU 5       5      (       d>  [        ST R                   S	U Vs/ s H  nUT R                  ;  d  M  UPM     sn 35      eT R
                  nUT l        [        U[        5      (       a  [        R                  " US
S9nUR                  S
   U-  n[        U[        5      (       a  [        R                  " US
S9nT R                  (       aX  UR                  US
S9nUR                  US
S9n[        R                  " X!/S
S9R                  T R                  R                   US9nT R"                  R%                  UUS9  T R"                  R&                  nT R                  R(                  R*                  n[-        X4T R.                  5      u  p4T R1                  UUX44UR                   UUU	T R"                  5      n	[3        U5      T l        [7        T R9                  U5      5       GH  u  nnT R                  (       a  [        R                  " U	/S-  5      OU	nSU0nT R                  UUSUSS9S
   nT R                  (       at  UR;                  U	R                  S   SS9u  nnUR=                  S5      u  nnUR=                  S5      u  nnUT R>                  UU-
  -  -   n[        R                  " UU/SS9n[A        T R"                  R(                  S5      (       a$  T R"                  R(                  RB                  S;   d   UR;                  U	R                  S   SS9u  nnT R"                  RE                  UUU	US9S
   n	Ub\  0 n U H  n[G        5       U   U U'   M     U" T UUU 5      n!U!R                  SU	5      n	U!R                  SU5      nU!R                  SU5      nUb-  UU-  S
:X  a$  U[I        T R"                  SS5      -  n"U" U"UU	5        [J        (       d  GM  [L        RN                  " 5         GM     U
S;  a  [        SU
 35      eU
S:X  d  T RP                  RS                  U	SS9S   n#U
S;   aX  U#S -  S -   n#U#RU                  S
S5      n#U#RW                  5       RY                  S
SS!S5      R[                  5       R]                  5       n#U
S":X  a  T R_                  U#5      n#OU	n#T Ra                  5         U(       d  U#4$ [c        U#S#9$ s  snf )$a  
Function invoked when calling the pipeline for generation.

Args:
    image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
        The clip image embeddings for text prompt, that will be used to condition the image generation.
    negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
        The clip image embeddings for negative text prompt, will be used to condition the image generation.
    height (`int`, *optional*, defaults to 512):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to 512):
        The width in pixels of the generated image.
    num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`
callbackNcallback_stepsz1.0.0zhPassing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`znPassing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`c              3   @   >#    U  H  oTR                   ;   v   M     g 7fr>   )_callback_tensor_inputs).0kr0   s     r   	<genexpr>0KandinskyV22Pipeline.__call__.<locals>.<genexpr>   s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found r   )dim)r6   r5   )r5   r   r#   F)sampletimestepencoder_hidden_statesadded_cond_kwargsrP   r   variance_type)learnedlearned_range)r4   r"   r$   order)ptnprL   latentzIOnly the output types `pt`, `pil` and `np` are supported not output_type=rg   T)force_not_quantizer]   )rf   rL   g      ?r   rL   )images)2popr   allr8   rW   _execution_devicer@   
isinstancelisttorchcatr7   rE   repeat_interleaver9   r%   r6   r&   set_timesteps	timestepsr-   in_channelsr   r/   r;   r,   rH   	enumerateprogress_barsplitchunkrB   hasattrra   steplocalsgetattrXLA_AVAILABLExm	mark_stepr'   decodeclampcpupermutefloatnumpynumpy_to_pilmaybe_free_model_hooksr   )$r0   r#   r$   r   r   rM   rB   rN   r4   r"   rO   rP   rQ   rR   kwargsrT   rU   rY   r5   
batch_sizers   num_channels_latentsitlatent_model_inputr`   
noise_predvariance_prednoise_pred_uncondnoise_pred_text_variance_pred_textcallback_kwargscallback_outputsstep_idximages$   `                                   r   __call__KandinskyV22Pipeline.__call__   s   L ::j$/$4d;z
 %  A .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  bc  ko  kG  kG  bGpq  |^  pH  oI  J  ''-lD)) 99\q9L!''*-BB
+T22$)II.C$K!++'99:OUV9WL$9$K$KLagh$K$i! 99&;%JPQRUUiioof V L 	$$%8$HNN,,	#yy//;;26$BXBXY &&-v=NN
 ")nd//	:;DAq=A=]=]G9q=!9cj!/ >)&*"3! #  J //,6,<,<W]]1=MST,<,U)
M5?5E5Ea5H2!?(5(;(;A(>%%.1D1DZkHk1ll
"YY
4F'GQO
 --??NN))77;WW * 0 0q1Aq 0 I
A nn))#	 * 
 G $/"$;A)/!OA& <#7aO#T *..y'B/33NLQ(8(<(<=TVk(l%#N(:a(? CC1g.}c <f ;;hithuvwwh&II$$W$FxPEm+c)Aq)		++Aq!Q7==?EEGe#))%0E##%8O"%00_ pHs   :T&T&)r@   rH   r/   )"__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seqrW   r	   r   r
   r*   r;   propertyrB   rE   rI   ro   no_gradr   EXAMPLE_DOC_STRINGr   Tensorr   intr   r   	Generatorstrboolr   r   r   __static_attributes____classcell__)r1   s   @r   r    r    H   s    )RU"U !U 	U 	 $ $ ( ( # # ]]_12
 #& #%&MQ*.%* KO9BG1ELL$u||*<<=G1  %U\\43E%EFG1 	G1
 G1 !G1 G1  #G1 E%//43H"HIJG1 %,,'G1 c]G1 G1 'xc40@$0F'GHG1 -1IG1 3 G1r   r    )   )!typingr   r   r   r   r   ro   modelsr	   r
   
schedulersr   utilsr   r   r   r   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelr~   r}   
get_loggerr   loggerr   r   r    r   r   r   <module>r      si    9 8  3 ' Z Z - C ))MM			H	% 4?C1, C1r   