
    +h                        S SK JrJrJrJr  S SKrS SKrS SKJ	r	J
r
JrJrJr  SSKJrJrJr  SSKJrJrJr  SSKJr  SS	KJr  S
SKJr  S
SKJr  S
SKJr  S
SK J!r!  S
SK"J#r#  Sr$Sr%Sr& " S S\5      r' " S S\5      r( " S S\5      r)g)    )CallableListOptionalUnionN)CLIPImageProcessorCLIPTextModelWithProjectionCLIPTokenizerCLIPVisionModelWithProjectionXLMRobertaTokenizer   )PriorTransformerUNet2DConditionModelVQModel)DDIMSchedulerDDPMSchedulerUnCLIPScheduler)replace_example_docstring   )DiffusionPipeline   )KandinskyPipeline)KandinskyImg2ImgPipeline)KandinskyInpaintPipeline)KandinskyPriorPipeline)MultilingualCLIPa  
    Examples:
        ```py
        from diffusers import AutoPipelineForText2Image
        import torch

        pipe = AutoPipelineForText2Image.from_pretrained(
            "kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16
        )
        pipe.enable_model_cpu_offload()

        prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k"

        image = pipe(prompt=prompt, num_inference_steps=25).images[0]
        ```
a~  
    Examples:
        ```py
        from diffusers import AutoPipelineForImage2Image
        import torch
        import requests
        from io import BytesIO
        from PIL import Image
        import os

        pipe = AutoPipelineForImage2Image.from_pretrained(
            "kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16
        )
        pipe.enable_model_cpu_offload()

        prompt = "A fantasy landscape, Cinematic lighting"
        negative_prompt = "low quality, bad quality"

        url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"

        response = requests.get(url)
        image = Image.open(BytesIO(response.content)).convert("RGB")
        image.thumbnail((768, 768))

        image = pipe(prompt=prompt, image=original_image, num_inference_steps=25).images[0]
        ```
a  
    Examples:
        ```py
        from diffusers import AutoPipelineForInpainting
        from diffusers.utils import load_image
        import torch
        import numpy as np

        pipe = AutoPipelineForInpainting.from_pretrained(
            "kandinsky-community/kandinsky-2-1-inpaint", torch_dtype=torch.float16
        )
        pipe.enable_model_cpu_offload()

        prompt = "A fantasy landscape, Cinematic lighting"
        negative_prompt = "low quality, bad quality"

        original_image = load_image(
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/cat.png"
        )

        mask = np.zeros((768, 768), dtype=np.float32)
        # Let's mask out an area above the cat's head
        mask[:250, 250:-250] = 1

        image = pipe(prompt=prompt, image=original_image, mask_image=mask, num_inference_steps=25).images[0]
        ```
c            !       (  ^  \ rS rSrSrSrSrS/rS\S\	S\
S	\\\4   S
\S\S\S\S\S\S\4U 4S jjrS*S\\   4S jjrS+S\\   S\\R6                  \4   4S jjrS+S jrS r\R@                  " 5       \!" \"5                    S,S\\\#\   4   S\\\\#\   4      S\S\$S\S\S\S \$S!\S"\\\RJ                  \#\RJ                     4      S#\\RL                     S$\\   S%\\\\\RL                  /S4      S&\S'\'4S( jj5       5       r(S)r)U =r*$ )-KandinskyCombinedPipelineq   av  
Combined Pipeline for text-to-image generation using Kandinsky

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    text_encoder ([`MultilingualCLIP`]):
        Frozen text-encoder.
    tokenizer ([`XLMRobertaTokenizer`]):
        Tokenizer of class
    scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
        A scheduler to be used in combination with `unet` to generate image latents.
    unet ([`UNet2DConditionModel`]):
        Conditional U-Net architecture to denoise the image embedding.
    movq ([`VQModel`]):
        MoVQ Decoder to generate the image from the latents.
    prior_prior ([`PriorTransformer`]):
        The canonical unCLIP prior to approximate the image embedding from the text embedding.
    prior_image_encoder ([`CLIPVisionModelWithProjection`]):
        Frozen image-encoder.
    prior_text_encoder ([`CLIPTextModelWithProjection`]):
        Frozen text-encoder.
    prior_tokenizer (`CLIPTokenizer`):
         Tokenizer of class
         [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    prior_scheduler ([`UnCLIPScheduler`]):
        A scheduler to be used in combination with `prior` to generate image embedding.
TzNtext_encoder->unet->movq->prior_prior->prior_image_encoder->prior_text_encoderprior_priortext_encoder	tokenizerunet	schedulermovqprior_image_encoderprior_text_encoderprior_tokenizerprior_schedulerprior_image_processorc                    > [         TU ]  5         U R                  UUUUUUUUU	U
US9  [        UUUU	U
US9U l        [        UUUUUS9U l        g N)r    r!   r"   r#   r$   r   r%   r&   r'   r(   r)   )priorimage_encoderr    r!   r#   image_processor)r    r!   r"   r#   r$   )super__init__register_modulesr   
prior_piper   decoder_pipeselfr    r!   r"   r#   r$   r   r%   r&   r'   r(   r)   	__class__s               s/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.pyr0   "KandinskyCombinedPipeline.__init__   s     	%# 31++"7 	 	
 1-+%%1
 .%
    Nattention_opc                 :    U R                   R                  U5        g Nr3   *enable_xformers_memory_efficient_attentionr5   r:   s     r7   r>   DKandinskyCombinedPipeline.enable_xformers_memory_efficient_attention       DD\Rr9   gpu_iddevicec                 h    U R                   R                  XS9  U R                  R                  XS9  g)u  
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
rB   rC   Nr2   enable_sequential_cpu_offloadr3   r5   rB   rC   s      r7   rG   7KandinskyCombinedPipeline.enable_sequential_cpu_offload   s0     	55V5S77v7Ur9   c                     U R                   R                  XS9  U R                  R                  XS9  U R                  R                  5         g N)iterabletotalr2   progress_barr3   enable_model_cpu_offloadr5   rL   rM   s      r7   rO   &KandinskyCombinedPipeline.progress_bar   @    $$h$D&&&F224r9   c                 t    U R                   R                  " S0 UD6  U R                  R                  " S0 UD6  g N r2   set_progress_bar_configr3   r5   kwargss     r7   rX   1KandinskyCombinedPipeline.set_progress_bar_config   .    //9&911;F;r9   promptnegative_promptnum_inference_stepsguidance_scalenum_images_per_promptheightwidthprior_guidance_scaleprior_num_inference_steps	generatorlatentsoutput_typecallbackcallback_stepsreturn_dictc                    U R                  UUUU	U
UUSSS9	nUS   nUS   n[        U[        [        45      (       d  U/OUn[	        U5      UR
                  S   :  a=  UR
                  S   [	        U5      -  S:X  a  UR
                  S   [	        U5      -  U-  nU R                  UUUUUUU
UUUUUS9nU R                  5         U$ )a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
        if `guidance_scale` is less than `1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    height (`int`, *optional*, defaults to 512):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to 512):
        The width in pixels of the generated image.
    prior_guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    prior_num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    callback (`Callable`, *optional*):
        A function that calls every `callback_steps` steps during inference. The function is called with the
        following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
    callback_steps (`int`, *optional*, defaults to 1):
        The frequency at which the `callback` function is called. If not specified, the callback is called at
        every step.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`
ptF	r]   r^   ra   r_   rf   rg   r`   rh   rk   r   r   )r]   image_embedsnegative_image_embedsrc   rb   r_   rf   r`   rh   ri   rj   rk   )r2   
isinstancelisttuplelenshaper3   maybe_free_model_hooks)r5   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   prior_outputsro   rp   outputss                       r7   __call__"KandinskyCombinedPipeline.__call__   s    Z +"7 9/ ( 

 %Q' -a 0!+FT5M!B!B&v;++A..<3E3Ea3H3v;3VZ[3["((+s6{:fDF##%"7 3)#)# $ 
 	##%r9   r3   r2   r<   NNNd         @r      r   r      NNpilNr   T)+__name__
__module____qualname____firstlineno____doc___load_connected_pipesmodel_cpu_offload_seq_exclude_from_cpu_offloadr   r   r   r   r   r   r   r   r
   r   r	   r   r   r0   r   r   r>   inttorchrC   strrG   rO   rX   no_gradr   TEXT2IMAGE_EXAMPLE_DOC_STRINGr   float	GeneratorTensorboolry   __static_attributes____classcell__r6   s   @r7   r   r   q   sA   < !l!.+
&+
 '+
 #	+

 56+
 +
 &+
 ;+
 8+
 '+
 )+
  2+
ZSxPXGY SVHSM VRWX]XdXdfiXiRj V5
< ]]_<= <@#& #%&&))+MQ*.%*GK !oc49n%o "%T#Y"78o !	o
 o  #o o o $o $'o E%//43H"HIJo %,,'o c]o 8S#u||$<d$BCDo o  !o > or9   r   c            %         ^  \ rS rSrSrSrSrS/rS\S\	S\
S	\\\4   S
\S\S\S\S\S\S\4U 4S jjrS,S\\   4S jjrS-S\\   S\\R6                  \4   4S jjrS-S jrS r\R@                  " 5       \!" \"5                     S.S\\\#\   4   S\\RH                  \%RL                  RL                  \#\RH                     \#\%RL                  RL                     4   S\\\\#\   4      S\S\'S\S\'S \S!\S"\'S#\S$\\\RP                  \#\RP                     4      S%\\RH                     S&\\   S'\\\\\RH                  /S4      S(\S)\)4"S* jj5       5       r*S+r+U =r,$ )/ KandinskyImg2ImgCombinedPipelineiK  aw  
Combined Pipeline for image-to-image generation using Kandinsky

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    text_encoder ([`MultilingualCLIP`]):
        Frozen text-encoder.
    tokenizer ([`XLMRobertaTokenizer`]):
        Tokenizer of class
    scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
        A scheduler to be used in combination with `unet` to generate image latents.
    unet ([`UNet2DConditionModel`]):
        Conditional U-Net architecture to denoise the image embedding.
    movq ([`VQModel`]):
        MoVQ Decoder to generate the image from the latents.
    prior_prior ([`PriorTransformer`]):
        The canonical unCLIP prior to approximate the image embedding from the text embedding.
    prior_image_encoder ([`CLIPVisionModelWithProjection`]):
        Frozen image-encoder.
    prior_text_encoder ([`CLIPTextModelWithProjection`]):
        Frozen text-encoder.
    prior_tokenizer (`CLIPTokenizer`):
         Tokenizer of class
         [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    prior_scheduler ([`UnCLIPScheduler`]):
        A scheduler to be used in combination with `prior` to generate image embedding.
TNprior_text_encoder->prior_image_encoder->prior_prior->text_encoder->unet->movqr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   c                    > [         TU ]  5         U R                  UUUUUUUUU	U
US9  [        UUUU	U
US9U l        [        UUUUUS9U l        g r+   )r/   r0   r1   r   r2   r   r3   r4   s               r7   r0   )KandinskyImg2ImgCombinedPipeline.__init__n       	%# 31++"7 	 	
 1-+%%1
 5%
r9   Nr:   c                 :    U R                   R                  U5        g r<   r=   r?   s     r7   r>   KKandinskyImg2ImgCombinedPipeline.enable_xformers_memory_efficient_attention  rA   r9   rB   rC   c                 h    U R                   R                  XS9  U R                  R                  XS9  ga  
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
rE   NrF   rH   s      r7   rG   >KandinskyImg2ImgCombinedPipeline.enable_sequential_cpu_offload  0     	55V5S77v7Ur9   c                     U R                   R                  XS9  U R                  R                  XS9  U R                  R                  5         g rK   rN   rQ   s      r7   rO   -KandinskyImg2ImgCombinedPipeline.progress_bar  rS   r9   c                 t    U R                   R                  " S0 UD6  U R                  R                  " S0 UD6  g rU   rW   rY   s     r7   rX   8KandinskyImg2ImgCombinedPipeline.set_progress_bar_config  r\   r9   r]   imager^   r_   r`   ra   strengthrb   rc   rd   re   rf   rg   rh   ri   rj   rk   c                    U R                  UUUUUUU
SSS9	nUS   nUS   n[        U[        [        45      (       d  U/OUn[        U[        R
                  R
                  5      (       a  U/OUn[        U5      UR                  S   :  a=  UR                  S   [        U5      -  S:X  a  UR                  S   [        U5      -  U-  n[        U[        [        45      (       aY  [        U5      UR                  S   :  a=  UR                  S   [        U5      -  S:X  a  UR                  S   [        U5      -  U-  nU R                  UUUUUU	UUUUUUUUS9nU R                  5         U$ )a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
        `Image`, or tensor representing an image batch, that will be used as the starting point for the
        process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
        again.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
        if `guidance_scale` is less than `1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    height (`int`, *optional*, defaults to 512):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to 512):
        The width in pixels of the generated image.
    strength (`float`, *optional*, defaults to 0.3):
        Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
        will be used as a starting point, adding more noise to it the larger the `strength`. The number of
        denoising steps depends on the amount of noise initially added. When `strength` is 1, added noise will
        be maximum and the denoising process will run for the full number of iterations specified in
        `num_inference_steps`. A value of 1, therefore, essentially ignores `image`.
    prior_guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    prior_num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    callback (`Callable`, *optional*):
        A function that calls every `callback_steps` steps during inference. The function is called with the
        following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
    callback_steps (`int`, *optional*, defaults to 1):
        The frequency at which the `callback` function is called. If not specified, the callback is called at
        every step.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`
rm   Frn   r   r   )r]   r   ro   rp   r   rc   rb   r_   rf   r`   rh   ri   rj   rk   
r2   rq   rr   rs   PILImagert   ru   r3   rv   )r5   r]   r   r^   r_   r`   ra   r   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rw   ro   rp   rx   s                         r7   ry   )KandinskyImg2ImgCombinedPipeline.__call__  s   r +"7 9/ ( 

 %Q' -a 0!+FT5M!B!B&%fciioo>>Ev;++A..<3E3Ea3H3v;3VZ[3["((+s6{:fDF utUm,,E
\//22""1%E
2a7!''*c%j8EAE##%"7 3)#)# $ 
" 	##%r9   r{   r<   r|   )Nr~   r   r   g333333?r   r   r   r   NNr   Nr   T)-r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   r	   r   r   r0   r   r   r>   r   r   rC   r   rG   rO   rX   r   r   IMAGE2IMAGE_EXAMPLE_DOC_STRINGr   r   r   r   r   r   r   ry   r   r   r   s   @r7   r   r   K  s   < !l!.+
&+
 '+
 #	+

 56+
 +
 &+
 ;+
 8+
 '+
 )+
  2+
ZSxPXGY S	VHSM 	VRWX]XdXdfiXiRj 	V5
< ]]_=>
 <@#& #%&&))+MQ*.%*GK %Ec49n%E U\\399??D4FSYY__H]]^E "%T#Y"78	E
 !E E  #E E E E $E $'E E%//43H"HIJE %,,'E c]E  8S#u||$<d$BCD!E" #E$ %E ? Er9   r   c            %       T  ^  \ rS rSrSrSrSrS/rS\S\	S\
S	\\\4   S
\S\S\S\S\S\S\4U 4S jjrS,S\\   4S jjrS-S\\   S\\R6                  \4   4S jjrS-S jrS r\R@                  " 5       \!" \"5                    S.S\\\#\   4   S\\RH                  \%RL                  RL                  \#\RH                     \#\%RL                  RL                     4   S\\RH                  \%RL                  RL                  \#\RH                     \#\%RL                  RL                     4   S\\\\#\   4      S\S\'S\S \S!\S"\'S#\S$\\\RP                  \#\RP                     4      S%\\RH                     S&\\   S'\\\\\RH                  /S4      S(\S)\)4"S* jj5       5       r*S+r+U =r,$ )/ KandinskyInpaintCombinedPipelinei<  ah  
Combined Pipeline for generation using Kandinsky

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    text_encoder ([`MultilingualCLIP`]):
        Frozen text-encoder.
    tokenizer ([`XLMRobertaTokenizer`]):
        Tokenizer of class
    scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
        A scheduler to be used in combination with `unet` to generate image latents.
    unet ([`UNet2DConditionModel`]):
        Conditional U-Net architecture to denoise the image embedding.
    movq ([`VQModel`]):
        MoVQ Decoder to generate the image from the latents.
    prior_prior ([`PriorTransformer`]):
        The canonical unCLIP prior to approximate the image embedding from the text embedding.
    prior_image_encoder ([`CLIPVisionModelWithProjection`]):
        Frozen image-encoder.
    prior_text_encoder ([`CLIPTextModelWithProjection`]):
        Frozen text-encoder.
    prior_tokenizer (`CLIPTokenizer`):
         Tokenizer of class
         [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    prior_scheduler ([`UnCLIPScheduler`]):
        A scheduler to be used in combination with `prior` to generate image embedding.
Tr   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   c                    > [         TU ]  5         U R                  UUUUUUUUU	U
US9  [        UUUU	U
US9U l        [        UUUUUS9U l        g r+   )r/   r0   r1   r   r2   r   r3   r4   s               r7   r0   )KandinskyInpaintCombinedPipeline.__init___  r   r9   Nr:   c                 :    U R                   R                  U5        g r<   r=   r?   s     r7   r>   KKandinskyInpaintCombinedPipeline.enable_xformers_memory_efficient_attention  rA   r9   rB   rC   c                 h    U R                   R                  XS9  U R                  R                  XS9  gr   rF   rH   s      r7   rG   >KandinskyInpaintCombinedPipeline.enable_sequential_cpu_offload  r   r9   c                     U R                   R                  XS9  U R                  R                  XS9  U R                  R                  5         g rK   rN   rQ   s      r7   rO   -KandinskyInpaintCombinedPipeline.progress_bar  rS   r9   c                 t    U R                   R                  " S0 UD6  U R                  R                  " S0 UD6  g rU   rW   rY   s     r7   rX   8KandinskyInpaintCombinedPipeline.set_progress_bar_config  r\   r9   r]   r   
mask_imager^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   c                    U R                  UUUUUUU
SSS9	nUS   nUS   n[        U[        [        45      (       d  U/OUn[        U[        R
                  R
                  5      (       a  U/OUn[        U[        R
                  R
                  5      (       a  U/OUn[        U5      UR                  S   :  a=  UR                  S   [        U5      -  S:X  a  UR                  S   [        U5      -  U-  n[        U[        [        45      (       aY  [        U5      UR                  S   :  a=  UR                  S   [        U5      -  S:X  a  UR                  S   [        U5      -  U-  n[        U[        [        45      (       aY  [        U5      UR                  S   :  a=  UR                  S   [        U5      -  S:X  a  UR                  S   [        U5      -  U-  nU R                  UUUUUU	UUUUUUUUS9nU R                  5         U$ )a
  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
        `Image`, or tensor representing an image batch, that will be used as the starting point for the
        process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
        again.
    mask_image (`np.array`):
        Tensor representing an image batch, to mask `image`. White pixels in the mask will be repainted, while
        black pixels will be preserved. If `mask_image` is a PIL image, it will be converted to a single
        channel (luminance) before use. If it's a tensor, it should contain one color channel (L) instead of 3,
        so the expected shape would be `(B, H, W, 1)`.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
        if `guidance_scale` is less than `1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    height (`int`, *optional*, defaults to 512):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to 512):
        The width in pixels of the generated image.
    prior_guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    prior_num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    callback (`Callable`, *optional*):
        A function that calls every `callback_steps` steps during inference. The function is called with the
        following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
    callback_steps (`int`, *optional*, defaults to 1):
        The frequency at which the `callback` function is called. If not specified, the callback is called at
        every step.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`
rm   Frn   r   r   )r]   r   r   ro   rp   rc   rb   r_   rf   r`   rh   ri   rj   rk   r   )r5   r]   r   r   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rw   ro   rp   rx   s                         r7   ry   )KandinskyInpaintCombinedPipeline.__call__  s	   p +"7 9/ ( 

 %Q' -a 0!+FT5M!B!B&%fciioo>>E%/
CIIOO%L%Lj\R\
v;++A..<3E3Ea3H3v;3VZ[3["((+s6{:fDF utUm,,E
\//22""1%E
2a7!''*c%j8EAE zD%=11J,"4"4Q"77""1%J71<&,,Q/3z?BjPJ##!%"7 3)#)# $ 
" 	##%r9   r{   r<   r|   r}   )-r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   r	   r   r   r0   r   r   r>   r   r   rC   r   rG   rO   rX   r   r   INPAINT_EXAMPLE_DOC_STRINGr   r   r   r   r   r   r   ry   r   r   r   s   @r7   r   r   <  s   < !l!.+
&+
 '+
 #	+

 56+
 +
 &+
 ;+
 8+
 '+
 )+
  2+
ZSxPXGY S	VHSM 	VRWX]XdXdfiXiRj 	V5
< ]]_9: <@#& #%&&))+MQ*.%*GK %Lc49n%L U\\399??D4FSYY__H]]^L %,,		ell9KTRUR[R[RaRaMbbc	L
 "%T#Y"78L !L L  #L L L $L $'L E%//43H"HIJL %,,'L c]L  8S#u||$<d$BCD!L" #L$ %L ; Lr9   r   )*typingr   r   r   r   	PIL.Imager   r   transformersr   r   r	   r
   r   modelsr   r   r   
schedulersr   r   r   utilsr   pipeline_utilsr   pipeline_kandinskyr   pipeline_kandinsky_img2imgr   pipeline_kandinsky_inpaintr   pipeline_kandinsky_priorr   r    r   r   r   r   r   r   r   rV   r9   r7   <module>r      s    3 2    F E G G / 1 @ @ < *! "" 8 :W 1 Wtn'8 nbu'8 ur9   