
    +h?^                        S SK Jr  S SKJr  S SKJrJrJrJrJ	r	  S SK
rS SKrS SKJrJr  SSKJr  SSKJr  SS	KJrJrJrJrJr  SS
KJr  SSKJr  SSKJr  \" 5       (       a  S SK J!s  J"r#  Sr$OSr$\RJ                  " \&5      r'\(" \RR                  " SSS5      5      \(" \RR                  " SSS5      5      SS -   r*Sr+\ " S S\5      5       r, " S S\\5      r-g)    )	dataclass)ceil)CallableDictListOptionalUnionN)CLIPTextModelCLIPTokenizer   )StableDiffusionLoraLoaderMixin)DDPMWuerstchenScheduler)
BaseOutput	deprecateis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipeline   )WuerstchenPriorTF      ?gUUUUUU?   g           a  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import WuerstchenPriorPipeline

        >>> prior_pipe = WuerstchenPriorPipeline.from_pretrained(
        ...     "warp-ai/wuerstchen-prior", torch_dtype=torch.float16
        ... ).to("cuda")

        >>> prompt = "an image of a shiba inu, donning a spacesuit and helmet"
        >>> prior_output = pipe(prompt)
        ```
c                   V    \ rS rSr% Sr\\R                  \R                  4   \
S'   Srg)WuerstchenPriorPipelineOutput;   z
Output class for WuerstchenPriorPipeline.

Args:
    image_embeddings (`torch.Tensor` or `np.ndarray`)
        Prior image embeddings for text prompt

image_embeddings N)__name__
__module____qualname____firstlineno____doc__r	   torchTensornpndarray__annotations____static_attributes__r        r/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.pyr   r   ;   s!     ELL"**455r,   r   c            %         ^  \ rS rSrSrSrSrSr/ SQrSS/r	   S-S\
S\S\S\S	\S
\S\SS4U 4S jjjrS r    S.S\\R&                     S\\R&                     4S jjr  S/S jr\S 5       r\S 5       r\S 5       r\R4                  " 5       \" \5      SSSSSSSSSSSSSSSS/4S\\\\\   4      S\ S \ S!\ S"\\   S#\S$\\\\\   4      S\\R&                     S\\R&                     S%\\    S&\\\RB                  \\RB                     4      S\\R&                     S'\\   S(\"S)\\#\ \ \$/S4      S*\\   4 S+ jj5       5       r%S,r&U =r'$ )0WuerstchenPriorPipelineI   aQ  
Pipeline for generating image prior for Wuerstchen.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

The pipeline also inherits the following loading methods:
    - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
    - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights

Args:
    prior ([`Prior`]):
        The canonical unCLIP prior to approximate the image embedding from the text embedding.
    text_encoder ([`CLIPTextModelWithProjection`]):
        Frozen text-encoder.
    tokenizer (`CLIPTokenizer`):
        Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    scheduler ([`DDPMWuerstchenScheduler`]):
        A scheduler to be used in combination with `prior` to generate image embedding.
    latent_mean ('float', *optional*, defaults to 42.0):
        Mean value for latent diffusers.
    latent_std ('float', *optional*, defaults to 1.0):
        Standard value for latent diffusers.
    resolution_multiple ('float', *optional*, defaults to 42.67):
        Default resolution for multiple images generated.
priortext_encoderztext_encoder->prior)latentstext_encoder_hidden_statesnegative_prompt_embeds	tokenizer	schedulerlatent_mean
latent_stdresolution_multiplereturnNc                 f   > [         TU ]  5         U R                  UUUUS9  U R                  XVUS9  g )N)r6   r2   r1   r7   )r8   r9   r:   )super__init__register_modulesregister_to_config)	selfr6   r2   r1   r7   r8   r9   r:   	__class__s	           r-   r>    WuerstchenPriorPipeline.__init__l   sJ     	%	 	 	
 	#Pc 	  	
r,   c                     Uc  [        XX2S9nO<UR                  U:w  a  [        SUR                   SU 35      eUR                  U5      nXVR                  -  nU$ )N)	generatordevicedtypezUnexpected latents shape, got z, expected )r   shape
ValueErrortoinit_noise_sigma)rA   rH   rG   rF   rE   r3   r7   s          r-   prepare_latents'WuerstchenPriorPipeline.prepare_latents   s`    ?"5fZG}}% #A'--P[\a[b!cddjj(G666r,   prompt_embedsr5   c                    Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nUGcw  U R                  USU R
                  R                  SSS9n	U	R                  n
U	R                  nU R                  USSS9R                  nUR                  S	   U
R                  S	   :  a  [        R                  " X5      (       d  U R
                  R                  US S 2U R
                  R                  S-
  S	24   5      n[        R                  S
U R
                  R                   SU 35        U
S S 2S U R
                  R                  24   n
US S 2S U R
                  R                  24   nU R                  U
R                  U5      UR                  U5      S9nUR                   nUR                  U R                  R"                  US9nUR%                  USS9nUGc  U(       Ga  Uc  S/U-  nO['        U5      ['        U5      La$  [)        S['        U5       S['        U5       S35      e[        U[        5      (       a  U/nO2U[        U5      :w  a!  [+        SU S[        U5       SU SU S3	5      eUnU R                  USU R
                  R                  SSS9nU R                  UR                  R                  U5      UR                  R                  U5      S9nUR                   nU(       a[  UR                  S   nUR                  U R                  R"                  US9nUR-                  SUS5      nUR/                  X-  US	5      nXg4$ )Nr   r   
max_lengthTpt)paddingrP   
truncationreturn_tensorslongest)rR   rT   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: )attention_mask)rG   rF   )dim z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)
isinstancestrlistlenrH   r6   model_max_length	input_idsrW   r&   equalbatch_decodeloggerwarningr2   rJ   last_hidden_staterG   repeat_interleavetype	TypeErrorrI   repeatview)rA   rF   num_images_per_promptdo_classifier_free_guidancepromptnegative_promptrN   r5   
batch_sizetext_inputstext_input_idsrW   untruncated_idsremoved_texttext_encoder_outputuncond_tokensuncond_input*negative_prompt_embeds_text_encoder_outputseq_lens                      r-   encode_prompt%WuerstchenPriorPipeline.encode_prompt   s    *VS"9"9JJvt$<$<VJ&,,Q/J ..$>>::# ) K )22N(77N"nnVYW[n\ffO$$R(N,@,@,DDU[[N N  $~~::#At~~'F'F'JR'O$OP  778	,Q "03TT^^5T5T3T0T!U!/3TT^^5T5T3T0T!U"&"3"3!!&).:K:KF:S #4 # 0AAM%((t/@/@/F/Fv(V%778MST7U!).I&!#z 1fT/%::UVZ[jVkUl mV~Q(  OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0>>$>>::# * L :>9J9J&&))&1,B]B]B`B`agBh :K :6 &P%a%a"&,2215G%;%>%>TEVEVE\E\ek%>%l"%;%B%B1F[]^%_"%;%@%@Acelnp%q" 44r,   c                    Ub  Ub  [        SU SU S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[	        U5       35      eUb  Ub  [        SU SU S35      eUbC  Ub@  UR
                  UR
                  :w  a&  [        SUR
                   S	UR
                   S
35      e[        U[        5      (       d  [        S[	        U5       S35      eg )NzCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` rZ   z5'num_inference_steps' must be of type 'int', but got zo                           In Case you want to provide explicit timesteps, please use the 'timesteps' argument.)rI   r[   r\   r]   rg   rH   intrh   )rA   rm   rn   num_inference_stepsrl   rN   r5   s          r-   check_inputs$WuerstchenPriorPipeline.check_inputs   s[    -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  -s33GM`HaGb cp q  4r,   c                     U R                   $ N_guidance_scalerA   s    r-   guidance_scale&WuerstchenPriorPipeline.guidance_scale  s    ###r,   c                      U R                   S:  $ )Nr   r   r   s    r-   rl   3WuerstchenPriorPipeline.do_classifier_free_guidance  s    ##a''r,   c                     U R                   $ r   )_num_timestepsr   s    r-   num_timesteps%WuerstchenPriorPipeline.num_timesteps  s    """r,   i   <   g       @r   rQ   Tr3   rm   heightwidthr}   	timestepsr   rn   rk   rE   output_typereturn_dictcallback_on_step_end"callback_on_step_end_tensor_inputsc           
        ^  UR                  SS5      nUR                  SS5      nUb  [        SSS5        Ub  [        SSS5        UbX  [        U 4S jU 5       5      (       d>  [        ST R                   S	U Vs/ s H  nUT R                  ;  d  M  UPM     sn 35      eT R
                  nUT l        Ub  [        U[        5      (       a  S
nO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nUbF  [        U[        5      (       d1  [        U[        5      (       a  U/nO[        S[        U5       S35      eT R                  (       aI  UbF  [        U[        5      (       d1  [        U[        5      (       a  U/nO[        S[        U5       S35      eT R                  UUUT R                  UU	S9  T R!                  UUU
T R                  UUU	S9u  pU	b  ["        R$                  " X/5      OUnUR&                  n[)        UT R*                  R,                  -  5      n[)        UT R*                  R,                  -  5      nT R.                  R*                  R0                  nU
U-  UUU4nUb<  T R2                  R5                  UUS9  T R2                  R6                  n[        U5      nO0T R2                  R5                  UUS9  T R2                  R6                  nT R9                  UUUXT R2                  5      n[        USS 5      T l        [=        T R?                  USS 5      5       GH  u  nnURA                  URC                  S5      5      RE                  U5      nT R/                  T R                  (       a  ["        R$                  " U/S-  5      OUT R                  (       a  ["        R$                  " U/S-  5      OUUS9n T R                  (       a6  U RG                  S5      u  n!n"["        RH                  " U"U!T RJ                  5      n T R2                  RM                  U UUUS9RN                  nUb\  0 n#U H  n[Q        5       U   U#U'   M     U" T UUU#5      n$U$R                  SU5      nU$R                  SU5      nU$R                  SU	5      n	Ub-  UU-  S:X  a$  U[S        T R2                  SS
5      -  n%U" U%UU5        [T        (       d  GM  [V        RX                  " 5         GM     UT R*                  RZ                  -  T R*                  R\                  -
  nT R_                  5         US:X  a,  URa                  5       Rc                  5       Re                  5       nU(       d  U4$ [g        U5      $ s  snf )a:  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    height (`int`, *optional*, defaults to 1024):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to 1024):
        The width in pixels of the generated image.
    num_inference_steps (`int`, *optional*, defaults to 60):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    timesteps (`List[int]`, *optional*):
        Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
        timesteps are used. Must be in descending order.
    guidance_scale (`float`, *optional*, defaults to 8.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
        equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
        setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
        closely linked to the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
        if `decoder_guidance_scale` is less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.WuerstchenPriorPipelineOutput`] or `tuple` [`~pipelines.WuerstchenPriorPipelineOutput`] if
    `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is a list with the
    generated image embeddings.
callbackNcallback_stepsz1.0.0zhPassing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`znPassing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`c              3   @   >#    U  H  oTR                   ;   v   M     g 7fr   )_callback_tensor_inputs).0krA   s     r-   	<genexpr>3WuerstchenPriorPipeline.__call__.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found r   r   z2'prompt' must be of type 'list' or 'str', but got rZ   z;'negative_prompt' must be of type 'list' or 'str', but got )rN   r5   )rm   rF   rk   rl   rn   rN   r5   )r   rF   )rF   rV   r   )rc)model_outputtimestepsamplerE   r3   r4   r5   orderr(   )4popr   allrI   r   _execution_devicer   r[   r\   r]   r^   rH   rh   rg   rl   r~   ry   r&   catrG   r   configr:   r1   c_inr7   set_timestepsr   rL   r   	enumerateprogress_barexpandsizerJ   chunklerpr   stepprev_samplelocalsgetattrXLA_AVAILABLExm	mark_stepr8   r9   maybe_free_model_hookscpufloatnumpyr   )&rA   rm   r   r   r}   r   r   rn   rN   r5   rk   rE   r3   r   r   r   r   kwargsr   r   r   rF   ro   r4   rG   latent_heightlatent_widthnum_channelseffnet_features_shapeitratiopredicted_image_embeddingpredicted_image_embedding_text predicted_image_embedding_uncondcallback_kwargscallback_outputsstep_idxs&   `                                     r-   __call__ WuerstchenPriorPipeline.__call__!  s   l ::j$/$4d;z
 %  A .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  bc  ko  kG  kG  bGpq  |^  pH  oI  J 
 ''-*VS"9"9JJvt$<$<VJ&,,Q/J j&>&>&#&& "TUYZ`UaTbbc dee++*:ot3T3Tos33'6&7O#UVZ[jVkUllmn  	,,'#9 	 	
 150B0B"7(,(H(H+'#9 1C 1
- CYBdEII}=>jw 	#
 +00Vdkk&E&EEFEDKK$C$CCDzz((--!6!C\S`bn o  NN((9V(L00I"%i.NN(()<V(L00I &&'<eVYaeaoaop ")CR.1d//	#2?@DAqHHW\\!_-007E )-

,0,L,L		7)a-(RY,0,L,L%))UGaK(RW, )3 )% //SlSrSrstSuP.0P,1JJ46TVZViVi-)
 nn))6#	 * 
 k  $/"$;A)/!OA& <#7aO#T *..y'B-=-A-A02L.* *:)=)=>VXn)o&#N(:a(? CC1g.}U AZ DKK333dkk6L6LL 	##%$kkm))+113G:,W55K pHs   :U?U?)r   r   )g      E@r   g(\UE@)NNNN)NN)(r!   r"   r#   r$   r%   	unet_nametext_encoder_namemodel_cpu_offload_seqr   _lora_loadable_modulesr   r
   r   r   r   r>   rL   r   r&   r'   ry   r~   propertyr   rl   r   no_gradr   EXAMPLE_DOC_STRINGr	   r\   r   r|   	Generatorboolr   r   r   r+   __classcell__)rB   s   @r-   r/   r/   I   s   8 I&1a%~6 "%*
 
 $
 	

 +
 
 
 #
 

 
,	  049=]5  -]5 !) 6]5J #'R $ $ ( ( # # ]]_12 37#%!% #;?049=/0MQ*.%) KO9B#m6sDI~./m6 m6 	m6
 !m6 ;m6 m6 "%T#Y"78m6  -m6 !) 6m6  (}m6 E%//43H"HIJm6 %,,'m6 c]m6 m6  'xc40@$0F'GH!m6" -1I#m6 3 m6r,   r/   ).dataclassesr   mathr   typingr   r   r   r   r	   r   r(   r&   transformersr
   r   loadersr   
schedulersr   utilsr   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   modeling_wuerstchen_priorr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr!   rc   r]   linspaceDEFAULT_STAGE_C_TIMESTEPSr   r   r/   r    r,   r-   <module>r      s    "  8 8   5 5 1 f f - . 6 ))MM			H	% !S%!<=R[[QVX[]_E`@abcbd@ee    
6J 
6 
6G6/1O G6r,   