
    +h`Q                        S SK JrJrJrJrJr  S SKrS SKrS SK	J
r
Jr  SSKJr  SSKJrJrJrJr  SSKJr  SS	KJrJrJr  S
SKJr  S
SKJr  \" 5       (       a  S SKJs  Jr   Sr!OSr!\RD                  " \#5      r$Sr% " S S\\5      r&g)    )CallableDictListOptionalUnionN)CLIPTextModelCLIPTokenizer   )DDPMWuerstchenScheduler)	deprecateis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DeprecatedPipelineMixinDiffusionPipelineImagePipelineOutput   )PaellaVQModel)WuerstchenDiffNeXtTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import WuerstchenPriorPipeline, WuerstchenDecoderPipeline

        >>> prior_pipe = WuerstchenPriorPipeline.from_pretrained(
        ...     "warp-ai/wuerstchen-prior", torch_dtype=torch.float16
        ... ).to("cuda")
        >>> gen_pipe = WuerstchenDecoderPipeline.from_pretrain("warp-ai/wuerstchen", torch_dtype=torch.float16).to(
        ...     "cuda"
        ... )

        >>> prompt = "an image of a shiba inu, donning a spacesuit and helmet"
        >>> prior_output = pipe(prompt)
        >>> images = gen_pipe(prior_output.image_embeddings, prompt=prompt)
        ```
c                     ^  \ rS rSrSrSr/ SQr S'S\S\S\	S\
S	\S
\SS4U 4S jjjrS r S(S jr\S 5       r\S 5       r\S 5       r\R*                  " 5       \" \5      SSSSSSSSSSSS/4S\\R2                  \\R2                     4   S\\\\   4   S\S\\\      S\S\\\\\   4      S\S \\\R<                  \\R<                     4      S\\R2                     S!\\   S"\S#\\ \\\!/S4      S$\\   4S% jj5       5       r"S&r#U =r$$ ))WuerstchenDecoderPipeline;   a  
Pipeline for generating images from the Wuerstchen model.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    tokenizer (`CLIPTokenizer`):
        The CLIP tokenizer.
    text_encoder (`CLIPTextModel`):
        The CLIP text encoder.
    decoder ([`WuerstchenDiffNeXt`]):
        The WuerstchenDiffNeXt unet decoder.
    vqgan ([`PaellaVQModel`]):
        The VQGAN model.
    scheduler ([`DDPMWuerstchenScheduler`]):
        A scheduler to be used in combination with `prior` to generate image embedding.
    latent_dim_scale (float, `optional`, defaults to 10.67):
        Multiplier to determine the VQ latent space size from the image embeddings. If the image embeddings are
        height=24 and width=24, the VQ latent shape needs to be height=int(24*10.67)=256 and
        width=int(24*10.67)=256 in order to match the training conditions.
ztext_encoder->decoder->vqgan)latentstext_encoder_hidden_statesnegative_prompt_embedsimage_embeddings	tokenizertext_encoderdecoder	schedulervqganlatent_dim_scalereturnNc                 f   > [         TU ]  5         U R                  UUUUUS9  U R                  US9  g )N)r   r    r!   r"   r#   )r$   )super__init__register_modulesregister_to_config)selfr   r    r!   r"   r#   r$   	__class__s          l/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/wuerstchen/pipeline_wuerstchen.pyr(   "WuerstchenDecoderPipeline.__init__[   sF     	% 	 	
 	1AB    c                     Uc  [        XX2S9nO<UR                  U:w  a  [        SUR                   SU 35      eUR                  U5      nXVR                  -  nU$ )N)	generatordevicedtypezUnexpected latents shape, got z, expected )r   shape
ValueErrortoinit_noise_sigma)r+   r4   r3   r2   r1   r   r"   s          r-   prepare_latents)WuerstchenDecoderPipeline.prepare_latentso   s`    ?"5fZG}}% #A'--P[\a[b!cddjj(G666r/   c                    [        U[        5      (       a  [        U5      OSnU R                  USU R                  R                  SSS9nUR
                  nUR                  n	U R                  USSS9R
                  n
U
R                  S   UR                  S   :  a  [        R                  " X5      (       d  U R                  R                  U
S S 2U R                  R                  S-
  S24   5      n[        R                  S	U R                  R                   S
U 35        US S 2S U R                  R                  24   nU	S S 2S U R                  R                  24   n	U R                  UR                  U5      U	R                  U5      S9nUR                  nUR!                  USS9nS nU(       Ga<  Uc  S/U-  nO[#        U5      [#        U5      La$  [%        S[#        U5       S[#        U5       S35      e[        U[&        5      (       a  U/nO2U[        U5      :w  a!  [)        SU S[        U5       SU SU S3	5      eUnU R                  USU R                  R                  SSS9nU R                  UR
                  R                  U5      UR                  R                  U5      S9nUR                  nUR                  S   nUR+                  SUS5      nUR-                  Xc-  US5      nX4$ )Nr   
max_lengthTpt)paddingr;   
truncationreturn_tensorslongest)r=   r?   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: )attention_maskr   dim z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)
isinstancelistlenr   model_max_length	input_idsrB   r4   torchequalbatch_decodeloggerwarningr    r6   last_hidden_staterepeat_interleavetype	TypeErrorstrr5   repeatview)r+   promptr2   num_images_per_promptdo_classifier_free_guidancenegative_prompt
batch_sizetext_inputstext_input_idsrB   untruncated_idsremoved_texttext_encoder_outputr   !uncond_text_encoder_hidden_statesuncond_tokensuncond_input*negative_prompt_embeds_text_encoder_outputseq_lens                      r-   encode_prompt'WuerstchenDecoderPipeline.encode_promptz   s-    %/vt$<$<S[!
nn ~~66 % 
 %..$33..SW.Xbb  $(<(<R(@@UcIuIu>>66q$..JiJilmJmprJrGr7stLNNNN334Il^M ,A/P1P1P/P,PQN+A/P1P1P/P,PQN"//0A0A&0IZhZkZklrZs/t%8%J%J"%?%Q%QRgmn%Q%o",0)&&!#z 1fT/%::UVZ[jVkUl mV~Q(  OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0>>$>>::# * L :>9J9J&&))&1,B]B]B`B`agBh :K :6 1[0l0l- 8==a@G0Q0X0XYZ\qst0u-0Q0V0V2GR1- *LLr/   c                     U R                   $ N_guidance_scaler+   s    r-   guidance_scale(WuerstchenDecoderPipeline.guidance_scale   s    ###r/   c                      U R                   S:  $ )Nr   rk   rm   s    r-   rZ   5WuerstchenDecoderPipeline.do_classifier_free_guidance   s    ##a''r/   c                     U R                   $ rj   )_num_timestepsrm   s    r-   num_timesteps'WuerstchenDecoderPipeline.num_timesteps   s    """r/      g        r   pilTr   r   rX   num_inference_steps	timestepsrn   r[   rY   r1   output_typereturn_dictcallback_on_step_end"callback_on_step_end_tensor_inputsc           
        ^  UR                  SS5      nUR                  SS5      nUb  [        SSS5        Ub  [        SSS5        UbX  [        U 4S jU 5       5      (       d>  [        ST R                   S	U Vs/ s H  nUT R                  ;  d  M  UPM     sn 35      eT R
                  nT R                  R                  nUT l        [        U[        5      (       d1  [        U[        5      (       a  U/nO[        S
[        U5       S35      eT R                  (       aI  UbF  [        U[        5      (       d1  [        U[        5      (       a  U/nO[        S[        U5       S35      e[        U[        5      (       a  [        R                   " USS9n[        U["        R$                  5      (       a"  [        R&                  " UUS9R)                  US9n[        U[        R&                  5      (       d  [        S[        U5       S35      e[        U[*        5      (       d  [        S[        U5       S35      eT R-                  UUUR/                  S5      U-  T R                  U5      u  nnUb  [        R                   " UU/5      OUnT R                  (       a,  [        R                   " U[        R0                  " U5      /5      OUn[+        UR/                  S5      T R2                  R4                  -  5      n[+        UR/                  S5      T R2                  R4                  -  5      nUR/                  S5      U-  SUU4nUb<  T R6                  R9                  UUS9  T R6                  R:                  n[=        U5      nO0T R6                  R9                  UUS9  T R6                  R:                  nT R?                  UUUXT R6                  5      n	[=        USS 5      T l         [C        T RE                  USS 5      5       GH  u  nnURG                  U	R/                  S5      5      R)                  U5      nT R                  T R                  (       a  [        R                   " U	/S-  5      OU	T R                  (       a  [        R                   " U/S-  5      OUUUS9nT R                  (       a6  URI                  S5      u  nn [        RJ                  " U UT RL                  5      nT R6                  RO                  UUU	US9RP                  n	Ub\  0 n!U H  n[S        5       U   U!U'   M     U" T UUU!5      n"U"R                  SU	5      n	U"R                  SU5      nU"R                  SU5      nUb-  UU-  S:X  a$  U[U        T R6                  SS5      -  n#U" U#UU	5        [V        (       d  GM  [X        RZ                  " 5         GM     U
S ;  a  [        S!U
 35      eU
S":X  d  T R\                  R2                  R^                  U	-  n	T R\                  Ra                  U	5      Rb                  Re                  SS5      n$U
S#:X  a?  U$Rg                  SSSS5      Ri                  5       Rk                  5       Rm                  5       n$OXU
S$:X  aO  U$Rg                  SSSS5      Ri                  5       Rk                  5       Rm                  5       n$T Ro                  U$5      n$OU	n$T Rq                  5         U(       d  U$$ [s        U$5      $ s  snf )%a  
Function invoked when calling the pipeline for generation.

Args:
    image_embedding (`torch.Tensor` or `List[torch.Tensor]`):
        Image Embeddings either extracted from an image or generated by a Prior Model.
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    num_inference_steps (`int`, *optional*, defaults to 12):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    timesteps (`List[int]`, *optional*):
        Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
        timesteps are used. Must be in descending order.
    guidance_scale (`float`, *optional*, defaults to 0.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
        equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
        setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
        closely linked to the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
        if `decoder_guidance_scale` is less than `1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple` [`~pipelines.ImagePipelineOutput`] if `return_dict` is True,
    otherwise a `tuple`. When returning a tuple, the first element is a list with the generated image
    embeddings.
callbackNcallback_stepsz1.0.0zhPassing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`znPassing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`c              3   @   >#    U  H  oTR                   ;   v   M     g 7frj   )_callback_tensor_inputs).0kr+   s     r-   	<genexpr>5WuerstchenDecoderPipeline.__call__.<locals>.<genexpr>2  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found z2'prompt' must be of type 'list' or 'str', but got rF   z;'negative_prompt' must be of type 'list' or 'str', but got r   rC   )r2   )r3   zI'image_embeddings' must be of type 'torch.Tensor' or 'np.array', but got z5'num_inference_steps' must be of type 'int', but got zo                           In Case you want to provide explicit timesteps, please use the 'timesteps' argument.r   r
      )ry   r2   rA   )reffnetclip)model_outputtimestepsampler1   r   r   r   orderr   )r<   nprw   latentzSOnly the output types `pt`, `np`, `pil` and `latent` are supported not output_type=r   r   rw   ):popr   allr5   r   _execution_devicer!   r3   rl   rG   rH   rU   rT   rS   rZ   rL   catr   ndarrayTensorr6   intrg   size
zeros_likeconfigr$   r"   set_timestepsry   rI   r8   rs   	enumerateprogress_barexpandchunklerprn   stepprev_samplelocalsgetattrXLA_AVAILABLExm	mark_stepr#   scale_factordecoder   clamppermutecpufloatnumpynumpy_to_pilmaybe_free_model_hooksr   )%r+   r   rX   rx   ry   rn   r[   rY   r1   r   rz   r{   r|   r}   kwargsr   r   r   r2   r3   prompt_embedsr   r   r   latent_heightlatent_widthlatent_features_shapeitratiopredicted_latentspredicted_latents_textpredicted_latents_uncondcallback_kwargscallback_outputsstep_idximagess%   `                                    r-   __call__"WuerstchenDecoderPipeline.__call__   s   T ::j$/$4d;z
 %  A .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  bc  ko  kG  kG  bGpq  |^  pH  oI  J 
 ''""- &$''&#&& "TUYZ`UaTbbc dee++*:ot3T3Tos33'6&7O#UVZ[jVkUllmn  &--$yy)9qA&

33$||,<VLOOV[O\*ELL99[\`aq\r[sstu  -s33GM`HaGb cp q  150B0B!!!$'<<,,1
-- CYBdEII}&<=>jw 	#
 // II')9)9:J)KLM! 	 ,11!4t{{7S7SST+003dkk6R6RRS!1!6!6q!9<Q!QSTVceq r  NN((9V(L00I"%i.NN(()<V(L00I &&'<eVYaeaoaop ")CR.1d//	#2?@DAqHHW\\!_-007E $,0,L,L		7)a-(RY,0,L,L%))UGaK(RW/	 !- ! //CTCZCZ[\C]@&(@$)JJ/GI_aeatat$u! nn)).#	 * 
 k  $/"$;A)/!OA& <#7aO#T *..y'B#3#7#78JL\#] -=-A-A02L.* #N(:a(? CC1g.}Q AT ;;efqers  h&jj''44w>GZZ&&w/66<<QBFd"1a3779??AGGI%1a3779??AGGI**62F 	##%M"6**W pHs   :[6[6)rl   rs   )gףp=
W%@rj   )%__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seqr   r	   r   r   r   r   r   r(   r8   rg   propertyrn   rZ   rt   rL   no_gradr   EXAMPLE_DOC_STRINGr   r   r   rU   r   r   	Generatorboolr   r   r   __static_attributes____classcell__)r,   s   @r-   r   r   ;   s#   . ; #(C C $C $	C
 +C C  C 
C C(	" PMd $ $ ( ( # # ]]_12 )-#%+/ #;?%&MQ*.%* KO9Bg+d5<<.@ @Ag+ c49n%g+ !	g+
 DK(g+ g+ "%T#Y"78g+  #g+ E%//43H"HIJg+ %,,'g+ c]g+ g+ 'xc40@$0F'GHg+ -1Ig+ 3 g+r/   r   )'typingr   r   r   r   r   r   r   rL   transformersr   r	   
schedulersr   utilsr   r   r   r   utils.torch_utilsr   pipeline_utilsr   r   r   modeling_paella_vq_modelr   modeling_wuerstchen_diffnextr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr   rO   r   r    r/   r-   <module>r      ss    9 8   5 1 Z Z - \ \ 3 < ))MM			H	% (F+ 79J F+r/   