
    +hf                        S SK JrJrJrJrJr  S SKrS SKJrJ	r	  SSK
Jr  SSKJr  SSKJrJrJrJr  SSKJr  S	S
KJrJr  S	SKJr  \" 5       (       a  S SKJs  Jr  SrOSr\R>                  " \ 5      r!Sr" " S S\5      r#g)    )CallableDictListOptionalUnionN)CLIPTextModelWithProjectionCLIPTokenizer   )StableCascadeUNet)DDPMWuerstchenScheduler)is_torch_versionis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutput)PaellaVQModelTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import StableCascadePriorPipeline, StableCascadeDecoderPipeline

        >>> prior_pipe = StableCascadePriorPipeline.from_pretrained(
        ...     "stabilityai/stable-cascade-prior", torch_dtype=torch.bfloat16
        ... ).to("cuda")
        >>> gen_pipe = StableCascadeDecoderPipeline.from_pretrain(
        ...     "stabilityai/stable-cascade", torch_dtype=torch.float16
        ... ).to("cuda")

        >>> prompt = "an image of a shiba inu, donning a spacesuit and helmet"
        >>> prior_output = pipe(prompt)
        >>> images = gen_pipe(prior_output.image_embeddings, prompt=prompt)
        ```
c            %       2  ^  \ rS rSrSrSrSrSr/ SQr S,S\	S\
S\S\S	\S
\SS4U 4S jjjrS r      S-S\\R&                     S\\R&                     S\\R&                     S\\R&                     4S jjr    S.S jr\S 5       r\S 5       r\S 5       rS r\R6                  " 5       \" \5      SSSSSSSSSSSSSSS/4S\\R&                  \\R&                     4   S \\ \\    4   S!\!S"\S#\\\ \\    4      S\\R&                     S\\R&                     S\\R&                     S\\R&                     S$\!S%\\\RD                  \\RD                     4      S\\R&                     S&\\    S'\#S(\\$\!\!\%/S4      S)\\    4 S* jj5       5       r&S+r'U =r($ )/StableCascadeDecoderPipeline:   a  
Pipeline for generating images from the Stable Cascade model.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    tokenizer (`CLIPTokenizer`):
        The CLIP tokenizer.
    text_encoder (`CLIPTextModelWithProjection`):
        The CLIP text encoder.
    decoder ([`StableCascadeUNet`]):
        The Stable Cascade decoder unet.
    vqgan ([`PaellaVQModel`]):
        The VQGAN model.
    scheduler ([`DDPMWuerstchenScheduler`]):
        A scheduler to be used in combination with `prior` to generate image embedding.
    latent_dim_scale (float, `optional`, defaults to 10.67):
        Multiplier to determine the VQ latent space size from the image embeddings. If the image embeddings are
        height=24 and width=24, the VQ latent shape needs to be height=int(24*10.67)=256 and
        width=int(24*10.67)=256 in order to match the training conditions.
decodertext_encoderztext_encoder->decoder->vqgan)latentsprompt_embeds_poolednegative_prompt_embedsimage_embeddings	tokenizer	schedulervqganlatent_dim_scalereturnNc                 f   > [         TU ]  5         U R                  UUUUUS9  U R                  US9  g )N)r   r   r   r    r!   )r"   )super__init__register_modulesregister_to_config)selfr   r   r   r    r!   r"   	__class__s          t/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.pyr&   %StableCascadeDecoderPipeline.__init__\   sF     	% 	 	
 	1AB    c	                 `   UR                   u  ppX-  S[        XR                  R                  -  5      [        XR                  R                  -  5      4nUc  [	        XXTS9nO<UR                   U:w  a  [        SUR                    SU 35      eUR                  U5      nXxR                  -  nU$ )N   )	generatordevicedtypezUnexpected latents shape, got z, expected )shapeintconfigr"   r   
ValueErrortoinit_noise_sigma)r)   
batch_sizer   num_images_per_promptr2   r1   r0   r   r    _channelsheightwidthlatents_shapes                 r+   prepare_latents,StableCascadeDecoderPipeline.prepare_latentso   s     &6%;%;"V.55564445	
 ?"=fbG}}- #A'--P[\i[j!klljj(G666r-   prompt_embedsr   r   negative_prompt_embeds_pooledc                    UGc  U R                  USU R                   R                  SSS9nUR                  nUR                  nU R                  USSS9R                  nUR                  S   UR                  S   :  a  [
        R                  " X5      (       d  U R                   R                  US S 2U R                   R                  S-
  S24   5      n[        R                  S	U R                   R                   S
U 35        US S 2S U R                   R                  24   nUS S 2S U R                   R                  24   nU R                  UR                  U5      UR                  U5      SS9nUR                  S   nUc  UR                  R                  S5      nUR                  U R                  R                  US9nUR                  U R                  R                  US9nUR!                  USS9nUR!                  USS9nU	Gc,  U(       Ga$  Uc  S/U-  nO[#        U5      [#        U5      La$  [%        S[#        U5       S[#        U5       S35      e['        U[(        5      (       a  U/nO2U[+        U5      :w  a!  [-        SU S[+        U5       SU SU S3	5      eUnU R                  USU R                   R                  SSS9nU R                  UR                  R                  U5      UR                  R                  U5      SS9nUR                  S   n	UR                  R                  S5      n
U(       a  U	R                  S   nU	R                  U R                  R                  US9n	U	R/                  SUS5      n	U	R1                  X#-  US5      n	U
R                  S   nU
R                  U R                  R                  US9n
U
R/                  SUS5      n
U
R1                  X#-  US5      n
XxX4$ )N
max_lengthTpt)paddingrE   
truncationreturn_tensorslongest)rG   rI      z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: )attention_maskoutput_hidden_states)r2   r1   r   dim z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)r   model_max_length	input_idsrM   r3   torchequalbatch_decodeloggerwarningr   r7   hidden_statestext_embeds	unsqueezer2   repeat_interleavetype	TypeError
isinstancestrlenr6   repeatview)r)   r1   r9   r:   do_classifier_free_guidancepromptnegative_promptrB   r   r   rC   text_inputstext_input_idsrM   untruncated_idsremoved_texttext_encoder_outputuncond_tokensuncond_input*negative_prompt_embeds_text_encoder_outputseq_lens                        r+   encode_prompt*StableCascadeDecoderPipeline.encode_prompt   s`     ..$>>::# ) K )22N(77N"nnVYW[n\ffO$$R(N,@,@,DDU[[N N  $~~::#At~~'F'F'JR'O$OP  778	,Q "03TT^^5T5T3T0T!U!/3TT^^5T5T3T0T!U"&"3"3!!&).:K:KF:Sjn #4 # 0==bAM#+':'F'F'P'PQR'S$%((t/@/@/F/Fv(V366T=N=N=T=T]c6d%778MST7U3EEF[abEc!).I&!#z 1fT/%::UVZ[jVkUl mV~Q(  OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0>>$>>::# * L :>9J9J&&))&1+::==fE%) :K :6 &P%]%]^`%a",V,b,b,l,lmn,o)&,2215G%;%>%>TEVEVE\E\ek%>%l"%;%B%B1F[]^%_"%;%@%@Acelnp%q"399!<G,I,L,L''--f -M -) -J,P,PQRTikl,m),I,N,N2GR-)
 4Jiir-   c           
      ^  ^  UbW  [        U 4S jU 5       5      (       d=  [        ST R                   SU Vs/ s H  ofT R                  ;  d  M  UPM     sn 35      eUb  Ub  [        SU SU S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[
        5      (       d  [        S[        U5       35      eUb  Ub  [        S	U S
U S35      eUbE  UbA  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eg g g s  snf )Nc              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0kr)   s     r+   	<genexpr><StableCascadeDecoderPipeline.check_inputs.<locals>.<genexpr>   s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` rR   )allr6   rv   r`   ra   listr^   r3   )r)   rf   rg   rB   r   "callback_on_step_end_tensor_inputsrx   s   `      r+   check_inputs)StableCascadeDecoderPipeline.check_inputs   s    .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  C *L$+ pHs   D*D*c                     U R                   $ ru   _guidance_scaler)   s    r+   guidance_scale+StableCascadeDecoderPipeline.guidance_scale  s    ###r-   c                      U R                   S:  $ )NrL   r   r   s    r+   re   8StableCascadeDecoderPipeline.do_classifier_free_guidance  s    ##a''r-   c                     U R                   $ ru   )_num_timestepsr   s    r+   num_timesteps*StableCascadeDecoderPipeline.num_timesteps   s    """r-   c                    [         R                  " S/5      nSS/n[         R                  " USU-   -  [         R                  -  S-  5      S-  nX!   nUR                  " U6 nUR                  UR                  5      UR                  UR                  5      pSXe-  S-  R                  5       [         R                  S-  -  SU-   -  U-
  nU$ )NgMb?r   rL   g      ?r   )rU   tensorcospiclampr7   r1   acos)r)   talphas_cumprodsclamp_rangemin_varvarratios           r+   get_timestep_ratio_conditioning<StableCascadeDecoderPipeline.get_timestep_ratio_conditioning$  s    LL%!!f))AQK%((2S89Q>ii%TT#**%wzz#**'=7=S(..0EHHsNCANQRRr-   
   g        rL   pilTr   r   rf   num_inference_stepsr   rg   r:   r0   output_typereturn_dictcallback_on_step_endr}   c                    U R                   nU R                  R                  nX@l        [	        SS5      (       a  U[
        R                  :X  a  [        S5      eU R                  UUUUUS9  [        U[        5      (       a  [
        R                  " USS9nUb  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nXR                  S   U-  -  n
Uc*  Uc'  U R                  UUUU
U R                   UUUUU	S	9
u  nnnn	U R                   (       a  [
        R                  " Xy/5      OUnU R                   (       a,  [
        R                  " U[
        R"                  " U5      /5      OUnU R$                  R'                  UUS
9  U R$                  R(                  nU R+                  UXUUXU R$                  5      n[        U R$                  [,        5      (       a  USS nOz[/        U R$                  R0                  S5      (       aU  U R$                  R0                  R2                  (       a0  SU R$                  R0                  l        [4        R7                  S5        [/        U R$                  S5      (       a/  SU R$                  R8                  -
  n[
        R:                  " USS9nO/ n[        U5      U l        [?        U RA                  U5      5       GH  u  nn[        U R$                  [,        5      (       d  [        U5      S:  am  U RC                  URE                  5       RG                  5       U5      nURI                  URK                  S5      5      RM                  U5      RM                  U5      nOURO                  5       RQ                  U R$                  R(                  S   5      RI                  URK                  S5      5      RM                  U5      nO/URI                  URK                  S5      5      RM                  U5      nU R                  U R                   (       a  [
        R                  " U/S-  5      OUU R                   (       a  [
        R                  " U/S-  5      OUUUSS9S   nU R                   (       a6  URS                  S5      u  nn[
        RT                  " UUU RV                  5      n[        U R$                  [,        5      (       d  UnU R$                  RY                  UUUUS9RZ                  nUb\  0 nU H  n []        5       U    UU '   M     U" U UUU5      n!U!R_                  SU5      nU!R_                  SU5      nU!R_                  SU5      n[`        (       d  GM  [b        Rd                  " 5         GM     US;  a  [        SU 35      eUS:X  d  U Rf                  R0                  Rh                  U-  nU Rf                  Rk                  U5      Rl                  Ro                  SS5      n"US:X  a?  U"Rq                  SSSS5      RG                  5       RO                  5       Rs                  5       n"OXUS:X  aO  U"Rq                  SSSS5      RG                  5       RO                  5       Rs                  5       n"U Ru                  U"5      n"OUn"U Rw                  5         U(       d  U"$ [y        U"5      $ )aW  
Function invoked when calling the pipeline for generation.

Args:
    image_embedding (`torch.Tensor` or `List[torch.Tensor]`):
        Image Embeddings either extracted from an image or generated by a Prior Model.
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    num_inference_steps (`int`, *optional*, defaults to 12):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 0.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
        equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
        setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
        closely linked to the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
        if `decoder_guidance_scale` is less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    prompt_embeds_pooled (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    negative_prompt_embeds_pooled (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds_pooled will be generated from `negative_prompt`
        input argument.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will ge generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple` [`~pipelines.ImagePipelineOutput`] if `return_dict` is True,
    otherwise a `tuple`. When returning a tuple, the first element is a list with the generated image
    embeddings.
<z2.2.0zW`StableCascadeDecoderPipeline` requires torch>=2.2.0 when using `torch.bfloat16` dtype.)rg   rB   r   r}   r   rO   NrL   )
rf   r1   r9   r:   re   rg   rB   r   r   rC   )r1   rK   clip_sampleFz set `clip_sample` to be Falsebetasg      ?r   )sampletimestep_ratioclip_text_pooledeffnetr   )model_outputtimestepr   r0   r   rB   r   )rF   npr   latentzSOnly the output types `pt`, `np`, `pil` and `latent` are supported not output_type=r   r   r
   r   )=_execution_devicer   r2   r   r   rU   bfloat16r6   r~   r`   r|   catra   rb   r3   rq   re   
zeros_liker    set_timesteps	timestepsr@   r   hasattrr5   r   rX   rY   r   cumprodr   	enumerateprogress_barr   longcpuexpandsizer7   floatdivchunklerpr   stepprev_samplelocalspopXLA_AVAILABLExm	mark_stepr!   scale_factordecoder   r   permutenumpynumpy_to_pilmaybe_free_model_hooksr   )#r)   r   rf   r   r   rg   rB   r   r   rC   r:   r0   r   r   r   r   r}   r1   r2   r9   r;   r   r   alphasr   ir   r   predicted_latentspredicted_latents_textpredicted_latents_uncondcallback_kwargsrx   callback_outputsimagess#                                      r+   __call__%StableCascadeDecoderPipeline.__call__.  s   p ''""-C))eu~~.Evww 	+'#9/Q 	 	
 &--$yy)9qA*VS"9"9JJvt$<$<VJ&,,Q/J !69O9OPQ9RV`9` a  %;%CHLHZHZ%&;,0,L,L /+%9'=.K I[ IEA#Q(E  // II+KL% 	 // II')9)9:J)KLM! 	 	$$%8$HNN,,	 &&(PYdhdrdr
 dnn&=>>!#2It~~,,m<<AVAVAbAb49%%1?@ 4>>7++4>>///F"]]6q9NN!)nd//	:;DAqdnn.EFF~&*%)%I%I!&&(,,.Zh%iN%3%:%:7<<?%K%N%Nu%U%X%XY_%`N%&WWY]]4>>3K3KB3O%P%W%WX_XdXdefXg%h%k%klq%rN!"',,q/!:!=!=e!D !%373S3Suyy'Q/Y`BFBbBbuyy.)9A)=>hv!5! !- ! ! //CTCZCZ[\C]@&(@$)JJ/GI_aeatat$u! dnn.EFF!"nn)).'#	 * 
 k  $/"$;A)/!OA& <#7aO#T *..y'B 0 4 4_m T)9)=)=>VXn)o&}[ <^ ;;efqers  h&jj''44w>GZZ&&w/66<<QBFd"1a3779??AGGI%1a3779??AGGI**62F 	##%M"6**r-   )r   r   )gףp=
W%@)NNNNNN)NNNN))__name__
__module____qualname____firstlineno____doc__	unet_nametext_encoder_namemodel_cpu_offload_seqrv   r   r	   r   r   r   r   r&   r@   r   rU   Tensorrq   r~   propertyr   re   r   r   no_gradr   EXAMPLE_DOC_STRINGr   r   ra   r4   	Generatorboolr   r   r   __static_attributes____classcell__)r*   s   @r+   r   r   :   s   . I&: #(C"C !C 2	C
 +C C  C 
C C&6 047;9=@Dij  -ij 'u||4ij !) 6ij (0'=ij\ #+/'R $ $ ( ( # # ]]_12 )-#% #;?047;9=@D%&MQ*.%* KO9B#k+d5<<.@ @Ak+ c49n%k+ !	k+
 k+ "%T#Y"78k+  -k+ 'u||4k+ !) 6k+ (0'=k+  #k+ E%//43H"HIJk+ %,,'k+ c]k+ k+  'xc40@$0F'GH!k+" -1I#k+ 3 k+r-   r   )$typingr   r   r   r   r   rU   transformersr   r	   modelsr   
schedulersr   utilsr   r   r   r   utils.torch_utilsr   pipeline_utilsr   r   #wuerstchen.modeling_paella_vq_modelr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr   rX   r   r    r-   r+   <module>r      sj    9 8  C ' 1 a a - C ? ))MM			H	% (a+#4 a+r-   