
    +h(                         S SK JrJrJrJrJr  S SKrSSKJrJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJrJr  \" 5       (       a  S SKJs  Jr  S
rOSr " S S\5      rg)    )DictListOptionalTupleUnionN   )AutoencoderKLDiTTransformer2DModel)KarrasDiffusionSchedulers)is_torch_xla_available)randn_tensor   )DiffusionPipelineImagePipelineOutputTFc                   2  ^  \ rS rSrSrSr SS\S\S\S\	\
\\4      4U 4S jjjrS	\\\\   4   S
\\   4S jr\R$                  " 5            SS\\   S\S\	\\R(                  \\R(                     4      S\S\	\   S\S
\\\4   4S jj5       rSrU =r$ )DiTPipeline(   a  
Pipeline for image generation based on a Transformer backbone instead of a UNet.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

Parameters:
    transformer ([`DiTTransformer2DModel`]):
        A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents. Initially published as
        [`Transformer2DModel`](https://huggingface.co/facebook/DiT-XL-2-256/blob/main/transformer/config.json#L2)
        in the config, but the mismatch can be ignored.
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
    scheduler ([`DDIMScheduler`]):
        A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
ztransformer->vaetransformervae	schedulerid2labelc                   > [         TU ]  5         U R                  XUS9  0 U l        Ub  UR	                  5        HQ  u  pVUR                  S5       H7  n[        U5      U R                  UR                  5       R                  5       '   M9     MS     [        [        U R                  R	                  5       5      5      U l        g g )N)r   r   r   ,)super__init__register_moduleslabelsitemssplitintlstriprstripdictsorted)	selfr   r   r   r   keyvaluelabel	__class__s	           ^/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/dit/pipeline_dit.pyr   DiTPipeline.__init__<   s     	+)T &nn.
"[[-E;>s8DKK 5 5 78 . / vdkk&7&7&9:;DK	      r(   returnc                     [        U[        5      (       d  [        U5      nU H-  nX R                  ;  d  M  [        U SU R                   S35      e   U Vs/ s H  o R                  U   PM     sn$ s  snf )z

Map label strings from ImageNet to corresponding class ids.

Parameters:
    label (`str` or `dict` of `str`):
        Label strings to be mapped to class ids.

Returns:
    `list` of `int`:
        Class ids to be processed by pipeline.
zK does not exist. Please make sure to select one of the following labels: 
 .)
isinstancelistr   
ValueError)r%   r(   ls      r*   get_label_idsDiTPipeline.get_label_idsN   su     %&&KEA# cefjfqfqerrst   )..1A...s   A3class_labelsguidance_scale	generatornum_inference_stepsoutput_typereturn_dictc                 	   [        U5      nU R                  R                  R                  nU R                  R                  R                  n	[        XyX4UU R                  U R                  R                  S9n
US:  a  [        R                  " U
/S-  5      OU
n[        R                  " XR                  S9R                  S5      n[        R                  " S/U-  U R                  S9nUS:  a  [        R                  " X/S5      OUnU R                  R                  U5        U R                  U R                  R                  5       GH  nUS:  a'  US[        U5      S-   n[        R                  " X/SS	9nU R                  R!                  X5      nUn[        R"                  " U5      (       d  UR$                  R&                  S
:H  nUR$                  R&                  S:H  n[)        U[*        5      (       a/  U(       d  U(       a  [        R,                  O[        R.                  nO.U(       d  U(       a  [        R0                  O[        R2                  n[        R                  " U/UUR$                  S9nO7[        UR4                  5      S:X  a  US   R7                  UR$                  5      nUR9                  UR4                  S   5      nU R                  UUUS9R:                  nUS:  at  USS2SU	24   USS2U	S24   nn[        R<                  " U[        U5      S-  SS	9u  nnUUUU-
  -  -   n[        R                  " UU/SS	9n[        R                  " UU/SS	9nU R                  R                  R>                  S-  U	:X  a  [        R<                  " UU	SS	9u  nnOUnU R                  RA                  UX5      RB                  n[D        (       d  GM  [F        RH                  " 5         GM     US:  a  URK                  SSS	9u  n
nOUn
SU RL                  R                  RN                  -  U
-  n
U RL                  RQ                  U
5      R:                  nUS-  S-   RS                  SS5      nURU                  5       RW                  SSSS5      R+                  5       RY                  5       nUS:X  a  U R[                  U5      nU R]                  5         U(       d  U4$ [_        US9$ )a  
The call function to the pipeline for generation.

Args:
    class_labels (List[int]):
        List of ImageNet class labels for the images to be generated.
    guidance_scale (`float`, *optional*, defaults to 4.0):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
    generator (`torch.Generator`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    num_inference_steps (`int`, *optional*, defaults to 250):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated image. Choose between `PIL.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`ImagePipelineOutput`] instead of a plain tuple.

Examples:

```py
>>> from diffusers import DiTPipeline, DPMSolverMultistepScheduler
>>> import torch

>>> pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-256", torch_dtype=torch.float16)
>>> pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
>>> pipe = pipe.to("cuda")

>>> # pick words from Imagenet class labels
>>> pipe.labels  # to print all available words

>>> # pick words that exist in ImageNet
>>> words = ["white shark", "umbrella"]

>>> class_ids = pipe.get_label_ids(words)

>>> generator = torch.manual_seed(33)
>>> output = pipe(class_labels=class_ids, num_inference_steps=25, generator=generator)

>>> image = output.images[0]  # label 'white shark'
```

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
        returned where the first element is a list with the generated images
)shaper8   devicedtype   r   )r>   i  r   N)dimmpsnpu)r?   r>   )timestepr6   g      ?r   pil)images)0lenr   configsample_sizein_channelsr   _execution_devicer?   torchcattensorreshaper   set_timestepsprogress_bar	timestepsscale_model_input	is_tensorr>   typer0   floatfloat32float64int32int64r=   toexpandsampler   out_channelsstepprev_sampleXLA_AVAILABLExm	mark_stepchunkr   scaling_factordecodeclampcpupermutenumpynumpy_to_pilmaybe_free_model_hooksr   )r%   r6   r7   r8   r9   r:   r;   
batch_sizelatent_sizelatent_channelslatentslatent_model_input
class_nullclass_labels_inputthalfrS   is_mpsis_npur?   
noise_predepsrestcond_eps
uncond_epshalf_epsmodel_output_sampless                                r*   __call__DiTPipeline.__call__g   s=   x &
&&--99**11==I))""((	
 :H!9KUYYy1}5QX||L9O9OPXXY[\\\4&:"5d>T>TU
IWZ[I[UYY'A1Eam 	$$%89""4>>#;#;<A!)*HC0B,Cq,HI%*YY|%C"!%!A!ABT!XI??9-- ,22775@+22775@i//.4EMMU]]E,2fEKK5;;E!LL)EJ\JcJcd	Y__%*%dO../A/H/HI	!(();)A)A!)DEI))"YEW * f 
 !&q*:?*:':;Z?K[H[=\T',{{3CA1'M$*%(Z:O(PPii8 4!<"YYT{:
 &&33q8OK"'++j/q"Qa) "&!4!4\1!Y!e!e}] =` A+11!1;JGQ(Gdhhoo444w>((//'*11Q;$++Aq1 ++-''1a399;AAC%''0G 	##%:"'22r,   )r   )N)g      @N2   rF   T)__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seqr
   r	   r   r   r   r    strr   r   r   r4   rM   no_gradrW   	Generatorboolr   r   r   __static_attributes____classcell__)r)   s   @r*   r   r   (   s   " / .2<*< < -	<
 4S>*< <$/5d3i#8 /T#Y /2 ]]_ !$MQ#%%* S33iS3 S3 E%//43H"HIJ	S3
 !S3 c]S3 S3 
"E)	*S3 S3r,   r   )typingr   r   r   r   r   rM   modelsr	   r
   
schedulersr   utilsr   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelrc   rb   r    r,   r*   <module>r      sI   * 6 5  : 3 + - C ))MMS3# S3r,   