
    +hV4                     :   S SK r S SKJr  S SKJrJrJr  S SKrS SK	r
S SKrS SKJrJr  SSKJr  SSKJr  SSKJrJrJrJr  SS	KJr  S
SKJr  SSKJr  \" 5       (       a  S SKJs  J r!  Sr"OSr"\RF                  " \$5      r%Sr&\ " S S\5      5       r' " S S\5      r(g)    N)	dataclass)ListOptionalUnion)CLIPTextModelWithProjectionCLIPTokenizer   )PriorTransformer)HeunDiscreteScheduler)
BaseOutputis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipeline   )ShapERendererTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import DiffusionPipeline
        >>> from diffusers.utils import export_to_gif

        >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        >>> repo = "openai/shap-e"
        >>> pipe = DiffusionPipeline.from_pretrained(repo, torch_dtype=torch.float16)
        >>> pipe = pipe.to(device)

        >>> guidance_scale = 15.0
        >>> prompt = "a shark"

        >>> images = pipe(
        ...     prompt,
        ...     guidance_scale=guidance_scale,
        ...     num_inference_steps=64,
        ...     frame_size=256,
        ... ).images

        >>> gif_path = export_to_gif(images[0], "shark_3d.gif")
        ```
c                       \ rS rSr% Sr\\\\R                  R                        \\\	R                        4   \S'   Srg)ShapEPipelineOutputK   z
Output class for [`ShapEPipeline`] and [`ShapEImg2ImgPipeline`].

Args:
    images (`torch.Tensor`)
        A list of images for 3D rendering.
images N)__name__
__module____qualname____firstlineno____doc__r   r   PILImagenpndarray__annotations____static_attributes__r       d/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/shap_e/pipeline_shap_e.pyr   r   K   s9     $tCIIOO,-tD4D/EEFFr%   r   c                   8  ^  \ rS rSrSrSrS/rS\S\S\	S\
S\4
U 4S	 jjrS
 rS r\R                   " 5       \" \5              SS\S\S\S\\\R.                  \\R.                     4      S\\R2                     S\S\S\\   S\4S jj5       5       rSrU =r$ )ShapEPipelineX   a  
Pipeline for generating latent representation of a 3D asset and rendering with the NeRF method.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

Args:
    prior ([`PriorTransformer`]):
        The canonical unCLIP prior to approximate the image embedding from the text embedding.
    text_encoder ([`~transformers.CLIPTextModelWithProjection`]):
        Frozen text-encoder.
    tokenizer ([`~transformers.CLIPTokenizer`]):
         A `CLIPTokenizer` to tokenize text.
    scheduler ([`HeunDiscreteScheduler`]):
        A scheduler to be used in combination with the `prior` model to generate image embedding.
    shap_e_renderer ([`ShapERenderer`]):
        Shap-E renderer projects the generated latents into parameters of a MLP to create 3D objects with the NeRF
        rendering method.
ztext_encoder->priorshap_e_rendererpriortext_encoder	tokenizer	schedulerc                 H   > [         TU ]  5         U R                  UUUUUS9  g )N)r+   r,   r-   r.   r*   )super__init__register_modules)selfr+   r,   r-   r.   r*   	__class__s         r&   r1   ShapEPipeline.__init__p   s2     	%+ 	 	
r%   c                     Uc  [        XX2S9nO<UR                  U:w  a  [        SUR                   SU 35      eUR                  U5      nXVR                  -  nU$ )N)	generatordevicedtypezUnexpected latents shape, got z, expected )r   shape
ValueErrortoinit_noise_sigma)r3   r:   r9   r8   r7   latentsr.   s          r&   prepare_latentsShapEPipeline.prepare_latents   s`    ?"5fZG}}% #A'--P[\a[b!cddjj(G666r%   c                    [        U[        5      (       a  [        U5      OS  SU R                  l        U R                  USU R                  R
                  SSS9nUR                  nU R                  USSS9R                  nUR                  S	   UR                  S	   :  a  [        R                  " Xg5      (       dj  U R                  R                  US S 2U R                  R
                  S-
  S	24   5      n[        R                  S
U R                  R
                   SU 35        U R                  UR                  U5      5      n	U	R                  n
U
R!                  USS9n
U
[        R"                  R%                  U
S	SS9-  n
U(       a-  [        R&                  " U
5      n[        R(                  " X/5      n
[*        R,                  " U
R                  S   5      U
-  n
U
$ )Nr   r   
max_lengthTpt)paddingrB   
truncationreturn_tensorslongest)rD   rF   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: dim)rJ   keepdim)
isinstancelistlenr-   pad_token_idmodel_max_length	input_idsr:   torchequalbatch_decodeloggerwarningr,   r<   text_embedsrepeat_interleavelinalgnorm
zeros_likecatmathsqrt)r3   promptr8   num_images_per_promptdo_classifier_free_guidancetext_inputstext_input_idsuntruncated_idsremoved_texttext_encoder_outputprompt_embedsnegative_prompt_embedss               r&   _encode_promptShapEPipeline._encode_prompt   s    "&$//FQ '(#nn ~~66 % 
 %....SW.Xbb  $(<(<R(@@UcIuIu>>66q$..JiJilmJmprJrGr7stLNNNN334Il^M
 #//0A0A&0IJ+77%778MST7U%(9(9-RY](9(^^&%*%5%5m%D"
 "II'=&MNM 		-"5"5a"89MIr%   r_   r`   num_inference_stepsr7   r>   guidance_scale
frame_sizeoutput_typereturn_dictc
                     [        U[        5      (       a  Sn
O8[        U[        5      (       a  [        U5      n
O[	        S[        U5       35      eU R                  nX-  n
US:  nU R                  XX,5      nU R                  R                  X;S9  U R                  R                  nU R                  R                  R                  nU R                  R                  R                  nU R                  XU-  4UR                   UUUU R                  5      nUR#                  UR$                  S   UU5      n['        U R)                  U5      5       H  u  nnU(       a  [*        R,                  " U/S-  5      OUnU R                  R/                  UU5      nU R                  UUUS9R0                  nUR3                  UR$                  S   SS9u  nnU(       a  UR5                  S5      u  nnUUUU-
  -  -   nU R                  R7                  UUUS	9R8                  n[:        (       d  M  [<        R>                  " 5         M     U RA                  5         US
;  a  [	        SU 35      eUS:X  a	  [C        US9$ / nUS:X  aJ  ['        U5       H:  u  nnU RD                  RG                  USSS24   U5      nURI                  U5        M<     O['        U5       H9  u  nnU RD                  RK                  USSS24   UUS9nURI                  U5        M;     [*        RL                  " U5      nURO                  5       RQ                  5       nUS:X  a!  U Vs/ s H  nU RS                  U5      PM     nnU	(       d  U4$ [C        US9$ s  snf )ai  
The call function to the pipeline for generation.

Args:
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    num_inference_steps (`int`, *optional*, defaults to 25):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor is generated by sampling using the supplied random `generator`.
    guidance_scale (`float`, *optional*, defaults to 4.0):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
    frame_size (`int`, *optional*, default to 64):
        The width and height of each image frame of the generated 3D output.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated image. Choose between `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`), `"latent"` (`torch.Tensor`), or mesh ([`MeshDecoderOutput`]).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.shap_e.pipeline_shap_e.ShapEPipelineOutput`] instead of a plain
        tuple.

Examples:

Returns:
    [`~pipelines.shap_e.pipeline_shap_e.ShapEPipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.shap_e.pipeline_shap_e.ShapEPipelineOutput`] is returned,
        otherwise a `tuple` is returned where the first element is a list with the generated images.
r   z2`prompt` has to be of type `str` or `list` but is g      ?)r8   r   r   )timestepproj_embeddingrI   )rq   sample)r!   pillatentmeshzUOnly the output types `pil`, `np`, `latent` and `mesh` are supported not output_type=ru   )r   rv   N)sizert   )*rL   strrM   rN   r;   type_execution_deviceri   r.   set_timesteps	timestepsr+   confignum_embeddingsembedding_dimr?   r9   reshaper:   	enumerateprogress_barrR   r\   scale_model_inputpredicted_image_embeddingsplitchunkstepprev_sampleXLA_AVAILABLExm	mark_stepmaybe_free_model_hooksr   r*   decode_to_meshappenddecode_to_imagestackcpunumpynumpy_to_pil)r3   r_   r`   rk   r7   r>   rl   rm   rn   ro   
batch_sizer8   ra   rg   r|   r~   r   itlatent_model_inputscaled_model_input
noise_pred_noise_pred_uncondr   ru   rv   images                               r&   __call__ShapEPipeline.__call__   sw   h fc""J%%VJQRVW]R^Q_`aa''7
&4s&:#++F<Qo 	$$%8$HNN,,	**99

))77&&-78NN
 //'--"2NMRd//	:;DAq=XG9q=!9^e!%!A!ABTVW!X", $  ('	  ',,"((+ - MJ +0:0@0@0C-!:.:PaCa1bb
nn)) *  k	  }7 << 	##%==ghsgtu  ("&g66& &w/	6++::47O d# 0 'w/	6,,<<47O# = 
 e$ 0 [[(FZZ\'')Fe#@FGu$++E2G9"&11 Hs   M;r   )r      NNg      @@   rt   T)r   r   r   r   r   model_cpu_offload_seq_exclude_from_cpu_offloadr
   r   r   r   r   r1   r?   ri   rR   no_gradr   EXAMPLE_DOC_STRINGrx   intr   r   	Generatorr   Tensorfloatboolr   r$   __classcell__)r4   s   @r&   r(   r(   X   s'   ( 2!2 3

 2
 !	

 )
 '
&	/b ]]_12 &'#%MQ*. #%* Y2Y2  #Y2 !	Y2
 E%//43H"HIJY2 %,,'Y2 Y2 Y2 c]Y2 Y2 3 Y2r%   r(   ))r]   dataclassesr   typingr   r   r   r   r!   	PIL.Imager   rR   transformersr   r   modelsr
   
schedulersr   utilsr   r   r   r   utils.torch_utilsr   pipeline_utilsr   rendererr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr   rU   r   r   r(   r   r%   r&   <module>r      s     ! ( (    C & /  . . # ))MM			H	% 8 	G* 	G 	GB2% B2r%   