
    +h                     z   S SK Jr  S SKJr  S SKJrJrJrJrJ	r	J
r
  S SKrS SKrS SKJr  S SKJr  S SKJrJr  SS	KJr  SS
KJrJr  SSKJrJr  SSKJrJrJ r J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)  \" 5       (       a  S SK*J+s  J,r-  Sr.OSr.\ R^                  " \05      r1Sr2\ " S S\5      5       r3 " S S\'5      r4g)    )	dataclass)partial)AnyDictListOptionalTupleUnionN)Image)tqdm)CLIPTextModelCLIPTokenizer   )PipelineImageInput)AutoencoderKLUNet2DConditionModel)DDIMSchedulerLCMScheduler)
BaseOutputis_torch_xla_availableloggingreplace_example_docstring)is_scipy_available)randn_tensor   )DiffusionPipeline   )MarigoldImageProcessorTFaE  
Examples:
```py
>>> import diffusers
>>> import torch

>>> pipe = diffusers.MarigoldDepthPipeline.from_pretrained(
...     "prs-eth/marigold-depth-v1-1", variant="fp16", torch_dtype=torch.float16
... ).to("cuda")

>>> image = diffusers.utils.load_image("https://marigoldmonodepth.github.io/images/einstein.jpg")
>>> depth = pipe(image)

>>> vis = pipe.image_processor.visualize_depth(depth.prediction)
>>> vis[0].save("einstein_depth.png")

>>> depth_16bit = pipe.image_processor.export_depth_to_16bit_png(depth.prediction)
>>> depth_16bit[0].save("einstein_depth_16bit.png")
```
c                       \ rS rSr% Sr\\R                  \R                  4   \
S'   \S\R                  \R                  4   \
S'   \S\R                  4   \
S'   Srg)MarigoldDepthOutputR   a  
Output class for Marigold monocular depth prediction pipeline.

Args:
    prediction (`np.ndarray`, `torch.Tensor`):
        Predicted depth maps with values in the range [0, 1]. The shape is $numimages       imes 1  imes height     imes
        width$ for `torch.Tensor` or $numimages     imes height     imes width      imes 1$ for `np.ndarray`.
    uncertainty (`None`, `np.ndarray`, `torch.Tensor`):
        Uncertainty maps computed from the ensemble, with values in the range [0, 1]. The shape is $numimages
            imes 1  imes height     imes width$ for `torch.Tensor` or $numimages    imes height     imes width      imes 1$
        for `np.ndarray`.
    latent (`None`, `torch.Tensor`):
        Latent features corresponding to the predictions, compatible with the `latents` argument of the pipeline.
        The shape is $numimages * numensemble       imes 4  imes latentheight       imes latentwidth$.

predictionNuncertaintylatent )__name__
__module____qualname____firstlineno____doc__r
   npndarraytorchTensor__annotations____static_attributes__r%       n/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/marigold/pipeline_marigold_depth.pyr    r    R   sO      bjj%,,.//tRZZ566$$%%r1   r    c            !       &  ^  \ rS rSrSrSrSr     S.S\S\S\	\
\4   S\S	\S
\\   S\\   S\\   S\\   S\\   4U 4S jjjrS\S\S\S\S\S\S\S\\\\4      S\\R.                     S\\	\R0                  \\R0                     4      S\S\S\4S jr\R6                  R8                  S/S j5       r\R<                  " 5       \" \ 5                    S0S\S\\   S\S\\   S\S\S\S\S\\\\4      S\\	\R.                  \\R.                     4      S\\	\R0                  \\R0                     4      S\S\S \S!\4S" jj5       5       r!S\R.                  S\\R.                     S\\R0                     S\S\S\"\R.                  \R.                  4   4S# jr#S$\R.                  S\R.                  4S% jr$\%        S1S&\R.                  S\S\S\S'\S(\&S)\S*\&S+\S\"\R.                  \\R.                     4   4S, jj5       r'S-r(U =r)$ )2MarigoldDepthPipelinei   a
  
Pipeline for monocular depth estimation using the Marigold method: https://marigoldmonodepth.github.io.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    unet (`UNet2DConditionModel`):
        Conditional U-Net to denoise the depth latent, conditioned on image latent.
    vae (`AutoencoderKL`):
        Variational Auto-Encoder (VAE) Model to encode and decode images and predictions to and from latent
        representations.
    scheduler (`DDIMScheduler` or `LCMScheduler`):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents.
    text_encoder (`CLIPTextModel`):
        Text-encoder, for empty text embedding.
    tokenizer (`CLIPTokenizer`):
        CLIP tokenizer.
    prediction_type (`str`, *optional*):
        Type of predictions made by the model.
    scale_invariant (`bool`, *optional*):
        A model property specifying whether the predicted depth maps are scale-invariant. This value must be set in
        the model config. When used together with the `shift_invariant=True` flag, the model is also called
        "affine-invariant". NB: overriding this value is not supported.
    shift_invariant (`bool`, *optional*):
        A model property specifying whether the predicted depth maps are shift-invariant. This value must be set in
        the model config. When used together with the `scale_invariant=True` flag, the model is also called
        "affine-invariant". NB: overriding this value is not supported.
    default_denoising_steps (`int`, *optional*):
        The minimum number of denoising diffusion steps that are required to produce a prediction of reasonable
        quality with the given model. This value must be set in the model config. When the pipeline is called
        without explicitly setting `num_inference_steps`, the default value is used. This is required to ensure
        reasonable results with various model flavors compatible with the pipeline, such as those relying on very
        short denoising schedules (`LCMScheduler`) and those with full diffusion schedules (`DDIMScheduler`).
    default_processing_resolution (`int`, *optional*):
        The recommended value of the `processing_resolution` parameter of the pipeline. This value must be set in
        the model config. When the pipeline is called without explicitly setting `processing_resolution`, the
        default value is used. This is required to ensure reasonable results with various model flavors trained
        with varying optimal processing resolution values.
ztext_encoder->unet->vae)depth	disparityunetvae	schedulertext_encoder	tokenizerprediction_typescale_invariantshift_invariantdefault_denoising_stepsdefault_processing_resolutionc                   > [         TU ]  5         X`R                  ;  a&  [        R	                  SU SU R                   S35        U R                  UUUUUS9  U R                  UUUU	U
S9  [        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OS	U l        Xpl        Xl        Xl        Xl        S U l        [%        U R                  S
9U l        g )Nz*Potentially unsupported `prediction_type='z&'`; values supported by the pipeline: .)r8   r9   r:   r;   r<   )r=   r>   r?   r@   rA   r9   r   r      )vae_scale_factor)super__init__supported_prediction_typesloggerwarningregister_modulesregister_to_configgetattrlenr9   configblock_out_channelsrE   r>   r?   r@   rA   empty_text_embeddingr   image_processor)selfr8   r9   r:   r;   r<   r=   r>   r?   r@   rA   	__class__s              r2   rG   MarigoldDepthPipeline.__init__   s     	"A"AANN<_<MMs22316
 	% 	 	
 	+++$;*G 	  	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw..'>$-J*$(!5tG\G\]r1   imagenum_inference_stepsensemble_sizeprocessing_resolutionresample_method_inputresample_method_output
batch_sizeensembling_kwargslatents	generatoroutput_typeoutput_uncertaintyreturnc           
      	  ^
 S[        U R                  R                  R                  5      S-
  -  nXR                  :w  a  [        SU R                   SU S35      eUc  [        S5      eUS:  a  [        S5      eUS:  a  [        S5      eUS:X  a  [        R                  S	5        US:  a<  U R                  (       d  U R                  (       a  [        5       (       d  [        S
5      eUS:X  a  U(       a  [        S5      eUc  [        S5      eUS:  a  [        S5      eX@R                  -  S:w  a  [        SU R                   S35      eUS;  a  [        S5      eUS;  a  [        S5      eUS:  a  [        S5      eUS;  a  [        S5      eU	b  T
b  [        S5      eUb:  [        U[        5      (       d  [        S5      eSU;   a  US   S;  a  [        S5      eSnSu  nn[        U[        5      (       d  U/n[        U5       GH  u  nn[        U[         R"                  5      (       d  [$        R&                  " U5      (       a`  UR(                  S;  a  [        SU SUR*                   S35      eUR*                  S S  u  nnSnUR(                  S!:X  a  UR*                  S   nOL[        U[,        R,                  5      (       a  UR.                  u  nnSnO[        S"U S#[1        U5       S35      eUc  UUnnO"UU4UU4:w  a  [        S$U S%UU4 S&UU4 35      eUU-  nGM     U	Gb%  [$        R&                  " U	5      (       d  [        S'5      eU	R3                  5       S!:w  a  [        S(U	R*                   S35      eUS:  a=  [5        UU5      nUU-  U-  nX-  U-  nUS:X  d  US:X  a  [        S)U S*U S+35      eUUnnXR                  -   S-
  U R                  -  nUU R                  -   S-
  U R                  -  nX-  U R                  R                  R6                  UU4nU	R*                  U:w  a  [        S,U	R*                   S-U S35      eT
b  [        T
[        5      (       aD  [        T
5      X-  :w  a  [        S.5      e[9        U
4S/ jT
 5       5      (       d  [        S05      e U$ [        T
[$        R:                  5      (       d  [        S1[1        T
5       S35      eU$ )2Nr   r   z/`vae_scale_factor` computed at initialization (z) differs from the actual one (z).zW`num_inference_steps` is not specified and could not be resolved from the model config.z'`num_inference_steps` must be positive.z!`ensemble_size` must be positive.zk`ensemble_size` == 2 results are similar to no ensembling (1); consider increasing the value to at least 3.z9Make sure to install scipy if you want to use ensembling.zpComputing uncertainty by setting `output_uncertainty=True` also requires setting `ensemble_size` greater than 1.zY`processing_resolution` is not specified and could not be resolved from the model config.r   zx`processing_resolution` must be non-negative: 0 for native resolution, or any positive value for downsampled processing.z.`processing_resolution` must be a multiple of rC   )nearestnearest-exactbilinearbicubicareazy`resample_method_input` takes string values compatible with PIL library: nearest, nearest-exact, bilinear, bicubic, area.zz`resample_method_output` takes string values compatible with PIL library: nearest, nearest-exact, bilinear, bicubic, area.z`batch_size` must be positive.)ptr+   z*`output_type` must be one of `pt` or `np`.z2`latents` and `generator` cannot be used together.z)`ensembling_kwargs` must be a dictionary.	reductionmeanmedianzF`ensembling_kwargs['reduction']` can be either `'mean'` or `'median'`.)NN)r   r      z`image[z(]` has unsupported dimensions or shape: rn   zUnsupported `image[z	]` type: zInput `image[z]` has incompatible dimensions z with the previous images z!`latents` must be a torch.Tensor.z/`latents` has unsupported dimensions or shape: z*Extreme aspect ratio of the input image: [z x ]z`latents` has unexpected shape=z
 expected=z^The number of generators must match the total number of ensemble members for all input images.c              3      >#    U  H4  oR                   R                  TS    R                   R                  :H  v   M6     g7f)r   N)devicetype).0gr_   s     r2   	<genexpr>5MarigoldDepthPipeline.check_inputs.<locals>.<genexpr>B  s,     Xi88==IaL,?,?,D,DDis   <?z;`generator` device placement is not consistent in the list.zUnsupported generator type: )rN   r9   rO   rP   rE   
ValueErrorrI   rJ   r>   r?   r   ImportError
isinstancedictlist	enumerater+   r,   r-   	is_tensorndimshaper   sizers   dimmaxlatent_channelsall	Generator)rS   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   actual_vae_scale_factor
num_imagesWHiimgH_iW_iN_imax_orignew_Hnew_Wwhshape_expecteds             `                 r2   check_inputs"MarigoldDepthPipeline.check_inputs   s_    #$DHHOO,N,N(ORS(S"T"&;&;;A$BWBWAXXw  yP  xQ  QS  T  &vww"FGG1@AAANN? 1$"6"6$:N:NXjXlXlYZZA"4"  !(k  !1$*  !#8#88A=MdNcNcMddefgg (ccC  ")ddC  >=>>l*IJJ9#8QRR(/66 !LMM//4Ek4RZl4l !ijj 
1%&&GE&FAs#rzz**eooc.B.B889,$wqc1YZ]ZcZcYdde%fgg99RS>S88q=))A,CC--88S #6qc49+Q!OPPyC11QC:% #A3&EsCj\Qkmnpqlrkst  #J' ', ??7++ !DEE{{}! #RSZS`S`Raab!cdd$q(q!911X=1X=A:!$'QRSQTTWXYWZZ[%\]]e1***Q.43H3HHAT***Q.43H3HHA(8$((//:Y:Y[\^_`N}}. #B7==/Q[\j[kkl!mnn  )T**y>Z%??$x  XiXXX$%bcc Y
   	5??;; #?Y?PPQ!RSSr1   c                    [        U S5      (       d  0 U l        OA[        U R                  [        5      (       d"  [	        S[        U R                  5       S35      e[        S0 U R                  D6nUR                  SU5      US'   UR                  SU5      US'   Ub  [        U40 UD6$ Ub  [        SSU0UD6$ [	        S5      e)	N_progress_bar_configz=`self._progress_bar_config` should be of type `dict`, but is rC   descleavetotalz/Either `total` or `iterable` has to be defined.r%   )hasattrr   rz   r{   rx   rs   getr   )rS   iterabler   r   r   progress_bar_configs         r2   progress_bar"MarigoldDepthPipeline.progress_barI  s    t344(*D%D55t<<OPTUYUnUnPoOppqr  #?T%>%>?&9&=&=fd&KF#':'>'>w'NG$8$788;e;':;;NOOr1   match_input_resolutionoutput_latentreturn_dictc                    U R                   nU R                  nUc  U R                  nUc  U R                  nU R	                  UUUUUUUU	U
UUU5      nU R
                  c]  SnU R                  USU R                  R                  SSS9nUR                  R                  U5      nU R                  U5      S   U l        U R                  R                  XUUU5      u  nnnU R                  XXU5      u  nnAU R
                  R                  UUS9R                  US	S	5      n/ nU R                  [!        SUU-  U5      SS
S9 H  nUUUU-    nUUUU-    nUR"                  S   nUSU n U R$                  R'                  UUS9  U R                  U R$                  R(                  SSS9 Hw  n![*        R,                  " UU/S	S9n"U R/                  U"U!U SS9S   n#U R$                  R1                  U#U!UUS9R2                  n[4        (       d  Mb  [6        R8                  " 5         My     UR;                  U5        M     [*        R,                  " USS9nAAAAAA A"A#[*        R,                  " [!        SUR"                  S   U5       Vs/ s H  nU R=                  UUUU-    5      PM     snSS9n$U(       d  SnU R                  R?                  U$U5      n$Sn%US	:  a  U$R@                  " UU/U$R"                  S	S Q76 n$[!        U5       Vs/ s H9  nU RB                  " U$U   U RD                  U RF                  U40 U	=(       d    0 D6PM;     n$n[I        U$6 u  n$n%[*        R,                  " U$SS9n$U(       a  [*        R,                  " U%SS9n%OSn%U(       aB  U R                  RK                  U$UUSS9n$U%b#  U(       a  U R                  RK                  U%UUSS9n%US:X  a@  U R                  RM                  U$5      n$U%b"  U(       a  U R                  RM                  U%5      n%U RO                  5         U(       d  U$U%U4$ [Q        U$U%US9$ s  snf s  snf )aA  
Function invoked when calling the pipeline.

Args:
    image (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`),
        `List[torch.Tensor]`: An input image or images used as an input for the depth estimation task. For
        arrays and tensors, the expected value range is between `[0, 1]`. Passing a batch of images is possible
        by providing a four-dimensional array or a tensor. Additionally, a list of images of two- or
        three-dimensional arrays or tensors can be passed. In the latter case, all list elements must have the
        same width and height.
    num_inference_steps (`int`, *optional*, defaults to `None`):
        Number of denoising diffusion steps during inference. The default value `None` results in automatic
        selection.
    ensemble_size (`int`, defaults to `1`):
        Number of ensemble predictions. Higher values result in measurable improvements and visual degradation.
    processing_resolution (`int`, *optional*, defaults to `None`):
        Effective processing resolution. When set to `0`, matches the larger input image dimension. This
        produces crisper predictions, but may also lead to the overall loss of global context. The default
        value `None` resolves to the optimal value from the model config.
    match_input_resolution (`bool`, *optional*, defaults to `True`):
        When enabled, the output prediction is resized to match the input dimensions. When disabled, the longer
        side of the output will equal to `processing_resolution`.
    resample_method_input (`str`, *optional*, defaults to `"bilinear"`):
        Resampling method used to resize input images to `processing_resolution`. The accepted values are:
        `"nearest"`, `"nearest-exact"`, `"bilinear"`, `"bicubic"`, or `"area"`.
    resample_method_output (`str`, *optional*, defaults to `"bilinear"`):
        Resampling method used to resize output predictions to match the input resolution. The accepted values
        are `"nearest"`, `"nearest-exact"`, `"bilinear"`, `"bicubic"`, or `"area"`.
    batch_size (`int`, *optional*, defaults to `1`):
        Batch size; only matters when setting `ensemble_size` or passing a tensor of images.
    ensembling_kwargs (`dict`, *optional*, defaults to `None`)
        Extra dictionary with arguments for precise ensembling control. The following options are available:
        - reduction (`str`, *optional*, defaults to `"median"`): Defines the ensembling function applied in
          every pixel location, can be either `"median"` or `"mean"`.
        - regularizer_strength (`float`, *optional*, defaults to `0.02`): Strength of the regularizer that
          pulls the aligned predictions to the unit range from 0 to 1.
        - max_iter (`int`, *optional*, defaults to `2`): Maximum number of the alignment solver steps. Refer to
          `scipy.optimize.minimize` function, `options` argument.
        - tol (`float`, *optional*, defaults to `1e-3`): Alignment solver tolerance. The solver stops when the
          tolerance is reached.
        - max_res (`int`, *optional*, defaults to `None`): Resolution at which the alignment is performed;
          `None` matches the `processing_resolution`.
    latents (`torch.Tensor`, or `List[torch.Tensor]`, *optional*, defaults to `None`):
        Latent noise tensors to replace the random initialization. These can be taken from the previous
        function call's output.
    generator (`torch.Generator`, or `List[torch.Generator]`, *optional*, defaults to `None`):
        Random number generator object to ensure reproducibility.
    output_type (`str`, *optional*, defaults to `"np"`):
        Preferred format of the output's `prediction` and the optional `uncertainty` fields. The accepted
        values are: `"np"` (numpy array) or `"pt"` (torch tensor).
    output_uncertainty (`bool`, *optional*, defaults to `False`):
        When enabled, the output's `uncertainty` field contains the predictive uncertainty map, provided that
        the `ensemble_size` argument is set to a value above 2.
    output_latent (`bool`, *optional*, defaults to `False`):
        When enabled, the output's `latent` field contains the latent codes corresponding to the predictions
        within the ensemble. These codes can be saved, modified, and used for subsequent calls with the
        `latents` argument.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.marigold.MarigoldDepthOutput`] instead of a plain tuple.

Examples:

Returns:
    [`~pipelines.marigold.MarigoldDepthOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.marigold.MarigoldDepthOutput`] is returned, otherwise a
        `tuple` is returned where the first element is the prediction, the second element is the uncertainty
        (or `None`), and the third is the latent (or `None`).
N 
do_not_padTri   )padding
max_length
truncationreturn_tensorsr   )rr   dtyper   zMarigold predictions...)r   r   )rr   FzDiffusion steps...r   )encoder_hidden_statesr   )r_   )is_aar+   )r"   r#   r$   ))_execution_devicer   r@   rA   r   rQ   r<   model_max_length	input_idstor;   rR   
preprocessprepare_latentsrepeatr   ranger   r:   set_timesteps	timestepsr-   catr8   stepprev_sampleXLA_AVAILABLExm	mark_stepappenddecode_predictionunpad_imagereshapeensemble_depthr>   r?   zipresize_antialiaspt_to_numpymaybe_free_model_hooksr    )&rS   rV   rW   rX   rY   r   rZ   r[   r\   r]   r^   r_   r`   ra   r   r   rr   r   r   prompttext_inputstext_input_idsr   original_resolutionimage_latentpred_latentbatch_empty_text_embeddingpred_latentsr   batch_image_latentbatch_pred_latenteffective_batch_sizetexttbatch_latentnoiser"   r#   s&                                         r2   __call__MarigoldDepthPipeline.__call__\  s   t ''

 &"&">"> ($($F$F! &&!!"

" $$,F..$>>::# ) K )2255f=N(,(9(9.(I!(LD% /3.B.B.M.M*?/
+w+ %)$8$8Ij%
!k %)%>%>%A%AW\%A%]%d%d1&
" ""!Z-/<DOh # 
A ".a!j.!A +AJ ?#5#;#;A#> -.C/CDDNN(()<V(L&&t~~'?'?uSg&h$yy*<>O)PVWX		,[`	abcd$(NN$7$71/9 %8 %+ " !=LLN i  12'
* ii!4 & YY q+"3"3A"6
CCA &&{1q:~'FGC 

 K ))55j'J
 1#++J]
HXHXYZY[H\]J z*	 +A ##qM((((&	
 ).B +  	 '*:&6#J:15J!#ii;" "-->>/1Gu ? J &+="22CC!46LTY D 
 $--99*EJ&+="22>>{K 	##%[99"!#
 	
*	s   !P:A P?c                    S n[         R                  " [        SUR                  S   U5       Vs/ s H(  nU" U R                  R                  XXu-    5      5      PM*     snSS9nXR                  R                  R                  -  nUR                  USS9nUn	U	c*  [        UR                  UUR                  UR                  S9n	X4$ s  snf )Nc                     [        U S5      (       a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [	        S5      e)Nlatent_distr^   z3Could not access latents of provided encoder_output)r   r   moder^   AttributeError)encoder_outputs    r2   retrieve_latents?MarigoldDepthPipeline.prepare_latents.<locals>.retrieve_latentsv  sI    ~}55%11668833%---$%Z[[r1   r   r   )r_   rr   r   )r-   r   r   r   r9   encoderO   scaling_factorrepeat_interleaver   rr   r   )
rS   rV   r^   r_   rX   r\   r   r   r   r   s
             r2   r   %MarigoldDepthPipeline.prepare_latentsn  s    	\ yy q%++a.*==A !1>1J!KL= 
 $hhoo&D&DD#55m5K&""##**"((	K ((%s   /C	r   c                    UR                  5       S:w  d1  UR                  S   U R                  R                  R                  :w  a:  [        SU R                  R                  R                   SUR                   S35      eU R                  R                  XR                  R                  R                  -  SS9S   nUR                  SS	S
9n[        R                  " USS5      nUS-   S-  nU$ )Nrn   r   z Expecting 4D tensor of shape [B,z,H,W]; got rC   F)r   r   Tr   keepdimg            ?g       @)r   r   r9   rO   r   rx   decoder   rl   r-   clip)rS   r   r"   s      r2   r   'MarigoldDepthPipeline.decode_prediction  s    ??![%6%6q%9TXX__=\=\%\2488??3R3R2SS^_j_p_p^qqrs  XX__[88??3Q3Q%Q_d_efgh
__D_9
ZZ
D#6
 3&#-
r1   r6   rj   regularizer_strengthmax_itertolmax_resc	           
        ^^^^^^^^^^^^ U R                  5       S:w  d  U R                  S   S:w  a  [        SU R                   S35      eTS;  a  [        ST S35      eT(       d  T(       a  [        S5      eS[        R                  4UUU4S	 jjmS[        R                  S
[
        R                  S[        R                  4UUU4S jjm SS[        R                  S[        S[        [        R                  [        [        R                     4   4U4S jjjmS
[
        R                  S[        R                  S[        4UUUU4S jjmS[        R                  4UUUUU4S jjn	T=(       d    Tn
U R                  S   mU
(       a  U	" U 5      nT" X5      n T" XS9u  pU R                  5       nT(       a  T(       a  U R                  5       nOT(       a  SnO[        S5      eX-
  R                  SS9nX-
  U-  n U(       a  X-  nX4$ )a  
Ensembles the depth maps represented by the `depth` tensor with expected shape `(B, 1, H, W)`, where B is the
number of ensemble members for a given prediction of size `(H x W)`. Even though the function is designed for
depth maps, it can also be used with disparity maps as long as the input tensor values are non-negative. The
alignment happens when the predictions have one or more degrees of freedom, that is when they are either
affine-invariant (`scale_invariant=True` and `shift_invariant=True`), or just scale-invariant (only
`scale_invariant=True`). For absolute predictions (`scale_invariant=False` and `shift_invariant=False`)
alignment is skipped and only ensembling is performed.

Args:
    depth (`torch.Tensor`):
        Input ensemble depth maps.
    scale_invariant (`bool`, *optional*, defaults to `True`):
        Whether to treat predictions as scale-invariant.
    shift_invariant (`bool`, *optional*, defaults to `True`):
        Whether to treat predictions as shift-invariant.
    output_uncertainty (`bool`, *optional*, defaults to `False`):
        Whether to output uncertainty map.
    reduction (`str`, *optional*, defaults to `"median"`):
        Reduction method used to ensemble aligned predictions. The accepted values are: `"mean"` and
        `"median"`.
    regularizer_strength (`float`, *optional*, defaults to `0.02`):
        Strength of the regularizer that pulls the aligned predictions to the unit range from 0 to 1.
    max_iter (`int`, *optional*, defaults to `2`):
        Maximum number of the alignment solver steps. Refer to `scipy.optimize.minimize` function, `options`
        argument.
    tol (`float`, *optional*, defaults to `1e-3`):
        Alignment solver tolerance. The solver stops when the tolerance is reached.
    max_res (`int`, *optional*, defaults to `1024`):
        Resolution at which the alignment is performed; `None` matches the `processing_resolution`.
Returns:
    A tensor of aligned and ensembled depth maps and optionally a tensor of uncertainties of the same shape:
    `(1, 1, H, W)`.
rn   r   z,Expecting 4D tensor of shape [B,1,H,W]; got rC   rk   Unrecognized reduction method: z1Pure shift-invariant ensembling is not supported.r6   c                 (  > U R                  TS5      R                  SS9R                  nU R                  TS5      R                  SS9R                  nT(       aU  T(       aN  SX!-
  R	                  SS9-  nU* U-  n[
        R                  " X445      R                  5       R                  5       nOCT(       a1  SUR	                  SS9-  nUR                  5       R                  5       nO[        S5      eUR                  [        R                  5      nU$ )Nr   r   r   ư>minUnrecognized alignment.)r   r   valuesr   clampr-   r   cpunumpyrx   astyper+   float64)	r6   init_mininit_maxinit_sinit_tparamrX   r>   r?   s	         r2   
init_param8MarigoldDepthPipeline.ensemble_depth.<locals>.init_param  s    }}]B7;;;BIIH}}]B7;;;BIIH? 3::t:DD 8+		6"23779??A x~~$~77

**, !:;;LL,ELr1   r  rb   c                   > T(       a  T(       a  [         R                  " US5      u  p#[        R                  " U5      R	                  U 5      R                  TSSS5      n[        R                  " U5      R	                  U 5      R                  TSSS5      nX-  U-   nU$ T(       a=  [        R                  " U5      R	                  U 5      R                  TSSS5      nX-  nU$ [        S5      e)Nr   r   r   )r+   splitr-   
from_numpyr   viewrx   )r6   r  sr   outrX   r>   r?   s        r2   align3MarigoldDepthPipeline.ensemble_depth.<locals>.align  s    ?xxq)$$Q'**5166}aAN$$Q'**5166}aANi!m J !$$U+..u5::=!QPQRi J !!:;;r1   depth_alignedreturn_uncertaintyc                 j  > S nTS:X  a6  [         R                  " U SSS9nU(       a  [         R                  " U SSS9nX24$ TS:X  a`  [         R                  " U SSS9R                  nU(       a6  [         R                  " [         R
                  " X-
  5      SSS9R                  nX24$ [        ST S35      e)Nrl   r   Tr   rm   r   rC   )r-   rl   stdrm   r   absrx   )r  r  r#   r"   rj   s       r2   ensemble6MarigoldDepthPipeline.ensemble_depth.<locals>.ensemble  s     KF""ZZ1dK
%"'))Mq$"OK ** h&"\\-QMTT
%"',,uyy9S/TZ[ei"j"q"qK ** !#B9+Q!OPPr1   c                   > SnT" X5      n[         R                  " [         R                  " T5      5       H?  u  pEX4   X5   -
  nX&S-  R                  5       R	                  5       R                  5       -  nMA     TS:  an  T" USS9u  pxUR                  5       R                  5       R                  5       n	SUR                  5       -
  R                  5       R                  5       n
X)U
-   T-  -  nU$ )Ng        r   r   Fr  r   )	r-   combinationsarangerl   sqrtitemr   r  r   )r  r6   costr  r   jdiffr"   _err_nearerr_farr  r  rX   r   s              r2   cost_fn5MarigoldDepthPipeline.ensemble_depth.<locals>.cost_fn  s    D!%/M**5<<+FG$'-*::q(--/4466 H $a' (5 Q
%>>+//1668!11668==?G+/CCCKr1   c           	      0  > SS K nU R                  [        R                  5      nTb4  [	        UR
                  SS  5      T:  a  [        R                  " UTS5      nT" U5      nUR                  R                  [        TUS9UST	TSS.S9nUR                  $ )	Nr   r   re   )r6   BFGSF)maxiterdisp)methodr   options)scipyr   r-   float32r   r   r   resize_to_max_edgeoptimizeminimizer   x)
r6   r*  depth_to_alignr  resr"  r  r   r   r   s
        r2   compute_param;MarigoldDepthPipeline.ensemble_depth.<locals>.compute_param  s    "XXemm4N"s>+?+?+C'Dw'N!7!J!J>[bds!t~.E..))~6$,e< * C 55Lr1   r   r  r   r   r   )F)r   r   rx   r-   r.   r+   r,   boolr	   r   floatr   r   r   )r6   r>   r?   ra   rj   r   r   r   r   r2  requires_aligningr  r#   	depth_max	depth_mindepth_ranger  r"  r  rX   r  s    `` `````       @@@@@r2   r   $MarigoldDepthPipeline.ensemble_depth  s   \ 99;!u{{1~2KEKK=XYZ[[..>ykKLL?PQQ	ell 	 	"	 	bjj 	U\\ 	 	 EJ	+ <<	+=A	+5<<%,,!778	+ 	+ 	2:: 	ell 	u 	 	 	 	 	& ,>A!%(E%'E%eSIIK			II677 ,333="k1&K!!r1   )r   r@   rA   rQ   rR   r>   r?   rE   )NTTNN)NNNT)Nr   NTrf   rf   r   NNNr+   FFT)TTFrm   g{Gz?r   gMbP?i   )*r&   r'   r(   r)   r*   model_cpu_offload_seqrH   r   r   r
   r   r   r   r   r   strr4  intrG   r   r   r   r-   r.   r   r   r   compilerdisabler   no_gradr   EXAMPLE_DOC_STRINGr   r	   r   r   staticmethodr5  r   r0   __classcell__)rT   s   @r2   r4   r4   i   s$   'R 6!7 *.*.*.157;-^"-^ -^ 45	-^
 $-^ !-^ "#-^ "$-^ "$-^ "*#-^ (0}-^ -^^B!B !B 	B
  #B  #B !$B B $DcN3B %,,'B E%//43H"HIJB B !B 
BH ^^P P$ ]]_12 .2/3'+%/&06:EIMQ#(# !N
!N
 &c]N
 	N

  (}N
 !%N
  #N
 !$N
 N
 $DcN3N
 %d5<<.@ @ABN
 E%//43H"HIJN
 N
 !N
 N
  !N
 3 N
`#)||#) %,,'#) EOO,	#)
 #) #) 
u||U\\)	*#)JU\\ ell   !% $#(!&*Z"||Z"Z" Z" !	Z"
 Z" $Z" Z" Z" Z" 
u||Xell33	4Z" Z"r1   r4   )5dataclassesr   	functoolsr   typingr   r   r   r   r	   r
   r   r+   r-   PILr   	tqdm.autor   transformersr   r   rR   r   modelsr   r   
schedulersr   r   utilsr   r   r   r   utils.import_utilsr   utils.torch_utilsr   pipeline_utilsr   marigold_image_processingr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr&   rI   rA  r    r4   r%   r1   r2   <module>rU     s   & "  : :     5 1  5 - . = ))MM			H	% , &* & &,S"- S"r1   