
    +hY                     `   S SK Jr  S SKJrJrJr  S SKrS SKrS SK	J
s  Jr  SSKJrJr  SSKJr  SSKJr  \ " S	 S
\5      5       rS\R*                  S\S\R.                  4S jrS\R.                  S\\R2                     S\R.                  4S jrSS\4S jjrSS\4S jjr " S S\\5      rg)    )	dataclass)OptionalTupleUnionN   )ConfigMixinregister_to_config)
BaseOutput   )SchedulerMixinc                   8    \ rS rSr% Sr\R                  \S'   Srg)VQDiffusionSchedulerOutput   a  
Output class for the scheduler's step function output.

Args:
    prev_sample (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        Computed sample x_{t-1} of previous timestep. `prev_sample` should be used as next model input in the
        denoising loop.
prev_sample N)	__name__
__module____qualname____firstlineno____doc__torch
LongTensor__annotations____static_attributes__r       f/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/schedulers/scheduling_vq_diffusion.pyr   r      s     !!!r   r   xnum_classesreturnc                     [         R                  " X5      nUR                  SSS5      n[        R                  " UR                  5       R                  SS95      nU$ )aw  
Convert batch of vector of class indices into batch of log onehot vectors

Args:
    x (`torch.LongTensor` of shape `(batch size, vector length)`):
        Batch of class indices

    num_classes (`int`):
        number of classes to be used for the onehot vectors

Returns:
    `torch.Tensor` of shape `(batch size, num classes, vector length)`:
        Log onehot vectors
r   r   r   KH9)min)Fone_hotpermuter   logfloatclamp)r   r   x_onehotlog_xs       r   index_to_log_onehotr+   )   sN     yy(H1a(HIIhnn&,,,78ELr   logits	generatorc                     [         R                  " U R                  U R                  US9n[         R                  " [         R                  " US-   5      * S-   5      * nX0-   nU$ )z 
Apply gumbel noise to `logits`
)devicer-   r!   )r   randshaper/   r&   )r,   r-   uniformgumbel_noisenoiseds        r   gumbel_noisedr5   >   sQ     jjfmmyQGIIuyy599EABBL"FMr   num_diffusion_timestepsc                     [         R                  " SU 5      U S-
  -  X!-
  -  U-   n[         R                  " S/U45      nUSS USS -  n[         R                  " USS S/45      nXC4$ )zB
Cumulative and non-cumulative alpha schedules.

See section 4.1.
r   r   Nnparangeconcatenate)r6   alpha_cum_startalpha_cum_endattats        r   alpha_schedulesrA   H   s     			!,-1H11LMQ^Qpq
	  ..1#s
$C	QR3s8	B
..#ab'A3
(C7Nr   c                     [         R                  " SU 5      U S-
  -  X!-
  -  U-   n[         R                  " S/U45      nSU-
  nUSS USS -  nSU-
  n[         R                  " USS S/45      nXc4$ )zB
Cumulative and non-cumulative gamma schedules.

See section 4.1.
r   r   Nr8   r9   )r6   gamma_cum_startgamma_cum_endcttone_minus_cttone_minus_ctcts          r   gamma_schedulesrI   X   s     			!,-1H11LMQ^Qpq
	  ..1#s
$CGM $}Sb'99L	
\	B
..#ab'A3
(C7Nr   c                   v   \ rS rSrSrSr\     SS\S\S\S\S\S	\4S
 jj5       r	SS\S\
\\R                  4   4S jjr  S S\R                  S\R                   S\R"                  S\\R&                     S\S\
\\4   4S jjrS rS\R                  S\R"                  S\R                  S\4S jrS rSrg)!VQDiffusionSchedulerj   a  
A scheduler for vector quantized diffusion.

This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
methods the library implements for all schedulers such as loading and saving.

Args:
    num_vec_classes (`int`):
        The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked
        latent pixel.
    num_train_timesteps (`int`, defaults to 100):
        The number of diffusion steps to train the model.
    alpha_cum_start (`float`, defaults to 0.99999):
        The starting cumulative alpha value.
    alpha_cum_end (`float`, defaults to 0.00009):
        The ending cumulative alpha value.
    gamma_cum_start (`float`, defaults to 0.00009):
        The starting cumulative gamma value.
    gamma_cum_end (`float`, defaults to 0.99999):
        The ending cumulative gamma value.
r   num_vec_classesnum_train_timestepsr=   r>   rC   rD   c                    Xl         U R                   S-
  U l        [        X#US9u  px[        X%US9u  pU R                   S-
  nSU-
  U	-
  U-  nSU-
  U
-
  U-  n[        R
                  " UR                  S5      5      n[        R
                  " UR                  S5      5      n[        R
                  " U	R                  S5      5      n	[        R                  " U5      n[        R                  " U5      n[        R                  " U	5      n[        R
                  " UR                  S5      5      n[        R
                  " UR                  S5      5      n[        R
                  " U
R                  S5      5      n
[        R                  " U5      n[        R                  " U5      n[        R                  " U
5      nUR                  5       U l	        UR                  5       U l
        UR                  5       U l        UR                  5       U l        UR                  5       U l        UR                  5       U l        S U l        [        R                   " ["        R$                  " SU5      S S S2   R'                  5       5      U l        g )Nr   )r=   r>   )rC   rD   float64r   r8   )	num_embed
mask_classrA   rI   r   tensorastyper&   r'   log_atlog_btlog_ctlog_cumprod_atlog_cumprod_btlog_cumprod_ctnum_inference_steps
from_numpyr:   r;   copy	timesteps)selfrM   rN   r=   r>   rC   rD   r@   r?   rH   rE   num_non_mask_classesbtbttrU   rV   rW   rX   rY   rZ   s                       r   __init__VQDiffusionScheduler.__init__   s    ) ..1,!"5fst!"5fst#~~1"frk113w} 44\\"))I./\\"))I./\\"))I./222ll3::i01ll3::i01ll3::i01333llnllnlln,224,224,224 $( ))"))A7J*KDbD*Q*V*V*XYr   Nr[   r/   c                 N   Xl         [        R                  " SU R                   5      SSS2   R                  5       n[        R
                  " U5      R                  U5      U l        U R                  R                  U5      U l        U R                  R                  U5      U l	        U R                  R                  U5      U l
        U R                  R                  U5      U l        U R                  R                  U5      U l        U R                  R                  U5      U l        g)a  
Sets the discrete timesteps used for the diffusion chain (to be run before inference).

Args:
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps and diffusion process parameters (alpha, beta, gamma) should be moved
        to.
r   Nr8   )r[   r:   r;   r]   r   r\   tor^   rU   rV   rW   rX   rY   rZ   )r_   r[   r/   r^   s       r   set_timesteps"VQDiffusionScheduler.set_timesteps   s     $7 IIa!9!9:4R4@EEG	)))477?kknnV,kknnV,kknnV,"1144V<"1144V<"1144V<r   model_outputtimestepsampler-   return_dictr   c                     US:X  a  UnOU R                  XU5      n[        Xd5      nUR                  SS9nU(       d  U4$ [        US9$ )a+  
Predict the sample from the previous timestep by the reverse transition distribution. See
[`~VQDiffusionScheduler.q_posterior`] for more details about how the distribution is computer.

Args:
    log_p_x_0: (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
        The log probabilities for the predicted classes of the initial latent pixels. Does not include a
        prediction for the masked class as the initial unnoised image cannot be masked.
    t (`torch.long`):
        The timestep that determines which transition matrices are used.
    x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        The classes of each latent pixel at time `t`.
    generator (`torch.Generator`, or `None`):
        A random number generator for the noise applied to `p(x_{t-1} | x_t)` before it is sampled from.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or
        `tuple`.

Returns:
    [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or `tuple`:
        If return_dict is `True`, [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] is
        returned, otherwise a tuple is returned where the first element is the sample tensor.
r   r   dim)r   )q_posteriorr5   argmaxr   )r_   ri   rj   rk   r-   rl   log_p_x_t_min_1	x_t_min_1s           r   stepVQDiffusionScheduler.step   sU    > q=*O"..|XNO'C#**q*1	<)i@@r   c                     [        X R                  5      nU R                  X2USS9nU R                  X2USS9nX-
  n[        R                  " USSS9nXx-
  nU R                  XsS-
  5      nXv-   U-   n	U	$ )ah  
Calculates the log probabilities for the predicted classes of the image at timestep `t-1`:

```
p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) )
```

Args:
    log_p_x_0 (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
        The log probabilities for the predicted classes of the initial latent pixels. Does not include a
        prediction for the masked class as the initial unnoised image cannot be masked.
    x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        The classes of each latent pixel at time `t`.
    t (`torch.Long`):
        The timestep that determines which transition matrix is used.

Returns:
    `torch.Tensor` of shape `(batch size, num classes, num latent pixels)`:
        The log probabilities for the predicted classes of the image at timestep `t-1`.
T)tx_tlog_onehot_x_t
cumulativeFr   )ro   keepdim)r+   rQ   $log_Q_t_transitioning_to_known_classr   	logsumexpapply_cumulative_transitions)
r_   	log_p_x_0rx   rw   ry   log_q_x_t_given_x_0log_q_t_given_x_t_min_1qq_log_sum_exprr   s
             r   rp    VQDiffusionScheduler.q_posterior   s    * -S..A"GGD H 
 #'"K"KE #L #
 + q$?  --aQ7 5E\ r   rw   rx   ry   rz   c                   U(       a.  U R                   U   nU R                  U   nU R                  U   nO-U R                  U   nU R                  U   nU R
                  U   nU(       d  USS2SSS24   R                  S5      nUSS2SS2SS24   nX5-   R                  U5      n	X R                  :H  n
U
R                  S5      R                  SU R                  S-
  S5      n
XyU
'   U(       d  [        R                  " U	W4SS9n	U	$ )a  
Calculates the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each
latent pixel in `x_t`.

Args:
    t (`torch.Long`):
        The timestep that determines which transition matrix is used.
    x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        The classes of each latent pixel at time `t`.
    log_onehot_x_t (`torch.Tensor` of shape `(batch size, num classes, num latent pixels)`):
        The log one-hot vectors of `x_t`.
    cumulative (`bool`):
        If cumulative is `False`, the single step transition matrix `t-1`->`t` is used. If cumulative is
        `True`, the cumulative transition matrix `0`->`t` is used.

Returns:
    `torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`:
        Each _column_ of the returned matrix is a _row_ of log probabilities of the complete probability
        transition matrix.

        When non cumulative, returns `self.num_classes - 1` rows because the initial latent pixel cannot be
        masked.

        Where:
        - `q_n` is the probability distribution for the forward process of the `n`th latent pixel.
        - C_0 is a class of a latent pixel embedding
        - C_k is the class of the masked latent pixel

        non-cumulative result (omitting logarithms):
        ```
        q_0(x_t | x_{t-1} = C_0) ... q_n(x_t | x_{t-1} = C_0)
                  .      .                     .
                  .               .            .
                  .                      .     .
        q_0(x_t | x_{t-1} = C_k) ... q_n(x_t | x_{t-1} = C_k)
        ```

        cumulative result (omitting logarithms):
        ```
        q_0_cumulative(x_t | x_0 = C_0)    ...  q_n_cumulative(x_t | x_0 = C_0)
                  .               .                          .
                  .                        .                 .
                  .                               .          .
        q_0_cumulative(x_t | x_0 = C_{k-1}) ... q_n_cumulative(x_t | x_0 = C_{k-1})
        ```
Nr8   r   rn   )rX   rY   rZ   rU   rV   rW   	unsqueeze	logaddexprR   expandrQ   r   cat)r_   rw   rx   ry   rz   abc(log_onehot_x_t_transitioning_from_maskedlog_Q_tmask_class_masks              r   r|   9VQDiffusionScheduler.log_Q_t_transitioning_to_known_classd  s   b ##A&A##A&A##A&AAAAAAA 8FaQh7O7Y7YZ[7\4
 (3B3	2 "%003 0)33A6==b$..STBTVXY#$ ii*R SYZ[Gr   c                    UR                   S   nU R                  U   nU R                  U   nU R                  U   nUR                   S   nUR	                  USU5      nX-   R                  U5      n[        R                  " X4SS9nU$ )Nr   r   r   rn   )r1   rX   rY   rZ   r   r   r   r   )r_   r   rw   bszr   r   r   num_latent_pixelss           r   r~   1VQDiffusionScheduler.apply_cumulative_transitions  s    ggaj"""GGAJHHS!./Ua IIqf!$r   )
rU   rV   rW   rX   rY   rZ   rR   rQ   r[   r^   )d   wJ??̔>r   r   )N)NT)r   r   r   r   r   orderr	   intr'   rc   r   strr   r/   rg   Tensorlongr   r   	Generatorboolr   r   rt   rp   r|   r~   r   r   r   r   rK   rK   j   sK   , E $'!('!)&,Z,Z !,Z 	,Z
 ,Z ,Z ,Z ,Z\= =eCDU>V =6 04 +All+A **+A   	+A
 EOO,+A +A 
)50	1+AZm^aIIa$)$4$4aFKlla`daFr   rK   )r   r   )r   r   )dataclassesr   typingr   r   r   numpyr:   r   torch.nn.functionalnn
functionalr#   configuration_utilsr   r	   utilsr
   scheduling_utilsr   r   r   r   r   r+   r   r5   rA   rI   rK   r   r   r   <module>r      s    " ) )     A  , 
" 
" 
"5++ # %,, *%,, 8EOO3L QVQ]Q] S  S $i>; ir   