
    cCiX                     n   S r SSKJr  SSKJrJr  SSKrSSKJ	r	J
r
Jr  SSKJrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJr  SS	KJrJrJ r J!r!  \ " 5       (       a  SSK"r"\!RF                  " \$5      r%S
\&\&\      4S jr'  SS\RP                  S\)S\\\*\4      S
\+\)\)4   4S jjr, " S S\	5      r-S/r.g)zImage processor class for TVP.    )Iterable)OptionalUnionN   )BaseImageProcessorBatchFeatureget_size_dict)PaddingModeflip_channel_orderpadresizeto_channel_dimension_format)
IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDChannelDimension
ImageInputPILImageResamplingget_image_sizeis_valid_imageto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_vision_availableloggingreturnc                 J   [        U [        [        45      (       a6  [        U S   [        [        45      (       a  [        U S   S   5      (       a  U $ [        U [        [        45      (       a  [        U S   5      (       a  U /$ [        U 5      (       a  U //$ [	        SU  35      e)Nr   z"Could not make batched video from )
isinstancelisttupler   
ValueError)videoss    f/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/tvp/image_processing_tvp.pymake_batchedr%   5   s    &4-((Zq	D%=-Q-QVdeklmenopeqVrVr	FT5M	*	*~fQi/H/Hx			z
9&B
CC    input_imagemax_sizeinput_data_formatc                     [        X5      u  p4X4:  a  US-  U-  nUnXe-  nOUS-  U-  nUnXu-  n[        U5      [        U5      4nU$ )Ng      ?)r   int)	r'   r(   r)   heightwidthratio
new_height	new_widthsizes	            r$   get_resize_output_image_sizer2   B   sb    
 #;BMFf$
&	u$	&

OS^,DKr&   c            +       t  ^  \ rS rSrSrS/rSS\R                  SSSSSSS\R                  SSSS4S\
S	\\\\4      S
\S\
S\\\\4      S\
S\\\4   S\
S\\\\4      S\\\\   4   S\S\
S\
S\\\\\   4      S\\\\\   4      SS4 U 4S jjjr\R                  SS4S\R*                  S	\\\4   S
\S\\\\4      S\\\\4      S\R*                  4S jjrSS\R                  SS4S\R*                  S\\\\4      S\\\\   4   S\S\\\\4      S\\\\4      4S jjrSSSSSSSSSSSSSSS\R2                  S4S\S\\
   S	\\\\4      S
\\   S\\
   S\\\\4      S\\
   S\\   S\
S\\\\4      S\\\\\   4      S\\   S\\
   S\\
   S\\\\\   4      S\\\\\   4      S\\   S\\\\4      S\R*                  4&S jjr\" 5       SSSSSSSSSSSSSSSS\R2                  S4S\\\\   \\\      4   S\\
   S	\\\\4      S
\\   S\\
   S\\\\4      S\\
   S\\   S\\
   S\\\\4      S\\\\\   4      S\\   S\\
   S\\
   S\\\\\   4      S\\\\\   4      S \\\\4      S\S\\\\4      S\R>                  R>                  4(S! jj5       r S"r!U =r"$ )#TvpImageProcessorU   a@  
Constructs a Tvp image processor.

Args:
    do_resize (`bool`, *optional*, defaults to `True`):
        Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
        `do_resize` parameter in the `preprocess` method.
    size (`dict[str, int]` *optional*, defaults to `{"longest_edge": 448}`):
        Size of the output image after resizing. The longest edge of the image will be resized to
        `size["longest_edge"]` while maintaining the aspect ratio of the original image. Can be overridden by
        `size` in the `preprocess` method.
    resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
        Resampling filter to use if resizing the image. Can be overridden by the `resample` parameter in the
        `preprocess` method.
    do_center_crop (`bool`, *optional*, defaults to `True`):
        Whether to center crop the image to the specified `crop_size`. Can be overridden by the `do_center_crop`
        parameter in the `preprocess` method.
    crop_size (`dict[str, int]`, *optional*, defaults to `{"height": 448, "width": 448}`):
        Size of the image after applying the center crop. Can be overridden by the `crop_size` parameter in the
        `preprocess` method.
    do_rescale (`bool`, *optional*, defaults to `True`):
        Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the `do_rescale`
        parameter in the `preprocess` method.
    rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
        Defines the scale factor to use if rescaling the image. Can be overridden by the `rescale_factor` parameter
        in the `preprocess` method.
    do_pad (`bool`, *optional*, defaults to `True`):
        Whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess` method.
    pad_size (`dict[str, int]`, *optional*, defaults to `{"height": 448, "width": 448}`):
        Size of the image after applying the padding. Can be overridden by the `pad_size` parameter in the
        `preprocess` method.
    constant_values (`Union[float, Iterable[float]]`, *optional*, defaults to 0):
        The fill value to use when padding the image.
    pad_mode (`PaddingMode`, *optional*, defaults to `PaddingMode.CONSTANT`):
        Use what kind of mode in padding.
    do_normalize (`bool`, *optional*, defaults to `True`):
        Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
        method.
    do_flip_channel_order (`bool`, *optional*, defaults to `True`):
        Whether to flip the color channels from RGB to BGR. Can be overridden by the `do_flip_channel_order`
        parameter in the `preprocess` method.
    image_mean (`float` or `list[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
        Mean to use if normalizing the image. This is a float or list of floats the length of the number of
        channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method.
    image_std (`float` or `list[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
        Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
        number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
pixel_valuesTNgp?r   	do_resizer1   resampledo_center_crop	crop_size
do_rescalerescale_factordo_padpad_sizeconstant_valuespad_modedo_normalizedo_flip_channel_order
image_mean	image_stdr   c                 F  > [         TU ]  " S0 UD6  Ub  UOSS0nUb  UOSSS.nU	b  U	OSSS.n	Xl        X l        X@l        XPl        X0l        X`l        Xpl        Xl	        Xl
        Xl        Xl        Xl        Xl        Ub  UO[        U l        Ub  Xl        g ["        U l        g )Nlongest_edge  )r,   r-    )super__init__r7   r1   r9   r:   r8   r;   r<   r=   r>   r?   r@   rA   rB   r   rC   r   rD   )selfr7   r1   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   kwargs	__class__s                    r$   rJ   TvpImageProcessor.__init__   s    & 	"6"'tnc-B!*!6IsUX<Y	'38CRU9V"	," $, . (%:"(2(>*DZ&/&;AVr&   imagedata_formatr)   c                     [        USS9nSU;   a  SU;   a  US   US   4nO2SU;   a  [        XS   U5      nO[        SUR                  5        35      e[	        U4UUUUS.UD6$ )a}  
Resize an image.

Args:
    image (`np.ndarray`):
        Image to resize.
    size (`dict[str, int]`):
        Size of the output image. If `size` is of the form `{"height": h, "width": w}`, the output image will
        have the size `(h, w)`. If `size` is of the form `{"longest_edge": s}`, the output image will have its
        longest edge of length `s` while keeping the aspect ratio of the original image.
    resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
        Resampling filter to use when resiizing the image.
    data_format (`str` or `ChannelDimension`, *optional*):
        The channel dimension format of the image. If not provided, it will be the same as the input image.
    input_data_format (`str` or `ChannelDimension`, *optional*):
        The channel dimension format of the input image. If not provided, it will be inferred.
Fdefault_to_squarer,   r-   rF   zCSize must have 'height' and 'width' or 'longest_edge' as keys. Got )r1   r8   rP   r)   )r	   r2   r"   keysr   )rK   rO   r1   r8   rP   r)   rL   output_sizes           r$   r   TvpImageProcessor.resize   s    4 TU;t4>4=9Kt#6u>>RTefKbcgclclcnbopqq
#/
 
 	
r&   c           	          [        XS9u  pUR                  SU5      n
UR                  SU	5      nX-
  X-
  pUS:  d  US:  a  [        S5      eSU4SU44n[        UUUUUUS9nU$ )a  
Pad an image with zeros to the given size.

Args:
    image (`np.ndarray`):
        Image to pad.
    pad_size (`dict[str, int]`)
        Size of the output image with pad.
    constant_values (`Union[float, Iterable[float]]`)
        The fill value to use when padding the image.
    pad_mode (`PaddingMode`)
        The pad mode, default to PaddingMode.CONSTANT
    data_format (`ChannelDimension` or `str`, *optional*)
        The channel dimension format of the image. If not provided, it will be the same as the input image.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format of the input image. If not provided, it will be inferred.
)channel_dimr,   r-   r   z0The padding size must be greater than image size)moder?   rP   r)   )r   getr"   r   )rK   rO   r>   r?   r@   rP   r)   rL   r,   r-   
max_height	max_width	pad_right
pad_bottompaddingpadded_images                   r$   	pad_imageTvpImageProcessor.pad_image   s    6 'uL\\(F3
LL%0	 ) 1:3F:q=JNOPPz?Q	N3+#/
 r&   c                    [        UUUUUUUUUUS9
  [        U5      nU(       a  U R                  XUUS9nU(       a  U R                  XUS9nU(       a  U R	                  XUS9nU(       a/  U R                  UR                  [        R                  5      UUUS9nU	(       a  U R                  UU
UUUS9nU(       a
  [        UUS9n[        UUUS9nU$ )	zPreprocesses a single image.)
r;   r<   rA   rC   rD   r9   r:   r7   r1   r8   )rO   r1   r8   r)   )r1   r)   )rO   scaler)   )rO   meanstdr)   )rO   r>   r?   r@   r)   )rO   r)   )input_channel_dim)r   r   r   center_croprescale	normalizeastypenpfloat32ra   r   r   )rK   rO   r7   r1   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rP   r)   rL   s                       r$   _preprocess_image#TvpImageProcessor._preprocess_image  s    0 	&!)%!)	
 u%KKe]nKoE$$UN_$`ELLuVgLhENNll2::.ZYbs # E NN! /!"3 # E !&UFWXE+E;Rcdr&   r#   return_tensorsc                 \   Ub  UOU R                   nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nU	b  U	OU R
                  n	U
b  U
OU R                  n
Ub  UOU R                  nU(       a  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  n[        USS9nUb  UOU R                  n[        USS9n[!        U5      (       d  [#        S5      e[%        U5      nU VVs/ s HH  n[&        R(                  " U Vs/ s H#  nU R+                  UUUUUUUUU	U
UUUUUUUUS9PM%     sn5      PMJ     nnnSU0n[-        UUS9$ s  snf s  snnf )	a  
Preprocess an image or batch of images.

Args:
    videos (`ImageInput` or `list[ImageInput]` or `list[list[ImageInput]]`):
        Frames to preprocess.
    do_resize (`bool`, *optional*, defaults to `self.do_resize`):
        Whether to resize the image.
    size (`dict[str, int]`, *optional*, defaults to `self.size`):
        Size of the image after applying resize.
    resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
        Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`, Only
        has an effect if `do_resize` is set to `True`.
    do_center_crop (`bool`, *optional*, defaults to `self.do_centre_crop`):
        Whether to centre crop the image.
    crop_size (`dict[str, int]`, *optional*, defaults to `self.crop_size`):
        Size of the image after applying the centre crop.
    do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
        Whether to rescale the image values between [0 - 1].
    rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
        Rescale factor to rescale the image by if `do_rescale` is set to `True`.
    do_pad (`bool`, *optional*, defaults to `True`):
        Whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess` method.
    pad_size (`dict[str, int]`, *optional*, defaults to `{"height": 448, "width": 448}`):
        Size of the image after applying the padding. Can be overridden by the `pad_size` parameter in the
        `preprocess` method.
    constant_values (`Union[float, Iterable[float]]`, *optional*, defaults to 0):
        The fill value to use when padding the image.
    pad_mode (`PaddingMode`, *optional*, defaults to "PaddingMode.CONSTANT"):
        Use what kind of mode in padding.
    do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
        Whether to normalize the image.
    do_flip_channel_order (`bool`, *optional*, defaults to `self.do_flip_channel_order`):
        Whether to flip the channel order of the image.
    image_mean (`float` or `list[float]`, *optional*, defaults to `self.image_mean`):
        Image mean.
    image_std (`float` or `list[float]`, *optional*, defaults to `self.image_std`):
        Image standard deviation.
    return_tensors (`str` or `TensorType`, *optional*):
        The type of tensors to return. Can be one of:
            - Unset: Return a list of `np.ndarray`.
            - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
            - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
            - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
            - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
    data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
        The channel dimension format for the output image. Can be one of:
            - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
            - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            - Unset: Use the inferred channel dimension format of the input image.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input image. If unset, the channel dimension format is inferred
        from the input image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
FrR   r:   )
param_namezkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)rO   r7   r1   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rP   r)   r6   )datatensor_type)r7   r8   r9   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   r1   r	   r:   r   r"   r%   rl   arrayrn   r   )rK   r#   r7   r1   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rp   rP   r)   videoimgrs   s                          r$   
preprocessTvpImageProcessor.preprocessR  s   ` "+!6IDNN	'38+9+E4K^K^#-#9Zt
+9+E4K^K^!-4;;'38-<-H/dNbNb'8T]]'3'?|TEVEV%:%F!DLfLf 	 $.#9Zt
!*!6IDNN	'tTYYTU;!*!6IDNN	!)D	F##: 
 f%8  5
4  3 HH,  %+*  %) **!"+!!)'5"+#-'5%!)(7!)%1.C#-"+$/*;% + (  %+2  5 	 
: '>BB9
s   F(*F#F(#F()r?   r:   r9   rB   rA   r=   r;   r7   rC   rD   r@   r>   r8   r<   r1   )#__name__
__module____qualname____firstlineno____doc__model_input_namesr   BILINEARr
   CONSTANTboolr   dictstrr+   r   floatr   r    rJ   rl   ndarrayr   r   ra   FIRSTr   rn   r   r   PILImagerx   __static_attributes____classcell__)rM   s   @r$   r4   r4   U   sB   /b (( )-'9'B'B#.2,3-19: + 4 4!&*:>9=!&W&W tCH~&&W %	&W
 &W DcN+&W &W c5j)&W &W 4S>*&W uhuo56&W &W &W  $&W U5$u+#567&W  E%e"456!&W$ 
%&W &WX (:'B'B>BDH)
zz)
 38n)
 %	)

 eC)9$9:;)
 $E#/?*?$@A)
 
)
\ .29: + 4 4>BDH-zz- 4S>*- uhuo56	-
 - eC)9$9:;- $E#/?*?$@A-d %))-15)-.2%)*.-1CG*.'+04:>9=2B2H2HDH'EE D>E tCH~&	E
 -.E !E DcN+E TNE !E E 4S>*E "%x(>"?@E ;'E tnE  (~E  U5$u+#567!E" E%e"456#E$ ./%E& $E#/?*?$@A'E* 
+EN %& %))-15)-.2%)*.!%-1CG*.'+04:>9=;?(8(>(>DH)JCj$z"2Dj9I4JJKJC D>JC tCH~&	JC
 -.JC !JC DcN+JC TNJC !JC JC 4S>*JC "%x(>"?@JC ;'JC tnJC  (~JC  U5$u+#567!JC" E%e"456#JC$ !sJ!78%JC& &'JC( $E#/?*?$@A)JC* 
+JC 'JCr&   r4   )rG   N)/r~   collections.abcr   typingr   r   numpyrl   image_processing_utilsr   r   r	   image_transformsr
   r   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   r   
get_loggerrz   loggerr    r%   r   r+   r   r!   r2   r4   __all__rH   r&   r$   <module>r      s    % $ "  U U    _ ^  
		H	%
DDj!12 
D @D  c+;&; <= 38_	&HC* HCV 
r&   