
    bCia                         S SK Jr  S SKJrJr  S SKrSSKJrJ	r	J
r
Jr  SSKJrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJr  SSKJrJr  \R@                  " \!5      r"S	\RF                  S
\$S\%\RF                     4S jr& " S S\5      r'S/r(g)    )Iterable)OptionalUnionN   )BaseImageProcessorBatchFeatureget_patch_output_sizeselect_best_resolution)PaddingModeconvert_to_rgbpadresizeto_channel_dimension_format)
ChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imagemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypeloggingimage
patch_sizereturnc                     / n[        XS9u  pE[        SXA5       H\  n[        SXQ5       HI  nU[        R                  :X  a  XXa-   2XwU-   24   nOU SS2XfU-   2XwU-   24   nUR	                  U5        MK     M^     U$ )aW  
Divides an image into patches of a specified size.

Args:
    image (`np.ndarray`):
        The input image.
    patch_size (`int`):
        The size of each patch.
    input_data_format (`ChannelDimension` or `str`):
        The channel dimension format of the input image.

Returns:
    list: A list of np.ndarray representing the patches.
channel_dimr   N)r   ranger   LASTappend)	r   r   input_data_formatpatchesheightwidthijpatchs	            h/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/aria/image_processing_aria.pydivide_to_patchesr-   .   s     G"5HMF1f)q%,A $4$9$99!.0!*n2DDEa^!3QZ5GGHNN5! - * N    c                      ^  \ rS rSrSr/ SQrSSSSSSSSS	S\R                  4S
\\	\
      S\\	\
      S\S\S\\	\\\4         S\\   S\\   S\S\\\
4   S\\   S\4U 4S jjjrSSSSSSSSSSS\R"                  S4S\\\	\   4   S
\\\
\	\
   4      S\\\
\	\
   4      S\\   S\\   S\\   S\\   S\\   S\\
   S\\   S\\   S\\\\4      S\\   S\\\\4      4S jjrS\R.                  S\S\S\R.                  4S jrS \S\4S! jrS\R.                  S\S\S\R.                  4S" jr\R8                  S#SS4S\R.                  S$\\\\\4   \\\\4      4   S%\S&\\
\\
   4   S\\\\4      S\\\\4      S\R.                  4S' jjrS\R.                  S(\	\\\4      S)\S\S\S\S\	\R.                     4S* jrS/S+\S,\4S- jjr S.r!U =r"$ )0AriaImageProcessorJ   a  
A vision processor for the Aria model that handles image preprocessing.
Initialize the AriaImageProcessor.

Args:
    image_mean (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
        Mean values for normalization.
    image_std (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
        Standard deviation values for normalization.
    max_image_size (`int`, *optional*, defaults to 980):
        Maximum image size.
    min_image_size (`int`, *optional*, defaults to 336):
        Minimum image size.
    split_resolutions (`list`, *optional*, defaults to a list of optimal,resolutions as tuples):
        The optimal resolutions for splitting the image.
    split_image (`bool`, *optional*, defaults to `False`):
        Whether to split the image.
    do_convert_rgb (`bool`, *optional*, defaults to `True`):
        Whether to convert the image to RGB.
    do_rescale (`bool`, *optional*, defaults to `True`):
        Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
        the `preprocess` method.
    rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
        Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
        method.
    do_normalize (`bool`, *optional*, defaults to `True`):
        Whether to normalize the image.
    resample (PILImageResampling, *optional*, defaults to `BICUBIC`):
        The resampling filter to use if resizing the image.
pixel_values
pixel_mask	num_cropsN  iP  FTgp?
image_mean	image_stdmax_image_sizemin_image_sizesplit_resolutionssplit_imagedo_convert_rgb
do_rescalerescale_factordo_normalizeresamplec                   > [         TU ]  " S0 UD6  Uc  / SQnUc  / SQnX0l        X@l        Xl        X l        X`l        Uc#  / SQnU Vs/ s H  oS   S-  US   S-  4PM     nnXPl        Xpl        Xl	        Xl
        Xl        Xl        g s  snf )N)      ?rC   rC   ))      )rD   r   )rD      )rD      )rD      )rD      )rD      )rE   rF   )rE   r   )rE   rE   )rE   rD   )r   rD   )r   rE   )rF   rD   )rF   rE   )rG   rD   )rH   rD   )rI   rD   )rJ   rD   r     rD    )super__init__r9   r:   r7   r8   r<   r;   r=   r>   r?   r@   rA   )selfr7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   kwargsel	__class__s                 r,   rN   AriaImageProcessor.__init__l   s     	"6"(J'I,,$"&$ !yFW XFWQ%#+r!us{!;FW X!2,$,(  !Ys   	B	ptimagesreturn_tensorsdata_formatr%   c           
         Ub  UOU R                   nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R
                  nUb  UOU R                  nU	b  U	OU R                  n	U
b  U
OU R                  n
Ub  UOU R                  nUS;  a  [        S5      eU R                  U5      n[        U5      n[        U5      (       d  [        S5      e[        U
UUUUU	S9  U(       a  U Vs/ s H  n[        U5      PM     nnU Vs/ s H  n[!        U5      PM     nnU(       a(  [#        US   5      (       a  [$        R'                  S5        Uc  [)        US   5      n/ n/ nSnU GH  nU(       a  U R+                  UU R,                  UUUUS9nOU/nUb  [/        U5      U:  a  [/        U5      nU GH.  n[1        U5      u  nnU[3        UU5      -  nUU:  a  [3        [5        UU-  5      U5      U4nOU[3        [5        UU-  5      U5      4n[7        UUUUUS	9nUUS   -
  UUS
   -
  nn[9        USU4SU44UUS9n[:        R<                  " XD4[>        S9nS
USUS   2SUS
   24'   URA                  U5        U(       a  U RC                  UXS9nU
(       a8  U RE                  UU R                   U R                  UUS9nUb  [G        UX5      OUnURA                  U5        GM1     GM     [I        [:        RJ                  " USS9[:        RJ                  " USS9US.US9$ s  snf s  snf )a  
Process a list of images.

Args:
    images (ImageInput or list of ImageInput):
        The input image or a list of images.
    image_mean (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
        Mean values for normalization.
    image_std (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
        Standard deviation values for normalization.
    max_image_size (`int`, *optional*, defaults to `self.max_image_size` (980)):
        Maximum image size.
    min_image_size (`int`, *optional*, defaults to `self.min_image_size` (336)):
        Minimum image size.
    split_image (`bool`, *optional*, defaults to `self.split_image` (False)):
        Whether to split the image.
    do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb` (True)):
        Whether to convert the image to RGB.
    do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
        Whether to rescale the image.
    rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
        Rescale factor to rescale the image by if `do_rescale` is set to `True`.
    do_normalize (`bool`, *optional*, defaults to `self.do_normalize` (True)):
        Whether to normalize the image.
    resample (PILImageResampling, *optional*, defaults to `self.resample` (BICUBIC)):
        The resampling filter to use if resizing the image.
    return_tensors (`str` or `TensorType`, *optional*, defaults to "pt"):
        The type of tensor to return.
    data_format (`str` or `ChannelDimension`, *optional*):
        The channel dimension format for the output image. Can be one of:
            - `"channels_first"` or `ChannelDimension.FIRST`:
                image in (num_channels, height, width) format.
            - `"channels_last"` or `ChannelDimension.LAST`:
                image in (height, width, num_channels) format.
        If unset, will use same as the input image.
    input_data_format (`str` or `ChannelDimension`, *optional*):
        The channel dimension format for the input image. Can be one of:
            - `"channels_first"` or `ChannelDimension.FIRST`:
                image in (num_channels, height, width) format.
            - `"channels_last"` or `ChannelDimension.LAST`:
                image in (height, width, num_channels) format.
        If unset, will use the inferred format of the input image.

Returns:
    BatchFeature:
        A BatchFeature object containing:
        - 'pixel_values':
            Tensor of processed image pixel values.
        - 'pixel_mask':
            Boolean pixel mask. This mask is a 2D tensor of shape (max_image_size, max_image_size) where:
            - True (1) values indicate pixels that belong to the original resized image.
            - False (0) values indicate pixels that are part of the padding.
          The mask helps distinguish between actual image content and padded areas in subsequent processing steps.
        - 'num_crops':
            The maximum number of crops across all images.
N)rK   r6   z(max_image_size must be either 490 or 980zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)r@   r7   r8   rA   r>   r?   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)rW   r%   )rA   rW   r%   rD   )dtype)r   scaler%   )axisr2   )datatensor_type)&r7   r8   r9   r:   r<   r=   r>   r?   r@   rA   
ValueErrorfetch_imagesr   r   r   r   r   r   loggerwarning_oncer   get_image_patchesr;   lenr   maxintr   r   npzerosboolr$   rescale	normalizer   r   stack)rO   rU   r7   r8   r9   r:   r<   r=   r>   r?   r@   rA   rV   rW   r%   r   r3   pixel_masksr5   crop_images
crop_imagehwrZ   new_sizecrop_image_resizedpadding_bottompadding_rightcrop_image_paddedr4   s                                 r,   
preprocessAriaImageProcessor.preprocess   s   R $.#9Zt
!*!6IDNN	+9+E4K^K^+9+E4K^K^%0%<k$BRBR+9+E4K^K^#-#9Zt
+9+E4K^K^'3'?|TEVEV'38+GHH""6*)&1F##: 
 	&%!!)	
 9?@nU+F@ 6<<VE.'V</&)44s
 $ >vay I	E"44**" 1&7 5   %g C$4y$@,	)
%j11&Q26 #CE	NN C^TH .CE	NN0STH%+% 1&7&" 1?!0Ln_ghi_jNj$'&(1m*<= 1&7	%!  XX~&FdS
;<
=Xa[=-HQK-78"":.(,/~ )5 )%  (,)$5*; )7 )% '2 44E{f. & ##$56c * B  "A > hh{;&
 '
 	
i A =s   L>'Mr   target_resolutionr   c                 :    [        XU5      u  pV[        XU4X4S9nU$ )a  
Resizes an image to a target resolution while maintaining aspect ratio.

Args:
    image (np.ndarray):
        The input image.
    target_resolution (tuple):
        The target resolution (height, width) of the image.
    resample (`PILImageResampling`):
        Resampling filter to use if resizing the image.
    input_data_format (`ChannelDimension` or `str`):
        The channel dimension format of the input image.

Returns:
    np.ndarray: The resized and padded image.
rA   r%   )r	   r   )rO   r   rx   rA   r%   
new_height	new_widthresized_images           r,   _resize_for_patching'AriaImageProcessor._resize_for_patchingW  s-    & !6ePa b
 u9&=vr.   original_resolutionc                 j    Uu  p4Uu  pV[        Xd-
  S5      u  px[        XS-
  S5      u  pXU
-   4XwU-   44$ )NrE   )divmod)rO   r   rx   original_heightoriginal_widthtarget_heighttarget_widthpaste_xr_xpaste_yr_ys              r,   _get_padding_size$AriaImageProcessor._get_padding_sizeq  sM    *='&7#l;Q?m=qA3''S=)AAAr.   c                 ^    [        XU5      nU R                  XB5      nU R                  XS9nU$ )zE
Pad an image to a target resolution while maintaining aspect ratio.
)padding)r	   r   r   )rO   r   rx   r%   new_resolutionr   padded_images          r,   _pad_for_patching$AriaImageProcessor._pad_for_patchingx  s7     /uIZ[((Kxxx7r.   g        r   modeconstant_valuesc                 ^   [        U[        5      (       d  [        U5      S:w  a  [        XX4XV5      $ Uc  [	        U5      n[
        R                  S[
        R                  S[
        R                  S[
        R                  S0n[        R                  " XXs   US9nUb  [        XU5      nU$ UnU$ )a  
Pads the `image` with the specified `padding` and `mode`. Padding can be in the (`height`, `width`)
dimension of in the (`num_patches`) dimension. In the second case an iterable if tuples is expected
as input.

Args:
    image (`np.ndarray`):
        The image to pad.
    padding (`int` or `tuple[int, int]` or `Iterable[tuple[int, int]]`):
        Padding to apply to the edges of the height, width axes. Can be one of three formats:
        - `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
        - `((before, after),)` yields same before and after pad for height and width.
        - `(pad,)` or int is a shortcut for before = after = pad width for all axes.
    mode (`PaddingMode`):
        The padding mode to use. Can be one of:
            - `"constant"`: pads with a constant value.
            - `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
            vector along each axis.
            - `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
            - `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
    constant_values (`float` or `Iterable[float]`, *optional*):
        The value to use for the padding if `mode` is `"constant"`.
    data_format (`str` or `ChannelDimension`, *optional*):
        The channel dimension format for the output image. Can be one of:
            - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
            - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        If unset, will use same as the input image.
    input_data_format (`str` or `ChannelDimension`, *optional*):
        The channel dimension format for the input image. Can be one of:
            - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
            - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        If unset, will use the inferred format of the input image.

Returns:
    `np.ndarray`: The padded image.

rF   constantreflectedge	symmetric)r   r   )
isinstancere   rc   r   r   r   CONSTANTREFLECT	REPLICATE	SYMMETRICrf   r   )rO   r   r   r   r   rW   r%   padding_mode_mappings           r,   r   AriaImageProcessor.pad  s    ` gs##s7|q'8utk]]$ >u E   *!!6!!;	 
 u,@,FXghR]Ri'<MN 	  pu 	 r.   grid_pinpointsr   c           	         [        U[        5      (       d  [        S5      eUn[        XS9n[	        X5      n	U R                  XXFS9n
U R                  XUS9n[        XUS9nU Vs/ s H  n[        XUS9PM     nnU$ s  snf )a  
Process an image with variable resolutions by dividing it into patches.

Args:
    image (`np.ndarray`):
        The input image to be processed.
    grid_pinpoints (list[tuple[int, int]]):
        A list of possible resolutions as tuples.
    patch_size (`int`):
        Size of the patches to divide the image into.
    resample (`PILImageResampling`):
        Resampling filter to use if resizing the image.
    data_format (`ChannelDimension` or `str`):
        The channel dimension format for the output image.
    input_data_format (`ChannelDimension` or `str`):
        The channel dimension format of the input image.

Returns:
    `list[np.ndarray]`: A list of NumPy arrays containing the processed image patches.
z6grid_pinpoints must be a list of possible resolutions.r    rz   )r%   )r   r%   )r!   input_channel_dim)	r   list	TypeErrorr   r
   r~   r   r-   r   )rO   r   r   r   rA   rW   r%   possible_resolutions
image_sizebest_resolutionr}   r   r&   r+   s                 r,   rb   $AriaImageProcessor.get_image_patches  s    : .$//TUU-#EI
0R11X 2 
 --m`q-r#L[lm
 !
  (Zkl  	 
 	
s   %A=r'   r(   c                     UR                  SU R                  5      nUR                  SU R                  5      n[        X4U R                  5      u  pgU(       d  SnU$ Xe-  U-  U-  nU$ )aS  
A utility that returns number of image patches for a given image size.

Args:
    height (`int`):
        Height of the input image.
    width (`int`):
        Width of the input image.
    images_kwargs (`dict`, *optional*)
        Any kwargs to override defaults of the image processor.
Returns:
    `int`: Number of patches per image.
r<   r9   rD   )getr<   r9   r
   r;   )	rO   r'   r(   images_kwargsr<   r9   resized_heightresized_widthnum_patchess	            r,   get_number_of_image_patches.AriaImageProcessor.get_number_of_image_patches  sv     $''t7G7GH&**+;T=P=PQ(>PTPfPf(g%*a 1?0PS`0`dr0rr.   )r=   r@   r>   r7   r8   r9   r:   rA   r?   r<   r;   )N)#__name__
__module____qualname____firstlineno____doc__model_input_namesr   BICUBICr   r   floatre   tuplerh   r   rN   r   FIRSTr   strr   rv   rf   ndarrayr~   r   r   r   r   r   r   rb   r   __static_attributes____classcell__)rR   s   @r,   r0   r0   J   s   > D -1+/!!=A&+)-,3'+'9'A'A"!T%[)"! DK("! 	"!
 "! $DsCx$9:"! d^"! !"! "! c5j)"! tn"! %"! "!N ;?9=(,(,&*)-%)*.'+15;?2B2H2HDHE
j$z"223E
 U5$u+#567E
 E%e"456	E

 !E
 !E
 d^E
 !E
 TNE
 !E
 tnE
 -.E
 !sJ!78E
 ./E
 $E#/?*?$@AE
NZZ49Xh	4BU Bu BZZ49N^	" (009<>BDH@zz@ sE#s(OXeCHo-FFG@ 	@
 uhuo56@ eC)9$9:;@ $E#/?*?$@A@ 
@D0zz0 U38_-0 	0
 %0 &0 ,0 
bjj	0d# c  r.   r0   ))collections.abcr   typingr   r   numpyrf   image_processing_utilsr   r   r	   r
   image_transformsr   r   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   utilsr   r   
get_loggerr   r`   r   re   r   r-   r0   __all__rL   r.   r,   <module>r      s   * % "  u u e e   ) 
		H	%RZZ S PTUWU_U_P` 8B+ BJ  
 r.   