
    cCi^                        S r SSKrSSKJrJr  SSKrSSKJrJ	r	  SSK
JrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJr  SS	KJr  \R@                  " \!5      r"    SS
\#S\#S\#S\#S\#S\#S\#4S jjr$ " S S\5      r%S/r&g)z#Image processor class for GLM-4.1V.    N)OptionalUnion   )BaseImageProcessorBatchFeature)convert_to_rgbresizeto_channel_dimension_format)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imagemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypelogging)
VideoInput
num_framesheightwidthtemporal_factorfactor
min_pixels
max_pixelsc                    X:  a  [        SU  SU 35      eX:  d  X$:  a  [        SU SU SU 35      e[        X5      [        X5      -  S:  a#  [        S[        X5      [        X5      -   35      e[        X-  5      U-  n[        X$-  5      U-  n[        X-  5      U-  n	X-  U-  U:  aq  [        R
                  " X-  U-  U-  5      n
[        U[        R                  " X-  U-  5      U-  5      n[        U[        R                  " X*-  U-  5      U-  5      nXx4$ X-  U-  U:  aZ  [        R
                  " XPU-  U-  -  5      n
[        R                  " X-  U-  5      U-  n[        R                  " X*-  U-  5      U-  nXx4$ )Nzt:z% must be larger than temporal_factor:zheight:z
 or width:z must be larger than factor:   z4absolute aspect ratio must be smaller than 200, got )
ValueErrormaxminroundmathsqrtfloorceil)r   r   r   r   r   r   r    h_barw_bart_barbetas              j/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/glm4v/image_processing_glm4v.pysmart_resizer0   1   s    #2j\)NN_`aa%.76(*UG;WX^W_`aa	V	c&0	03	6B3vCUX[\bXjCjBkl
 	
 &/"V+E%.!F*E*.//AE}uz)yy*-5CDFDJJv}v'=>GHFDJJu|f'<=FG < 
		+yyF':U'BCD		&-&01F:		%,/069<    c            &       (  ^  \ rS rSrSrSS/rSS\R                  SSSSSSSS	S	4S
\S\	\
\\4      S\S\S\\\4   S\S\	\\\\   4      S\	\\\\   4      S\S\S\S\SS4U 4S jjjrSSSSSSSSSSSS\R$                  S4S\\\4   S
\	\   S\	\
\\4      S\	\   S\	\   S\	\   S\	\   S\	\\\\   4      S\	\\\\   4      S\	\   S\	\   S\	\   S\	\   S\	\   S\	\\\4      4S jjrSSSSSSSSSSSSSS\R$                  S4S\S\	\   S
\	\   S\	\
\\4      S\	\   S\	\   S\	\   S\	\   S\	\\\\   4      S\	\\\\   4      S\	\   S\	\   S\	\   S\	\   S\	\\\4      S\	\   S\	\\\4      4"S jjrS#S\S \4S! jjrS"rU =r$ )$Glm4vImageProcessorR   a)
  
Constructs a GLM-4V image processor that dynamically resizes images based on the original images.

Args:
    do_resize (`bool`, *optional*, defaults to `True`):
        Whether to resize the image's (height, width) dimensions.
    size (`Dict[str, int]` *optional*, defaults to `{"shortest_edge": 112 * 112, "longest_edge": 28 * 28 * 15000}`):
        Size of the image's `(height, width)` dimensions after resizing. Can be overridden by the `size` parameter
        in the `preprocess` method. Available options are:
            - `{"height": int, "width": int}`: The image will be resized to the exact size `(height, width)`.
                Do NOT keep the aspect ratio.
            - `{"shortest_edge": int, "longest_edge": int}`: The image will be resized to a maximum size respecting
                the aspect ratio and keeping the shortest edge less or equal to `shortest_edge` and the longest edge
                less or equal to `longest_edge`.
            - `{"max_height": int, "max_width": int}`: The image will be resized to the maximum size respecting the
                aspect ratio and keeping the height less or equal to `max_height` and the width less or equal to
                `max_width`.
    resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
        Resampling filter to use when resizing the image.
    do_rescale (`bool`, *optional*, defaults to `True`):
        Whether to rescale the image by the specified scale `rescale_factor`.
    rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
        Scale factor to use if rescaling the image.
    do_normalize (`bool`, *optional*, defaults to `True`):
        Whether to normalize the image.
    image_mean (`float` or `List[float]`, *optional*, defaults to `[0.48145466, 0.4578275, 0.40821073]`):
        Mean to use if normalizing the image. This is a float or list of floats for each channel in the image.
    image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`):
        Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
    do_convert_rgb (`bool`, *optional*, defaults to `True`):
        Whether to convert the image to RGB.
    patch_size (`int`, *optional*, defaults to 14):
        The spatial patch size of the vision encoder.
    temporal_patch_size (`int`, *optional*, defaults to 2):
        The temporal patch size of the vision encoder.
    merge_size (`int`, *optional*, defaults to 2):
        The merge size of the vision encoder to llm encoder.
pixel_valuesimage_grid_thwTNgp?      	do_resizesizeresample
do_rescalerescale_factordo_normalize
image_mean	image_stddo_convert_rgb
patch_sizetemporal_patch_size
merge_sizereturnc                 "  > [         TU ]  " S0 UD6  Ub  SU;  d  SU;  a  [        S5      eUc  SSS.nX l        Xl        X0l        X@l        XPl        X`l        Ub  UO[        U l
        Ub  UO[        U l        Xl        Xl        Xl        Xl        g )Nshortest_edgelongest_edge:size must contain 'shortest_edge' and 'longest_edge' keys. 1  q rG   rH    )super__init__r#   r:   r9   r;   r<   r=   r>   r   r?   r   r@   rB   rC   rD   rA   )selfr9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   kwargs	__class__s                 r/   rO   Glm4vImageProcessor.__init__|   s      	"6"!<VZ@ZYZZ\%.PD	" $,((2(>*DT&/&;$#6 $,r1   imagesdata_formatinput_data_formatc                    [        U5      nU(       a  U Vs/ s H  n[        U5      PM     nnU Vs/ s H  n[        U5      PM     nnU(       a(  [        US   5      (       a  [        R                  S5        Uc  [        US   5      n[        US   US9u  nnUUnn/ nU H{  nU(       a'  [        UUUUX-  US   US   S9u  nn[        UUU4XOS9nU(       a  U R                  UXoS9nU(       a  U R                  UXUS	9n[        UXS
9nUR                  U5        M}     [        R                  " U5      nU[         R"                  :X  a  UR%                  SSSS5      nUR&                  S   U-  S:w  aT  [        R(                  " US   [        R*                     UUR&                  S   U-  -
  SS9n[        R,                  " UU/SS9nUR&                  S   nUR&                  S   U-  nUU
-  UU
-  nnUR/                  UUUUU-  UU
UU-  UU
5	      nUR%                  SSSSSSSSS5	      nUR/                  UU-  U-  UU-  U
-  U
-  5      nUUUU44$ s  snf s  snf )a6  
Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`.

Args:
    images (`ImageInput`):
        Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. If pixel values range from 0 to 1, set `do_rescale=False`.
    vision_info (`List[Dict]`, *optional*):
        Optional list of dictionaries containing additional information about vision inputs.
    do_resize (`bool`, *optional*, defaults to `self.do_resize`):
        Whether to resize the image.
    size (`Dict[str, int]`, *optional*, defaults to `self.size`):
        Size of the image after resizing. `shortest_edge` and `longest_edge` keys must be present.
    resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
        Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums.
    do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
        Whether to rescale the image.
    rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
        Scale factor to use if rescaling the image.
    do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
        Whether to normalize the image.
    image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
        Mean to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
    image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
        Standard deviation to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
    patch_size (`int`, *optional*, defaults to `self.patch_size`):
        The spatial patch size of the vision encoder.
    temporal_patch_size (`int`, *optional*, defaults to `self.temporal_patch_size`):
        The temporal patch size of the vision encoder.
    merge_size (`int`, *optional*, defaults to `self.merge_size`):
        The merge size of the vision encoder to llm encoder.
    do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
        Whether to convert the image to RGB.
    data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`):
        The channel dimension format for the output image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - Unset: Use the channel dimension format of the input image.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.   - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)channel_dimrG   rH   )r   r   r   r   r   r   r    )r:   r;   rV   )scalerV   )imagemeanstdrV   )input_channel_dimr      r8   )axis               )r   r   r   r   loggerwarning_oncer   r   r0   r	   rescale	normalizer
   appendnparrayr   LAST	transposeshaperepeatnewaxisconcatenatereshape)rP   rT   r9   r:   r;   r<   r=   r>   r?   r@   rB   rC   rD   rA   rU   rV   rZ   r   r   resized_heightresized_widthprocessed_imagespatchesrepeatschannelgrid_tgrid_hgrid_wflatten_patchess                                r/   _preprocessGlm4vImageProcessor._preprocess   s   z *&19?@nU+F@ 6<<VE.'V</&)44s $ >vay I&vay>OP(.E0<2!$7%2#O4#N31-  ?( U.fjSd '  0{hE##E*1 4 ((+,*///''1a3G==11Q6iiBJJ')<a@PSf@f)gnoG nngw%7a@G--"q!%88':5}
7R//j j 

 ##Aq!Q1aA>!//VOf$g0C&Cj&PS]&]
  888M A =s
   IIvideosreturn_tensorsc                    Ub  UOU R                   nUb  SU;  d  SU;  a  [        S5      eUc  SSS.nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R
                  nUb  UOU R                  nU	b  U	OU R                  n	U
b  U
OU R                  n
Ub  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  UOU R                  nUb  U R                  U5      n[        U5      nUb  [        U5      (       d  [        S5      e[!        UUU	U
UUUS9  0 nUb  / / nnU HE  nU R#                  UUUUUUUU	U
UUUUUUS	9u  nnUR%                  U5        UR'                  U5        MG     [(        R*                  " U5      n[(        R*                  " U5      nUR-                  UUS
.5        [/        UUS9$ )a  
Args:
    images (`ImageInput`):
        Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
        passing in images with pixel values between 0 and 1, set `do_rescale=False`.
    videos (`VideoInput`):
        Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If
        passing in videos with pixel values between 0 and 1, set `do_rescale=False`.
    do_resize (`bool`, *optional*, defaults to `self.do_resize`):
        Whether to resize the image.
    size (`Dict[str, int]`, *optional*, defaults to `self.size`):
        Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with
        the longest edge resized to keep the input aspect ratio.
    resample (`int`, *optional*, defaults to `self.resample`):
        Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only
        has an effect if `do_resize` is set to `True`.
    do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
        Whether to rescale the image.
    rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
        Rescale factor to rescale the image by if `do_rescale` is set to `True`.
    do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
        Whether to normalize the image.
    image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
        Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`.
    image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
        Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to
        `True`.
        The max pixels of the image to resize the image.
    patch_size (`int`, *optional*, defaults to `self.patch_size`):
        The spatial patch size of the vision encoder.
    temporal_patch_size (`int`, *optional*, defaults to `self.temporal_patch_size`):
        The temporal patch size of the vision encoder.
    merge_size (`int`, *optional*, defaults to `self.merge_size`):
        The merge size of the vision encoder to llm encoder.
    do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
        Whether to convert the image to RGB.
    return_tensors (`str` or `TensorType`, *optional*):
        The type of tensors to return. Can be one of:
        - Unset: Return a list of `np.ndarray`.
        - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
        - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
        - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
        - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
    data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
        The channel dimension format for the output image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - Unset: Use the channel dimension format of the input image.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input image. If unset, the channel dimension format is inferred
        from the input image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.

rG   rH   rI   rJ   rK   rL   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)r=   r>   r?   r@   r9   r:   r;   )r9   r:   r;   r<   r=   r>   r?   r@   rB   rC   rD   rU   rA   rV   )r5   r6   )datatensor_type)r:   r#   r9   r;   r<   r=   r>   r?   r@   rB   rC   rD   rA   fetch_imagesr   r   r   r~   extendrj   rk   rl   updater   )rP   rT   r   r9   r:   r;   r<   r=   r>   r?   r@   rB   rC   rD   rA   r   rU   rV   r   r5   vision_grid_thwsrZ   rw   r6   s                           r/   
preprocessGlm4vImageProcessor.preprocess(  s&   Z 'tTYY!<VZ@ZYZZ\%.PD!*!6IDNN	'38#-#9Zt
+9+E4K^K^'3'?|TEVEV#-#9Zt
!*!6IDNN	#-#9Zt
5H5T1Z^ZrZr#-#9Zt
+9+E4K^K^&&v.F-f5Fl6&:&:: 
 	&)%!	
 -/*L*.*:*:'%)#1!-)')(;) +#1&7 +; +'" ##G, ''7'  ( 88L1L!xx(89KKIYZ[>BBr1   r   r   c           
         UR                  SU R                  5      nUR                  SU R                  5      nUR                  SSSS.5      nXE-  n[        U R                  UUUUS   US   U R                  S	9u  pX-  X-  pX-  $ )
aY  
A utility that returns number of image patches for a given image size.

Args:
    height (`int`):
        Height of the input image.
    width (`int`):
        Width of the input image.
    images_kwargs (`dict`, *optional*)
        Any kwargs to override defaults of the image processor.
Returns:
    `int`: Number of image patches per image.
rB   rD   r:   rJ   rK   rL   rG   rH   )r   r   r   r   r   r    r   )getrB   rD   r0   rC   )rP   r   r   images_kwargsrB   rD   r:   r   rt   ru   r{   r|   s               r/   get_number_of_image_patches/Glm4vImageProcessor.get_number_of_image_patches  s     #&&|T__E
"&&|T__E
  9Ve)fg((4//O,N+ 44)
% (5}7Rr1   )rA   r>   r<   r9   r?   r@   rD   rB   r;   r=   r:   rC   )N)__name__
__module____qualname____firstlineno____doc__model_input_namesr   BICUBICboolr   dictstrintr   floatlistrO   r   FIRSTr   r   r~   r   r   r   __static_attributes____classcell__)rR   s   @r/   r3   r3   R   s   %N ()9: )-'9'A'A,3!:>9=##$"-"- tCH~&"- %	"-
 "- c5j)"- "- U5$u+#567"- E%e"456"- "- "- !"- "- 
"- "-N %))-15%)*.'+:>9=$(-1$()-2B2H2HDH!F9j*,-F9 D>F9 tCH~&	F9
 -.F9 TNF9 !F9 tnF9 U5$u+#567F9 E%e"456F9 SMF9 &c]F9 SMF9 !F9 ./F9  $E#/?*?$@A!F9V (,$()-15%)*.'+:>9=$(-1$()-;?2B2H2HDH%NCNC $NC D>	NC
 tCH~&NC -.NC TNNC !NC tnNC U5$u+#567NC E%e"456NC SMNC &c]NC SMNC !NC  !sJ!78!NC" ./#NC$ $E#/?*?$@A%NC`# c  r1   r3   )r8      rJ   i   )'r   r'   typingr   r   numpyrk   image_processing_utilsr   r   image_transformsr   r	   r
   image_utilsr   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   video_utilsr   
get_loggerr   rf   r   r0   r3   __all__rM   r1   r/   <module>r      s    *  "  F 
    ) % 
		H	% 0  	
   BC, CL !
!r1   