
    cCiH                        S r SSKJr  SSKJrJr  SSKrSSKJ	r	J
r
Jr  SSKJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJr  SS	KJrJrJrJ r J!r!J"r"  SS
K#J$r$  \ " 5       (       a  SSK%r%\" 5       (       a  SSK&r&\!RN                  " \(5      r)S r* SS\RV                  S\\,   S\\,   S\\\\,4      4S jjr-\$" SS9 " S S\	5      5       r.S/r/g)z%Image processor class for LayoutLMv3.    )Iterable)OptionalUnionN   )BaseImageProcessorBatchFeatureget_size_dict)resizeto_channel_dimension_formatto_pil_image)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDChannelDimension
ImageInputPILImageResamplinginfer_channel_dimension_formatis_scaled_imagemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_pytesseract_availableis_vision_availableloggingrequires_backends)requiresc                     [        SU S   U-  -  5      [        SU S   U-  -  5      [        SU S   U-  -  5      [        SU S   U-  -  5      /$ )Ni  r         r   )int)boxwidthheights      t/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/layoutlmv3/image_processing_layoutlmv3.pynormalize_boxr'   :   s`    DCFUN#$DCFVO$%DCFUN#$DCFVO$%	     imagelangtesseract_configinput_data_formatc                    [        XS9nUR                  u  pV[        R                  " XASUS9nUS   US   US   US   US   4u  ppn[	        U5       VVs/ s H  u  pUR                  5       (       a  M  UPM      nnn[	        U5       VVs/ s H  u  pX;  d  M  UPM     nnn[	        U	5       VVs/ s H  u  nnX;  d  M  UPM     n	nn[	        U
5       VVs/ s H  u  nnX;  d  M  UPM     n
nn[	        U5       VVs/ s H  u  nnX;  d  M  UPM     nnn[	        U5       VVs/ s H  u  nnX;  d  M  UPM     nnn/ n[        XX5       H%  u  nnnnUUUU-   UU-   /nUR                  U5        M'     / nU H  nUR                  [        UXV5      5        M      [        U5      [        U5      :X  d   S	5       eUU4$ s  snnf s  snnf s  snnf s  snnf s  snnf s  snnf )
zdApplies Tesseract OCR on a document image, and returns recognized words + normalized bounding boxes.r,   dict)r*   output_typeconfigtextlefttopr$   r%   z-Not as many words as there are bounding boxes)
r   sizepytesseractimage_to_data	enumeratestripzipappendr'   len)r)   r*   r+   r,   	pil_imageimage_widthimage_heightdatawordsr3   r4   r$   r%   idxwordirrelevant_indicescoordactual_boxesxywh
actual_boxnormalized_boxesr#   s                            r&   apply_tesseractrM   C   s    UHI )K$$YvVfgD&*6lDL$u+tT[}^bck^l&l#EV 09/?T/?)#tzz|#/?T#,U#3U#3ics7TT#3EU$-dOUOjc5s7TEODU#,S>
S>ZS%S5R5>C
S%.u%5W%5zsE9VU%5EW&/&7Y&7
U3;Xe&7FY L$U3
1aAE1q5)
J' 4
 c; MN  u:-.._0__.""") UUU
SWYsH   G 5G GG2GGG)G GG'G7G)vision)backendsc            !         ^  \ rS rSrSrS/rSS\R                  SSSSSSSS4S\S	\	\
\\4      S
\S\S\S\S\	\\\\   4      S\	\\\\   4      S\S\	\   S\	\   SS4U 4S jjjr\R                  SS4S\R$                  S	\
\\4   S
\S\	\\\4      S\	\\\4      S\R$                  4S jjr\" 5       SSSSSSSSSSSS\R,                  S4S\S\	\   S	\	\
\\4      S\	\   S\	\   S\	\   S\	\\\\   4      S\	\\\\   4      S\	\   S\	\   S\	\   S\	\\\4      S\S\	\\\4      S\R4                  R4                  4S jj5       rSrU =r$ )LayoutLMv3ImageProcessori   a!
  
Constructs a LayoutLMv3 image processor.

Args:
    do_resize (`bool`, *optional*, defaults to `True`):
        Whether to resize the image's (height, width) dimensions to `(size["height"], size["width"])`. Can be
        overridden by `do_resize` in `preprocess`.
    size (`dict[str, int]` *optional*, defaults to `{"height": 224, "width": 224}`):
        Size of the image after resizing. Can be overridden by `size` in `preprocess`.
    resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
        Resampling filter to use if resizing the image. Can be overridden by `resample` in `preprocess`.
    do_rescale (`bool`, *optional*, defaults to `True`):
        Whether to rescale the image's pixel values by the specified `rescale_value`. Can be overridden by
        `do_rescale` in `preprocess`.
    rescale_factor (`float`, *optional*, defaults to 1 / 255):
        Value by which the image's pixel values are rescaled. Can be overridden by `rescale_factor` in
        `preprocess`.
    do_normalize (`bool`, *optional*, defaults to `True`):
        Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
        method.
    image_mean (`Iterable[float]` or `float`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
        Mean to use if normalizing the image. This is a float or list of floats the length of the number of
        channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method.
    image_std (`Iterable[float]` or `float`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
        Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
        number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
    apply_ocr (`bool`, *optional*, defaults to `True`):
        Whether to apply the Tesseract OCR engine to get words + normalized bounding boxes. Can be overridden by
        the `apply_ocr` parameter in the `preprocess` method.
    ocr_lang (`str`, *optional*):
        The language, specified by its ISO code, to be used by the Tesseract OCR engine. By default, English is
        used. Can be overridden by the `ocr_lang` parameter in the `preprocess` method.
    tesseract_config (`str`, *optional*):
        Any additional custom configuration flags that are forwarded to the `config` parameter when calling
        Tesseract. For example: '--psm 6'. Can be overridden by the `tesseract_config` parameter in the
        `preprocess` method.
pixel_valuesTNgp? 	do_resizer5   resample
do_rescalerescale_valuedo_normalize
image_mean	image_std	apply_ocrocr_langr+   returnc                    > [         TU ]  " S0 UD6  Ub  UOSSS.n[        U5      nXl        X l        X0l        X@l        XPl        X`l        Ub  UO[        U l
        Ub  UO[        U l        Xl        Xl        Xl        g )N   )r%   r$    )super__init__r	   rU   r5   rV   rW   rescale_factorrY   r   rZ   r   r[   r\   r]   r+   )selfrU   r5   rV   rW   rX   rY   rZ   r[   r\   r]   r+   kwargs	__class__s                r&   rc   !LayoutLMv3ImageProcessor.__init__   s~     	"6"'tc-JT""	 $+((2(>*DZ&/&;AV"  0r(   r)   data_formatr,   c                     [        U5      nSU;  d  SU;  a  [        SUR                  5        35      eUS   US   4n[        U4UUUUS.UD6$ )a  
Resize an image to `(size["height"], size["width"])`.

Args:
    image (`np.ndarray`):
        Image to resize.
    size (`dict[str, int]`):
        Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
    resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
        `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
    data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the output image. If unset, the channel dimension format of the input
        image is used. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input image. If unset, the channel dimension format is inferred
        from the input image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.

Returns:
    `np.ndarray`: The resized image.
r%   r$   zFThe `size` dictionary must contain the keys `height` and `width`. Got )r5   rV   ri   r,   )r	   
ValueErrorkeysr
   )re   r)   r5   rV   ri   r,   rf   output_sizes           r&   r
   LayoutLMv3ImageProcessor.resize   sy    F T"47$#6efjfofofqersttH~tG}5
#/
 
 	
r(   imagesrd   return_tensorsc                    Ub  UOU R                   nUb  UOU R                  n[        U5      nUb  UOU R                  nUb  UOU R                  nUb  UOU R
                  nUb  UOU R                  nUb  UOU R                  nU	b  U	OU R                  n	U
b  U
OU R                  n
Ub  UOU R                  nUb  UOU R                  n[        U5      n[        U5      (       d  [        S5      e[        UUUUU	UUUS9  U Vs/ s H  n[!        U5      PM     nnU(       a(  [#        US   5      (       a  [$        R'                  S5        Uc  [)        US   5      nU
(       aI  [+        U S5        / n/ nU H3  n[-        UXUS9u  nnUR/                  U5        UR/                  U5        M5     U(       a!  U Vs/ s H  nU R1                  UX4US9PM     nnU(       a   U Vs/ s H  nU R3                  UXoS9PM     nnU(       a!  U Vs/ s H  nU R5                  UXUS	9PM     nnU Vs/ s H  n[7        UXS
9PM     nn[9        SU0US9nU
(       a
  WUS'   WUS'   U$ s  snf s  snf s  snf s  snf s  snf )a  
Preprocess an image or batch of images.

Args:
    images (`ImageInput`):
        Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
        passing in images with pixel values between 0 and 1, set `do_rescale=False`.
    do_resize (`bool`, *optional*, defaults to `self.do_resize`):
        Whether to resize the image.
    size (`dict[str, int]`, *optional*, defaults to `self.size`):
        Desired size of the output image after applying `resize`.
    resample (`int`, *optional*, defaults to `self.resample`):
        Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` filters.
        Only has an effect if `do_resize` is set to `True`.
    do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
        Whether to rescale the image pixel values between [0, 1].
    rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
        Rescale factor to apply to the image pixel values. Only has an effect if `do_rescale` is set to `True`.
    do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
        Whether to normalize the image.
    image_mean (`float` or `Iterable[float]`, *optional*, defaults to `self.image_mean`):
        Mean values to be used for normalization. Only has an effect if `do_normalize` is set to `True`.
    image_std (`float` or `Iterable[float]`, *optional*, defaults to `self.image_std`):
        Standard deviation values to be used for normalization. Only has an effect if `do_normalize` is set to
        `True`.
    apply_ocr (`bool`, *optional*, defaults to `self.apply_ocr`):
        Whether to apply the Tesseract OCR engine to get words + normalized bounding boxes.
    ocr_lang (`str`, *optional*, defaults to `self.ocr_lang`):
        The language, specified by its ISO code, to be used by the Tesseract OCR engine. By default, English is
        used.
    tesseract_config (`str`, *optional*, defaults to `self.tesseract_config`):
        Any additional custom configuration flags that are forwarded to the `config` parameter when calling
        Tesseract.
    return_tensors (`str` or `TensorType`, *optional*):
        The type of tensors to return. Can be one of:
            - Unset: Return a list of `np.ndarray`.
            - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
            - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
            - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
            - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
    data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
        The channel dimension format for the output image. Can be one of:
            - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
            - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input image. If unset, the channel dimension format is inferred
        from the input image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)rW   rd   rY   rZ   r[   rU   r5   rV   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.r6   r.   )r)   r5   rV   r,   )r)   scaler,   )r)   meanstdr,   )input_channel_dimrS   )r@   tensor_typerA   boxes)rU   r5   r	   rV   rW   rd   rY   rZ   r[   r\   r]   r+   r   r   rk   r   r   r   loggerwarning_oncer   r   rM   r;   r
   rescale	normalizer   r   )re   ro   rU   r5   rV   rW   rd   rY   rZ   r[   r\   r]   r+   rp   ri   r,   r)   words_batchboxes_batchrA   rw   r@   s                         r&   
preprocess#LayoutLMv3ImageProcessor.preprocess   s   L "+!6IDNN	'tTYYT"'38#-#9Zt
+9+E4K^K^'3'?|TEVEV#-#9Zt
!*!6IDNN	!*!6IDNN	'38/?/K+QUQfQf)&1F##:  	&!)%!		
 6<<VE.'V</&)44s
 $ >vay I dM2KK.uhduvu""5)""5)  
  $#E %dYjk#  
  $#E 5d#  
  $#E U^op#   ou
ntej'{`nt 	 
 .&!9~V'DM'DMc =.

s   I3II"I'#I,)r\   rY   rW   rU   rZ   r[   r]   rV   rd   r5   r+   )__name__
__module____qualname____firstlineno____doc__model_input_namesr   BILINEARboolr   r/   strr"   floatr   r   rc   npndarrayr   r
   r   FIRSTr   r   PILImager~   __static_attributes____classcell__)rg   s   @r&   rQ   rQ   i   s   $L (( )-'9'B'B&!>B=A"&*,11 tCH~&1 %	1
 1 1 1 U5(5/#9:;1 E%%"89:1 1 3-1 #3-1 
1 1H (:'B'B>BDH.
zz.
 38n.
 %	.

 eC)9$9:;.
 $E#/?*?$@A.
 
.
` %& %))-%)*.'+>B=A$("&*.;?(8(>(>DH!UU D>U tCH~&	U TNU !U tnU U5(5/#9:;U E%%"89:U D>U 3-U #3-U !sJ!78U &U  $E#/?*?$@A!U" 
#U 'Ur(   rQ   )N)0r   collections.abcr   typingr   r   numpyr   image_processing_utilsr   r   r	   image_transformsr
   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   r   r   utils.import_utilsr   r   r6   
get_loggerr   rx   r'   r   r   rM   rQ   __all__ra   r(   r&   <module>r      s    , $ "  U U Q Q     +  			H	% AE	##::##
3-## sm##  &6&; <=	##L 
;O1 O  Od &
&r(   