
    cCi95                        S r SSKJrJr  SSKrSSKJrJrJ	r	  SSK
JrJrJrJr  SSKJrJrJrJrJrJrJrJr  SSKJrJrJrJrJrJr  SS	KJ r   \" 5       (       a  SSK!r!\" 5       (       a  SSK"r"\RF                  " \$5      r%S
 r&  SS\RN                  S\\(   S\\(   S\\\(\4      4S jjr)\ " SS9 " S S\5      5       r*S/r+g)z%Image processor class for LayoutLMv2.    )OptionalUnionN   )BaseImageProcessorBatchFeatureget_size_dict)flip_channel_orderresizeto_channel_dimension_formatto_pil_image)ChannelDimension
ImageInputPILImageResamplinginfer_channel_dimension_formatmake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_pytesseract_availableis_vision_availableloggingrequires_backends)requiresc                     [        SU S   U-  -  5      [        SU S   U-  -  5      [        SU S   U-  -  5      [        SU S   U-  -  5      /$ )Ni  r         r   )int)boxwidthheights      t/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/layoutlmv2/image_processing_layoutlmv2.pynormalize_boxr$   6   s`    DCFUN#$DCFVO$%DCFUN#$DCFVO$%	     imagelangtesseract_configinput_data_formatc                    Ub  UOSn[        XS9nUR                  u  pV[        R                  " XASUS9nUS   US   US   US   US	   4u  ppn[	        U5       VVs/ s H  u  pUR                  5       (       a  M  UPM      nnn[	        U5       VVs/ s H  u  pX;  d  M  UPM     nnn[	        U	5       VVs/ s H  u  nnX;  d  M  UPM     n	nn[	        U
5       VVs/ s H  u  nnX;  d  M  UPM     n
nn[	        U5       VVs/ s H  u  nnX;  d  M  UPM     nnn[	        U5       VVs/ s H  u  nnX;  d  M  UPM     nnn/ n[        XX5       H%  u  nnnnUUUU-   UU-   /nUR                  U5        M'     / nU H  nUR                  [        UXV5      5        M      [        U5      [        U5      :X  d   S
5       eUU4$ s  snnf s  snnf s  snnf s  snnf s  snnf s  snnf )zdApplies Tesseract OCR on a document image, and returns recognized words + normalized bounding boxes. r)   dict)r'   output_typeconfigtextlefttopr!   r"   z-Not as many words as there are bounding boxes)
r   sizepytesseractimage_to_data	enumeratestripzipappendr$   len)r&   r'   r(   r)   	pil_imageimage_widthimage_heightdatawordsr1   r2   r!   r"   idxwordirrelevant_indicescoordactual_boxesxywh
actual_boxnormalized_boxesr    s                            r#   apply_tesseractrK   ?   s    ,<+G'R UHI )K$$YvVfgD&*6lDL$u+tT[}^bck^l&l#EV 09/?T/?)#tzz|#/?T#,U#3U#3ics7TT#3EU$-dOUOjc5s7TEODU#,S>
S>ZS%S5R5>C
S%.u%5W%5zsE9VU%5EW&/&7Y&7
U3;Xe&7FY L$U3
1aAE1q5)
J' 4
 c; MN  u:-.._0__.""") UUU
SWYsH   G<GG"G9G	G G0GGG.G%>G%)vision)backendsc                     ^  \ rS rSrSrS/rSS\R                  SSS4S\S\	\
\\4      S	\S
\S\	\   S\	\   SS4U 4S jjjr\R                  SS4S\R                  S\
\\4   S	\S\	\\\4      S\	\\\4      S\R                  4S jjr\" 5       SSSSSSS\R(                  S4	S\S\	\   S\	\
\\4      S	\	\   S
\	\   S\	\   S\	\   S\	\\\4      S\S\	\\\4      S\R0                  R0                  4S jj5       rSrU =r$ )LayoutLMv2ImageProcessorf   a?  
Constructs a LayoutLMv2 image processor.

Args:
    do_resize (`bool`, *optional*, defaults to `True`):
        Whether to resize the image's (height, width) dimensions to `(size["height"], size["width"])`. Can be
        overridden by `do_resize` in `preprocess`.
    size (`dict[str, int]` *optional*, defaults to `{"height": 224, "width": 224}`):
        Size of the image after resizing. Can be overridden by `size` in `preprocess`.
    resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
        Resampling filter to use if resizing the image. Can be overridden by the `resample` parameter in the
        `preprocess` method.
    apply_ocr (`bool`, *optional*, defaults to `True`):
        Whether to apply the Tesseract OCR engine to get words + normalized bounding boxes. Can be overridden by
        `apply_ocr` in `preprocess`.
    ocr_lang (`str`, *optional*):
        The language, specified by its ISO code, to be used by the Tesseract OCR engine. By default, English is
        used. Can be overridden by `ocr_lang` in `preprocess`.
    tesseract_config (`str`, *optional*, defaults to `""`):
        Any additional custom configuration flags that are forwarded to the `config` parameter when calling
        Tesseract. For example: '--psm 6'. Can be overridden by `tesseract_config` in `preprocess`.
pixel_valuesTNr+   	do_resizer3   resample	apply_ocrocr_langr(   returnc                    > [         TU ]  " S0 UD6  Ub  UOSSS.n[        U5      nXl        X l        X0l        X@l        XPl        X`l        g )N   )r"   r!    )	super__init__r   rR   r3   rS   rT   rU   r(   )	selfrR   r3   rS   rT   rU   r(   kwargs	__class__s	           r#   r[   !LayoutLMv2ImageProcessor.__init__   sO     	"6"'tc-JT""	 "  0r%   r&   data_formatr)   c                     [        U5      nSU;  d  SU;  a  [        SUR                  5        35      eUS   US   4n[        U4UUUUS.UD6$ )a  
Resize an image to `(size["height"], size["width"])`.

Args:
    image (`np.ndarray`):
        Image to resize.
    size (`dict[str, int]`):
        Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
    resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
        `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
    data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the output image. If unset, the channel dimension format of the input
        image is used. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input image. If unset, the channel dimension format is inferred
        from the input image. Can be one of:
        - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
        - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.

Returns:
    `np.ndarray`: The resized image.
r"   r!   zFThe `size` dictionary must contain the keys `height` and `width`. Got )r3   rS   r`   r)   )r   
ValueErrorkeysr
   )r\   r&   r3   rS   r`   r)   r]   output_sizes           r#   r
   LayoutLMv2ImageProcessor.resize   sy    F T"47$#6efjfofofqersttH~tG}5
#/
 
 	
r%   imagesreturn_tensorsc           
      T   Ub  UOU R                   nUb  UOU R                  n[        U5      nUb  UOU R                  nUb  UOU R                  nUb  UOU R
                  nUb  UOU R                  n[        U5      n[        U5      (       d  [        S5      e[        UUUS9  U Vs/ s H  n[        U5      PM     nnU
c  [        US   5      n
U(       aG  [        U S5        / n/ nU H1  n[        XXzS9u  pUR                  U5        UR                  U5        M3     U(       a   U Vs/ s H  nU R!                  XXJS9PM     nnU Vs/ s H  n[#        XS9PM     nnU Vs/ s H  n[%        XU
S9PM     nn['        SU0US	9nU(       a
  WUS
'   WUS'   U$ s  snf s  snf s  snf s  snf )a  
Preprocess an image or batch of images.

Args:
    images (`ImageInput`):
        Image to preprocess.
    do_resize (`bool`, *optional*, defaults to `self.do_resize`):
        Whether to resize the image.
    size (`dict[str, int]`, *optional*, defaults to `self.size`):
        Desired size of the output image after resizing.
    resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
        Resampling filter to use if resizing the image. This can be one of the enum `PIL.Image` resampling
        filter. Only has an effect if `do_resize` is set to `True`.
    apply_ocr (`bool`, *optional*, defaults to `self.apply_ocr`):
        Whether to apply the Tesseract OCR engine to get words + normalized bounding boxes.
    ocr_lang (`str`, *optional*, defaults to `self.ocr_lang`):
        The language, specified by its ISO code, to be used by the Tesseract OCR engine. By default, English is
        used.
    tesseract_config (`str`, *optional*, defaults to `self.tesseract_config`):
        Any additional custom configuration flags that are forwarded to the `config` parameter when calling
        Tesseract.
    return_tensors (`str` or `TensorType`, *optional*):
        The type of tensors to return. Can be one of:
            - Unset: Return a list of `np.ndarray`.
            - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
            - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
            - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
            - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
    data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
        The channel dimension format for the output image. Can be one of:
            - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
            - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)rR   r3   rS   r   r4   r,   )r&   r3   rS   r)   )input_channel_dimrQ   )r>   tensor_typer?   boxes)rR   r3   r   rS   rT   rU   r(   r   r   rb   r   r   r   r   rK   r9   r
   r	   r   r   )r\   rf   rR   r3   rS   rT   rU   r(   rg   r`   r)   r&   words_batchboxes_batchr?   rk   r>   s                    r#   
preprocess#LayoutLMv2ImageProcessor.preprocess   s   ^ "+!6IDNN	'tTYYT"'38!*!6IDNN	'38/?/K+QUQfQf)&1F##:  	&	
 6<<VE.'V<$ >vay IdM2KK.u@Pv""5)""5)  
  $#E %Xk#   _ee^dUZ$UP^dent
ntej'N_`nt 	 
 .&!9~V'DM'DMA =  f
s   'F(FF !F%)rT   rR   rU   rS   r3   r(   )__name__
__module____qualname____firstlineno____doc__model_input_namesr   BILINEARboolr   r-   strr   r[   npndarrayr   r   r
   r   FIRSTr   r   PILImagern   __static_attributes____classcell__)r^   s   @r#   rO   rO   f   s   . (( )-'9'B'B"&*,11 tCH~&1 %	1
 1 3-1 #3-1 
1 14 (:'B'B>BDH.
zz.
 38n.
 %	.

 eC)9$9:;.
 $E#/?*?$@A.
 
.
` %& %))-15$("&*.;?(8(>(>DHdd D>d tCH~&	d
 -.d D>d 3-d #3-d !sJ!78d &d $E#/?*?$@Ad 
d 'dr%   rO   )NN),rt   typingr   r   numpyry   image_processing_utilsr   r   r   image_transformsr	   r
   r   r   image_utilsr   r   r   r   r   r   r   r   utilsr   r   r   r   r   r   utils.import_utilsr   r|   r4   
get_loggerrp   loggerr$   rz   rx   rK   rO   __all__rY   r%   r#   <module>r      s    , "  U U e e	 	 	  +  			H	% '+@D	$#::$#
3-$# sm$#  c+;&; <=	$#N 
;E1 E  EP &
&r%   