
    cCiY                         S r SSKJr  SSKJrJr  SSKrSSKJ	r	  SSK
Jr  SSKJr  SS	KJrJrJr  SS
KJr  \R(                  " \5      r\" 5       (       a  SSKr\" SS9 " S S\5      5       rS/rg)z
Processor class for SAM2.
    )deepcopy)OptionalUnionN   )
ImageInput)ProcessorMixin)BatchEncoding)
TensorTypeis_torch_availablelogging)requires)torch)backendsc                   B  ^  \ rS rSrSrS/rSrS"S\\   S\4U 4S jjjr	       S#S\\
   S	\\
   S
\\\\\\\            \R                  4      S\\\\\\         \R                  4      S\\\\\\         \R                  4      S\\\\\      \R                  4      S\\\\4      S\4S jjr S$S\SSSS4S jjrS%S jrS&S jrS'S jrS rS r S&S\\R                  \R6                  \4   S\S\S\S\\   S\4S jjrS(S jr     S)S  jrS!rU =r $ )*Sam2Processor%   a  
Constructs a SAM2 processor which wraps a SAM2 image processor and an 2D points & Bounding boxes processor into a
single processor.

[`Sam2Processor`] offers all the functionalities of [`Sam2ImageProcessorFast`] and [`Sam2VideoProcessor`]. See the docstring of
[`~Sam2ImageProcessorFast.__call__`] and [`~Sam2VideoProcessor.__call__`] for more information.

Args:
    image_processor (`Sam2ImageProcessorFast`):
        An instance of [`Sam2ImageProcessorFast`].
    target_size (`int`, *optional*):
        The target size (target_size, target_size) to which the image will be resized.
    point_pad_value (`int`, *optional*, defaults to -10):
        The value used for padding input points.
image_processorSam2ImageProcessorFasttarget_sizepoint_pad_valuec                    > [         TU ]  " U40 UD6  X0l        Ub  X l        g U R                  R                  S   U l        g )Nheight)super__init__r   r   sizer   )selfr   r   r   kwargs	__class__s        b/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/sam2/processing_sam2.pyr   Sam2Processor.__init__:   s?    3F3.*5*A;tG[G[G`G`aiGj    imagessegmentation_mapsinput_pointsinput_labelsinput_boxesoriginal_sizesreturn_tensorsreturnc                   ^ Ub  U R                   " U4UUS.UD6n	OXUbJ  [        U[        R                  5      (       a  UR	                  5       R                  5       n[        SU0US9n	O[        S5      eU	S   nUb2  [        U5      S:w  a#  [        U5      [        U5      :w  a  [        S5      eUc  Uc  UGb  U R                  USS	S
SS9n
U R                  USSSS9nU R                  USSSSS9nU
b  U R                  U
5      SS nUb  U R                  U5      SS nUb  U R                  U5      SS mU
b  Ub  WW:w  a  [        S5      eUb4  [        U5      S:  a%  [        U4S jU 5       5      (       a  [        S5      eU
b]  U R                  U
WS/-   5      n[        R                  " U[        R                  S9nU R                  UUSS9  U	R!                  SU05        UbH  U R                  UW5      n[        R                  " U[        R"                  S9nU	R!                  SU05        UbG  [        R                  " U[        R                  S9nU R                  UUSS9  U	R!                  SU05        U	$ )a  
This method uses [`Sam2ImageProcessorFast.__call__`] method to prepare image(s) for the model. It also prepares 2D
points and bounding boxes for the model if they are provided.

Args:
    images (`ImageInput`, *optional*):
        The image(s) to process.
    segmentation_maps (`ImageInput`, *optional*):
        The segmentation maps to process.
    input_points (`list[list[list[list[float]]]]`, `torch.Tensor`, *optional*):
        The points to add to the frame.
    input_labels (`list[list[list[int]]]`, `torch.Tensor`, *optional*):
        The labels for the points.
    input_boxes (`list[list[list[float]]]`, `torch.Tensor`, *optional*):
        The bounding boxes to add to the frame.
    original_sizes (`list[list[float]]`, `torch.Tensor`, *optional*):
        The original sizes of the images.
    return_tensors (`str` or `TensorType`, *optional*):
        The type of tensors to return.
    **kwargs:
        Additional keyword arguments to pass to the image processor.

Returns:
    A [`BatchEncoding`] with the following fields:
    - `pixel_values` (`torch.Tensor`): The processed image(s).
    - `original_sizes` (`list[list[float]]`): The original sizes of the images.
    - `reshaped_input_sizes` (`torch.Tensor`): The reshaped input sizes of the images.
    - `labels` (`torch.Tensor`): The processed segmentation maps (if provided).
    - `input_points` (`torch.Tensor`): The processed points.
    - `input_labels` (`torch.Tensor`): The processed labels.
    - `input_boxes` (`torch.Tensor`): The processed bounding boxes.
N)r#   r(   r'   )tensor_typez0Either images or original_sizes must be provided   z{original_sizes must be of length 1 or len(images). If you are passing a single image, you must pass a single original_size.   pointsz;[image level, object level, point level, point coordinates]   )expected_depth
input_nameexpected_formatexpected_coord_sizer   labelsz([image level, object level, point level])r0   r1   r2   boxesz)[image level, box level, box coordinates]zbInput points and labels have inconsistent dimensions. Please ensure they have the same dimensions.c              3   F   >#    U  H  n[        U5      TS    :  v   M     g7f)r,   N)len).0	img_boxesboxes_max_dimss     r   	<genexpr>)Sam2Processor.__call__.<locals>.<genexpr>   s     [?is9~q(99?s   !zInput boxes have inconsistent dimensions that would require padding, but boxes cannot be padded due to model limitations. Please ensure all images have the same number of boxes.)dtypeT)preserve_paddingr$   r%   is_bounding_boxr&   )r   
isinstancer   Tensorcputolistr	   
ValueErrorr7   _validate_single_input_get_nested_dimensionsany_pad_nested_listtensorfloat32_normalize_tensor_coordinatesupdateint64)r   r"   r#   r$   r%   r&   r'   r(   r   encoding_image_processorprocessed_pointsprocessed_labelsprocessed_boxespoints_max_dimslabels_max_dimspadded_pointsfinal_pointspadded_labelsfinal_labelsfinal_boxesr:   s                       @r   __call__Sam2Processor.__call__?   s   V '+';';("3-( 	($ '.%,,77!/!3!3!5!<!<!>'46F5Wes't$OPP 22BC#n"5":s>?RVYZ`Va?a N 
 #|'?;CZ#:: # ]$%  ;    $:: # J	  ;   #99 " K$% : O  +"&"="=>N"OPRQR"S+"&"="=>N"OPRQR"S*!%!<!<_!Mbq!Q  +0@0L"o5$| 
 *s?/Cq/H[?[[[$R   + $ 5 56F[\Z]H] ^$||MO22<bf2g(//0NO+ $ 5 56F X$||MM(//0NO*#ll?%--P22;`d2e(//0LM''r!   coordsztorch.Tensorc                     Uu  pVXp[        U5      R                  5       nU(       a  UR                  SSS5      nUS   X-  -  US'   US   Xu-  -  US'   U(       a  UR                  SS5      nU$ )a  
Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H, W) format.

Args:
    target_size (`int`):
        The target size of the image.
    coords (`torch.Tensor`):
        The coordinates to be normalized.
    original_size (`tuple`):
        The original size of the image.
    is_bounding_box (`bool`, *optional*, defaults to `False`):
        Whether the coordinates are bounding boxes.
r/   ).r   ).r,   r-   )r   floatreshape)	r   r   r\   original_sizer@   old_hold_wnew_hnew_ws	            r   _normalize_coordinates$Sam2Processor._normalize_coordinates   sx      %"u&!'')^^B1-F5=9v5=9v^^B*Fr!   c           	         Uc  g[        U[        R                  5      (       ad  X2S-
  :X  d  [        UR                  5      S::  a  UR                  5       R                  5       $ U Vs/ s H  o@R                  XBUS-   5      PM     sn$ [        U[        R                  5      (       aV  X2S-
  :X  d  [        UR                  5      S::  a  UR                  5       $ U Vs/ s H  o@R                  XBUS-   5      PM     sn$ [        U[        5      (       a,  X2:X  a  U$ U Vs/ s H  o@R                  XBUS-   5      PM     sn$ [        U[        [        45      (       a  U$ [        S[        U5       35      es  snf s  snf s  snf )a  
Recursively convert various input formats (tensors, numpy arrays, lists) to nested lists.

Args:
    data: Input data in any format
    expected_depth: Expected nesting depth
    current_depth: Current depth in recursion

Returns:
    Nested list representation of the data
Nr/   r,   zUnsupported data type: )rA   r   rB   r7   shapenumpyrD   _convert_to_nested_listnpndarraylistintr_   rE   type)r   datar0   current_depthitems        r   rk   %Sam2Processor._convert_to_nested_list   s[    < dELL)) 22c$**o6Jzz|**,,jnojnbf44T=[\K\]jnoobjj)) 22c$**o6J{{}$jnojnbf44T=[\K\]jnood##. koojnbf44T=[\K\]jnoosEl++K6tDzlCDD! p
 p ps   'E2E7E<c                    Uc  / n[        U[        5      (       d  U$ [        U5      S:X  a  UR                  [        U5      5        O[	        US   [        U5      5      US'   [        U5      S:  a  U Hz  n[        U[        5      (       d  M  U R                  U5      n[        U5       H@  u  pVUS-   [        U5      :  a  UR                  U5        M*  [	        X%S-      U5      X%S-   '   MB     M|     U$ )a  
Get the maximum dimensions at each level of nesting.

Args:
    nested_list (`list`):
        Nested list structure.
    max_dims (`list`, *optional*):
        Current maximum dimensions (for recursion).

Returns:
    `list`: A list of maximum dimensions for each nesting level.
r   r,   )rA   rn   r7   appendmaxrG   	enumerate)r   nested_listmax_dimsrs   sub_dimsidims          r   rG   $Sam2Processor._get_nested_dimensions  s     H+t,,Ox=AOOC,-hqk3{+;<HQK{a#dD))#::4@H"+H"5q5CM1$OOC0.1(q5/3.GHUO	 #6	 $ r!   c                 :   Uc  U R                   nU[        U5      :  a  U$ [        U[        5      (       d  U/n[        U5      nX#   nU[        U5      S-
  :X  a  UR	                  U/Xe-
  -  5        OUS:  am  U[        U5      S-
  :  a  X#S-   S nU R                  Xt5      nOU/X#S-      -  nUR	                  [        Xe-
  5       V	s/ s H  n	[        U5      PM     sn	5        OLX#S-   S nU R                  Xt5      nUR	                  [        U5       V	s/ s H  n	[        U5      PM     sn	5        U[        U5      S-
  :  aN  [        [        U5      5       H6  n
[        X   [        5      (       d  M  U R                  X   X#S-   U5      X'   M8     U$ s  sn	f s  sn	f )a  
Recursively pad a nested list to match target dimensions.

Args:
    nested_list (`list`):
        Nested list to pad.
    target_dims (`list`):
        Target dimensions for each level.
    current_level (`int`, *optional*, defaults to 0):
        Current nesting level.
    pad_value (`int`, *optional*):
        Value to use for padding.

Returns:
    `list`: The padded nested list.
Nr,   r   r/   )	r   r7   rA   rn   extend_create_empty_nested_structureranger   rI   )r   ry   target_dimscurrent_level	pad_valuecurrent_sizer   template_dimstemplate_r|   s              r   rI   Sam2Processor._pad_nested_list,  s   " ,,IC,, +t,,&-K ;'!0 C,q00	{k.HIJ a 3{#3a#77$/0A0C$DM#BB=\H !*{[9J-KKH""kF`@a#b@a1HX$6@a#bc !,A,=,? @>>}X""k@R#S@R1HX$6@R#ST 3{+a//3{+,knd33%)%:%:;>;hiXikt%uKN -  $c
 $Ts   FFc                     [        U5      S:X  a	  U/US   -  $ [        US   5       Vs/ s H  o0R                  USS U5      PM     sn$ s  snf )z
Create an empty nested structure with given dimensions filled with pad_value.

Args:
    dims (`list`):
        The dimensions of the nested structure.
    pad_value (`int`):
        The value to fill the structure with.
r,   r   N)r7   r   r   )r   dimsr   r   s       r   r   ,Sam2Processor._create_empty_nested_structurej  sX     t9>;a((V[\`ab\cVdeVdQR77QR)LVdeees   A
c                    [        U[        5      (       a'  [        U5      S:X  a  gSU R                  US   5      -   $ [        U[        R
                  [        R                  45      (       a  [        UR                  5      $ g)zz
Get the nesting level of a list structure.

Args:
    input_list (`list`):
        The list to get the nesting level of.
r   r,   )	rA   rn   r7   _get_nesting_levelrl   rm   r   rB   ri   )r   
input_lists     r   r    Sam2Processor._get_nesting_levely  sh     j$'':!#t..z!}===
RZZ$>??z''((r!   rq   r0   r1   r2   r3   c                    Uc  g[        U[        R                  [        R                  45      (       a{  UR
                  U:w  a"  [        SU SU SU SUR
                   S3	5      eUb5  UR                  S   U:w  a"  [        SU SU SUR                  S    S	35      eU R                  X5      $ [        U[        5      (       a?  U R                  U5      nXb:w  a  [        SU S
U SU SU S3	5      eU R                  X5      $ g)ae  
        Validate a single input by ensuring proper nesting and raising an error if the input is not valid.

        Args:
            data (`torch.Tensor`, `np.ndarray`, or `list`):
                Input data to process.
            expected_depth (`int`):
                Expected nesting depth.
            input_name (`str`):
                Name of the input for error messages.
            expected_format (`str`):
                The expected format of the input.
            expected_coord_size (`int`, *optional*):
                Expected coordinate size (2 for points, 4 for boxes, None for labels).
.
NzInput z must be a tensor/array with z, dimensions. The expected nesting format is z. Got z dimensions.r^   z as the last dimension, got .z must be a nested list with z( levels. The expected nesting format is z levels.)rA   r   rB   rl   rm   ndimrE   ri   rk   rn   r   )r   rq   r0   r1   r2   r3   rr   s          r   rF   $Sam2Processor._validate_single_input  s   0 < dU\\2::677yyN* ZL(EnEU  VB  CR  BS  SY  Z^  Zc  Zc  Yd  dp  q  %0::b>%88$ ,IJ]I^^z{  |F  |F  GI  |J  {K  KL  M  //EE dD!! 33D9M. ZL(D^DTT|  ~M  }N  NT  Ub  Tc  ck  l  //EE "r!   c                    U(       a  XR                   :g  nUR                  SSS9n[        [        U5      5       H  nXqR                  S   :  d  M  U[        U5      :  a  X'   OUS   nU R                  U R                  X   XS9n	U(       a4  WU   n
[        R                  " U
R                  X   5      XU   5      X'   M  XU'   M     g)a  
Helper method to normalize coordinates in a tensor across multiple images.

Args:
    tensor (`torch.Tensor`):
        Input tensor with coordinates.
    original_sizes (`list`):
        Original image sizes.
    is_bounding_box (`bool`, *optional*, defaults to `False`):
        Whether coordinates are bounding boxes.
    preserve_padding (`bool`, *optional*, defaults to `False`):
        Whether to preserve padding values (for points).
r^   T)r}   keepdimr   r?   N)
r   allr   r7   ri   rf   r   r   where	expand_as)r   rJ   r'   r@   r>   mask
coord_maskimg_idxra   normalized_coordsimg_masks              r   rL   +Sam2Processor._normalize_tensor_coordinates  s     111Db$7JS01Ga(;BSEX;X 7^lmn^o$($?$?$$fo} %@ %! $)'2H&+kk **6?;=NW^P_'FO '87O 2r!   c           	      H    U R                   R                  " UUUUUUU40 UD6$ )a  
Remove padding and upscale masks to the original image size.

Args:
    masks (`Union[List[torch.Tensor], List[np.ndarray]]`):
        Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
    original_sizes (`Union[torch.Tensor, List[Tuple[int,int]]]`):
        The original sizes of each image before it was resized to the model's expected input shape, in (height,
        width) format.
    mask_threshold (`float`, *optional*, defaults to 0.0):
        Threshold for binarization and post-processing operations.
    binarize (`bool`, *optional*, defaults to `True`):
        Whether to binarize the masks.
    max_hole_area (`float`, *optional*, defaults to 0.0):
        The maximum area of a hole to fill.
    max_sprinkle_area (`float`, *optional*, defaults to 0.0):
        The maximum area of a sprinkle to fill.
    apply_non_overlapping_constraints (`bool`, *optional*, defaults to `False`):
        Whether to apply non-overlapping constraints to the masks.

Returns:
    (`torch.Tensor`): Batched masks in batch_size, num_channels, height, width) format, where (height, width)
    is given by original_size.
)r   post_process_masks)	r   masksr'   mask_thresholdbinarizemax_hole_areamax_sprinkle_area!apply_non_overlapping_constraintsr   s	            r   r    Sam2Processor.post_process_masks  s=    F ##66-	
 	
 		
r!   )r   r   )Ni)NNNNNNN)F)r   )N)r   N)FF)        Tr   r   F)!__name__
__module____qualname____firstlineno____doc__
attributesimage_processor_classr   ro   r   r   r   rn   r_   r   rB   strr
   r	   rZ   rf   rk   rG   rI   r   r   rl   rm   rF   rL   r   __static_attributes____classcell__)r   s   @r   r   r   %   s     $$J4kXc] k\_ k k (,26UYMQNRKO;?B($B( $J/B( uT$tDK/@*A%BELL%PQR	B(
 uT$tCy/%:ELL%HIJB( eDd5k):$;U\\$IJKB( !tDK'8%,,'F!GHB( !sJ!78B( 
B(J X](6	<$EL#J<|f. .20FELL"**d230F 0F 	0F
 0F &c]0F 
0Fd!8N */,
 ,
r!   r   )r   copyr   typingr   r   rj   rl   image_utilsr   processing_utilsr   tokenization_utils_baser	   utilsr
   r   r   utils.import_utilsr   
get_loggerr   loggerr   r   __all__ r!   r   <module>r      ss     "  % . 4 < < * 
		H	% 
:e
N e
 e
P 
r!   