
    h6                        S SK r S SKrS SKJr  S SKJrJr  S SKrS SK	r	S SK
Jr  S SKJr  S rS rS\	R                  4S	 jrS
 r " S S5      rSSS\	R&                  " S5      4S jrSSS\	R&                  " S5      4S jrSS\	R&                  " S5      4S jrS rS rS\\R2                  \	R                  4   4S jrS\\R2                  \	R                  4   S\\\\\4   4S jrg)    N)Thread)TupleUnion)Image)tqdmc                  >   [         R                  R                  5       (       a  [         R                  R                  S5      R                  S:  n [         R                  R                  S5      R                  S:  nU(       d  [
        R                  " S[        SS9  [        S [         R                  R                  S5      S S  5       5      nUS	:  a,  [
        R                  " S
[         R                   S3[        SS9  US	:  =(       d    U(       + nOSn SnSnXU4$ )Nr         zSFlash Attention is disabled as it requires a GPU with Ampere (8.0) CUDA capability.   category
stacklevelc              3   8   #    U  H  n[        U5      v   M     g 7fN)int).0vs     Y/home/james-whalen/.local/lib/python3.13/site-packages/torchao/_models/sam2/utils/misc.py	<genexpr>$get_sdpa_settings.<locals>.<genexpr>   s     Q0P1A0Ps   .)r   r   zYou are using PyTorch zw without Flash Attention v2 support. Consider upgrading to PyTorch 2.2+ for Flash Attention v2 (which could be faster).TF)torchcudais_availableget_device_propertiesmajorwarningswarnUserWarningtuple__version__split)old_gpuuse_flash_attnpytorch_versionmath_kernel_ons       r   get_sdpa_settingsr'      s    zz  **2215;;a?99!<BBaGMMe$  Q0A0A0G0G0LRa0PQQV#MM():):(; <e e$	 )61G5GN22    c                     SSK Jn  UR                  U R                  [        R
                  5      R                  5       5      $ )a  
Get the connected components (8-connectivity) of binary masks of shape (N, 1, H, W).

Inputs:
- mask: A binary mask tensor of shape (N, 1, H, W), where 1 is foreground and 0 is
        background.

Outputs:
- labels: A tensor of shape (N, 1, H, W) containing the connected component labels
          for foreground pixels and 0 for background pixels.
- counts: A tensor of shape (N, 1, H, W) containing the area of the connected
          components for foreground pixels and 0 for background pixels.
r   )_C)sam2r*   get_connected_componnetstor   uint8
contiguous)maskr*   s     r   get_connected_componentsr1   0   s.     &&twwu{{';'F'F'HIIr(   masksc                 T   U R                   u  pp4U R                  n[        R                  " XE[        R                  S9n[        R                  " X5[        R                  S9n[        R
                  " XgSS9u  pUS   R                  USX45      nU	S   R                  USX45      n	[        R                  " [        R                  " XU5      R                  S5      SS9u  p[        R                  " [        R                  " XS5      R                  S5      SS9u  p[        R                  " [        R                  " X	U5      R                  S5      SS9u  p[        R                  " [        R                  " X	S5      R                  S5      SS9u  p[        R                  " XX4SS9nU$ )	z
compute bounding box given an input mask

Inputs:
- masks: [B, 1, H, W] masks, dtype=torch.Tensor

Returns:
- box_coords: [B, 1, 4], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.Tensor
)devicedtypexy)indexing)NN.   dim)shaper4   r   arangeint32meshgridexpandminwhereflattenmaxstack)r2   B_hwr4   xsysgrid_xsgrid_ysmin_xsmax_xsmin_ysmax_ysbbox_coordss                  r   mask_to_boxrT   C   sB    JA!\\F	aekk	:B	aekk	:B~~bt<Go&--aA9Go&--aA9G		%++ea8@@D"MIF		%++eb9AA"E2NIF		%++ea8@@D"MIF		%++eb9AA"E2NIF++vv>BGKr(   c                    [         R                  " U 5      n[        R                  " UR	                  S5      R                  X45      5      nUR                  [        R                  :X  a  US-  nO[        SUR                   SU  35      e[        R                  " U5      R                  SSS5      nUR                  u  pVXFU4$ )NRGB     o@zUnknown image dtype: z on r   r   r8   )r   opennparrayconvertresizer5   r.   RuntimeErrorr   
from_numpypermutesize)img_path
image_sizeimg_pilimg_npimgvideo_widthvideo_heights          r   _load_img_as_tensorrh   ]   s    jj"GXXgooe,33Z4LMNF||rxx%26<<.XJOPP


6
"
*
*1a
3C 'Kk))r(   c                   *    \ rS rSrSrS rS rS rSrg)AsyncVideoFrameLoaderi   zR
A list of video frames to be load asynchronously without blocking session start.
c                 ,  ^  UT l         UT l        UT l        UT l        UT l        S /[        U5      -  T l        S T l        S T l        S T l	        UT l
        T R                  S5        U 4S jn[        USS9T l        T R                  R                  5         g )Nr   c                     >  [        [        [        TR                  5      5      SS9 H  n TR	                  U 5        M     g ! [
         a  nUTl         S nAg S nAff = f)Nframe loading (JPEG)desc)r   rangelenimages__getitem__	Exception	exception)neselfs     r   _load_frames4AsyncVideoFrameLoader.__init__.<locals>._load_frames   sP    #eC$45<RSA$$Q' T #!"#s   >A 
AAAT)targetdaemon)	img_pathsrb   offload_video_to_cpuimg_meanimg_stdrr   rs   rv   rg   rf   compute_devicert   r   threadstart)ry   r~   rb   r   r   r   r   rz   s   `       r   __init__AsyncVideoFrameLoader.__init__n   s     #$$8! fs9~- , 		# L>r(   c                    U R                   b  [        S5      U R                   eU R                  U   nUb  U$ [        U R                  U   U R
                  5      u  p#nX0l        X@l        X R                  -  nX R                  -  nU R                  (       d  UR                  U R                  SS9nX R                  U'   U$ )NzFailure in frame loading threadT)non_blocking)rv   r]   rs   rh   r~   rb   rg   rf   r   r   r   r-   r   )ry   indexre   rg   rf   s        r   rt   !AsyncVideoFrameLoader.__getitem__   s    >>%@At~~Ukk% ?J)<NN5!4??*
&; )&}}||((&&,,4&@C E
r(   c                 ,    [        U R                  5      $ r   )rr   rs   )ry   s    r   __len__AsyncVideoFrameLoader.__len__   s    4;;r(   )r   rv   rb   rs   r   r~   r   r   r   rg   rf   N)	__name__
__module____qualname____firstlineno____doc__r   rt   r   __static_attributes__ r(   r   rj   rj   i   s    $L* r(   rj   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?Fr   c           
      `   [        U [        5      n[        U [        5      nU=(       a%    [        R                  R                  U 5      S   S;   n	U(       d  U	(       a  [        U UUUUUS9$ U(       a3  [        R                  R                  U 5      (       a  [        U UUUUUUS9$ [        S5      e)z
Load the video frames from video_path. The frames are resized to image_size as in
the model and are loaded to GPU if offload_video_to_cpu=False. This is used by the demo.
r:   )z.mp4z.MP4)
video_pathrb   r   r   r   r   )r   rb   r   r   r   async_loading_framesr   z;Only MP4 video and JPEG folder are supported at this moment)

isinstancebytesstrospathsplitext!load_video_frames_from_video_fileisdir!load_video_frames_from_jpg_imagesNotImplementedError)
r   rb   r   r   r   r   r   is_bytesis_stris_mp4_paths
             r   load_video_framesr      s     *e,H
C(FQRWW--j9"=AQQK;0!!!5)
 	
 
BGGMM*--0!!!5!5)
 	
 "I
 	
r(   c                 <   [        U [        5      (       a'  [        R                  R	                  U 5      (       a  U nO[        S5      e[        R                  " U5       Vs/ s H-  n[        R                  R                  U5      S   S;   d  M+  UPM/     n	nU	R                  S S9  [        U	5      n
U
S:X  a  [        SU 35      eU	 Vs/ s H"  n[        R                  R                  X{5      PM$     nn[        R                  " U[        R                  S9S	S	2S	S	4   n[        R                  " U[        R                  S9S	S	2S	S	4   nU(       a(  [        UUUUUU5      nXR                   UR"                  4$ [        R$                  " U
S
X[        R                  S9n['        [)        USS95       H  u  nn[+        UU5      u  X'   nnM     U(       d3  UR-                  U5      nUR-                  U5      nUR-                  U5      nX-  nX-  nUWW4$ s  snf s  snf )aD  
Load the video frames from a directory of JPEG files ("<frame_index>.jpg" format).

The frames are resized to image_size x image_size and are loaded to GPU if
`offload_video_to_cpu` is `False` and to CPU if `offload_video_to_cpu` is `True`.

You can load a frame asynchronously by setting `async_loading_frames` to `True`.
ak  Only JPEG frames are supported at this moment. For video files, you may use ffmpeg (https://ffmpeg.org/) to extract frames into a folder of JPEG files, such as 
```
ffmpeg -i <your_video>.mp4 -q:v 2 -start_number 0 <output_dir>/'%05d.jpg'
```
where `-q:v` generates high-quality JPEG frames and `-start_number 0` asks ffmpeg to start the JPEG file from 00000.jpg.r:   )z.jpgz.jpegz.JPGz.JPEGc                 X    [        [        R                  R                  U 5      S   5      $ )Nr   )r   r   r   r   )ps    r   <lambda>3load_video_frames_from_jpg_images.<locals>.<lambda>   s    3rww'7'7':1'=#>r(   )keyr   zno images found in r5   N   rn   ro   )r   r   r   r   r   r   listdirr   sortrr   r]   joinr   tensorfloat32rj   rg   rf   zeros	enumerater   rh   r-   )r   rb   r   r   r   r   r   
jpg_folderr   frame_names
num_frames
frame_namer~   lazy_imagesrs   rw   ra   rg   rf   s                      r   r   r      s   " *c""rww}}Z'@'@
!<
 	
 J''A77Ar"&HH 	
'  
 >?[!JQ0=>>HST*j5IT||HEMM:1dD=IHll7%--8D$GG+ 
 44k6M6MMM[[Q
emmTF i6L!MN8/B8Z/X,	< O>*;;~.**^,
F
F<,,E Us    *HH	)Hc                    SSK n[        R                  " U[        R                  S9SS2SS4   n[        R                  " U[        R                  S9SS2SS4   nUR                  R                  S5        UR                  U 5      R                  5       R                  u  pxn	/ n
UR                  XUS9 H%  nU
R                  UR                  SSS5      5        M'     [        R                  " U
SS9R                  5       S	-  n
U(       d3  U
R                  U5      n
UR                  U5      nUR                  U5      nX-  n
X-  n
XU4$ )
z(Load the video frames from a video file.r   Nr   r   )widthheightr   r8   r;   rW   )decordr   r   r   bridge
set_bridgeVideoReadernextr=   appendr_   rF   floatr-   )r   rb   r   r   r   r   r   rg   rf   rH   rs   frames               r   r   r     s    ||HEMM:1dD=IHll7%--8D$GG
MMW%#)#5#5j#A#F#F#H#N#N LqF##J#TemmAq!,- U [[Q'--/%7F>*;;~.**^,
F
F,,r(   c                     US:  d   S5       eU n [        U S:*  5      u  p4US:  XA:*  -  n[        R                  " USU 5      n U $ ! [         a*  n[        R
                  " U S3[        SS9  Un  SnAU $ SnAff = f)zQ
A post processor to fill small holes in mask scores with area under `max_area`.
r   zmax_area must be positiveg?a*  

Skipping the post-processing step due to the error above. You can still use SAM 2 and it's OK to ignore the error above, although some post-processing functionality may be limited (which doesn't affect the results in most cases; see https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).r   r   N)r1   r   rC   ru   r   r   r   )r0   max_area
input_masklabelsareasis_holerx   s          r   fill_holes_in_mask_scoresr   9  s     a<444<J0;A:%"34{{7C. K  
c N N !	
 K
s   2A 
A9A44A9c                 ~    U c  XpCO4[         R                  " U S   U/SS9n[         R                  " U S   U/SS9nX4S.$ )zDAdd new points and labels to previous point inputs (add at the end).point_coordsr8   r;   point_labels)r   r   )r   cat)old_point_inputs
new_points
new_labelspointsr   s        r   concat_pointsr   V  sL    #,^<jIqQ,^<jIqQ";;r(   imagec                 :   [        U [        R                  5      (       a  U R                  S S $ [        U [        R
                  5      (       a  U R                  u  pnX#4$ [        U [        5      (       a  U R                  u  p2X#4$ [        S[        U 5       35      e)Nr   z;Only support np.ndarray, torch.Tensoror PIL Image, but got )
r   rY   ndarrayr=   r   Tensorr   r`   r   type)r   rH   rI   rJ   s       r   get_image_sizer   a  s    %$${{2A	E5<<	(	(++av	E5	!	!zzv!I$u+W
 	
r(   crop_boxc                     Uu  p#pE[        U [        R                  5      (       a  XU2X$2S S 24   $ [        U [        R                  5      (       a  U S S 2X52X$24   $ [        S[        U 5       35      e)NzAExpected image to be of type np.ndarray or torch.Tensor, but got )r   rY   r   r   r   
ValueErrorr   )r   r   x0y0x1y1s         r   
crop_imager   p  sy     NBB%$$UBE1_%%	E5<<	(	(Qru_%%%%)%[M3
 	
r(   )r   r   	threadingr   typingr   r   numpyrY   r   PILr   r   r'   r1   r   rT   rh   rj   r4   r   r   r   r   r   r   r   r   r   r   r(   r   <module>r      s    
       3<J&u|| 4	*A  A P #!<<'&
Z #!<<'@-N #!<<'-@:<
%

ELL 89 

U\\)*
6;Cc3<N6O
r(   