
    h~#                     z    S SK r S SKJr  S SKJr  S SKJrJr  S SKJ	r	  S SK
Jr  S SKJr  SS	KJr   " S
 S\5      rg)    N)Image)SegmentationPredictor)DEFAULT_CFGchecks)box_iou)scale_masks)
TORCH_1_10   )adjust_bboxes_to_image_borderc                   X   ^  \ rS rSrSr\SS4U 4S jjrU 4S jrS
S jrS r	S r
S	rU =r$ )FastSAMPredictor   a  
FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks.

This class extends the SegmentationPredictor, customizing the prediction pipeline specifically for fast SAM. It
adjusts post-processing steps to incorporate mask prediction and non-maximum suppression while optimizing for
single-class segmentation.

Attributes:
    prompts (dict): Dictionary containing prompt information for segmentation (bboxes, points, labels, texts).
    device (torch.device): Device on which model and tensors are processed.
    clip_model (Any, optional): CLIP model for text-based prompting, loaded on demand.
    clip_preprocess (Any, optional): CLIP preprocessing function for images, loaded on demand.

Methods:
    postprocess: Apply postprocessing to FastSAM predictions and handle prompts.
    prompt: Perform image segmentation inference based on various prompt types.
    set_prompts: Set prompts to be used during inference.
Nc                 4   > [         TU ]  XU5        0 U l        g)a  
Initialize the FastSAMPredictor with configuration and callbacks.

This initializes a predictor specialized for Fast SAM (Segment Anything Model) segmentation tasks. The predictor
extends SegmentationPredictor with custom post-processing for mask prediction and non-maximum suppression
optimized for single-class segmentation.

Args:
    cfg (dict): Configuration for the predictor.
    overrides (dict, optional): Configuration overrides.
    _callbacks (list, optional): List of callback functions.
N)super__init__prompts)selfcfg	overrides
_callbacks	__class__s       \/home/james-whalen/.local/lib/python3.13/site-packages/ultralytics/models/fastsam/predict.pyr   FastSAMPredictor.__init__#   s     	4    c                   > U R                   R                  SS5      nU R                   R                  SS5      nU R                   R                  SS5      nU R                   R                  SS5      n[        TU ]  XU5      nU H  n	[        R
                  " SSU	R                  S   U	R                  S   /US   R                  [        R                  S9n
[        U	R                  R                  U	R                  5      n[        R                  " [        U
S   U5      S	:  5      R                  5       nUR                  5       S:w  d  M  XR                  R                  U'   M     U R!                  XXVUS
9$ )a]  
Apply postprocessing to FastSAM predictions and handle prompts.

Args:
    preds (list[torch.Tensor]): Raw predictions from the model.
    img (torch.Tensor): Input image tensor that was fed to the model.
    orig_imgs (list[np.ndarray]): Original images before preprocessing.

Returns:
    (list[Results]): Processed results with prompts applied.
bboxesNpointslabelstextsr   r
   )devicedtypeg?)r   r   r   r   )r   popr   postprocesstorchtensor
orig_shaper    float32r   boxesxyxynonzeror   flattennumelprompt)r   predsimg	orig_imgsr   r   r   r   resultsresultfull_boxr(   idxr   s                r   r#   FastSAMPredictor.postprocess3   s9    !!(D1!!(D1!!(D1  $/'%e)<F||Av((+V->->q-AB5QR8??bgboboH 2&,,2C2CVEVEVWE-- > DEMMOCyy{a)1!!#&  {{7&W\{]]r   c                  
   Uc  Uc  Uc  U$ / n[        U[        5      (       d  U/nU GH  n[        U5      S:X  a  UR                  U5        M&  UR                  R
                  nUR                  SS UR                  :w  a  [        US   UR                  5      S   n[        R                  " [        U5      [        R                  U R                  S9n	Ub  [        R                  " U[        R                  U R                  S9nUR                  S:X  a  US   OUnUSS2S4   USS2S4   -
  USS2S4   USS2S4   -
  -  n
[        R                   " U Vs/ s H)  oSS2US   US   2US   US   24   R#                  SS9PM+     sn5      n[        R"                  " USS9nU
SS2S4   U-   U-
  nS	U	[        R$                  " X-  SS9'   UGb  [        R                  " U[        R                  U R                  S9nUR                  S:X  a  US   OUnUc#  [        R&                  " UR                  S   5      n[        R                  " U[        R                  U R                  S9n[        U5      [        U5      :X  d   S
[        U5       S[        U5       35       eUR#                  5       S:X  a7  [        R&                  " [        U5      [        R                  U R                  S9O6[        R                  " [        U5      [        R                  U R                  S9n[)        X45       H8  u  nn[        U5      U[        R*                  " USS2US   US   4   S	S9S   '   M:     X-  n	UGbp  [        U[,        5      (       a  U/n/ / nn[/        UR0                  R2                  R5                  5       5       H  u  nnS U 5       u  nnnn[6        (       a  UU   R#                  5       O!UU   R#                  S5      R#                  5       S::  a  UR                  U5        Mk  UR                  [8        R:                  " UR<                  UU2UU2SSS24   5      5        M     U R?                  UU5      n[        R$                  " USS9n[        U5      (       a@  U[        R@                  " UU R                  S9S   [C        U5      :*  R#                  S5      -  nS	U	U'   UR                  Xy   5        GM     U$ s  snf )a  
Perform image segmentation inference based on cues like bounding boxes, points, and text prompts.

Args:
    results (Results | list[Results]): Original inference results from FastSAM models without any prompts.
    bboxes (np.ndarray | list, optional): Bounding boxes with shape (N, 4), in XYXY format.
    points (np.ndarray | list, optional): Points indicating object locations with shape (N, 2), in pixels.
    labels (np.ndarray | list, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
    texts (str | list[str], optional): Textual prompts, a list containing string objects.

Returns:
    (list[Results]): Output results filtered and determined by the provided prompts.
Nr   r
   )r!   r          )r
   r8   )dimTz5Expected `labels` with same size as `point`, but got z and )as_tuplec              3   8   #    U  H  n[        U5      v   M     g 7f)N)int).0xs     r   	<genexpr>*FastSAMPredictor.prompt.<locals>.<genexpr>   s     %8ac!ffas   d   r    )"
isinstancelistlenappendmasksdatashaper&   r   r$   zerosboolr    	as_tensorint32ndimstacksumargmaxoneszipr*   str	enumerater(   r)   tolistr	   r   	fromarrayorig_img_clip_inferencer%   r<   )r   r1   r   r   r   r   prompt_resultsr2   rH   r4   
bbox_areasb
mask_areasfull_mask_areasunion	point_idxpointlabelcrop_ims
filter_idxix1y1x2y2
similaritytext_idxs                              r   r-   FastSAMPredictor.promptO   sY    >fnN'4((iGF6{a%%f-LL%%E{{12&"3"33#E$K1B1BCAF++c&kDKKPC!u{{4;;W)/)9v$QTlVAqD\9fQTlVTUWXTX\>YZ
"[[fl)mflab1Q4!A$;!qt0K*L*P*PU[*P*\fl)mn
"'))Ev">"1d7+o=
J?CELL!3;<!u{{4;;W)/)9v>"ZZQ8Fu{{4;;W6{c&k1 KCPVK=X]^abh^i]jk1
 zz|q( JJs6{%**T[[QS[

4;;W 
 %($7LE5_cdi_jIemmE!U1XuQx2G,HSWXYZ[\ %8  eS))"GE')2*%fll&7&7&>&>&@ADAq%8a%8NBB*4*a%(,,q/:M:M:OTWW"))!, OOEOOFOOBrE2b5RVTVRVDV4W$XY B "11(EB
 <<
;z??j!Md!SWZ[cWd!d i ijk llH $H!!&+.g j Q *ns   0S;
c                     SSK n[	        U S5      (       a  [	        U S5      (       d'  UR                  SU R                  S9u  U l        U l        [        R                  " U Vs/ s H,  o@R                  U5      R                  U R                  5      PM.     sn5      nUR                  U5      R                  U R                  5      nU R                  R                  U5      nU R                  R                  U5      nXfR                  SS	S
9-  nXwR                  SS	S
9-  nXgSS2S4   -  R!                  S5      $ ! [         a    [        R                  " S5        SSK n GN\f = fs  snf )am  
Perform CLIP inference to calculate similarity between images and text prompts.

Args:
    images (list[PIL.Image]): List of source images, each should be PIL.Image with RGB channel order.
    texts (list[str]): List of prompt texts, each should be a string object.

Returns:
    (torch.Tensor): Similarity matrix between given images and texts with shape (M, N).
r   Nz+git+https://github.com/ultralytics/CLIP.git
clip_modelclip_preprocesszViT-B/32rC   rB   T)r9   keepdim)clipImportErrorr   check_requirementshasattrloadr    ro   rp   r$   rP   totokenizeencode_imageencode_textnormrQ   )r   imagesr   rr   imagetokenized_textimage_featurestext_featuress           r   rZ    FastSAMPredictor._clip_inference   s;   	 l++WTCT5U5U48IIjQUQ\Q\I4]1DOT1W]^W]e2259<<T[[IW]^_u-00=55f=33NC--"d-CC++D+AAq$w!77<<R@@  	%%&ST	
 _s   D; #3E&;$E#"E#c                     Xl         g)z(Set prompts to be used during inference.N)r   )r   r   s     r   set_promptsFastSAMPredictor.set_prompts   s    r   )ro   rp   r   )NNNN)__name__
__module____qualname____firstlineno____doc__r   r   r#   r-   rZ   r   __static_attributes____classcell__)r   s   @r   r   r      s3    & '$4  ^8HTA4 r   r   )r$   PILr   ultralytics.models.yolo.segmentr   ultralytics.utilsr   r   ultralytics.utils.metricsr   ultralytics.utils.opsr   ultralytics.utils.torch_utilsr	   utilsr   r    r   r   <module>r      s.      A 1 - - 4 0f, fr   