
    h                     f    S SK rS SKrS SKJr  S SKJr  S SKJr   " S S\5      r	 " S S\	\5      r
g)	    N)LoadVisualPrompt)DetectionPredictor)SegmentationPredictorc                   l   ^  \ rS rSrSrSS\4U 4S jjjrS rU 4S jrSU 4S jjr	U 4S jr
S	 rS
rU =r$ )YOLOEVPDetectPredictor   a  
A mixin class for YOLO-EVP (Enhanced Visual Prompting) predictors.

This mixin provides common functionality for YOLO models that use visual prompting, including
model setup, prompt handling, and preprocessing transformations.

Attributes:
    model (torch.nn.Module): The YOLO model for inference.
    device (torch.device): Device to run the model on (CPU or CUDA).
    prompts (dict | torch.Tensor): Visual prompts containing class indices and bounding boxes or masks.

Methods:
    setup_model: Initialize the YOLO model and set it to evaluation mode.
    set_prompts: Set the visual prompts for the model.
    pre_transform: Preprocess images and prompts before inference.
    inference: Run inference with visual prompts.
    get_vpe: Process source to get visual prompt embeddings.
verbosec                 .   > [         TU ]  XS9  SU l        g)z
Set up the model for prediction.

Args:
    model (torch.nn.Module): Model to load or use.
    verbose (bool, optional): If True, provides detailed logging.
)r	   TN)supersetup_modeldone_warmup)selfmodelr	   	__class__s      _/home/james-whalen/.local/lib/python3.13/site-packages/ultralytics/models/yolo/yoloe/predict.pyr   "YOLOEVPDetectPredictor.setup_model   s     	E3    c                     Xl         g)z
Set the visual prompts for the model.

Args:
    prompts (dict): Dictionary containing class indices and bounding boxes or masks.
        Must include a 'cls' key with class indices.
N)prompts)r   r   s     r   set_prompts"YOLOEVPDetectPredictor.set_prompts*   s	     r   c           
        > [         T	U ]  U5      nU R                  R                  SS5      nU R                  R                  SS5      nU R                  S   n[	        U5      S:X  a`  U R                  US   R                  SS US   R                  SS XSU5      nUR                  S5      R                  U R                  5      nGOuUc   SU S	35       e[        U[        5      (       a  [        S
 U 5       5      (       d   SU S	35       e[        U[        5      (       a  [        S U 5       5      (       d   SU S	35       e[	        U5      [	        U5      s=:X  a  [	        U5      :X  d.  O   S[	        U5       S[	        U5       S[	        U5       S	35       e[        [	        U5      5       Vs/ s H9  nU R                  X(   R                  SS X   R                  SS XX   X8   5      PM;     nn[        R                  R                   R"                  R%                  USS9R                  U R                  5      nU R&                  R(                  (       a  UR+                  5       U l        U$ UR-                  5       U l        U$ s  snf )a  
Preprocess images and prompts before inference.

This method applies letterboxing to the input image and transforms the visual prompts
(bounding boxes or masks) accordingly.

Args:
    im (list): List containing a single input image.

Returns:
    (list): Preprocessed image ready for model inference.

Raises:
    ValueError: If neither valid bounding boxes nor masks are provided in the prompts.
bboxesNmaskscls   r      zExpected bboxes, but got !c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7fN
isinstancenpndarray.0bs     r   	<genexpr>7YOLOEVPDetectPredictor.pre_transform.<locals>.<genexpr>O   s!     3^W]RSJq"**4M4MW]   ')z#Expected list[np.ndarray], but got c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7fr    r!   r%   s     r   r(   r)   R   s!     5bYaTUjBJJ6O6OYar*   z-Expected same length for all inputs, but got vsT)batch_first)r   pre_transformr   poplen_process_single_imageshape	unsqueezetodevicer"   listallrangetorchnnutilsrnnpad_sequencer   fp16halffloat)
r   imimgr   r   categoryvisualsr   ir   s
            r   r.   $YOLOEVPDetectPredictor.pre_transform4   sT     g#B'!!(D1  $/<<&s8q=00Qbq1A2a5;;rPQ?T\fklG''*--dkk:G %L)B6(!'LL%fd++3^W]3^0^0^ 5fXQ?^ h--#5bYa5b2b2b 5hZqAb r7c(m:s6{: ?By3x=/Y[\_`f\g[hhij:
 s3x(A **36<<+;RU[[!_hk[a[de(   hhnn((55g45PSSTXT_T_`G)-w||~
 ?Fmmo
s   /A Jc                 F  > Ub  [        U5      (       a  [        R                  " U[        R                  S9nUR                  S:X  a	  USSS24   n[        US   US   -  US   US   -  5      nXF-  nUSSSS24==   [        US   US   U-  -
  S-  S-
  5      -  ss'   USSSS24==   [        US   US   U-  -
  S-  S-
  5      -  ss'   O;Ub-  [        TU ]!  U5      n[        R                  " U5      nSXUS:H  '   O[        S	5      e[        5       R                  X1XE5      $ )
af  
Process a single image by resizing bounding boxes or masks and generating visuals.

Args:
    dst_shape (tuple): The target shape (height, width) of the image.
    src_shape (tuple): The original shape (height, width) of the image.
    category (str): The category of the image for visual prompts.
    bboxes (list | np.ndarray, optional): A list of bounding boxes in the format [x1, y1, x2, y2].
    masks (np.ndarray, optional): A list of masks corresponding to the image.

Returns:
    (torch.Tensor): The processed visuals for the image.

Raises:
    ValueError: If neither `bboxes` nor `masks` are provided.
N)dtyper   r   .r   g?r   z$Please provide valid bboxes or masks)r0   r#   arrayfloat32ndimminroundr   r.   stack
ValueErrorr   get_visuals)	r   	dst_shape	src_shaperC   r   r   gainresized_masksr   s	           r   r1   ,YOLOEVPDetectPredictor._process_single_image`   s8   " #f++XXfBJJ7F{{aay|il2IaL9Q<4OPDNF319	!y|d7J(Ja'ORU'U!VV319	!y|d7J(Ja'ORU'U!VV!G1%8MHH]+E"#E3,CDD  !--h6QQr   c                 D   > [         TU ]  " U/UQ7SU R                  0UD6$ )z
Run inference with visual prompts.

Args:
    im (torch.Tensor): Input image tensor.
    *args (Any): Variable length argument list.
    **kwargs (Any): Arbitrary keyword arguments.

Returns:
    (torch.Tensor): Model prediction results.
vpe)r   	inferencer   )r   rA   argskwargsr   s       r   rY    YOLOEVPDetectPredictor.inference   s(     w GGGGGr   c                     U R                  U5        [        U R                  5      S:X  d   S5       eU R                   H1  u  p#nU R                  U5      nU R	                  X@R
                  SS9s  $    g)a  
Process the source to get the visual prompt embeddings (VPE).

Args:
    source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source
        of the image to make predictions on. Accepts various types including file paths, URLs, PIL
        images, numpy arrays, and torch tensors.

Returns:
    (torch.Tensor): The visual prompt embeddings (VPE) from the model.
r   z get_vpe only supports one image!T)rX   
return_vpeN)setup_sourcer0   dataset
preprocessr   r   )r   source_im0srA   s        r   get_vpeYOLOEVPDetectPredictor.get_vpe   sf     	&!4<< A%I'II%,,JAQ&B::bllt:DD 'r   )r   r   )T)NN)__name__
__module____qualname____firstlineno____doc__boolr   r   r.   r1   rY   re   __static_attributes____classcell__)r   s   @r   r   r      s=    &	 $ 	  	 *X#RJHE Er   r   c                       \ rS rSrSrSrg)YOLOEVPSegPredictor   z\Predictor for YOLO-EVP segmentation tasks combining detection and segmentation capabilities. N)rg   rh   ri   rj   rk   rm   rr   r   r   rp   rp      s    fr   rp   )numpyr#   r9   ultralytics.data.augmentr   ultralytics.models.yolo.detectr   ultralytics.models.yolo.segmentr   r   rp   rr   r   r   <module>rw      s8      5 = AXE/ XEv	02G 	r   