
    cCi)                        S SK JrJrJrJr  SSKJrJrJrJ	r	J
r
  SSKJrJr  \" 5       (       a  S SKJr  SSKJrJr  \" 5       (       a  S SKrS S	KJr  SS
KJr  \	R0                  " \5      r\" \" SS95       " S S\5      5       rg)    )AnyOptionalUnionoverload   )add_end_docstringsis_torch_availableis_vision_availableloggingrequires_backends   )ChunkPipelinebuild_pipeline_init_args)Image)
load_imagevalid_imagesN)BaseModelOutput)2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMEST)has_image_processorc                     ^  \ rS rSrSrSrSrSrSrU 4S jr	\
S\\S4   S\\\\   4   S	\S
\\\\4      4S j5       r\
S\\\\4      S	\S
\\\\\4         4S j5       r SS\\S\\\\4      4   S\\\\\   4      S	\S
\\\\\4      \\\\\4         4   4U 4S jjjrS rSS jrS rSS jrSSS
\\\4   4S jrSrU =r$ )ZeroShotObjectDetectionPipeline   aN  
Zero shot object detection pipeline using `OwlViTForObjectDetection`. This pipeline predicts bounding boxes of
objects when you provide an image and a set of `candidate_labels`.

Example:

```python
>>> from transformers import pipeline

>>> detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
>>> detector(
...     "http://images.cocodataset.org/val2017/000000039769.jpg",
...     candidate_labels=["cat", "couch"],
... )
[{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.254, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}]

>>> detector(
...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
...     candidate_labels=["head", "bird"],
... )
[{'score': 0.119, 'label': 'bird', 'box': {'xmin': 71, 'ymin': 170, 'xmax': 410, 'ymax': 508}}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-object-detection"`.

See the list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-object-detection).
FTc                    > [         TU ]  " S0 UD6  U R                  S:X  a  [        SU R                   S35      e[        U S5        U R                  [        5        g )NtfzThe z is only available in PyTorch.vision )super__init__	framework
ValueError	__class__r   check_model_typer   )selfkwargsr!   s     k/home/james-whalen/.local/lib/python3.13/site-packages/transformers/pipelines/zero_shot_object_detection.pyr   (ZeroShotObjectDetectionPipeline.__init__=   sR    "6">>T!tDNN#33QRSS$)PQ    imagezImage.Imagecandidate_labelsr$   returnc                     g Nr   )r#   r(   r)   r$   s       r%   __call__(ZeroShotObjectDetectionPipeline.__call__F   s      #r'   c                     g r,   r   )r#   r(   r$   s      r%   r-   r.   K   s    ber'   c           	      \  > SU;   a  UR                  S5      n[        U[        [        R                  45      (       a  XS.nOX[        U[        [
        45      (       a:  [        U5      (       a*  [	        [        TU ]   " S [        X5       5       40 UD65      $  Un[        TU ]   " U40 UD6nU$ )a$  
Detect objects (bounding boxes & classes) in the image(s) passed as inputs.

Args:
    image (`str`, `PIL.Image` or `list[dict[str, Any]]`):
        The pipeline handles three types of images:

        - A string containing an http url pointing to an image
        - A string containing a local path to an image
        - An image loaded in PIL directly

        You can use this parameter to send directly a list of images, or a dataset or a generator like so:

        ```python
        >>> from transformers import pipeline

        >>> detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
        >>> detector(
        ...     [
        ...         {
        ...             "image": "http://images.cocodataset.org/val2017/000000039769.jpg",
        ...             "candidate_labels": ["cat", "couch"],
        ...         },
        ...         {
        ...             "image": "http://images.cocodataset.org/val2017/000000039769.jpg",
        ...             "candidate_labels": ["cat", "couch"],
        ...         },
        ...     ]
        ... )
        [[{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.25, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}], [{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.254, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}]]
        ```


    candidate_labels (`str` or `list[str]` or `list[list[str]]`):
        What the model should recognize in the image.

    threshold (`float`, *optional*, defaults to 0.1):
        The probability necessary to make a prediction.

    top_k (`int`, *optional*, defaults to None):
        The number of top predictions that will be returned by the pipeline. If the provided number is `None`
        or higher than the number of predictions available, it will default to the number of predictions.

    timeout (`float`, *optional*, defaults to None):
        The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
        the call may block forever.


Return:
    A list of lists containing prediction results, one list per input image. Each list contains dictionaries
    with the following keys:

    - **label** (`str`) -- Text query corresponding to the found object.
    - **score** (`float`) -- Score corresponding to the object (between 0 and 1).
    - **box** (`dict[str,int]`) -- Bounding box of the detected object in image's original size. It is a
      dictionary with `x_min`, `x_max`, `y_min`, `y_max` keys.
text_queriesr(   r)   c              3   .   #    U  H  u  pXS .v   M     g7f)r2   Nr   ).0imglabelss      r%   	<genexpr>;ZeroShotObjectDetectionPipeline.__call__.<locals>.<genexpr>   s     pSoKCs?Sos   )
pop
isinstancestrr   listtupler   r   r-   zip)r#   r(   r)   r$   inputsresultsr!   s         r%   r-   r.   N   s    ~ V#%zz.9ec5;;/00$KFe}--,u2E2E pSVW\Sop  F'"64V4r'   c                 h    0 nSU;   a  US   US'   0 nSU;   a  US   US'   SU;   a  US   US'   U0 U4$ )Ntimeout	thresholdtop_kr   )r#   r$   preprocess_paramspostprocess_paramss       r%   _sanitize_parameters4ZeroShotObjectDetectionPipeline._sanitize_parameters   sc    +1)+<i(& .4[.A{+f*0/w' "&888r'   c              #     #    [        US   US9nUS   n[        U[        5      (       a  UR                  S5      n[        R
                  " UR                  UR                  //[        R                  S9n[        U5       H}  u  pgU R                  XpR                  S9nU R                  X0R                  S9n	U R                  S:X  a  U	R                  U R                  5      n	U[        U5      S-
  :H  UUS	.UEU	Ev   M     g 7f)
Nr(   )rB   r)   ,)dtype)return_tensorsptr   )is_lasttarget_sizecandidate_label)r   r:   r;   splittorchtensorheightwidthint32	enumerate	tokenizerr   image_processortorK   len)
r#   r?   rB   r(   r)   rO   irP   text_inputsimage_featuress
             r%   
preprocess*ZeroShotObjectDetectionPipeline.preprocess   s     6'?G<!"45&,,/55c:llU\\5;;$?#@T"+,<"=A...XK!11%1WN~~%!/!2!24::!>$4 5 99*#2 	
 !  #>s   D Dc                     UR                  S5      nUR                  S5      nUR                  S5      nU R                  " S0 UD6nX#US.UEnU$ )NrO   rP   rN   )rO   rP   rN   r   )r9   model)r#   model_inputsrO   rP   rN   outputsmodel_outputss          r%   _forward(ZeroShotObjectDetectionPipeline._forward   s[    "&&}5&**+<=""9-**,|,(3dkwovwr'   c                 l   / nU H  nUS   n[        U5      nU R                  R                  XRUS   S9S   nUS   R                  5        HI  nUS   U   R	                  5       n	U R                  US   U   S   5      n
XU
S.nUR                  U5        MK     M     [        US S	S
9nU(       a  US U nU$ )NrP   rO   )rd   rC   target_sizesr   scoresboxes)scorelabelboxc                     U S   $ )Nrl   r   )xs    r%   <lambda>=ZeroShotObjectDetectionPipeline.postprocess.<locals>.<lambda>   s    '
r'   T)keyreverse)r   rY   post_process_object_detectionnonzeroitem_get_bounding_boxappendsorted)r#   re   rC   rD   r@   model_outputrm   rd   indexrl   rn   results               r%   postprocess+ZeroShotObjectDetectionPipeline.postprocess   s    )L !23E*<8L**HH$UbHc I G !*224)%0557,,WW-=e-DQ-GH#(Ev& 5 * &:DIfuoGr'   rn   ztorch.Tensorc                     U R                   S:w  a  [        S5      eUR                  5       R                  5       u  p#pEUUUUS.nU$ )z
Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }

Args:
    box (`torch.Tensor`): Tensor containing the coordinates in corners format.

Returns:
    bbox (`dict[str, int]`): Dict containing the coordinates in corners format.
rM   zAThe ZeroShotObjectDetectionPipeline is only available in PyTorch.)xminyminxmaxymax)r   r    inttolist)r#   rn   r   r   r   r   bboxs          r%   rx   1ZeroShotObjectDetectionPipeline._get_bounding_box   sO     >>T!`aa!$!1!1!3D	
 r'   r   r,   )g?N)__name__
__module____qualname____firstlineno____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r   r   r;   r<   r   dictr-   r   rG   r_   rf   r~   r   rx   __static_attributes____classcell__)r!   s   @r%   r   r      s   @ O #OR #3-.#BGTRUYBW#cf#	d38n	# # ed4S>2eced4PTUXZ]U]P^K_F`e e
 =AVS-d38n)==>V #5d3i#89V 	V
 
tDcN#T$tCH~*>%??	@V Vp	9(,^ S#X  r'   r   )typingr   r   r   r   utilsr   r	   r
   r   r   baser   r   PILr   image_utilsr   r   rR   transformers.modeling_outputsr   models.auto.modeling_autor   
get_loggerr   loggerr   r   r'   r%   <module>r      sq    1 1 k k 9 6=^			H	% ,FGam a Har'   