
    cCi                         S r SSKJrJr  SSKrSSKJr  SSKJ	r	  SSK
JrJrJrJr  SSKJrJr  SS	KJr  SS
KJr  SSKJrJrJr   " S S\5      r " S S\5      rS/rg)z(Fast Video processor class for InternVL.    )OptionalUnionN)
functional   )BatchFeature)OPENAI_CLIP_MEANOPENAI_CLIP_STDPILImageResamplingSizeDict)UnpackVideosKwargs)
TensorType)BaseVideoProcessor)VideoMetadatagroup_videos_by_shapereorder_videosc                   ,    \ rS rSr% \\\\4   \S'   Sr	g) InternVLVideoProcessorInitKwargs   initial_shift N)
__name__
__module____qualname____firstlineno__r   boolfloatint__annotations____static_attributes__r       p/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/internvl/video_processing_internvl.pyr   r      s    uc)**r!   r   c                   ^  ^  \ rS rSr\R
                  r\r\	r
SSS.rSrSrSrSrSrSr\rS/rS\\   4U 4S jjr   S S	\S
\\   S\\\\4      S\\\\\4      4S jjr S!S\S   S\S\S\S\S   S\S\S\S\S\S\\\\\   4      S\\\\\   4      S\\\ \!4      S\"4S jjr#Sr$U =r%$ )"InternVLVideoProcessor"   i  )heightwidthTFpixel_values_videoskwargsc                 &   > [         TU ]  " S0 UD6  g )Nr   )super__init__)selfr)   	__class__s     r"   r,   InternVLVideoProcessor.__init__0   s    "6"r!   metadata
num_framesfpsr   c                 l   Ub  UOU R                   nUb  UOU R                  nUR                  nUc8  Ub5  Ub  UR                  c  [	        S5      e[        XaR                  -  U-  5      nUSL a  Xb-  S-  nX&:  a  [	        SU SU S35      e[        R                  " XFXb-  5      R                  5       nU$ )a  
Default sampling function which uniformly samples the desired number of frames between 0 and total number of frames.
If `fps` is passed along with metadata, `fps` frames per second are sampled uniformty. Arguments `num_frames`
and `fps` are mutually exclusive.

Args:
    metadata (`VideoMetadata`):
        Metadata of the video containing information about total duration, fps and total number of frames.
    num_frames (`int`, *optional*):
        Maximum number of frames to sample. Defaults to `self.num_frames`.
    fps (`int` or `float`, *optional*):
        Target frames to sample per second. Defaults to `self.fps`.
    initial_shift (`bool`, `float` or `int`, defaults to `self.initial_shift`):
        The initial shift to apply when sampling frames. If `True`, the shift is set so that frames are sampled from the middle of the video.

Returns:
    np.ndarray:
        Indices to sample video frames.
zAsked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. Please pass in `VideoMetadata` object or use a fixed `num_frames` per input videoT   z(Video can't be sampled. The `num_frames=z` exceeds `total_num_frames=z`. )r1   r   total_num_framesr2   
ValueErrorr   torcharange)r-   r0   r1   r2   r   r)   r5   indicess           r"   sample_frames$InternVLVideoProcessor.sample_frames3   s    6 $.#9Zt
)6)BHZHZ#44 #/8<<#7 h  -<sBCJD ,9A=M(::,Fbcsbttwx  ,,}@P@]^bbdr!   videosztorch.Tensordo_convert_rgb	do_resizesizeinterpolationzF.InterpolationModedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stdreturn_tensorsreturnc           	         [        U5      u  nn0 nUR                  5        H:  u  nnU(       a  U R                  U5      nU(       a  U R                  UXES9nUUU'   M<     [	        UU5      n[        U5      u  nn0 nUR                  5        H8  u  nnU(       a  U R                  UU5      nU R                  UXXU5      nUUU'   M:     [	        UU5      nU(       a  [        R                  " USS9OUn[        SU0US9$ )N)r?   r@   r   )dimr(   )datatensor_type)
r   itemsconvert_to_rgbresizer   center_croprescale_and_normalizer7   stackr   )r-   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   r)   grouped_videosgrouped_videos_indexresized_videos_groupedshapestacked_videosresized_videosprocessed_videos_groupedprocessed_videoss                          r"   _preprocess"InternVLVideoProcessor._preprocessf   s    $ 0EV/L,,!#%3%9%9%;!E>!%!4!4^!D!%^$!d,:"5) &< ((>@TU 0E^/T,,#% %3%9%9%;!E>!%!1!1.)!L!77
LV_N /=$U+ &< **BDXYCQ5;;'7Q?Wg"79I!JXfggr!   r   )NNN)N)&r   r   r   r   r
   BICUBICresampler   rF   r	   rG   r?   r>   rC   rE   r=   r   do_sample_framesr   valid_kwargsmodel_input_namesr   r,   r   r   r   r   r   r   r:   listr   strr   r   r\   r    __classcell__)r.   s   @r"   r$   r$   "   s   !))H!JIC(DIJLNM3L./#(H!I # %)+/;?11 SM1 eCJ'(	1
  dE3&6 781B <@,h^$,h ,h 	,h
 ,h   56,h ,h ,h ,h ,h ,h U5$u+#567,h E%e"456,h !sJ!78,h  
!,h ,hr!   r$   )__doc__typingr   r   r7   torchvision.transforms.v2r   Fimage_processing_utilsr   image_utilsr   r	   r
   r   processing_utilsr   r   utilsr   video_processing_utilsr   video_utilsr   r   r   r   r$   __all__r   r!   r"   <module>rq      sS    / "  5 2 Z Z 4  8 O O+| +ph/ phf $
$r!   