
    oi                        S S/r / SQqSSKrSSKJr  SSKrSSKJr  SSKrSSK	r	SSK
r
SSKrSSKJr  SSKrSSKrSSKJr  SS	KJrJrJrJrJr  SS
KJr   SSKJrJr  SSKJr  SrSSKJr  SSK J!r!J"r"  \RF                  RI                  SS5      S:H  r%Sr&Sr'Sr(Sr)Sr*Sr+\," \-" \RF                  RI                  SS5      5      5      r.\%(       a  \R^                  " S\. 35        Sr0Sr1S r2S!r3S"\,S#\,S$\,4S% jr4 S"\,S#\,S$\,4S& jr5 S"\,S#\,S$\,4S' jr6 \&\'\(4S(\,S)\,S#\,S*\,S+\,S$\7\,\,4   4S, jjr8 \&4S-\9S.\,S$\R                  4S/ jjr: S-\9S0\,S1\\,\-4   S$\,4S2 jr;S-\9S$\7\Rx                  \-4   4S3 jr=S$\>4S4 jr?S-\9S0\,S1\-S$\7\,\,\,4   4S5 jr@S-\9S$\7\Rx                  \-4   4S6 jrAS$\>4S7 jrBS-\9S$\7\Rx                  \-4   4S8 jrC\A\=\CS9.rD\R                  " S:S5      rF\" SS;9S$\G4S< j5       rH\&S4S-\9S=\,S>\>S$\\Rx                  \I\R                     4   4S? jjrJSIS@ jrKSA\\\   \\\      4   S$\\   4SB jrL \&S4SA\\\   \\\      4   S.\,SC\>S$\\\\R                     S4   \\\\Rx                  \\R                     4      S4   4   4SD jjrM SE rN SF rO SSKPr\R                  R                  R                  rRSSGKSJTrU   " SH S5      rVg!   Sr GNn= f)Jprocess_vision_infoUnslothVisionDataCollator)z	<|image|>z<|vision_start|>z<|vision_end|>z<|vision_pad|>z<|image_pad|>z<|video_pad|>z<image>z[IMG]z[IMG_BREAK]z	[IMG_END]z<image_soft_token>z<start_of_image>z<end_of_image>z<|START_OF_IMG|>z<|END_OF_IMG|>z<|IMG_LINE_BREAK|>z<|IMG_PATCH|>    N)Image)BytesIO)	lru_cache)version)UnionTupleListDictSequence)	takewhile)io
transforms)InterpolationModeTF   )logger)dtype_from_configHAS_TORCH_DTYPEUNSLOTH_ENABLE_LOGGING01   i@  i      i  i 0	 VIDEO_MAX_PIXELSg    Az!Unsloth: set VIDEO_TOTAL_PIXELS:           @   i   numberfactorreturnc                 "    [        X-  5      U-  $ )zFReturns the closest integer to 'number' that is divisible by 'factor'.)roundr   r    s     R/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/vision_utils.pyround_by_factorr&   f   s    !F**    c                 8    [         R                  " X-  5      U-  $ )z]Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'.)mathceilr$   s     r%   ceil_by_factorr+   k   s    99V_%..r'   c                 8    [         R                  " X-  5      U-  $ )zYReturns the largest integer less than or equal to 'number' that is divisible by 'factor'.)r)   floorr$   s     r%   floor_by_factorr.   p   s    ::fo&//r'   heightwidth
min_pixels
max_pixelsc           
         [        X5      [        X5      -  [        :  a*  [        S[         S[        X5      [        X5      -   35      e[        U[	        X5      5      n[        U[	        X5      5      nXV-  U:  a:  [
        R                  " X-  U-  5      n[        X-  U5      n[        X-  U5      nXV4$ XV-  U:  a7  [
        R                  " X0U-  -  5      n[        X-  U5      n[        X-  U5      nXV4$ )a  
Rescales the image so that the following conditions are met:

1. Both dimensions (height and width) are divisible by 'factor'.

2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].

3. The aspect ratio of the image is maintained as closely as possible.
z+absolute aspect ratio must be smaller than z, got )	maxmin	MAX_RATIO
ValueErrorr&   r)   sqrtr.   r+   )r/   r0   r    r1   r2   h_barw_barbetas           r%   smart_resizer<   v   s     6C..:9)F3vK]`cdj`rKrJst
 	
 78E67E}z!yy&.J67v6f5
 <	 
	#yy~67v}f5u|V4<r'   elesize_factorc                 <   SU ;   a  U S   nO%U S   n[        U[        5      (       a  SU;   a  US   n S n[        U[        R                  5      (       a  UnGO[        U[        5      (       Ga  UR	                  S5      (       d  UR	                  S5      (       a5  [        R
                  " [        R                  " USS9R                  5      nGOUR	                  S5      (       a  [        R
                  " US	S  5      nGOkUR	                  S
5      (       aQ  SU;   aI  UR                  SS5      u  pE[        R                  " U5      n[        R
                  " [        U5      5      nGO[        R
                  " U5      nO[        U[        5      (       a   [        R
                  " [        U5      5      nO[        U[        5      (       a  SU;   a-  US   (       a#  [        R
                  " [        US   5      5      nOpSU;   a$  US   (       a  [        R
                  " US   5      nOFSU;   a@  US   (       a6  [        R
                  " [        R                  " US   SS9R                  5      nUc"  [        S[        U5      R                    S35      eUR#                  S5      nSU ;   a  SU ;   a  [%        U S   U S   US9u  pxOIUR&                  u  pU R                  S[(        5      nU R                  S[*        5      n[%        U
U	UUUS9u  pxUR-                  X45      nU$ )Nimage	image_urlurlhttp://https://T)streamfile://   z
data:imagezbase64,r   bytespathzyUnrecognized image input. We support local path, http url, base64 and PIL.Image, bytes and dict formats. Instead we got ``RGBresized_heightresized_widthr    r1   r2   r    r1   r2   )
isinstancedictr   str
startswithopenrequestsgetrawsplitbase64	b64decoder   rH   r7   type__name__convertr<   size
MIN_PIXELS
MAX_PIXELSresize)r=   r>   r@   	image_obj_base64_datadatarL   rM   r0   r/   r1   r2   s                r%   fetch_imagerf      s    #~GK eT""u~%LEI%%%		E3		I&&%*:*::*F*F

8<<d#C#G#GHIi((

59-Il++E!!&Y!:''4!JJwt}5	

5)I	E5	!	!JJwu~.		E4	 	 eg

75>#:;Iu_v

5=1Ie^e

8<<eT#J#N#NOI  U  VZ  [`  Va  Vj  Vj  Uk  kl  m  n  	ne$E3?c#9(4 ! )
% 

WW\:6
WW\:6
(4!!)
% LL-89ELr'   total_frames	video_fpsc           	      F   SU ;   a  SU ;   a   S5       eSU ;   a  [        U S   [        5      nOU R                  S[        5      n[	        U R                  S[
        5      [        5      n[        U R                  S[        [        U5      5      [        5      nX-  U-  nX1:  a(  [        (       a  [        R                  " SU SU S35        [        [        [        X55      U5      U5      n[        U[        5      n[        U::  a  X1::  d  [        S	[         S
U SU S35      eU$ )a)  calculate the number of frames for video used for model inputs.

Args:
    ele (dict): a dict contains the configuration of video.
        support either `fps` or `nframes`:
            - nframes: the number of frames to extract for model inputs.
            - fps: the fps to extract frames for model inputs.
                - min_frames: the minimum number of frames of the video, only used when fps is provided.
                - max_frames: the maximum number of frames of the video, only used when fps is provided.
    total_frames (int): the original total number of frames of the video.
    video_fps (int | float): the original fps of the video.

Raises:
    ValueError: nframes should in interval [FRAME_FACTOR, total_frames].

Returns:
    int: the number of frames for video used for model inputs.
fpsnframesz%Only accept either `fps` or `nframes`
min_frames
max_framesz Unsloth: smart_nframes: nframes[z] > total_frames[]znframes should in interval [z, z], but got .)r&   FRAME_FACTORrV   FPSr+   FPS_MIN_FRAMESr.   r5   FPS_MAX_FRAMESr   r   warningr4   r7   )r=   rg   rh   rk   rj   rl   rm   s          r%   smart_nframesru      s   . c!1[4[[2C!#i.,?ggeS!#CGGL.$I<X
$SWW\3~|;\%]_kl
*S0!%%!A'J[\h[iijklc#g2J?N!'<8G#(?7~R~U`ah`iijkllNr'   c                 <   U S   n[         R                  " [        R                  5      [         R                  " S5      :  a-  SU;   d  SU;   a  [        R
                  " S5        SU;   a  USS n[        R                  " 5       n[        R                  " UU R                  S	S
5      U R                  SS5      SSS9u  p4n US   nUR                  S5      Uph[        (       a=  [        R                  " SU< SU< SU< S[        R                  " 5       U-
  S S3	5        [!        XUS9n	["        R$                  " SUS-
  U	5      R'                  5       R)                  5       n
U	[+        US5      -  U-  nX:   nX;4$ ! [         a  n[        SU5        Sn SnANSnAff = f)a~  read video using torchvision.io.read_video

Args:
    ele (dict): a dict contains the configuration of video.
    support keys:
        - video: the path of video. support "file://", "http://", "https://" and local path.
        - video_start: the start time of video.
        - video_end: the end time of video.
Returns:
    torch.Tensor: the video tensor with shape (T, C, H, W).
videoz0.19.0rC   rD   zVtorchvision < 0.19.0 does not support http/https video path, please upgrade to 0.19.0.rF   rG   Nvideo_start        	video_endsecTCHW)	start_ptsend_ptspts_unitoutput_formatrh   z6error getting video_fps there is probably a path issuer   r   z"Unsloth: torchvision:  video_path=, total_frames=, video_fps=, time=.3fsrg   rh   r   ư>)r   parsetorchvision__version__warningswarntimer   
read_videorV   	Exceptionprintr^   r   r   inforu   torchlinspacer#   longr4   )r=   
video_pathstrw   audior   rh   erg   rk   idx
sample_fpss               r%   _read_video_torchvisionr      s    WJ}}[,,-h0GG
"jJ&>MMrs
"#ABJ	B''--T*E$%	 $jjmY)9j]:J\O=i\Y`aeajajaloqaqru`vvwxyCiPG
..L1,g
6
<
<
>
C
C
EC3|T22Y>JJE  FJ	s    E9 9
FFFc                  D    SS K n U R                  R                  S5      S L$ )Nr   decord)importlib.utilutil	find_spec)	importlibs    r%   is_decord_availabler   &  s    >>##H-T99r'   c                    US::  a  [        S5      eUS::  a  [        S5      eU R                  SS5      nU R                  SS5      nUc  Uc  SUS-
  U4$ X-  nUb.  [        S[        X55      5      n[        R
                  " Xb-  5      nOSnUb<  [        S[        XE5      5      n[        R                  " X-  5      n	[        XS-
  5      n	OUS-
  n	Xy:  a,  [        S	U S
Ub  WOS SU	 S
Ub  WOU SUS SU SU S35      e[        (       a.  [        R                  " SU< SU	< SU< SU< SU< SUS 35        XyX-
  S-   4$ )a  
Calculate the start and end frame indices based on the given time range.

Args:
    ele (dict): A dictionary containing optional 'video_start' and 'video_end' keys (in seconds).
    total_frames (int): Total number of frames in the video.
    video_fps (float): Frames per second of the video.

Returns:
    tuple: A tuple containing (start_frame, end_frame, frame_count).

Raises:
    ValueError: If input parameters are invalid or the time range is inconsistent.
r   z#video_fps must be a positive numberz'total_frames must be a positive integerrx   Nrz   r   ry   z Invalid time range: Start frame z (at zs) exceeds end frame zs). Video duration: z.2fzs (z
 frames @ zfps)z2Unsloth: calculate video frame range: start_frame=z, end_frame=r   z from video_start=z, video_end=r   r   )
r7   rV   r4   r5   r)   r*   r-   r   r   r   )
r=   rg   rh   rx   rz   max_durationvideo_start_clampedstart_framevideo_end_clamped	end_frames
             r%   calculate_video_frame_ranger   ,  s   ( A~>??qBCC ''-.KT*Iy0,"L00+L!#s;'EFii 3 ?@S%ABJJ0<=		!#34	 1$	 .{m5XcXoATuv@w x!!*5iF[1Bam0n o+C0L>I;VZ\
 	
 I[N-YLXh[gZii|p{o}  ~K  AJ  @L  LY  OX  Z]  N^  _  	`9#:Q#>>>r'   c                    SSK nU S   n[        R                  " 5       nUR                  U5      n[        U5      UR	                  5       pe[        U UU5      u  pxn[        XUS9n	[        R                  " XxU	5      R                  5       R                  5       R                  5       n
UR                  U
5      R                  5       n[        R                  " U5      R                  SSSS5      n[         (       a=  ["        R$                  " SU< S	U< S
U< S[        R                  " 5       U-
  S S3	5        U	['        US5      -  U-  nX4$ )aw  read video using decord.VideoReader

Args:
    ele (dict): a dict contains the configuration of video.
    support keys:
        - video: the path of video. support "file://", "http://", "https://" and local path.
        - video_start: the start time of video.
        - video_end: the end time of video.
Returns:
    torch.Tensor: the video tensor with shape (T, C, H, W).
r   Nrw   r      r   r   zUnsloth: decord:  video_path=r   r   r   r   r   r   )r   r   VideoReaderlenget_avg_fpsr   ru   r   r   r#   r   tolist	get_batchasnumpytensorpermuter   r   r   r4   )r=   r   r   r   vrrg   rh   r   r   rk   r   rw   r   s                r%   _read_video_decordr   g  s(    WJ	B			J	'B!"gr~~'7)+F,(KL
 CiPG
..
9
?
?
A
F
F
H
O
O
QCLL%%'ELL''1a3E45EmT[\`\e\e\gjl\lmp[qqrst3|T22Y>Jr'   c                       SSK n U R                  R                  S5      c  gSSKJn  g! [
        [        [        4 a     gf = f)z8Check if torchcodec is available and properly installed.r   N
torchcodecFVideoDecoderT)r   r   r   torchcodec.decodersr   ImportErrorAttributeErrorr   )r   r   s     r%   is_torchcodec_availabler     sA    >>##L1943 s    * * AAc                    SSK Jn  [        [        R                  R                  SS5      5      n[        (       a  [        R                  " SU 35        U S   n[        U[        5      (       a  UR                  S5      (       a  USS	 n[        R                  " 5       nU" X2S
9nUR                  R                  nUR                  R                  n[!        U UU5      u  pn[#        XUS9n
[$        R&                  " XU
5      R)                  5       R+                  5       R-                  5       nU
[/        US5      -  U-  nUR1                  US9R2                  n[5        US5      (       aX  UR6                  S:X  aH  UR8                  S   S;   a5  UR8                  S   S;  a"  UR;                  SSSS5      R=                  5       n[        (       a=  [        R                  " SU< SU< SU< S[        R                  " 5       U-
  S S3	5        X4$ )a  read video using torchcodec.decoders.VideoDecoder

Args:
    ele (dict): a dict contains the configuration of video.
    support keys:
        - video: the path of video. support "file://", "http://", "https://" and local path.
        - video_start: the start time of video.
        - video_end: the end time of video.
Returns:
    torch.Tensor: the video tensor with shape (T, C, H, W).
r   r   TORCHCODEC_NUM_THREADS   z%Unsloth: set TORCHCODEC_NUM_THREADS: rw   rF   rG   N)num_ffmpeg_threadsr   r   )indicesndimr   )r   r   r   r   r   r   z!Unsloth: torchcodec:  video_path=r   r   r   r   r   )r   r   intosenvironrV   r   r   r   rP   rR   rS   r   metadataaverage_fps
num_framesr   ru   r   r   r#   r   r   r4   get_frames_atre   hasattrr   shaper   
contiguous)r=   r   r   r   r   decoderrh   rg   r   r   rk   r   r   rw   s                 r%   _read_video_torchcodecr     s    1 0H!!LM;<R;STUWJ*c""z'<'<Y'G'G^
	B:QG  ,,I##..L+F,(KL
 CiPG
..
9
?
?
A
F
F
H
O
O
QC3|T22Y>J!!#!.33Euf%**/;;r?i'EKKN),KMM!Q1-88:E8ZM9IL?-YLX_`d`i`i`knp`pqt_uuvwxr'   )r   r   r   FORCE_UNSLOTH_VIDEO_READER)maxsizec                      [         b  [         n O=[        5       (       a  Sn O+[        5       (       a  Sn O[        (       a  Sn O[	        S5      e[
        (       a  [        R                  " SU  S35        U $ )Nr   r   r   zwUnsloth: No video reader backend available, please install decord or torchvision or torchcodec to process video inputs.z(Unsloth: unsloth_zoo/vision_utils using z to read video.)r   r   r   HAS_TORCHVISIONr7   r   r   r   )video_reader_backends    r%   get_video_reader_backendr     sq    !-9			'	 	"	"+	,  S  T  	T>?S>TTcder'   image_factorreturn_video_sample_fpsc                    [        U S   [        5      (       Ga=  [        5       n [        U   " U 5      u  pEUR                  u  pxpU R                  S[        5      nU R                  S[        5      n[        [        [        X-  [        -  5      [!        US-  5      5      nU R                  SU5      nX:  a(  [
        (       a  [        R                  " S	U S
U S35        [        X5      nSU ;   a  SU ;   a  [#        U S   U S   US9u  nnO[#        U	U
UUUS9u  nn[$        R&                  R)                  UUU/[*        R,                  SS9nU(       a  XE4$ U$ [        U S   [.        [0        45      (       d   eU R3                  5       nUR5                  SS 5        UR5                  SS 5        U S    Vs/ s H  n[7        SU0UEUS9PM     nn[9        [;        U5      [        5      n[;        U5      U:  a$  UR=                  US   /U[;        U5      -
  -  5        U(       a  UUR5                  SS5      4$ U$ ! [         aC  n[
        (       a  [        R                  " SU SU 35        [        S   " U 5      u  pE S nAGNGS nAff = fs  snf )Nrw   zUnsloth: video_reader_backend z) error, use torchvision as default, msg: r   r1   total_pixelsg?r2   zUnsloth: The given max_pixels[z] exceeds limit[z].rL   rM   rN   rO   T)interpolation	antialiasr[   r@   r>   r   rj   r   )rP   rR   r   VIDEO_READER_BACKENDSr   r   r   rt   r   rV   VIDEO_MIN_PIXELSVIDEO_TOTAL_PIXELSr4   r5   r   rp   r   r<   r   
functionalra   r   BICUBIClisttuplecopypoprf   r+   r   extend)r=   r   r   r   rw   r   r   rk   rc   r/   r0   r1   r   r2   max_pixels_supposedrL   rM   process_infovideo_elementimagess                       r%   fetch_videor     s   #g,$$79	J 56J KC PE %*KK!FWW\+;<
ww~/AB-|/E/TUWZ[ehl[lWmn
!gglJ?+%%!?@S?TTdeodpprst,9
s"#'=,8$%O$#-)NM -9#%%-)NM %%,,]++33	 - 
 #$$#g,u6666xxz&$' "%W!- -@<@l[!- 	  !Vl;v; MM6":,'CK*?@A"<++E3777m  	J%%!?@T?UU~  @A  B   C  D 5m DS IE:	JZs   H5 2J5
J?8I==Jc                 
  ^^ U c  g[        U [        [        45      (       a  [        U 5      $ U  Vs/ s H  n[        U5      PM     nnU(       d  gUS   m[        UU4S jUSS  5       5      (       a  [        T5      $ U$ s  snf )zTReturn a single float if all fps equal (within tol), else a list; pass None through.Nr   c              3   R   >#    U  H  n[         R                  " UTTTS 9v   M     g7f))rel_tolabs_tolN)r)   isclose).0vf0tols     r%   	<genexpr>collapse_fps.<locals>.<genexpr>#  s"     \S[aDLLBSIS[s   $'r   )rP   r   floatall)rj   r   r   valsr   s    `  @r%   collapse_fpsr     s|    
{#U|$$Sz!"cE!HcD"	aB\SWXYXZS[\\\59fbff	 #s   B conversationsc                     / n[        U S   [        5      (       a  U /n U  HN  nU HE  n[        US   [        5      (       d  M  US    H  nUS   S;   d  M  UR                  U5        M!     MG     MP     U$ )Nr   contentr[   )r@   rA   rw   )rP   rQ   r   append)r   vision_infosconversationmessager=   s        r%   extract_vision_infor  &  sx    L-"D))&%#G'),d33"9-C6{&EE$++C0 . $ & r'   return_video_kwargsc                 d   [        U 5      n/ n/ n/ nU Hh  nSU;   d  SU;   a  UR                  [        XqS95        M)  SU;   a0  [        XqSS9u  pUR                  U	5        UR                  U5        M_  [	        S5      e   [        U5      S:X  a  S n[        U5      S:X  a  S nU(       a  XES	U04$ XE4$ )
Nr@   rA   r   rw   T)r   r   z,image, image_url or video should in content.r   rj   )r  r   rf   r   r7   r   )
r   r>   r  r   image_inputsvideo_inputsvideo_sample_fps_listvision_infovideo_inputvideo_sample_fpss
             r%   r   r   4  s     '}5L LL#k![K%?K QR#,7w{,|)K!(()9:,KLL $ <A
<AE3H+III%%r'   c                    [        U S5      (       a  U R                  OU n [        n[        U S5      (       a  [        U R                  /-   n U R	                  U5      n[        U S5      (       a  UR                  U R                  5         [        S U 5       5      n[        [        U5      5      n[        R                  " U5      nU$ )N	tokenizerimage_tokenpad_token_idc              3   .   #    U  H  oc  M  Uv   M     g 7fN )r   xs     r%   r   )get_padding_tokens_ids.<locals>.<genexpr>b  s     K(91QQ(9s   	)r   r  IMAGE_TOKENSr  convert_tokens_to_idsr   r  r   setr   	IntTensor)r  image_tokenspadding_token_idss      r%   get_padding_tokens_idsr  T  s     (/y+'F'F	##IILy-((#y'<'<&==!77Ey.))  !7!78K(9KKS!234(9:r'   c                 `   S[         R                  [         R                  [         R                  S[         R                  [         R                  [         R                  S[         R                  [         R                  [         R                  0nU b  U S :X  a  g X;   a  X   $ [	        SU  S35        g )Nfloat32float16bfloat16	Unsloth: z, is not recognized, so we'll default to None)r   r  r  r  r   )dtype__DTYPE_MAPs     r%   
_get_dtyper#  i  s}    5==u}}5==u}}ENNK 
%4-		k&88	% LMNr'   )train_on_responses_onlyc                      \ rS rSrSr             SS jr S r S rS rS r	SS	 jr
S
 rS rS rS S jrS\4S jrS\4S jr\R(                  " 5         S!S\R*                  S\R*                  S\S\S\\R*                     S-  S\\\-  \-     S-  S\\R*                  \R*                  \\R*                  S4   4   4S jj5       rS"S jrS#S jrS rS rSr g)$r   i~  )r  r!  ignore_index	processorformatting_func
image_sizemax_seq_length
truncationr$  num_procassistant_single_content
patch_sizeresize_dimensionsnap_to_patch_sizecompletion_only_losspad_to_multiple_of	size_funcNc           
      *  ^ [        US5      (       d  [        S5      e[        U5      U l        [	        [
        (       a  [        UR                  5      O#UR                  5       R                  R                  5      U l
        X`l        X l        X@l        Xl        Xl        Xl         UR                  R"                  R$                  U l        US	:X  a'   UR                  R"                  R,                  U l        OUS:X  a  S U l        O[1        U[2        [4        45      (       aT  [7        U5      S:X  d   e[1        US   [8        5      (       a  [1        US   [8        5      (       d   e[3        U5      U l        O$[;        U5      [8        L a  XPl        O[        S5      e TS;  a  [        S5      eTS;   a  U4S jU l        O)TS:X  a	  S U l        OTS	:X  a	  S U l        O[        S5      eTU l        Uc  [        US5      (       a  UR@                  n[;        U5      [8        L a  [C        US5      OS U l         U R@                  S LU l"        U(       aA  [1        U[F        5      (       a  [1        U	[F        5      (       d   e[I        S UU	U
USUS9U l%        OS U l%         URM                  SSS0SSS./S.S SS!S./S./5        S"U l'        g !   [        UR                  S5      (       az  [        UR                  R"                  S5      (       aU  UR                  R"                  R&                  R)                  5       nSU;   d  SU;   a
  SU l         GN|[*        S-  U l         GN[*        S-  U l         GN= f!   [/        S
5        SU l         GN= f! [         ak     URM                  SSS0SSS./S.S S!S./5        SU l'        [/        S#URP                  RR                   S$35         g ! [T         a  n[W        U5      eS nAff = f[T         a  n[W        U5      eS nAff = f)%Nimage_processorz<Unsloth: UnslothVisionDataCollator is only for image models!vision_config
model_typegemma3npixtral   r   r5   z=Unsloth: Model does not have a default image size - using 512i   r4   r   r   zUnsloth: resize accepts 'min', 'max', a tuple of 2 numbers or 1 number
For example (224, 224) or just 224. The default is 'min' which auto resizes images!)r   r   r4   r5   zUnsloth: resize_dimension accepts 0, 1, 'max' or 'min'
For example 0 resizes the first dimension, 1 the second, 'max' resizes based on the max of height width, 'min' the min size)r   r   c                 "   > U R                   T   $ r  )r^   )r  r/  s    r%   <lambda>4UnslothVisionDataCollator.__init__.<locals>.<lambda>  s    qvv.>'?r'   c                 N    [        U R                  S   U R                  S   5      $ Nr   r   )r4   r^   r  s    r%   r<  r=        s166!9affQi'@r'   c                 N    [        U R                  S   U R                  S   5      $ r?  )r5   r^   r@  s    r%   r<  r=    rA  r'   r*  T)instruction_partresponse_partforce_matchr  return_functionr,  userr[   r@   textzHello!r[   rH  )roler   	assistantzHow can I help you?Fr   za only accepts 1 text field for assistant roles!
We will auto fix the data collator to support it!),r   	TypeErrorr  r  r#  r   r   configget_input_embeddingsweightr!  r&  r'  r(  r1  r2  r0  r6  r.  r7  lowerIMAGE_FACTORr)  r   rP   r   r   r   r   r[   r3  r/  r*  r4   r+  rR   _train_on_responses_onlyr$  apply_chat_templater-  	__class__r\   r   RuntimeError)selfmodelr'  r*  r(  ra   r&  r$  rC  rD  rE  r,  r1  r2  r/  r0  
lower_namer   s                 `   r%   __init__"UnslothVisionDataCollator.__init__  s	   ( y"344Z[[!7	!B ell+&&(//55


 )".$8!"4"4
	4#ll88CCDO U?&"',,"<"<"G"G
 u_"DO..v;!#$#fQi--*VAY2L2LML#FmDO&\S $Of  	#77N  '?DN&@DN&@DNN  !1 !u.//%BVBV8<^8LPS8Sc.!4Y]--T9 #.44MSV9W9WXW+C#3#0#.#,#'#+,D( ,0D(	"))W%#X6-8 9 %#-BC2E F	+  -2D)& 	I	4u||_55'%,,B\B\^j:k:k"\\77BBHHJ

*i:.E&(DO&2a&7DO".!"3&UV"%D  	&&--#)!':1< = )5JK	/  15-	 3 3 < <= >H H 		  &"1o%& 	"q/!	"s\   %%K  %M+ +N  BM(M(M(+N
PA
O
O6&O11O66PPPc           	         U R                   b   U Vs/ s H  o R                  U5      PM     nnSUS   ;   a  SUS   ;   a  U R                  U5      $ / n/ n/ nS/ 0nU GH  nU R                  U5      n[        U5      S:w  aD  U R	                  U5      nU R
                  (       a  U R                  U5      nU R                  U5      n U R                  R                  USSS9nUR                  U5        U R                  X'5      u  pnU R                  U	5      n	[        U	5      S:  a  UR                  U	5        [        U
5      S:  d  M  UR                  U
5        Uc  S/ 0nUS   R                  US   5        GM      [        USU R                  U R                   SSS	9nU(       a  [        U5      S:  a  XLS
'   U(       aA  [        U5      S:  a2  X\S'   [#        US   5      US'   UR%                  5        H	  u  pXU'   M     U R&                  b  U R&                  US'   U R                  " S0 UD6nSU;   a  U R)                  U5      nSU;   a  U R)                  US5      nUS   R+                  5       nU R,                  U[.        R0                  " UU R2                  5      '   UUS'   U R4                  (       a  U R5                  U5      S   US'   U$ s  snf )Npromptr   
completionrj   F)tokenizeadd_generation_promptTpt)rH  paddingr+  
max_lengthreturn_tensorsadd_special_tokensr   videosr2  pixel_valuespixel_values_videos	input_idslabelsr  )r(  _collate_prompt_completion_select_messages_or_rawr   %_validate_and_normalize_first_messager-  _collapse_assistant_content_clean_none_keysr'  rS  r   "_extract_images_videos_for_example_resize_images_inplacer   rQ   r+  r*  r   itemsr2   _cast_pixel_values_dtype_inplacecloner&  r   isinr  r$  )rV  examplesexampletextsr   re  video_kwargsmessagesr  r@   rw   video_kwargproc_kwargskr   batchri  s                    r%   __call__"UnslothVisionDataCollator.__call__  s    +EMNX',,W5XHNx{"|x{'B228<<r{G33G<H 8}!EEhO 00#??IH00:nn88 (- 9 G
 LL!(,(O(OPW(b%E+//6E5zA~e$5zA~e$&#("+KU#**;u+=>=  > 	 **$
 c&kAo$*!c&kAo$*!".|E/B"CL$**,!"A -"".040G0GK,--- U"99%@E E)99%AVWE {#))+=A=N=Nuzz&$"8"89: h''"::5A(KE(OW Os   Kc                 2    SU;   a  US   $ SU;   a  US   $ U$ )Nry  r   r  )rV  rv  s     r%   rk  1UnslothVisionDataCollator._select_messages_or_rawh  s/     :&&'?++ Nr'   c                 V   [        U5      S:X  a  g US   n[        U[        5      (       d   eSU;  a  SU;  a  [        S5      eUR	                  S5      n[        U[
        5      (       a  SUS./US'   U$ [        U[        [        45      (       a  US   nSU;   d   e U$ [        S5      e)	Nr   rJ  r   zXUnsloth: Failed to use vision data collator!
Maybe use `standardize_data_formats` first!rH  rI  r[   zUnsloth: Failed to use vision data collator!
Your messages must be like:
[{'role':'user', 'content':[{'type':'text', 'text':'Hello!'}]}])r   rP   rQ   rL  rV   rR   r   r   )rV  ry  r  r   parts        r%   rl  ?UnslothVisionDataCollator._validate_and_normalize_first_messageq  s    x=A1+'4(((( Yg%=>  ++i(gs##+17"C!DGI  $//1:DT>!>  R r'   c                 |    U H5  nUS   S:X  d  M  [        US   =n[        5      (       d  M*  US   S   US'   M7     U$ )NrJ  rK  r   r   rH  )rP   r   )rV  ry  r  r   s       r%   rm  5UnslothVisionDataCollator._collapse_assistant_content  sI    Gv+-);;gTBB)0F);GI&   r'   c                 N    U R                   R                  X=(       d    / -   SX4S9$ )NF)r^  r_  continue_final_message)r'  rS  )rV  prompt_messagescompletion_messagesr_  r  s        r%   _render_chat&UnslothVisionDataCollator._render_chat  s.    ~~118b9Eav 2 
 	
r'   c                     SU;   a  [        US   5      n/ nS nO%[        UU R                  S-  SS9u  p4nUc  / nUc  / n X4U4$ )Nr   r   Tr>   r  )r   r   r.  )rV  rv  ry  r@   rw   rz  s         r%   ro  <UnslothVisionDataCollator._extract_images_videos_for_example  sf    w*+EEK(; OOA-$()%E+
 }be}be[((r'   c                    S nS nS n U=(       d    / U=(       d    / -   nU(       a&  [        UU R                  S-  SS9u  pEnUc  / nUc  / nOZSU;   aT  [        [        US   5      5       Vs/ s H  nSUS   U   0PM     n	n[        U	U R                  S-  SS9u  pEnUc  / nUc  / nXEU4$ s  snf ! [         a    / n/ n Nf = f)Nr   Tr  r   r@   )r   r.  ranger   r   )
rV  rv  p_msgsc_msgsimgsvids
vids_kwargmsg_listir   s
             r%   _extract_images_for_pc0UnslothVisionDataCollator._extract_images_for_pc  s   
	"26H)< $ 1(,*&J
 <<w&MRSVW^_gWhSiMj#kMjWgh.?.B$CMjL#k-@$$(OOA$5,0.*D

 |BT|BT
 :%% $l  	DD	s*   AB/ B/ +B*?'B/ *B/ /C ?C c                   ^	^
 U	U
4S jnUbK  U R                   b>  [        U R                   [        [        45      (       a$  [	        U R                   5      S:X  a  U=(       d    / $ U R                   m
T
Gb  [        U5       GH
  u  p4[        T
5      [        L a  UR                  T
[        5      X'   M2  U R                  U5      T
:  d  MI  [        US5      (       d  M\  UR                  u  pVUT
-  U R                  U5      S-  -   U R                  U5      -  nUT
-  U R                  U5      S-  -   U R                  U5      -  nU R                  (       a  U R                  S-  m	U" U5      U" U5      pUR                  Xx4[        5      X'   GM     U$ )Nc           
         > [        T[        TU TS-
  :  a#  [        R                  " U T-  5      -  5      5      $ [        R                  " U T-  S-   5      -  5      5      $ )Nr   g      ?)r4   r   r)   r*   r-   )r  r    r)  s    r%   quantize_to_factorLUnslothVisionDataCollator._resize_images_inplace.<locals>.quantize_to_factor  sf    vs6'(J,='=		!F(#$    CG::aPVhWZlC[$    r'   r   ra   r   )r)  rP   r   r   r   	enumerater[   ra   LANCZOSr3  r   r^   r0  r.  )rV  r@   r  r  imgwhnew_wnew_hr    r)  s            @@r%   rp  0UnslothVisionDataCollator._resize_images_inplace  sX   	
 =DOO3
4??UZ\`Ta8b8bgjkokzkzg{  @A  hA;B__
!#E*
#u,"zz*g>EH^^C(:5'#x:P:P88DA^dnnS.AQ.FF4>>Z]K^^E^dnnS.AQ.FF4>>Z]K^^E..!%1!4'9%'@BTUZB[u"zz5.'BEH + r'   c                 n   X   n[        U5      [        L a~  [        U5       Hg  u  pE[        U5      [        L a3  [        U5       H"  u  pgUR                  U R                  5      XV'   M$     MJ  UR                  U R                  5      X4'   Mi      X1U'    U$ X   R                  U R                  5      X'    U$ r  )r[   r   r  tor!  )rV  r}  keyrf  jpixel_value_jr|  pixel_value_ks           r%   rr  :UnslothVisionDataCollator._cast_pixel_values_dtype_inplace  s    z%$-l$; &$.,5m,D(+8+;+;DJJ+G( -E '4&6&6tzz&BLO %< %#J 	 tzz2EJr'   r!   c                 r    [        U R                  SU R                  5      n[        USS5      nUS:X  a  S$ S$ )Nr  padding_siderightleft)getattrr'  )rV  toksides      r%   _tokenizer_padding_side1UnslothVisionDataCollator._tokenizer_padding_side  s7    dnnk4>>BsNG4v4W4r'   c                 ~    [        U R                  SU R                  5      n[        USS 5      nUc  [        S5      eU$ )Nr  r  uG   Tokenizer must define `pad_token_id` for prompt–completion collation.)r  r'  r7   )rV  r  pad_ids      r%   _pad_token_id_or_fail/UnslothVisionDataCollator._pad_token_id_or_fail  s<    dnnk4>>Bnd3>fggr'   attention_maskrh  r  r  extra_tensorsextra_pad_values.c                    UR                   u  pxUR                  n	UR                  U	[        R                  S9n
U
R                  SS9nU
R                  U	[        R                  S9R                  SS9S-
  nUS:X  a  X-
  R                  S5      U-   nOUS:X  a  UnO[        S5      eUR                  Xx4U5      nUR                  Xx45      nU
R                  SS9u  nnUUU4   R                  [        R                  5      nUUU4   UUU4'   UR                  [        R                  :X  a  SUUU4'   OSUUU4'   / nUb  Uc  S	/[        U5      -  n[        U5      [        U5      :X  d   S
5       e[!        XV5       H?  u  nnUR                  UR                   U5      nUUU4   UUU4'   UR#                  U5        MA     [%        UR'                  5       R)                  5       5      nS	Us=:  a  U:  aW  O  OTUS:X  a  [+        UU-
  U5      nO[+        S	U5      nUS S 2U4   nUS S 2U4   nU(       a  U Vs/ s H  nUS S 2U4   PM     nnX[-        U5      4$ s  snf )N)devicer!  r   dimr  r  zside must be 'left' or 'right'T)as_tupler   z)extra_pad_values must match extra_tensors)r   r  r  r   boolsumint64cumsum	unsqueezer7   new_full	new_zerosnonzeror   r!  r   zipr   r   r4   itemslicer   )rV  r  rh  r  r  r  r  BLr  keepr|  rankdstnew_idsnew_attnridxcsrccdst
new_extrasmpadvoutmax_kslr   s                             r%   _flush_to_side(UnslothVisionDataCollator._flush_to_side  s\    !!  ejj AxxAxwwfEKKw8??A?FJ6>5##A&-CW_C=>>%%qfl;!++QF3\\4\0
d4:!!%**-(t4t>>UZZ'#'HT4Z #$HT4Z  *,
$'$%3]);#; '(C,>>k@kk>}?4jj$/"#D$J-D$J!!#& @
 AEEGLLN#u=q=v~1u9a(1e_q2vG2H0:;
1a2h

;%
"333 <s   5Ic                     UR                   u  pxX::  a  XU/Ub  U/-   $ / -   $ US:X  a  [        U* S 5      O[        SU5      n	US S 2U	4   nUS S 2U	4   nUS S 2U	4   nUb	  US S 2U	4   nXU/Ub  U/-   $ / -   $ )Nr  r   )r   r  )
rV  rh  r  completion_maskr  max_lentoken_type_idsrc   r  r  s
             r%   _truncate_by_side+UnslothVisionDataCollator._truncate_by_side:  s    <?WeWqNCSzzwyzz&*fnUG8T"%7:K#ArE*	(B/)!R%0%+ArE2N?;SaSm?Ovvsuvvr'   c	                 b   UR                   u  pX-   S-
  U-  U-  nX:X  a  XU/Ub  U/-   $ / -   $ X-
  n[        R                  " X4XQR                  UR                  S9n[        R
                  " X4UR                  UR                  S9n[        R
                  " X4UR                  UR                  S9nUb+  [        R                  " X4XR                  UR                  S9nUS:X  a]  [        R                  " X4SS9n[        R                  " X4SS9n[        R                  " X4SS9nUb  [        R                  " WU4SS9nO\[        R                  " X4SS9n[        R                  " X.4SS9n[        R                  " X?4SS9nUb  [        R                  " UW4SS9nXU/Ub  U/-   $ / -   $ )Nr   )r!  r  r  r  )r   r   fullr!  r  zeroscat)rV  rh  r  r  r  r  multipler  token_type_pad_idr  r  L2pad_lenpad_ids	zeros_att
zeros_comppad_token_type_idss                    r%   _pad_to_multiple*UnslothVisionDataCollator._pad_to_multipleH  s   |aH,87?WeWqNCSzzwyzz&**a\6QZQaQabKKN4H4HQ_QfQfg	[[!_5J5JSbSiSij
%!&QL:KSgSgp~  qF  qF  "G6>		7"6A>I"YY	'BJN#ii(E1MO)!&,>+OUV!W		9"6A>I"YY'BJN#ii(E1MO)!&N<N+OUV!W?;SaSm?Ovvsuvvr'   c                    / / / / 4u  p#pEU GH_  nUS   US   p[        U[        5      =(       a(    [        U5      S:H  =(       d    [        US   [        5      n	[        U[        5      =(       a(    [        U5      S:H  =(       d    [        US   [        5      n
U	(       aV  U R	                  U5        U R
                  (       a  U R                  U5        U R                  U5      nU R                  USSS9nO[        U5      nU
(       a  U R	                  U5        U R
                  (       a  U R                  U5        U R                  U5      nU R                  XxS9nSR                  S	 [        S
 [        X5      5       5       5      nU[        U5      S  nO[        U5      nU R                  Xi(       a  UOS U
(       a  UOS 5      u  pnU R                  U5      nUR                  U5        UR                  U5        U(       a   [        U5      S:  a  UR                  U5        U(       d  GM  [        U5      S:  d  GM0  UR                  U5        Uc  S/ 0nUS   R!                  US   5        GMb     [        SSSSS9n[        SSSSS9n[        U5      S:  a  UUS'   [        U5      S:  a5  UUS'   [#        WS   5      US'   UR%                  5        H  u  nnUUU'   M     U R&                  " SSU0UD6nU R&                  " SSU0UD6nUS   US   nnUS   US   nnUR)                  SS 5      UR)                  SS 5      nn[*        R,                  " UU4SS9n[*        R,                  " UU4SS9n[*        R,                  " [*        R.                  " U5      U4SS9nUb  Ub  [*        R,                  " UU4SS9n OS n U R1                  5       n!U R3                  5       n"U b  U R5                  UUU"U!UU 45      u  nnu  nn U R6                  b#  U R9                  UUUU"U R6                  U S9u  nnnn U R:                  (       a4  U R:                  S:  a$  U R=                  UUUU"U!U R:                  U S9u  nnnn OU R5                  UUU"U!U45      u  nnu  nU R6                  b#  U R9                  UUUU"U R6                  5      u  nnnU R:                  (       a4  U R:                  S:  a$  U R=                  UUUU"U!U R:                  5      u  nnnUR?                  5       n#U R@                  U#US:H  '   U R@                  U#[*        RB                  " U#U RD                  5      '   U RF                  (       a  U R@                  U#US:H  '   [        U5      n$UU$S'   UU$S'   U#U$S'   U b  U U$S'   SU$;   a  U RI                  U$5      n$SU$;   a  U RI                  U$S5      n$U$$ )Nr\  r]  r   TF)r_  r  )r  r   c              3   *   #    U  H	  u  pUv   M     g 7fr  r  )r   r  rc   s      r%   r   GUnslothVisionDataCollator._collate_prompt_completion.<locals>.<genexpr>  s     d.cda.cs   c                     U S   U S   :H  $ r?  r  r@  s    r%   r<  FUnslothVisionDataCollator._collate_prompt_completion.<locals>.<lambda>  s    !A$!A$,r'   rj   r  r`  )ra  r  rc  rd  r  r   re  rH  rh  r  r  r   r  )r  ri  rf  rg  r  )%rP   r   r   rQ   rl  r-  rm  rn  r  rR   joinr   r  r  rp  r   r   r   rq  r'  rV   r   r  
zeros_liker  r  r  r*  r  r2  r  rs  r&  rt  r  r1  rr  )%rV  ru  prompt_textscompletion_textsr   re  expc	is_p_msgs	is_c_msgsp_txtpc_txtc_txtr  r  r  prompt_kwargscompletion_kwargsr|  r   proc_promptsproc_completionsp_idsc_idsp_mc_mp_ttc_ttrh  r  r  r  r  
flush_sideri  r  s%                                        r%   rj  4UnslothVisionDataCollator._collate_prompt_completiond  s   9;RR6Bh<L!1q #1d+WQ11V
1Q4QU@VI"1d+WQ11V
1Q4QU@VI::1=0044Q7))!,))!4`e)fA::1=0044Q7))!,**1*T di8NPSTYPb.cdds5z{+A &*%@%@)QY]dm_`sw%x"D
..t4D&##E*D	Ad#tD	Ad#%"'J5!((E):;U X $	
 ! $	
 v;?&,M(#v;?&,M(# ,Z-> ?Ju"((*1#$a  + ~~I<I=I>>U/?UCTU#K02B;2Ou 013CDT3US!%%&6=?O?S?STdfj?kdIIuen!4	C:15))U%5%5c%:C$@aH 0"YYd|;N!N ++-113
%KOK^K^	:vQ_?`LHNI'H
 "".MQMcMc~
DL_L_p~ Nd NJ	>?N
 &&4+B+BQ+FMQMbMb~
FTXTkTk  }K Nc NJ	>?N =A<O<O	:v?Q=9NI'9
 "".=A=S=S~
DL_L_>:	>? &&4+B+BQ+F=A=R=R~
FTXTkTk>:	>?
 "&*&7&7~"#=A=N=Nuzz&$"8"89:$$+/+<+<F?a'( < $K .H%$2C !S 77<C C'77=RSC
r'   c                    U H  nUR                  S5      n[        U[        5      (       d  M+  U HN  n[        U[        5      (       d  M  UR	                  5        VVs/ s H  u  pVUb  M
  UPM     nnnU H  nXE	 M     MP     M     U$ s  snnf )z4Remove None-valued keys added by Arrow serializationr   )rV   rP   r   rQ   rq  )rV  ry  r  r   r  r|  r   keys_to_removes           r%   rn  *UnslothVisionDataCollator._clean_none_keys  sx    Gkk),G'4((#D!$--8<

)R!)R!/A $ "0 $    *Ss   	B
*B
)r-  r1  r!  r(  r&  r)  r*  r2  r  r.  r'  r/  r3  r0  r$  r+  )NNr5   iFNNTNTNr   F)NFF)rf  )NNr  )Nr   )!r\   
__module____qualname____firstlineno__	__slots__rY  r~  rk  rl  rm  r  ro  r  rp  rr  rR   r  r   r  r   no_gradTensorr   r   r  r
   r   r  r  r  rj  rn  __static_attributes__r  r'   r%   r   r   ~  sV   I  "'#!"%IT 	Pb 	0

) &>:"5 5
s  ]]_ 8<@D9494 ||94 	94
 94  -494 #3;#56=94 
u||U\\5s1B+CC	D94 94vww8IV
r'   )g-C6?)W__all__r  r   PILr   rY   r   r   r)   r   r   r   	functoolsr   rU   r   	packagingr   typingr	   r
   r   r   r   	itertoolsr   r   torchvision.transformsr   r   logr   hf_utilsr   r   r   rV   r   rQ  r_   r`   r6   r   r   r   r   r   r   rp   rq   rr   rs   r&   r+   r.   r   r<   rQ   rf   ru   r  r   r  r   r   r   r   r   r   getenvr   rR   r   r   r   r   r  r   r  r#  	PIL.Image
Resamplingr  dataset_utilsr$  rR  r   r  r'   r%   <module>r)     ss  @ (        	     5 5 *8O  8**..)ACHCO 

	    rzz~~.@BXYZ[ 
KK34F3GHI	+C + + + /3 / / / 0C 0 0 0  ,8:is%(EHcf
38_6 
 $;	;; [[;x &	&& S%Z & 		&R(	(
5<<(V:T :8?	8?8? 8? 3S=	8?v	
5<<D	 	*	*
5<<*\ !*(   YY'CTJ  1 #      0<]b ;T ; ;VZ ;glmrmymy{  AF  AL  AL  |M  nM  hN ;z
g
uT$Zd4j9I-I'J 
tTXz 
 
 $ %&dT$t*%556&& & 5ekk"D()5eELL$u{{J[<[6\1]_c1c+dde	&: $ 	  
))


&
& N{	 {	x [%Os   K K