
    h                         S r SSKrSSKrSSKrSSKJrJr  SSKJr  SSK	r	SSK
r
SSKJr  SSKJr  SSKJr  SSKJrJrJrJr  SS	KJrJrJrJrJrJr  \R:                  R=                  S
S5        S\S\S\4S jr S\!\\4   S\S\!\\"\!\\4      4   4S jr#S\"\!\\4      S\!\\
RH                  4   4S jr%\ " S S\5      5       r&\'S:X  Ga  \" \&\\45      r(\(RS                  5       u  r*r+r,\!" SS9\+l-        S\+l.        \" \*R^                  \*R`                  SS9r1\,Rd                  S;   a  \,Rd                  O\3" \
\,Rd                  5      r2\" SSS\
Rh                  S9r5\!" \,Rl                  \,Rn                  \2\" 5       \5S9r8\Rr                  " \,Rt                  40 \8D6r;\" S S!S!S"S#/ S$QS%9r<\+Rz                  (       a1  \;R}                  5         S\;R~                  l@        \;R                  5         \Rr                  " \,Rt                  \,Rn                  S&9rB\1 V s/ s H  n \#" U \*R                  5      PM     sn rD\" \;\+\D\%\<\BS'9rE\ER                  5         \ER                  \+R                  5        \+R                  (       aO  \ER                  \*R^                  S(9  \ER                  R                  (       a  \BR                  \+R                  5        C;CE\
R                  R                  5         ggs  sn f ))a  
Example usage:
accelerate launch     --config_file examples/accelerate_configs/deepspeed_zero2.yaml     examples/scripts/sft_video_llm.py     --dataset_name mfarre/simplevideoshorts     --video_cache_dir "/optional/path/to/cache/"     --model_name_or_path Qwen/Qwen2-VL-7B-Instruct     --per_device_train_batch_size 1     --output_dir video-llm-output     --tf32 True     --gradient_accumulation_steps 4     --num_train_epochs 4     --optim adamw_torch_fused     --log_level debug     --log_level_replica debug     --save_strategy steps     --save_steps 300     --learning_rate 8e-5     --max_grad_norm 0.3     --warmup_ratio 0.1     --lr_scheduler_type cosine     --push_to_hub False     --dtype bfloat16     --gradient_checkpointing True
    N)	dataclassfield)Any)load_dataset)
LoraConfig)process_vision_info)AutoModelForImageTextToTextAutoProcessorBitsAndBytesConfigQwen2VLProcessor)ModelConfigScriptArguments	SFTConfig
SFTTrainer	TrlParserget_kbit_device_mapTRACKIO_SPACE_IDztrl-trackiourl	cache_dirreturnc                 f   [         R                  " USS9  U R                  S5      S   n[         R                  R	                  X5      n[         R                  R                  U5      (       a  U$  [        R                  " U SS9 nUR                  5         [        US5       nUR                  SS9 H  nU(       d  M  UR                  U5        M     S	S	S	5        S	S	S	5        U$ ! , (       d  f       N= f! , (       d  f       U$ = f! [        R                   a  n[        S
U 35      UeS	nAff = f)z.Download video if not already present locally.T)exist_ok/)streamwbi    )
chunk_sizeNzFailed to download video: )osmakedirssplitpathjoinexistsrequestsgetraise_for_statusopeniter_contentwriteRequestException	Exception)r   r   filename
local_pathrfchunkes           X/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/sft_video_llm.pydownload_videor3   J   s    KK	D)yy~b!Hi2J	ww~~j!!	A\\#d+q j$'1^^t^<Eu = ( , 	 (' ,+ $$ A4QC89q@AsT   0D C6"C%>C%C6	D %
C3	/C66
D D D D0D++D0examplec                     U S   nU S   n[         R                  " U S   5      nSnSU S3n[        R                  " US5      S   nS	S
US./S.SS[	        X!5      SSS.S
U SUS    3S./S.SS
US   S./S./nSU0$ )z%Prepare dataset example for training.	video_urltimecoded_ccqaz.You are an expert in movie narrative analysis.zCAnalyze the video and consider the following timecoded subtitles:

zC

Based on this information, please answer the following questions:   r   systemtext)typer;   )rolecontentuservideoiN g      ?)r<   r@   
max_pixelsfpsz

Question: question	assistantanswermessages)jsonloadsrandomsampler3   )	r4   r   r6   r7   qa_pairssystem_messagebase_promptselected_qarF   s	            r2   prepare_datasetrO   _   s    $I>*Lzz'$-(HEN BEK --!,Q/K 'O&PQ >)+O_hqtuK={S]G^F_)`a	
 6;xCX*Y)Z[
H !!    examplesc                    / n/ n[        U 5       H  u  p4 [        S US    5       5      n[        S[        R                  R                  U5       35        UR                  [        R                  US   SS95        [        US   5      S   S   nUR                  U5        M     [        XSSS9nUS   R                  5       n	SX[        R                  R                  :H  '   [        [        [         5      (       a  / SQO-[        R                  R#                  [        R$                  5      /n
U
 H
  nSXU:H  '   M     XS'   U$ ! [         a  n[        SU S	U 35      UeS
nAff = f)z'Collate batch of examples for training.c              3   t   #    U  H.  nUS      H!  nUR                  S5      S:X  d  M  US   v   M#     M0     g7f)r>   r<   r@   N)r%   ).0messager>   s      r2   	<genexpr>collate_fn.<locals>.<genexpr>   sA      2G&y1G;;v&'1 ! 1 !2s   "88rF   zProcessing video: F)tokenizer9   r   zFailed to process example z: NptT)r;   videosreturn_tensorspadding	input_idsi)idP ieP ihP labels)	enumeratenextprintr   r!   basenameappend	processorapply_chat_templater   r+   
ValueErrorclone	tokenizerpad_token_id
isinstancer   convert_tokens_to_idsimage_token)rQ   textsvideo_inputsir4   
video_pathvideo_inputr1   inputsr^   visual_tokensvisual_token_ids               r2   
collate_fnru   }   su   EL)
	K &z2 J &rww'7'7
'C&DEFLL66wz7JUZ6[\-gj.AB1EaHK, *  EtUYZFK &&(F9=FY((5556
 i!122 	!!!77	8M8MNO  ),0() ) 8M'  	K9!BqcBCJ	Ks   BE
E$EE$c                   6    \ rS rSr% Sr\" SSS0S9r\\S'   Sr	g	)
CustomScriptArguments   z
Arguments for the script.

Args:
    video_cache_dir (`str`, *optional*, defaults to `"/tmp/videos/"`):
        Video cache directory.
z/tmp/videos/helpzVideo cache directory.)defaultmetadatavideo_cache_dir N)
__name__
__module____qualname____firstlineno____doc__r   r|   str__annotations____static_attributes__r}   rP   r2   rw   rw      s      !6KcBdeOSerP   rw   __main__F)use_reentranttrain)namer    )autoNTnf4)load_in_4bitbnb_4bit_use_double_quantbnb_4bit_quant_typebnb_4bit_compute_dtype)revisiontrust_remote_codedtype
device_mapquantization_config	CAUSAL_LM   g?none)q_projk_projv_projo_proj)	task_typer.   
lora_alphalora_dropoutbiastarget_modules)r   )modelargstrain_datasetdata_collatorpeft_configprocessing_class)dataset_name)Or   rG   r   rI   dataclassesr   r   typingr   r$   torchdatasetsr   peftr   qwen_vl_utilsr   transformersr	   r
   r   r   trlr   r   r   r   r   r   environ
setdefaultr   r3   dictlistrO   Tensorru   rw   r~   parserparse_args_and_configscript_argstraining_args
model_argsgradient_checkpointing_kwargsremove_unused_columnsr   dataset_configdatasetr   getattrbfloat16
bnb_configmodel_revisionr   model_kwargsfrom_pretrainedmodel_name_or_pathr   r   gradient_checkpointinggradient_checkpointing_enableconfigr   enable_input_require_gradsrd   r|   prepared_datasettrainerr   
save_model
output_dirpush_to_hubacceleratoris_main_processhub_model_idcudaempty_cache)r4   s   0r2   <module>r      s$  66  	  (    !  - i i c c 

  (- 8A A A A*"T#s(^ " "S$tTWY\T\~J^E^@_ "<%d38n- %$sELL7H2I %P 	fO 	f 	f z-y+FGF-3-I-I-K*K
 37U2KM/*/M' ;33+:T:T\cdG !+ 0 0N BJPUWaWgWgHhE $"&!$~~	J **$66&(&L (77
8U8UfYefE
?K ++++-%*"((*--%%9U9UI
 ^ee]dRY1L1LM]de & "G MMO }//0  )A)AB..!!-"<"<= 		JJ_ l fs   K)