
    h              	       x   S r SSKrSSKrSSKJr  SSKJr  SSKJrJ	r	J
r
JrJrJrJrJr  \R                   R#                  SS5        \S:X  Gam  \" \	\
\45      r\R)                  5       u  rrr\" S	S
9\l        S\l        \R6                  S;   a  \R6                  O\" \\R6                  5      r\" \R:                  \R<                  \S9r\" \5      r \ b  \" 5       \S'   \ \S'   \RB                  " \RD                  4S\RF                  0\D6r$\" \RJ                  \RL                  S9r'\" \$\\'\RP                     \RR                  S:w  a  \'\RT                     OS\" \5      S9r+\+RY                  5         \+R[                  \R\                  5        \R^                  (       a  \+R_                  \RJ                  S9  ggg)a  
pip install pillow

# Tested on 8x H100 GPUs
accelerate launch     --config_file examples/accelerate_configs/deepspeed_zero3.yaml     examples/scripts/sft_vlm.py     --dataset_name HuggingFaceH4/llava-instruct-mix-vsft     --model_name_or_path llava-hf/llava-1.5-7b-hf     --gradient_accumulation_steps 8     --output_dir LLaVA-1.5-7B-SFT     --dtype bfloat16

For LLaVA-NeXT, use:
    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf

For meta-llama/Llama-3.2-11B-Vision-Instruct, use:
    --model_name_or_path meta-llama/Llama-3.2-11B-Vision-Instruct

accelerate launch     --config_file examples/accelerate_configs/deepspeed_zero3.yaml     examples/scripts/sft_vlm.py     --dataset_name HuggingFaceH4/llava-instruct-mix-vsft     --model_name_or_path HuggingFaceTB/SmolVLM-Instruct     --per_device_train_batch_size 1     --gradient_accumulation_steps 1     --output_dir SmolVLM-SFT     --dtype bfloat16     --use_peft     --lora_target_modules down_proj, o_proj, k_proj, q_proj, gate_proj, up_proj, v_proj
    N)load_dataset)AutoModelForImageTextToText)ModelConfigScriptArguments	SFTConfig
SFTTrainer	TrlParserget_kbit_device_mapget_peft_configget_quantization_configTRACKIO_SPACE_IDztrl-trackio__main__F)use_reentrant)autoN)revisionattn_implementationdtype
device_mapquantization_configtrust_remote_code)nameno)modelargstrain_dataseteval_datasetpeft_config)dataset_name)0__doc__ostorchdatasetsr   transformersr   trlr   r   r   r   r	   r
   r   r   environ
setdefault__name__parserparse_args_and_configscript_argstraining_args
model_argsdictgradient_checkpointing_kwargs
max_lengthr   getattrmodel_revisionr   model_kwargsr   from_pretrainedmodel_name_or_pathr   r   r   dataset_configdatasetdataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub     R/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/sft_vlm.py<module>rB      s  2@ 
  ! 4	 	 	 

  (- 8zK@AF-3-I-I-K*K
26U2KM/#M
 !+ 0 0N BJPUWaWgWgHhE**&::L
 2*=&%8%:\".A*+'77%%9C9U9UYeE ;33+:T:TUG
 k==>@M@[@[_c@cW[;;<im#J/G MMO }//0  )A)AB ![ r@   