
    h              
          S r SSKrSSKrSSKJr  SSKJrJr  SSKJ	r	J
r
JrJrJrJrJrJr  \R"                  R%                  SS5        \S:X  Ga  \" \\	\45      r\R+                  5       u  rrr\R2                  S	;   a  \R2                  O\" \\R2                  5      r\" \R8                  \R:                  \S
9r\" \5      r\b  \" 5       \S'   \\S'   \R@                  " \RB                  4S\RD                  0\D6r#\" \5      r$\$c*  \R@                  " \RB                  4S\RD                  0\D6r%OSr%\R@                  " \RB                  \RD                  SS9r&\&RN                  r'\#RP                  RR                  S:X  a  OC\#RP                  RR                  S:X  a  S\&l*        O!\#RP                  RR                  S:X  a  S\&l*        \'RV                  c  \'RX                  \'l+        \RZ                  (       aF  \#R]                  5        V Vs/ s H#  u  pUR2                  \R^                  :X  d  M!  U PM%     snn \#l0        \" \Rb                  \Rd                  \Rf                  S9r4\
" \#\%\\4\Rj                     \Rl                  S:w  a  \4\Rn                     OS\&\$S9r8\8Rs                  5         \8Ru                  \Rv                  5        \Rx                  (       a  \8Ry                  \Rb                  S9  gggs  snn f )a{  
Without dataset streaming:

```
accelerate launch examples/scripts/dpo_vlm.py     --dataset_name HuggingFaceH4/rlaif-v_formatted     --model_name_or_path Qwen/Qwen2.5-VL-3B-Instruct     --per_device_train_batch_size 2     --gradient_accumulation_steps 32     --dataset_num_proc 32     --output_dir dpo_idefics_rlaif-v     --dtype bfloat16     --gradient_checkpointing     --use_peft     --lora_target_modules all-linear
```

With dataset streaming:

```
accelerate launch examples/scripts/dpo_vlm.py     --dataset_name HuggingFaceH4/rlaif-v_formatted     --dataset_streaming     --model_name_or_path Qwen/Qwen2.5-VL-3B-Instruct     --per_device_train_batch_size 2     --max_steps 100     --gradient_accumulation_steps 32     --dataset_num_proc 32     --output_dir dpo_idefics_rlaif-v     --dtype bfloat16     --gradient_checkpointing     --use_peft     --lora_target_modules all-linear
```
    N)load_dataset)AutoModelForImageTextToTextAutoProcessor)	DPOConfig
DPOTrainerModelConfigScriptArguments	TrlParserget_kbit_device_mapget_peft_configget_quantization_configTRACKIO_SPACE_IDztrl-trackio__main__)autoN)revisionattn_implementationdtype
device_mapquantization_configtrust_remote_codeF)r   do_image_splittingidefics2	paligemmaa  {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] if item['type'] == 'text' %}{{ item['text'] }}<|im_end|>{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}llavaa  {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %})name	streamingno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)=__doc__ostorchdatasetsr   transformersr   r   trlr   r   r   r	   r
   r   r   r   environ
setdefault__name__parserparse_args_and_configscript_argstraining_args
model_argsr   getattrdictmodel_revisionr   model_kwargsr   from_pretrainedmodel_name_or_pathr   modelr"   	ref_model	processor	tokenizerconfig
model_typechat_template	pad_token	eos_tokenignore_bias_buffersnamed_buffersbool!_ddp_params_and_buffers_to_ignorer#   dataset_configdataset_streamingdatasetdataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub)r   buffers   00R/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/dpo_vlm.py<module>rR      s  4"H 
  ! C	 	 	 

  (- 8zK@AF-3-I-I-K*K

 !+ 0 0N BJPUWaWgWgHhE**&::L
 2*=&%8%:\".A*+'77%%$66 E
 "*-K/??))
(::
 
	 	--%%9U9UjoI ##I ||*,		 	 K	/ #d			 	 G	+ #G	"'11	&& &+%8%8%:3
%:\Tfllejj>XD%:3
/   ''//G k==>@M@[@[_c@cW[;;<im"G MMO }//0  )A)AB !c f3
s    K/*K/