
    h              
          S r SSKrSSKrSSKJr  SSKJr  SSKJrJ	r	  SSK
JrJrJrJrJrJrJrJr  \R&                  R)                  SS5        \S	:X  Ga  \" \\\45      r\R/                  5       u  rrr\R6                  S
;   a  \R6                  O\" \\R6                  5      r\" \R<                  \R>                  \R@                  \S9r!\" \5      r"\"b  \" 5       \!S'   \"\!S'   \RF                  " \RH                  40 \!D6r%\" \5      r&\&c  \RF                  " \RH                  40 \!D6r'OSr'\	RF                  " \RH                  \R<                  S9r(\" \RR                  \RT                  \RV                  S9r,\,\RZ                     r.\R^                  S:w  a  \,\R`                     OSr1S r2\.Rg                  \2\Rh                  S9r.\1b  \1Rg                  \2\Rh                  S9r1\" \%\'\\.\1\(\&S9r5\5Rm                  5         \5Ro                  \Rp                  5        \Rr                  (       a  \5Rs                  \RR                  S9  ggg)a+  
python examples/scripts/mpo_vlm.py     --dataset_name HuggingFaceH4/rlaif-v_formatted     --model_name_or_path Qwen/Qwen2.5-VL-3B-Instruct     --per_device_train_batch_size 4     --per_device_eval_batch_size 4     --num_train_epochs 1     --gradient_accumulation_steps 8     --dataset_num_proc 1     --output_dir dpo_idefics_rlaif-v     --dtype bfloat16     --gradient_checkpointing     --use_peft     --lora_target_modules down_proj, o_proj, k_proj, q_proj, gate_proj, up_proj, v_proj     --loss_type sigmoid bco_pair sft     --loss_weights 0.8 0.2 1.0
    N)load_dataset)Image)AutoModelForImageTextToTextAutoProcessor)	DPOConfig
DPOTrainerModelConfigScriptArguments	TrlParserget_kbit_device_mapget_peft_configget_quantization_configTRACKIO_SPACE_IDztrl-trackio__main__)autoN)trust_remote_coderevisionattn_implementationdtype
device_mapquantization_config)r   )name	streamingnoc                     U S   S   n[        U[        R                  5      (       a'  UR                  S:w  a  UR                  S5      nU/U S'   U $ )Nimagesr   RGB)
isinstancer   modeconvert)exampleimages     R/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/mpo_vlm.py
ensure_rgbr$   s   sM    !!$eU[[))zzU"e,!&GH    )num_proc)model	ref_modelargstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name):__doc__ostorchdatasetsr   PILr   transformersr   r   trlr   r   r	   r
   r   r   r   r   environ
setdefault__name__parserparse_args_and_configscript_argstraining_args
model_argsr   getattrdictr   model_revisionr   model_kwargsr   from_pretrainedmodel_name_or_pathr'   r-   r(   	processorr.   dataset_configdataset_streamingdatasetdataset_train_splitr*   eval_strategydataset_test_splittest_datasetr$   mapdataset_num_proctrainertrain
save_model
output_dirpush_to_hub r%   r#   <module>rT      su  4$ 
  !  C	 	 	 

  (- 8 zK@AF-3-I-I-K*K

 !+ 0 0N BJPUWaWgWgHhE$66**&::	L 2*=&%8%:\".A*+'77%%
E "*-K/??))

	
 	--%%9U9UI   ''//G
 K;;<M>K>Y>Y]a>a7;99:gkL "%%j=;Y;Y%ZM#''
]=[=['\
 #!"G MMO }//0  )A)AB !_ r%   