
    h              
       2   S r SSKrSSKJr  SSKJrJrJr  SSKJ	r	J
r
JrJrJr  SSKJr  \R                   R#                  SS5        \S	:X  GaJ  \" \\
\	45      r\R)                  5       u  rrr\R0                  " \R2                  \R4                  S
9r\R0                  " \R2                  \R4                  S
9r\R:                  c  \R<                  \l        \" \R>                  \R@                  S9r!\RD                  c  \\l"        \" \\\!\RF                     \RH                  S:w  a  \!\RJ                     OS\\" \5      S9r&\&RO                  5         \&RQ                  \RR                  5        \RT                  (       a  \&RU                  \R>                  S9  ggg)a5  
Run the ORPO training script with the following command with some example arguments.
In general, the optimal configuration for ORPO will be similar to that of DPO without the need for a reference model:

# regular:
python examples/scripts/orpo.py     --dataset_name trl-internal-testing/hh-rlhf-helpful-base-trl-style     --model_name_or_path gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-6     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir "gpt2-aligned-orpo"     --warmup_steps 150     --logging_first_step     --no_remove_unused_columns

# peft:
python examples/scripts/orpo.py     --dataset_name trl-internal-testing/hh-rlhf-helpful-base-trl-style     --model_name_or_path gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-5     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir "gpt2-lora-aligned-orpo"     --optim rmsprop     --warmup_steps 150     --logging_first_step     --no_remove_unused_columns     --use_peft     --lora_r 16     --lora_alpha 16
    N)load_dataset)AutoModelForCausalLMAutoTokenizerHfArgumentParser)ModelConfig
ORPOConfigORPOTrainerScriptArgumentsget_peft_config)SIMPLE_CHAT_TEMPLATETRACKIO_SPACE_IDztrl-trackio__main__)trust_remote_code)nameno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)+__doc__osdatasetsr   transformersr   r   r   trlr   r   r	   r
   r   trl.trainer.utilsr   environ
setdefault__name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsfrom_pretrainedmodel_name_or_pathr   model	tokenizer	pad_token	eos_tokenr   dataset_configdatasetchat_templatedataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub     O/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/orpo.py<module>r:      s  0#J 
 ! N N V V 2 

  (- 8 z
KHIF-3-O-O-Q*K

 !00%%9U9UE --%%9U9UI "'11	
 ;33+:T:TUG&"6	
 k==>@M@[@[_c@cW[;;<im"#J/G MMO }//0  )A)AB !Q r8   