
    hK              
       2   S r SSKrSSKJr  SSKJrJrJr  SSKJ	r	J
r
JrJrJr  SSKJr  \R                   R#                  SS5        \S	:X  GaJ  \" \\	\45      r\R)                  5       u  rrr\R0                  " \R2                  \R4                  S
9r\R0                  " \R2                  \R4                  S
9r\R:                  c  \R<                  \l        \" \R>                  \R@                  S9r!\RD                  c  \\l"        \
" \\\!\RF                     \RH                  S:w  a  \!\RJ                     OS\\" \5      S9r&\&RO                  5         \&RQ                  \RR                  5        \RT                  (       a  \&RU                  \R>                  S9  ggg)a  
Run the CPO training script with the following command with some example arguments.
In general, the optimal configuration for CPO will be similar to that of DPO:

# Full training:
python examples/scripts/cpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --model_name_or_path gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-6     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir "gpt2-aligned-cpo"     --warmup_steps 150     --logging_first_step     --no_remove_unused_columns

# QLoRA:
python examples/scripts/cpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --model_name_or_path gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-5     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir "gpt2-lora-aligned-cpo"     --optim rmsprop     --warmup_steps 150     --logging_first_step     --no_remove_unused_columns     --use_peft     --lora_r 16     --lora_alpha 16
    N)load_dataset)AutoModelForCausalLMAutoTokenizerHfArgumentParser)	CPOConfig
CPOTrainerModelConfigScriptArgumentsget_peft_config)SIMPLE_CHAT_TEMPLATETRACKIO_SPACE_IDztrl-trackio__main__)trust_remote_code)nameno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)+__doc__osdatasetsr   transformersr   r   r   trlr   r   r	   r
   r   trl.trainer.utilsr   environ
setdefault__name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsfrom_pretrainedmodel_name_or_pathr   model	tokenizer	pad_token	eos_tokenr   dataset_configdatasetchat_templatedataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub     N/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/cpo.py<module>r:      s  0#J 
 ! N N T T 2 

  (- 8z	;GHF-3-O-O-Q*K

 !00%%9U9UE --%%9U9UI "'11	
 ;33+:T:TUG&"6	
 k==>@M@[@[_c@cW[;;<im"#J/G MMO }//0  )A)AB !Q r8   