
    h"                     J   S r SSKrSSKJr  SSKJrJrJr  SSKJ	r	J
r
JrJrJr  \R                  R                  SS5        \S:X  Ga\  \" \\	\45      r\R%                  5       u  rrr\R,                  " \R.                  \R0                  S	9r\R,                  " \R.                  \R0                  S	9r\R,                  " \R.                  \R0                  S	9r\R8                  c  \R:                  \l        \" \R<                  \R>                  S
9r \
" \\\\ \RB                     \RD                  S:w  a  \ \RF                     OS\\" \5      S9r$\$RK                  5         \$RM                  \RN                  5        \RP                  (       a  \$RQ                  \R<                  S9  ggg)a  
Run the KTO training script with the commands below. In general, the optimal configuration for KTO will be similar to that of DPO.

# Full training:
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 16     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir kto-aligned-model     --warmup_ratio 0.1     --logging_first_step

# QLoRA:
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 8     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir kto-aligned-model-lora     --warmup_ratio 0.1     --logging_first_step     --use_peft     --load_in_4bit     --lora_target_modules all-linear     --lora_r 16     --lora_alpha 16
    N)load_dataset)AutoModelForCausalLMAutoTokenizerHfArgumentParser)	KTOConfig
KTOTrainerModelConfigScriptArgumentsget_peft_configTRACKIO_SPACE_IDztrl-trackio__main__)trust_remote_code)nameno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name))__doc__osdatasetsr   transformersr   r   r   trlr   r   r	   r
   r   environ
setdefault__name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsfrom_pretrainedmodel_name_or_pathr   model	ref_model	tokenizer	pad_token	eos_tokenr   dataset_configdatasetdataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub     N/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/kto.py<module>r8      s  0#J 
 ! N N T T 

  (- 8 z	;GHF-3-O-O-Q*K
 !00%%9U9UE %44%%9U9UI --%%9U9UI "'11	 ;33+:T:TUG k==>@M@[@[_c@cW[;;<im"#J/G MMO }//0  )A)AB !K r6   