
    h=                     L   S r SSKrSSKrSSKJr  SSKJr  SSKJrJ	r	  SSK
JrJrJrJrJrJrJrJr  \R&                  " \5      r\R,                  R/                  SS5        S	 rSS
\R2                  4S jjr\S:X  a(  \" 5       r\R9                  SS9u  rrrr r!\" \\\\ 5        gg)a  
Run the KTO training script with the commands below. In general, the optimal configuration for KTO will be similar to
that of DPO.

# Full training:
```bash
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path=trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 16     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type=cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir=kto-aligned-model     --warmup_ratio 0.1     --logging_first_step
```

# QLoRA:
```bash
# QLoRA:
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path=trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 8     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type=cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir=kto-aligned-model-lora     --warmup_ratio 0.1     --logging_first_step     --use_peft     --load_in_4bit     --lora_target_modules=all-linear     --lora_r=16     --lora_alpha=16
```
    N)logging)load_dataset)AutoModelForCausalLMAutoTokenizer)DatasetMixtureConfig	KTOConfig
KTOTrainerModelConfigScriptArguments	TrlParserget_datasetget_peft_configTRACKIO_SPACE_IDztrl-trackioc                    [         R                  " UR                  UR                  S9n[         R                  " UR                  UR                  S9n[        R                  " UR                  UR                  S9nUR
                  c  UR                  Ul        UR                  (       a'  U R                  (       a  [        R                  S5        OUR                  (       a  U R                  (       d  [        U5      nOWUR                  (       d;  U R                  (       a*  [        U R                  U R                  U R                  S9nO[        S5      e[!        UUUWU R"                     UR$                  S:w  a  XpR&                     OS U[)        U5      S9nUR+                  5         UR-                  UR.                  5        UR0                  (       a  UR1                  U R                  S9  g g )N)trust_remote_codezBoth `datasets` and `dataset_name` are provided. The `datasets` argument will be used to load the dataset and `dataset_name` will be ignored.)name	streamingz5Either `datasets` or `dataset_name` must be provided.no)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)r   from_pretrainedmodel_name_or_pathr   r   	pad_token	eos_tokendatasetsr   loggerwarningr   r   dataset_configdataset_streaming
ValueErrorr	   dataset_train_spliteval_strategydataset_test_splitr   train
save_model
output_dirpush_to_hub)	script_argstraining_args
model_argsdataset_argsmodel	ref_model	tokenizerdatasettrainers	            I/home/james-whalen/.local/lib/python3.13/site-packages/trl/scripts/kto.pymainr6   \   s    00%%9U9UE %44%%9U9UI --%%9U9UI "'11	 !9!9:	
 
		{'?'?l+""{'?'?$$;+E+EQ\QnQn
 PQQ k==>@M@[@[_c@cW;;<im"#J/G MMO }//0  )A)AB !    
subparsersc                 t    [         [        [        [        4nU b  U R	                  SSUS9nU$ [        U5      nU$ )NktozRun the KTO training script)helpdataclass_types)r   r   r
   r   
add_parserr   )r8   r<   parsers      r5   make_parserr?      sH    &	;@TUO&&u3Pbq&r M ?+Mr7   __main__T)return_remaining_strings)N)"__doc__argparseos
accelerater   r   r   transformersr   r   trlr   r   r	   r
   r   r   r   r   
get_logger__name__r    environ
setdefaultr6   _SubParsersActionr?   r>   parse_args_and_configr,   r-   r.   r/   _ r7   r5   <module>rP      s   0)V  	  ! <	 	 	 
		H	% 

  (- 8/CdH66  z]F ?E>Z>Z!% ?[ ?;K
L! 	mZ> r7   