
    h'                     h   S r SSKrSSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
  SSKJrJrJrJrJrJrJrJrJrJr  SSKJr  \R0                  " \5      r\R6                  R9                  SS	5        S
 rSS\R<                  4S jjr\S:X  a(  \" 5       r \ RC                  SS9u  r"r#r$r%r&\" \"\#\$\%5        gg)a  
# Full training
```bash
python trl/scripts/dpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --learning_rate 5.0e-7     --num_train_epochs 1     --per_device_train_batch_size 2     --max_steps 1000     --gradient_accumulation_steps 8     --gradient_checkpointing     --eval_strategy steps     --eval_steps 50     --output_dir Qwen2-0.5B-DPO     --no_remove_unused_columns
```

# LoRA:
```bash
python trl/scripts/dpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --learning_rate 5.0e-6     --num_train_epochs 1     --per_device_train_batch_size 2     --max_steps 1000     --gradient_accumulation_steps 8     --gradient_checkpointing     --eval_strategy steps     --eval_steps 50     --output_dir Qwen2-0.5B-DPO     --no_remove_unused_columns     --use_peft     --lora_r 32     --lora_alpha 16
```
    N)logging)load_dataset)AutoModelForCausalLMAutoTokenizer)
DatasetMixtureConfig	DPOConfig
DPOTrainerModelConfigScriptArguments	TrlParserget_datasetget_kbit_device_mapget_peft_configget_quantization_config)SIMPLE_CHAT_TEMPLATETRACKIO_SPACE_IDztrl-trackioc           
      d   UR                   S;   a  UR                   O[        [        UR                   5      n[        UR                  UR
                  US9n[        U5      nUb  [        5       US'   XeS'   [        R                  " UR                  4SUR                  0UD6n[        U5      nUc.  [        R                  " UR                  4SUR                  0UD6n	OS n	[        R                  " UR                  UR                  S9n
U
R                  c  U
R                  U
l        U
R                   c  ["        U
l        U R$                  (       aJ  UR'                  5        VVs/ s H'  u  pUR                   [        R(                  :X  d  M%  UPM)     snnUl        UR,                  (       a'  U R.                  (       a  [0        R3                  S5        OUR,                  (       a  U R.                  (       d  [5        U5      nOWUR,                  (       d;  U R.                  (       a*  [7        U R.                  U R8                  U R:                  S9nO[=        S	5      e[?        UU	UWU R@                     URB                  S
:w  a  XRD                     OS U
US9nURG                  5         URB                  S
:w  a4  URI                  5       nURK                  SU5        URM                  SU5        URO                  URP                  5        URR                  (       a  URS                  U R.                  S9  g g s  snnf )N)autoN)revisionattn_implementationdtype
device_mapquantization_configtrust_remote_code)r   zBoth `datasets` and `dataset_name` are provided. The `datasets` argument will be used to load the dataset and `dataset_name` will be ignored.)name	streamingz5Either `datasets` or `dataset_name` must be provided.no)argstrain_dataseteval_datasetprocessing_classpeft_configeval)dataset_name)*r   getattrtorchdictmodel_revisionr   r   r   r   from_pretrainedmodel_name_or_pathr   r   r   	pad_token	eos_tokenchat_templater   ignore_bias_buffersnamed_buffersbool!_ddp_params_and_buffers_to_ignoredatasetsr$   loggerwarningr   r   dataset_configdataset_streaming
ValueErrorr	   dataset_train_spliteval_strategydataset_test_splittrainevaluatelog_metricssave_metrics
save_model
output_dirpush_to_hub)script_argstraining_args
model_argsdataset_argsr   model_kwargsr   modelr"   	ref_model	tokenizerr   bufferdatasettrainermetricss                   I/home/james-whalen/.local/lib/python3.13/site-packages/trl/scripts/dpo.pymainrO   \   s    !+ 0 0N BJPUWaWgWgHhE**&::L
 2*=&%8%:\".A*+ 00%%9C9U9UYeE "*-K(88))
=G=Y=Y
]i
	 	--%%9U9UI "'11	&"6	&& &+%8%8%:3
%:\Tfllejj>XD%:3
/
 !9!9:	
 
		{'?'?l+""{'?'?$$;+E+EQ\QnQn
 PQQ k==>@M@[@[_c@cW;;<im"G MMO""d*""$FG,VW- }//0  )A)AB !Q3
s   #$L,L,
subparsersc                 t    [         [        [        [        4nU b  U R	                  SSUS9nU$ [        U5      nU$ )NdpozRun the DPO training script)helpdataclass_types)r   r   r
   r   
add_parserr   )rP   rT   parsers      rN   make_parserrW      sH    &	;@TUO&&u3Pbq&r M ?+M    __main__T)return_remaining_strings)N)'__doc__argparseosr&   
accelerater   r2   r   transformersr   r   trlr   r   r	   r
   r   r   r   r   r   r   trl.trainer.utilsr   
get_logger__name__r3   environ
setdefaultrO   _SubParsersActionrW   rV   parse_args_and_configrB   rC   rD   rE   _ rX   rN   <module>rj      s   0%N  	   ! <   3 
		H	% 

  (- 8LC^H66  z]F ?E>Z>Z!% ?[ ?;K
L! 	mZ> rX   