
    h                     6   S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
Jr  S SKJrJrJrJrJrJrJr  S SKJr  \ R,                  R/                  SS5         \S	:X  Ga  \" \\\45      r\R5                  5       u  rrr\R<                  " \R>                  S
S9  \R@                  S;   a  \R@                  O\!" \\R@                  5      r \"" \RF                  \RH                  \ S9r%\" \5      r&\&b  \" 5       \%S'   \&\%S'   \
RN                  " \RP                  S\RR                  S9r*\*RW                  SS05        \*RX                  c  \\*l,        \	RN                  " \RZ                  \RR                  SS9r.\	RN                  " \RZ                  \RR                  SS9r/\RN                  " \R`                  \RR                  S9r1\" \5      r2\2c&  \RN                  " \R`                  \RR                  S9r3OSr3\" \Rh                  \Rj                  \Rl                  S9r7Sr8\7Rs                  \:" \;" \75      \8-
  5      5      r<\7Rs                  \:" \;" \75      \8-
  \;" \75      5      5      r=Sr>S r?\" 5       R                  5          \?" \<\*5      r<\?" \=\*5      r=SSS5        \" \\*\1\3\/\.\<\=\2S9	rA\AR                  5         \AR                  \R>                  5        \R                  (       a  \AR                  \Rh                  S9  \AR                  5         gg! , (       d  f       N= f)    N)PartialState)load_dataset)AutoModelForCausalLM"AutoModelForSequenceClassificationAutoTokenizerHfArgumentParser)ModelConfig	PPOConfig
PPOTrainerScriptArgumentsget_kbit_device_mapget_peft_configget_quantization_config)SIMPLE_CHAT_TEMPLATETRACKIO_SPACE_IDztrl-trackio__main__T)ignore_errors)autoN)revisionattn_implementationdtype
device_mapquantization_configleft)padding_sidetrust_remote_code	pad_tokenz[PAD]   )r   
num_labels)r   )namesplitd   promptc                 d   ^ U4S jnU R                  USU R                  [        R                  S9$ )zFpre-tokenize the dataset before training; only collate during trainingc                 .   > T" U [            SS9nSUS   0$ )NF)padding	input_ids)dataset_text_field)elementoutputs	tokenizers     R/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/ppo/ppo.pytokenize!prepare_dataset.<locals>.tokenize   s+    *+G  !566    T)batchedremove_columnsnum_proc)mapcolumn_namestraining_argsdataset_num_proc)datasetr+   r-   s    ` r,   prepare_datasetr8      s8    	7 {{"//"33	  
 	
r/   )	argsprocessing_classmodel	ref_modelreward_modelvalue_modeltrain_dataseteval_datasetpeft_config)dataset_name)Fosshutiltorch
accelerater   datasetsr   transformersr   r   r   r   trlr	   r
   r   r   r   r   r   trl.trainer.utilsr   environ
setdefault__name__parserparse_args_into_dataclassesscript_argsr5   
model_argsrmtree
output_dirr   getattrdictmodel_revisionr   model_kwargsr   from_pretrainedmodel_name_or_pathr   r+   add_special_tokenschat_templatereward_model_pathr>   r=   sft_model_pathpolicyrA   
ref_policyrB   dataset_configdataset_train_splitr7   eval_samplesselectrangelenr?   r@   r(   r8   local_main_process_firsttrainertrain
save_modelpush_to_hubgenerate_completions r/   r,   <module>rm      s"  0 
   # !    3 

  (- 8> z	;GHF-3-O-O-Q*K

MM-**$?
 !+ 0 0N BJPUWaWgWgHhE**&::L
 2*=&%8%:\".A*+--%%FjNjNjI   +w!78&"6	4DD'':;W;WdeK 6EE'':;W;WdeL "11$$
8T8TF "*-K)99((J<X<X

 

   {'A'AIhIhG LNN5W)D#EFM>>%G|(CS\"RSL!
& 
	0	0	2'yA&|Y? 
3 "!#!
G MMO }//0  )A)AB  "I V 
3	2s   :L


L