
    h                        S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
Jr  S SKJrJrJrJrJrJrJr  S SKJr  \ R,                  R/                  SS5         \S	:X  Ga  \" \\\45      r\R5                  5       u  rrr\R<                  " \R>                  S
S9  \R@                  S;   a  \R@                  O\!" \\R@                  5      r \"" \RF                  \RH                  \ S9r%\" \5      r&\&b  \" 5       \%S'   \&\%S'   \
RN                  " \RP                  S\RR                  S9r*\*RW                  SS05        \*RX                  c  \\*l,        \	RN                  " \RZ                  \RR                  SS9r.\	RN                  " \RZ                  \RR                  SS9r/\RN                  " \R`                  \RR                  S9r1\" \5      r2\2c&  \RN                  " \R`                  \RR                  S9r3OSr3\" \Rh                  \Rj                  S9r6\6\Rn                     r8\Rr                  S:w  a  \6\Rt                     OSr;S r<\" 5       R{                  5          \<" \8\*5      r8\;b	  \<" \;\*5      r;\8R}                  S \R~                  S9r8\;b  \;R}                  S \R~                  S9r;SSS5        \8S    S   S   \*R                  :w  d   S5       e\" \\*\1\3\/\.\8\;\2S 9	rA\AR                  5         \AR                  \R>                  5        \R                  (       a  \AR                  \Rh                  S!9  \AR                  5         gg! , (       d  f       N= f)"    N)PartialState)load_dataset)AutoModelForCausalLM"AutoModelForSequenceClassificationAutoTokenizerHfArgumentParser)ModelConfig	PPOConfig
PPOTrainerScriptArgumentsget_kbit_device_mapget_peft_configget_quantization_config)SIMPLE_CHAT_TEMPLATETRACKIO_SPACE_IDztrl-trackio__main__T)ignore_errors)autoN)revisionattn_implementationdtype
device_mapquantization_configleft)padding_sidetrust_remote_code	pad_tokenz[PAD]   )r   
num_labels)r   )namenoc                 b   ^ U4S jnU R                  UU R                  [        R                  S9$ )zFpre-tokenize the dataset before training; only collate during trainingc                 N   > TR                  U S   S S SSS9nU[        U5      S.$ )Nmessagesr   FT)paddingadd_generation_prompt)	input_idslengths)apply_chat_templatelen)elementr'   	tokenizers     W/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/ppo/ppo_tldr.pytokenize!prepare_dataset.<locals>.tokenize   s>    !55
#BQ'&* 6 I
 "+s9~FF    )remove_columnsnum_proc)mapcolumn_namestraining_argsdataset_num_proc)datasetr,   r.   s    ` r-   prepare_datasetr8      s6    	G {{"//"33  
 	
r0   c                     U S   S:*  $ Nr(   i    xs    r-   <lambda>r>      s    q|s7Jr0   )r2   c                     U S   S:*  $ r:   r;   r<   s    r-   r>   r>      s    99Lr0   r'   z)The last token should not be an EOS token)	argsprocessing_classmodel	ref_modelreward_modelvalue_modeltrain_dataseteval_datasetpeft_config)dataset_name)Fosshutiltorch
accelerater   datasetsr   transformersr   r   r   r   trlr	   r
   r   r   r   r   r   trl.trainer.utilsr   environ
setdefault__name__parserparse_args_into_dataclassesscript_argsr5   
model_argsrmtree
output_dirr   getattrdictmodel_revisionr   model_kwargsr   from_pretrainedmodel_name_or_pathr   r,   add_special_tokenschat_templatereward_model_pathrF   rE   sft_model_pathpolicyrI   
ref_policyrJ   dataset_configr7   dataset_train_splitrG   eval_strategydataset_test_splitrH   r8   local_main_process_firstfilterr6   eos_token_idtrainertrain
save_modelpush_to_hubgenerate_completionsr;   r0   r-   <module>rt      so  0 
   # !    3 

  (- 8#L z	;GHF-3-O-O-Q*K

MM-**$?
 !+ 0 0N BJPUWaWgWgHhE**&::L
 2*=&%8%:\".A*+--%%FjNjNjI   +w!78&"6	4DD'':;W;WdeK 6EE'':;W;WdeL "11$$
8T8TF "*-K)99((J<X<X

 

 ;33+:T:TUGK;;<M>K>Y>Y]a>a7;99:gkL
& 
	0	0	2'yA#*<CL%,,-JUbUsUs,t#'../LWdWuWu.vL 
3 K(,	0F0FFsHssF
 "!#!
G MMO }//0  )A)AB  "O N 
3	2s   AM  
M