
    hZ              
       6   S r SSKrSSKrSSKJr  SSKJr  SSKJrJ	r	J
r
  SSKJrJrJrJrJrJrJr  \R&                  " \5      r\R,                  R/                  SS5        \S	:X  Ga2  \
" \\\45      r\R3                  5       u  rrr\" S
S9\l        \R>                  S;   a  \R>                  O\ " \\R>                  5      r\" \RB                  \RD                  (       a  S
OSS9r#\" \5      r$\$b  \" 5       \#S'   \$\#S'   \	RJ                  " \RL                  \RN                  SS9r(\RJ                  " \RL                  4S\RN                  S.\#D6r)\(RT                  \)RV                  l*        \RX                  (       a!  \RZ                  S:w  a  \R]                  S5        \" \R^                  \R`                  S9r1\1Re                  S 5      r1\" \)\(\\1\Rf                     \1\Rh                     \" \5      S9r5\5Rm                  5         \5Ro                  \Rp                  5        \5Rs                  5       r:\5Rw                  S\:5        \5Ry                  S\:5        \5Ro                  \Rp                  5        \Rz                  (       a  \5R{                  \R^                  S9  ggg)a  
Full training:
python examples/scripts/prm.py     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --dataset_name trl-lib/prm800k     --output_dir Qwen2-0.5B-Reward     --per_device_train_batch_size 8     --num_train_epochs 1     --gradient_checkpointing True     --learning_rate 1.0e-5     --eval_strategy steps     --eval_steps 50

LoRA:
python examples/scripts/prm.py     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --dataset_name trl-lib/prm800k     --output_dir Qwen2-0.5B-Reward-LoRA     --per_device_train_batch_size 8     --num_train_epochs 1     --gradient_checkpointing True     --learning_rate 1.0e-4     --eval_strategy steps     --eval_steps 50
    --use_peft     --lora_r 32     --lora_alpha 16
    N)logging)load_dataset)AutoModelForTokenClassificationAutoTokenizerHfArgumentParser)ModelConfig	PRMConfig
PRMTrainerScriptArgumentsget_kbit_device_mapget_peft_configget_quantization_configTRACKIO_SPACE_IDztrl-trackio__main__F)use_reentrant)autoNT)revision	use_cache
device_mapquantization_config)trust_remote_codeuse_fast   )
num_labelsr   	TOKEN_CLSzYou are using a `task_type` that is different than `TOKEN_CLS` for PEFT. This will lead to silent bugs Make sure to pass --lora_task_type TOKEN_CLS when using this script with PEFT.)namec                 $    [        U S   5      S:  $ )Ncompletionsr   )len)xs    N/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/prm.py<lambda>r"   t   s    s1]+;'<q'@    )modelprocessing_classargstrain_dataseteval_datasetpeft_configeval)dataset_name)>__doc__ostorch
accelerater   datasetsr   transformersr   r   r   trlr   r	   r
   r   r   r   r   
get_logger__name__loggerenviron
setdefaultparserparse_args_into_dataclassesscript_argstraining_args
model_argsdictgradient_checkpointing_kwargsdtypegetattrmodel_revisiongradient_checkpointingmodel_kwargsr   from_pretrainedmodel_name_or_pathr   	tokenizerr$   pad_token_idconfiguse_peftlora_task_typewarningr+   dataset_configdatasetfilterdataset_train_splitdataset_test_splittrainertrain
save_model
output_direvaluatemetricslog_metricssave_metricspush_to_hub r#   r!   <module>r[      s  .: 
   ! Y Y   
		H	% 

  (- 8 z	;GHF-3-O-O-Q*K
26U2KM/
 !+ 0 0N BJPUWaWgWgHhE**(??%TL 2*=&%8%:\".A*+--%%9U9U`dI ,;;%%23zGcGcgsE !* 6 6ELLz88KG^	
 ;33+:T:TUGnn@AG
 "k==>[;;<#J/G MMO
 }//0 G() }//0  )A)AB !A r#   