
    h                     f   S SK r S SKrS SKrS SKrS SKJrJr  S SKJr  S SK	J
r
  S SKJr  S SKJrJrJrJrJrJrJrJr  S SKJrJr  \
R2                  " \5      r\\S.r\ " S	 S
\5      5       rS rSS\ R>                  4S jjr \S:X  a(  \ " 5       r!\!RE                  SS9u  r#r$r%r&r'\" \#\$\%\&5        gg)    N)	dataclassfield)Optional)logging)load_dataset)DatasetMixtureConfigModelConfig
RLOOConfigRLOOTrainerScriptArguments	TrlParserget_datasetget_peft_config)get_soft_overlong_punishmentthink_format_reward)r   r   c                   d    \ rS rSr% Sr\" SSS0S9r\\   \	S'   \" SSS0S9r
\\\      \	S	'   S
rg)RLOOScriptArguments5   aH  
Script arguments for the RLOO training script.

Args:
    reward_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
        Reward model id of a pretrained model hosted inside a model repo on huggingface.co or local path to a
        directory containing model weights saved using [`~transformers.PreTrainedModel.save_pretrained`].
    reward_funcs (`list[str]` or `None`, *optional*, defaults to `None`):
        Reward functions to use. It can be either one of `"think_format_reward"`; or a dotted import path " (e.g.,
        `'my_lib.rewards.custom_reward'`).
NhelpzReward model id of a pretrained model hosted inside a model repo on huggingface.co or local path to a directory containing model weights saved using `PreTrainedModel.save_pretrained`.)defaultmetadatareward_model_name_or_pathzReward functions to use. It can be either one of  'think_format_reward'; or a dotted import path. (e.g., 'my_lib.rewards.custom_reward').reward_funcs )__name__
__module____qualname____firstlineno____doc__r   r   r   str__annotations__r   list__static_attributes__r       J/home/james-whalen/.local/lib/python3.13/site-packages/trl/scripts/rloo.pyr   r   5   s]    
 05 p
0x}  ). C
)L(49% r$   r   c           
         / nU R                   (       a  UR                  U R                   5        U R                  (       a  U R                   H  nU[        ;   a  UR                  [        U   5        M'  SU;   a{  UR	                  SS5      u  pe[
        R                  R                  S[        R                  " 5       5        [        R                  " U5      n[        Xu5      nUR                  U5        M  [        SU S[        [        R                  5       5       S35      e   UR                   (       a'  U R"                  (       a  [$        R'                  S5        OUR                   (       a  U R"                  (       d  [)        U5      n	OWUR                   (       d;  U R"                  (       a*  [+        U R"                  U R,                  U R.                  S9n	O[        S	5      e[1        UR2                  UUW	U R4                     UR6                  S
:w  a  XR8                     OS [;        U5      S9n
U
R=                  5         U
R?                  UR@                  5        URB                  (       a  U
RC                  U R"                  S9  g g )N.   r   z Could not load reward function 'z'. Expected one of z or a valid import path.zBoth `datasets` and `dataset_name` are provided. The `datasets` argument will be used to load the dataset and `dataset_name` will be ignored.)name	streamingz5Either `datasets` or `dataset_name` must be provided.no)modelr   argstrain_dataseteval_datasetpeft_config)dataset_name)"r   appendr   reward_funcs_registryrsplitsyspathinsertosgetcwd	importlibimport_modulegetattr
ValueErrorr"   keysdatasetsr1   loggerwarningr   r   dataset_configdataset_streamingr   model_name_or_pathdataset_train_spliteval_strategydataset_test_splitr   train
save_model
output_dirpush_to_hub)script_argstraining_args
model_argsdataset_argsr   	func_namemodule_pathmodulereward_funcdatasettrainers              r%   mainrV   S   s   L,,KAAB$11I11##$9)$DE	!)2)9)9#q)A&299;/"00=%f8##K0 6ykAT16689::RT  2  !9!9:	
 
		{'?'?l+""{'?'?$$;+E+EQ\QnQn
 PQQ ++!k==>@M@[@[_c@cW;;<im#J/G MMO }//0  )A)AB !r$   
subparsersc                 t    [         [        [        [        4nU b  U R	                  SSUS9nU$ [        U5      nU$ )NrloozRun the RLL training script)r   dataclass_types)r   r
   r	   r   
add_parserr   )rW   rZ   parsers      r%   make_parserr]      sH    *JEYZO&&v4Qcr&s M ?+Mr$   __main__T)return_remaining_strings)N)(argparser:   r8   r5   dataclassesr   r   typingr   
accelerater   r?   r   trlr   r	   r
   r   r   r   r   r   trl.rewardsr   r   
get_loggerr   r@   r3   r   rV   _SubParsersActionr]   r\   parse_args_and_configrL   rM   rN   rO   _r   r$   r%   <module>rj      s   ,   	 
 (   !	 	 	 J 
		H	% /$@  /  :5CpH66  z]F ?E>Z>Z!% ?[ ?;K
L! 	mZ> r$   