
    hS                     Z   S SK r S SKrS SKJr  S SKJrJrJr  S SKrS SK	J
r
  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJrJrJrJr  S SKJr  S S	KJ r   S
SK!J"r"  SSK#J$r$  SSK%J&r&J'r'  \ " 5       (       a  S SK(J)r)  \" 5       (       a  S SK*r*\
RV                  " \,5      r- " S S\5      r.g)    N)Path)CallableOptionalUnion)logging)Dataset)
DataLoader)AutoModelForCausalLMAutoTokenizerBaseImageProcessorDataCollatorDataCollatorForLanguageModelingDataCollatorForSeq2SeqFeatureExtractionMixinPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTrainerTrainingArgumentsis_wandb_available)EvalLoopOutput)is_peft_available   )PPODecorators   )IterativeSFTConfig)generate_model_cardget_comet_experiment_url)	PeftModelc                     ^  \ rS rSrSrSS/r       S!S\\\4   S\	\\
\4      S\	\   S	\	\\\\\4   4      S
\	\\\\\4      S\\R*                  R,                  \R*                  R.                  R0                  4   S\	\\R4                  \R4                  /\R4                  4      S\	\\/\4      4U 4S jjjrS\S\
S\4S jrS\R4                  S\R4                  S\R4                  4S jr\S\ \RB                     S\ \RB                     S\ \RB                     S\ \   S\ \   4
S j5       r"\#RH                  " 5            S"S\	\ \RB                        S\	\ \RB                        S\	\ \RB                        S\	\ \      S\	\ \      4
S jj5       r%S r&U 4S jr'   S#S\	\   S\	\   S\\\ \   S4   4S jjr(S r)U =r*$ )$IterativeSFTTrainer9   a	  
The IterativeSFTTrainer can be used to finetune models with methods that requires some steps between optimization.

<Tip warning={true}>

The [`IterativeSFTTrainer`] is deprecated and will be removed in version 0.24.0. Please use the [`SFTTrainer`].

</Tip>

Args:
    model (`Union[str, PreTrainedModel]`):
        Model to be trained. Can be either:

        - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
          path to a *directory* containing model weights saved using
          [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
          using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
          `args.model_init_kwargs`.
        - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
    args ([`IterativeSFTConfig`], *optional*, defaults to `None`):
        Configuration for this trainer. If `None`, a default configuration is used.
    data_collator (`DataCollator`, *optional*):
        Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
        Will default to [`~transformers.default_data_collator`] if no `processing_class` is provided, an instance
        of [`~transformers.DataCollatorWithPadding`] otherwise if the processing_class is a feature extractor or
        tokenizer.
    eval_dataset (`datasets.Dataset`):
        The dataset to use for evaluation.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If `None`, the processing class is loaded from the model's name
        with [`~transformers.AutoTokenizer.from_pretrained`].
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
        The optimizer and scheduler to use for training.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
        The function to use to preprocess the logits before computing the metrics.
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
        metric values.
trlziterative-sftNmodelargsdata_collatoreval_datasetprocessing_class
optimizerspreprocess_logits_for_metricscompute_metricsc	                 |  > [         R                  " S[        5        [        U[        5      (       a  UOUR
                  R                  n	Uc#  U	R                  S5      S   n
[        U
 S35      nOe[        U[        5      (       aP  [        U[        5      (       d;  UR                  5       nUR                  US'   UR                  S5        [        S0 UD6nUc  [        R                  " U	5      nUR                  b*  [        U[        5      (       d  [         R#                  S5        [        U[        5      (       a  U R%                  X5      n['        5       (       a  [        U[(        5      (       a  SU l        OS	U l        XPl        [/        UR
                  S
S	5      U l        Uc<  U R0                  (       a  [3        USSS9U l        O [7        U R,                  S	S9U l        OX0l        UR8                  U l        UR:                  U l        UR<                  U l        [>        TU ]  UUU R4                  UUUUUS9  [C        U RD                  S5      (       a%  U RD                  RG                  U RH                  5        U RK                  U RL                  RN                  5        U RP                  RS                  U RD                  U RT                  U RV                  5      u  U l"        U l*        U l+        U R:                  S:X  a  SOSU R,                  l,        [C        U S5      (       d  [[        S5      eU R<                  [\        l        g )NzkThe `IterativeSFTTrainer` is deprecated and will be removed in version 0.24.0. Please use the `SFTTrainer`./z-IterativeSFT	hub_tokenpush_to_hub_tokenzYou passed model_init_kwargs to the `IterativeSFTConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.TFis_encoder_decoder   )label_pad_token_idpad_to_multiple_of)mlm)r$   r%   r&   r'   r(   r+   r)   r*   add_model_tagskeep_endleftrightacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`. )/warningswarnFutureWarning
isinstancestrconfig_name_or_pathsplitr   r   to_dictr/   popr   from_pretrainedmodel_init_kwargsloggerwarning_create_model_from_pathr   r   is_peft_modelr(   getattrr1   r   r&   r   
max_lengthtruncation_modeoptimize_device_cachesuper__init__hasattrr$   r7   
_tag_namescreate_optimizer_and_schedulerr%   	max_stepsr;   prepare	optimizerlr_schedulertruncation_sideAttributeErrorr   )selfr$   r%   r&   r'   r(   r)   r*   r+   model_id
model_name	dict_args	__class__s               [/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/iterative_sft_trainer.pyrR   IterativeSFTTrainer.__init__d   s     		
 'uc2258R8R<!,R0J%M&BCD/00DJ\9]9]I%)^^Ik"MM-.%2	2D #,<<XF !!-j6L6LNN; eS!!00=E :eY#?#?!%D!&D 0")%,,8Le"T &&%;$RS&" &ETEZEZ`e%f"!.//#33%)%?%?",,%-+!*G 	 		
 4::/00JJ%%doo6++DII,?,?@ 9=8H8H8P8PJJ(9(99
5
DND$5 ;?:N:NR\:\bi-t]++ j  /3.H.H+    
model_pathreturnc                 Z    UR                   =(       d    0 n[        R                  " U40 UD6$ )z0Creates a model from a path or model identifier.)rH   r
   rG   )r\   rd   r%   rH   s       ra   rK   +IterativeSFTTrainer._create_model_from_path   s*     228b#33JTBSTTrc   	input_idsattention_masklabelsc                    Uc&  U Vs/ s H  n[         R                  " U5      PM     nnU R                  (       a  U R                  [	        XU5       VVVs/ s H  u  pEnXEUS.PM     snnn5      R                  U R                  R                  5      nUR                  SS 5        SUS   US   U R                  R                  :H  '   OTU R                  [	        X5       VVs/ s H	  u  pEXES.PM     snn5      R                  U R                  R                  5      nU R                  b  U R                  S:X  a3  UR                  5        VV	s0 s H  u  pXS U R                   _M     nnn	U$ U R                  S:X  a4  UR                  5        VV	s0 s H  u  pXU R                  * S  _M     nnn	U$ [        SU R                   35      eU$ s  snf s  snnnf s  snnf s  sn	nf s  sn	nf )	Nrh   ri   rj   decoder_input_idsr2   rj   )rh   ri   
keep_startr8   zUnknown truncation mode: )torch	ones_liker1   r&   ziptor$   devicerF   r(   pad_token_idrN   rO   items
ValueError)
r\   rh   ri   rj   idsattlab
input_datakvs
             ra   prepare_model_inputs(IterativeSFTTrainer.prepare_model_inputs   s   !>GHiseooc2iNH""++ *-Y)O)O# #&L)O
 b""#  NN.5_cJx H!59N9N9[9[![\ ++KNyKijKixss:Kijb""# 
 ??&##|3BLBRBRBTUBT$!a#4T__!55BT
U  %%3CMCSCSCUVCU41aDOO#3#5!66CU
V  !#<T=Q=Q<R!STT9 I k VVs    GG"G
GG#textstexts_labelsc           
         UGc  Uc  [        SS/X/5       Hr  u  pV[        U[        5      (       d  [        U S[	        U5       35      e[        US   [
        R                  5      (       a  MW  [        SU S[	        US   5       35      e   GOF[        / SQXU/5       Hr  u  pV[        U[        5      (       d  [        U S[	        U5       35      e[        US   [
        R                  5      (       a  MW  [        SU S[	        US   5       35      e   O[        U[        5      (       d  [        S[	        U5       35      e[        US   [        5      (       d  [        S	[	        US   5       35      eUb^  [        U[        5      (       d  [        S
[	        U5       35      e[        US   [        5      (       d  [        S[	        US   5       35      eXX#U4$ )a  
Check if the input data is valid for training.

Args:
    input_ids (list[`torch.LongTensor`]):
        List of tensors containing the input_ids
    attention_mask (list[`torch.LongTensor`]):
        List of tensors containing the attention_mask
    labels (list[`torch.FloatTensor`]):
        List of tensors containing the labels
    texts (list[`str`]):
        List of string containing the text input.
    texts_labels (list[`str`]):
        List of string containing the text labels.

Returns:
    `tuple`: The input data.
rh   rj   z! must be a list of tensors - got r   zElements in z must be tensors - got rl   z''text' must be a list of strings - got z)Elements in 'text' must be strings - got z.'text_labels' must be a list of strings - got z0Elements in 'text_labels' must be strings - got )rq   r@   listrv   typero   TensorrA   )rh   ri   rj   r   r   nametensor_lists          ra   _step_safety_checker(IterativeSFTTrainer._step_safety_checker   s   4 =%),k8-DyFY)Z%D%k488(D61RSWXcSdRe)fgg%k!nellCC(<v=TUYZefgZhUiTj)kll	 *[ *-=	[a?b*%D &k488(D61RSWXcSdRe)fgg%k!nellCC(<v=TUYZefgZhUiTj)kll* eT** #J4PU;-!XYYeAh,, #LTRWXYRZ^L\!]^^'!,55$'UVZ[gVhUi%jkk!,q/377$'WX\]ijk]lXmWn%opp&EErc   c                   ^  T R                   R                  5         T R                  R                  S:X  aY  [        R
                  " S5      R                  T R                  R                  5      T l	        T R                  R                  T l
        Uc  Uc  [        S5      eUb  Ub  [        R                  S5        Uc  Uc  T R                  (       a  [        S5      eUb  USS OSnUb  USS OSnUb  USS OSnUb  USS OSnUb  USS OSnT R                  XX4U5      u  pp4nUb&  T R!                  UT R"                  SSSS	9nUS
   US   p!Ub   T R!                  UT R"                  SSSS	9S
   nUc  UnT R%                  XU5      n['        UR)                  5       5      n0 nUR+                  U5        U 4S jn	[,        R.                  " U5      n
U
R1                  S5        [3        U
T R                  R4                  SU	S9n[7        U5       GH  u  pT R8                  R;                  T R                   5         U Vs0 s H  oX   _M	     nnT R=                  T R                   U5      nT R                  R>                  S:  a  URA                  5       nURC                  5       nT R8                  RE                  U5        T R8                  RF                  (       a_  T R                  RH                  bH  T R8                  RK                  T R                   RM                  5       T R                  RH                  5        T RN                  RQ                  5         T RN                  RS                  5         T RT                  b  T RT                  RQ                  5         T R                  =R                  S-  sl        T =R                  U-  sl	        T RW                  5         SSS5        GM     gs  snf ! , (       d  f       GM  = f)aP  
Run an optimisation step given a list of input_ids, attention_mask, and labels or a list of text and
text_labels.

Args:
    input_ids (list[`torch.LongTensor`]):
        List of tensors containing the input_ids (if not provided, text will be used)
    attention_mask (list[`torch.LongTensor`], , *optional*):
        List of tensors containing the attention_mask
    labels (list[`torch.FloatTensor`], *optional*):
        List of tensors containing the labels (if set to None, will default to input_ids)
    texts (list[`str`], *optional*):
        List of strings containing the text input (if not provided, input_ids will directly be used)
    texts_labels (list[`str`], *optional*):
        List of strings containing the text labels (if set to None, will default to text)

Returns:
    `dict[str, Any]`: A summary of the training statistics
r   g        Nz@Step should include `input_ids` or `texts` as keyword arguments.ztBoth `input_ids` and `texts` argument are provided. `input_ids` will be ignored. Please provide only one of the two.zNo 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed.Tpt)rN   
truncationpaddingreturn_tensorsrh   ri   c                    > [        5       nU S    HY  nUS;   d  M  [        R                  " U  Vs/ s H  o3U   PM	     sn5      R                  TR                  R
                  5      X'   M[     U$ s  snf )Nr   rl   )dictro   stackrr   r$   rs   )datareturn_dictkeydr\   s       ra   collator*IterativeSFTTrainer.step.<locals>.collators  sf    &KAwCC',{{D3IDqcFD3I'J'M'MdjjN_N_'`K$   4Js   A0
ro   )
batch_sizeshuffle
collate_fnr   ),r$   trainstateglobal_stepro   tensorrr   r%   rs   tr_loss_globalstep_last_loggedrv   rI   rJ   r1   r   r(   rN   r}   r   keysupdater   	from_dict
set_formatr	   per_device_train_batch_size	enumerater;   
accumulatecompute_lossn_gpumeandetachbackwardsync_gradientsmax_grad_normclip_grad_norm_
parametersrX   step	zero_gradrY   _maybe_log_save_evaluate)r\   rh   ri   rj   r   r   model_inputsmodel_inputs_names
batch_dictr   
batch_datastep_dataloader_batchr{   losstr_loss_steps   `                ra   r   IterativeSFTTrainer.step#  s   8 	

::!!Q& <<,//		0@0@ADL+/::+A+AD(_``"u'8NN6
 >l2t7N7N S 
 %.$9IaLt	.<.H*d$0d!-a4*6*B|AAEAZAZvlB
>	6, 00$//dDae 1 L )5[(A<P`Ca~#**$//dDae + F >F00FS!,"3"3"56
,'	 &&z2
g&$yy<<	
 "/2HA!!,,TZZ85GH5G585GH((\B99??Q&99;D#{{}  ))$/##22tyy7N7N7Z$$44

--/		//
 ##%((*$$0%%**,

&&!+& ,--/7 98 3H 98s   8O*=O%FO*%O**
O:	c                 \   U R                   R                  bf  U R                  R                  U R                   R                  -  S:X  a5  U R                  R                  S:w  a  U R	                  U R
                  5        U R                   R                  Gb  U R                  R                  U R                   R                  -  S:X  a  U R                  R                  S:w  a  0 nU R                  U R                  5      R                  5       R                  5       nU =R                  U R                  -  sl        [        X R                  R                  U R                  -
  -  S5      US'   U R                  5       US'   U R                  R                  U l        U R                  U5        g g g g )Nr      r   learning_rate)r%   
eval_stepsr   r   evaluater'   logging_steps_nested_gatherr   r   itemroundr   _get_learning_ratelog)r\   logstr_loss_scalars      ra   r   ,IterativeSFTTrainer._maybe_log_save_evaluate  sD   99+zz%%		(<(<<AdjjF\F\`aFad//0 99"".zz%%		(?(??1DI_I_cdId)+!%!4!4T\\!B!G!G!I!N!N!P ,$^zz7M7MPTPlPl7l%mopqV(,(?(?(A_%/3zz/E/E, JeD /rc   c                   > U R                   R                  c*  [        U R                   R                  5      R                  nO(U R                   R                  R                  S5      S   nU R                  US9  [        TU ]!  X5        g )Nr-   r.   )r^   )	r%   hub_model_idr   
output_dirr   rD   create_model_cardrQ   _save_checkpoint)r\   r$   trialr^   r`   s       ra   r   $IterativeSFTTrainer._save_checkpoint  sj    99!!)dii22388J//55c:2>J*5 .rc   r^   dataset_nametagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        UUU R                   UU[#        5       (       a+  [$        R&                  b  [$        R&                  R(                  OS[+        5       SS9nUR-                  [        R
                  R/                  U R0                  R2                  S	5      5        g)
a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
NrC   unsloth_versionunslothJOB_IDhf_jobszIterative SFT)
base_modelr^   r   r   r   	wandb_url	comet_urltrainer_namez	README.md)is_world_process_zerorS   r$   rB   ospathisdirrC   setr@   rA   addenvironr   rT   r   r   r   wandbrunurlr   savejoinr%   r   )r\   r^   r   r   r   
model_cards         ra   r   %IterativeSFTTrainer.create_model_card  sN   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$(!!**%'9';';		@Ueiimm[_.0(	

 	TYY%9%9;GHrc   )r   r&   r1   rL   rY   rN   r$   rP   rX   r(   r   rO   )NNNN)NNNN)NNNNN)NNN)+__name__
__module____qualname____firstlineno____doc__rT   r   rA   r   r   r   r   r   r   r   r   r   r   r   tuplero   optim	OptimizerrY   LambdaLRr   r   r   rR   rK   r}   staticmethodr   
LongTensorr   r   empty_device_cacher   r   r   r   __static_attributes____classcell__)r`   s   @ra   r!   r!   9   s   &P )J
 HL04EI W
 imFJbIS/)*bI u/1BBCDbI  -	bI
 uWd3<.@%@ABbI #)+=?UWeef
bI %++//1I1I1R1RRSbI (0%,,9UW\WcWc9c0d'ebI "(N+;T+A"BCbI bIHU# U=O UTc U
ell ELL bgbnbn @ 3F(()3FU--.3F U%%&3F Cy	3F
 3i3F 3Fj %%' 7;;?37%),0|0D!1!123|0 !e&6&6!78|0 e../0	|0
 S	"|0 tCy)|0 (|0|0/ %)&*,0	4ISM4I sm4I CcD()	4I 4Irc   r!   )/r   r=   pathlibr   typingr   r   r   ro   
accelerater   datasetsr   torch.utils.datar	   transformersr
   r   r   r   r   r   r   r   r   r   r   r   r   transformers.trainer_utilsr   transformers.utilsr   corer   iterative_sft_configr   utilsr   r   peftr   r   
get_loggerr   rI   r!   r<   rc   ra   <module>r     s    
   , ,    '    6 0   4 @  			H	%}I' }Irc   