
    
:i              
       6   S r SSKJr  SSKrSSKJr  SSKJr  SSKJrJ	r	J
r
JrJrJrJrJr  SSKJrJrJrJrJrJrJ
r
JrJrJrJrJrJrJrJrJrJrJrJrJ r JrJ!r!  SSKrSSK7  SSK"J#r#J$r$  SS	K%J&r&  SSKrSSK'r(SS
K)J*r*  SSKJr  SSK+J,r,J-r.  SSK/J0r0  SSK1r1SSK2J3r3  S r4 SSSSSS.r5\Rl                  " SS\5S9S 5       r7S\R                  S\8S\8S\R                  4S jr9S\R                  S\R                  S\8S\8S\R                  4
S jr:S\R                  S\8S\R                  4S jr;\# " S  S!\5      5       r<  " S" S#\5      r= " S$ S%\=5      r> \?" \S&5      (       a3  SSKr " S' S(\R                  5      rA \R                  " \A" S)5      5        gg)*z;
2025.10.10
2025.10.9
4.56.2
0.23.0
__UNSLOTH_VERSIONING__
    )TensorN)
functional)AnyListOptionalTupleUnionDictSetCallable)AcceleratorAlignPropConfigAlignPropTrainerr   r   DDPOStableDiffusionPipeliner   PathProjectConfigurationPyTorchModelHubMixinr	   defaultdictgenerate_model_cardget_comet_experiment_urlis_wandb_availableloggerloggingosset_seedtextwraptorchwarnings)*)	dataclassfield)Version)nullcontext)DataCollatorForSeq2SeqDataCollatorForLanguageModeling)ParallelMode)
MethodTypec                 F   ^  [         R                  " T 5      U 4S j5       nU$ )Nc                 8  > [        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         T" U /UQ70 UD6n[        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         U$ )Nmodelfor_trainingfor_inference)hasattrr*   r+   r,   )selfargskwargsoutputfs       N/home/james-whalen/llama.cpp/unsloth_compiled_cache/UnslothAlignPropTrainer.pywrapper*prepare_for_training_mode.<locals>.wrapper0   sx     4!!gdjj.&I&IJJ##%4)$)&)4!!gdjj/&J&JJJ$$&    )	functoolswraps)r2   r4   s   ` r3   prepare_for_training_moder9   /   s%    __Q  Nr6   TF)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphs)dynamic	fullgraphoptionsc                 d   [         R                  " U R                  SU R                  S   5      SSS9n[         R                  " UR                  S5      SSS9n/ n[	        X#5       H  u  pVUR                  [         R                  5      n[         R                  " USUR                  S5      S9R                  S5      n[         R                  " USS9nXx-
  n	UR                  U	5        M      [         R                  " U5      nUR                  U R                  S   U R                  S   45      nU$ )N   r   )chunksdim)rD   indexrD      )r   chunkreshapeshapeziptofloat32gather	unsqueezesqueeze	logsumexpappendconcat)
logitsrE   chunked_logitschunked_indexall_per_token_logpschunk_logitschunk_indexselected_logitslogsumexp_valuesper_token_logpss
             r3   chunked_selective_log_softmaxr]   E   s    [[FLL4D!EPQYZ[N[[r!2QaHM%(%G!#u}}5,,|2{G\G\]_G`aiijlm ??<rB)<""?3 &H 	,,':;-55v||AUV6XYr6   	input_idslogits_to_keeppad_token_idreturnc                 ~    XR                   S   :  a  [        S5      eU SS2SU* 24   nX2:H  nUR                  SS9nU$ )zr
Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens 
rG   z8logits_to_keep must be smaller than the sequence length.NrF   )rJ   
ValueErrorsum)r^   r_   r`   prompt_sectionpadding_maskpad_token_countss         r3   calculate_pad_tokens_in_promptrh   W   sX     ++STTq"2N?"223N"2L#''A'.r6   completion_input_idsleft_pad_tokens_per_promptmax_left_padc                     U R                   u  pEU R                  nX!-
  n[        R                  " XVS9R	                  S5      nXR	                  S5      :  n	X:g  n
X-  nU$ )a)  
Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]

Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
and pad are pad tokens, this function would make a completion mask that would 0 out the pad
and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
)devicer   rG   )rJ   rm   r   arangerO   )ri   rj   rk   r`   
batch_sizecompletion_lenrm   num_tokens_to_maskindices
shift_masknon_padding_mask
final_masks               r3    create_completion_attention_maskrv   j   si     "6!;!;J!((F%Bll>9CCAFG88;;J,<.Jr6   tensorpad_idc                 l    X:g  n[         R                  " USSSS9n[         R                  " U SU5      nU$ )zD
Moves all padding tokens in each sequence of a batch to the right.
rG   T)rD   
descendingstable)r   argsortrN   )rw   rx   masksorted_indicespacked_tensors        r3   left_pack_paddingr      s8     D]]4Q4MNLLN;Mr6   c                      ^  \ rS rSr% Sr\" SSS0S9r\\   \	S'   \" SSS	0S9r
\\   \	S
'                                  SU 4S jjrSrU =r$ )UnslothAlignPropConfig   a  
    
Configuration class for the [`AlignPropTrainer`].

Using [`~transformers.HfArgumentParser`] we can turn this class into
[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
command line.

Parameters:
    exp_name (`str`, *optional*, defaults to `os.path.basename(sys.argv[0])[: -len(".py")]`):
        Name of this experiment (defaults to the file name without the extension).
    run_name (`str`, *optional*, defaults to `""`):
        Name of this run.
    seed (`int`, *optional*, defaults to `0`):
        Random seed for reproducibility.
    log_with (`str` or `None`, *optional*, defaults to `None`):
        Log with either `"wandb"` or `"tensorboard"`. Check
        [tracking](https://huggingface.co/docs/accelerate/usage_guides/tracking) for more details.
    log_image_freq (`int`, *optional*, defaults to `1`):
        Frequency for logging images.
    tracker_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`):
        Keyword arguments for the tracker (e.g., `wandb_project`).
    accelerator_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`):
        Keyword arguments for the accelerator.
    project_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`):
        Keyword arguments for the accelerator project config (e.g., `logging_dir`).
    tracker_project_name (`str`, *optional*, defaults to `"trl"`):
        Name of project to use for tracking.
    logdir (`str`, *optional*, defaults to `"logs"`):
        Top-level logging directory for checkpoint saving.
    num_epochs (`int`, *optional*, defaults to `100`):
        Number of epochs to train.
    save_freq (`int`, *optional*, defaults to `1`):
        Number of epochs between saving model checkpoints.
    num_checkpoint_limit (`int`, *optional*, defaults to `5`):
        Number of checkpoints to keep before overwriting old ones.
    mixed_precision (`str`, *optional*, defaults to `"fp16"`):
        Mixed precision training.
    allow_tf32 (`bool`, *optional*, defaults to `True`):
        Allow `tf32` on Ampere GPUs.
    resume_from (`str`, *optional*, defaults to `""`):
        Path to resume training from a checkpoint.
    sample_num_steps (`int`, *optional*, defaults to `50`):
        Number of sampler inference steps.
    sample_eta (`float`, *optional*, defaults to `1.0`):
        Eta parameter for the DDIM sampler.
    sample_guidance_scale (`float`, *optional*, defaults to `5.0`):
        Classifier-free guidance weight.
    train_batch_size (`int`, *optional*, defaults to `1`):
        Batch size for training.
    train_use_8bit_adam (`bool`, *optional*, defaults to `False`):
        Whether to use the 8bit Adam optimizer from `bitsandbytes`.
    train_learning_rate (`float`, *optional*, defaults to `1e-3`):
        Learning rate.
    train_adam_beta1 (`float`, *optional*, defaults to `0.9`):
        Beta1 for Adam optimizer.
    train_adam_beta2 (`float`, *optional*, defaults to `0.999`):
        Beta2 for Adam optimizer.
    train_adam_weight_decay (`float`, *optional*, defaults to `1e-4`):
        Weight decay for Adam optimizer.
    train_adam_epsilon (`float`, *optional*, defaults to `1e-8`):
        Epsilon value for Adam optimizer.
    train_gradient_accumulation_steps (`int`, *optional*, defaults to `1`):
        Number of gradient accumulation steps.
    train_max_grad_norm (`float`, *optional*, defaults to `1.0`):
        Maximum gradient norm for gradient clipping.
    negative_prompts (`str` or `None`, *optional*, defaults to `None`):
        Comma-separated list of prompts to use as negative examples.
    truncated_backprop_rand (`bool`, *optional*, defaults to `True`):
        If `True`, randomized truncation to different diffusion timesteps is used.
    truncated_backprop_timestep (`int`, *optional*, defaults to `49`):
        Absolute timestep to which the gradients are backpropagated. Used only if `truncated_backprop_rand=False`.
    truncated_rand_backprop_minmax (`tuple[int, int]`, *optional*, defaults to `(0, 50)`):
        Range of diffusion timesteps for randomized truncated backpropagation.
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether to push the final model to the Hub.

    NhelpzvLLM SamplingParams)defaultmetadatavllm_sampling_paramsrA   z8Chunk size to reduce memory usage. -1 is most efficient.unsloth_num_chunksc                  d  > [         S:  a  [        S[          S35        [         S:  a  [        S[          S35        [        T!U ]  " S#0 SU_SU_SU_S	U_S
U_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_S U_S!U_S"U_U D6  UU l        UU l        g )$NgHz>z Unsloth: Your learning rate of `zi` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!rG   za` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!exp_namerun_nameseedlog_withlog_image_freqtracker_project_namelogdir
num_epochs	save_freqnum_checkpoint_limitmixed_precision
allow_tf32resume_fromsample_num_steps
sample_etasample_guidance_scaletrain_batch_sizetrain_use_8bit_adamtrain_learning_ratetrain_adam_beta1train_adam_beta2train_adam_weight_decaytrain_adam_epsilon!train_gradient_accumulation_stepstrain_max_grad_normnegative_promptstruncated_backprop_randtruncated_backprop_timesteppush_to_hub )learning_rateprintsuper__init__r   r   )"r.   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r0   	__class__s"                                    r3   r   UnslothAlignPropConfig.__init__   s   H 4)I-  YB  (C  "D1e&F}o  Vw  %x  y 	0	0	0 	0  		0
 ,	0 $8	0 	0 $	0 "	0 $8	0 .	0 $	0 &	0  0	0 $	0  %:!	0"  0#	0$ #6%	0& #6'	0(  0)	0*  0+	0, '>-	0. "4/	00 1R1	02 #63	04  05	06 '>7	08 +F9	0: &;	0< %9!"4r6   )r   r   )export_gguf iO  NrG   trllogsd   rG      fp16Tr   2         ?g      @rG   Fg-C6
?g?g+?g{Gz?g:0yE>   r   NT1   FNrA   )__name__
__module____qualname____firstlineno____doc__r!   r   r   r   __annotations__r   intr   __static_attributes____classcell__r   s   @r3   r   r      s    M\ +012+(3-  */VW*#  !$   ### "&",-!"&&(#AF5 F5r6   r   c                     ^  \ rS rSrSrSS/r S"S\S\\R                  \
\   \
\   /\R                  4   S\/ \
\\4   4   S	\S
\\\\\/\4      4
S jjrS rS\S\4S jrS rS\R                  S\S\R                  4S jrS rS rS rS#S jrS"S\\   4S jjrS rU 4S jr   S$S\\   S\\   S\\\\   S4   4S  jjrS!r U =r!$ )%_UnslothAlignPropTraineri1  aN  
The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is
heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/ As of now only Stable Diffusion based
pipelines are supported

Args:
    config (`AlignPropConfig`):
        Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details.
    reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`):
        Reward function to be used
    prompt_function (`Callable[[], tuple[str, Any]]`):
        Function to generate prompts to guide model
    sd_pipeline (`DDPOStableDiffusionPipeline`):
        Stable Diffusion pipeline to be used for training.
    image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`):
        Hook to be called to log images
r   	alignpropNconfigreward_functionprompt_functionsd_pipelineimage_samples_hookc           
         [         R                  " S[        5        Uc  [        R                  " S5        X0l        X l        Xl        XPl        [        S0 U R                  R                  D6nU R                  R                  (       Ga  [        R                  R                  [        R                  R                  U R                  R                  5      5      U R                  l        S[        R                  R!                  U R                  R                  5      ;  a  [#        [%        S [        R&                  " U R                  R                  5      5      5      n[)        U5      S:X  a"  [+        SU R                  R                   35      e[-        U Vs/ s H   n[/        UR1                  S5      S   5      PM"     sn5      n	[        R                  R3                  U R                  R                  SU	S    35      U R                  l        U	S   S	-   Ul        [7        SU R                  R8                  U R                  R:                  UU R                  R<                  S
.U R                  R>                  D6U l         UR8                  S L=(       a    UR8                  S:H  n
U R@                  RB                  (       ao  U R@                  RE                  U R                  RF                  U
(       d  [I        URK                  5       S9OURK                  5       U R                  RL                  S9  [        RN                  " SU 35        [Q        U R                  RR                  SS9  X@l*        U RT                  RW                  S	U R@                  RX                  (       + SSSS9  U R@                  R:                  S:X  a  [Z        R\                  nO;U R@                  R:                  S:X  a  [Z        R^                  nO[Z        R`                  nU RT                  Rb                  Re                  U R@                  Rf                  US9  U RT                  Rh                  Re                  U R@                  Rf                  US9  U RT                  Rj                  Re                  U R@                  Rf                  US9  U RT                  Rm                  5       nU R@                  Ro                  U Rp                  5        U R@                  Rs                  U Rt                  5        U R                  Rv                  (       aL  [Z        Rx                  R{                  5       (       a)  S[Z        R|                  Rx                  R~                  l;        U R                  [        U["        5      (       d  UR                  5       OU5      U lC        U RT                  Ri                  U RT                  R                  U R                  R                  c  S/OU R                  R                  SSSU RT                  R                  R                  S9R                  Re                  U R@                  Rf                  5      5      S   U lH        U RT                  R                  =(       d    U R@                  R                  U lI        [        U RT                  S5      (       aq  U RT                  R                  (       aV  U R@                  R                  XR                  5      u  olC        [#        [%        S UR                  5       5      5      U lM        O2U R@                  R                  XR                  5      u  U lM        U lC        UR                  (       ax  [        RN                  " SUR                   35        U R@                  R                  UR                  5        [/        UR                  R1                  S5      S   5      S	-   U lO        g SU lO        g s  snf )NzEAlignPropTrainer is deprecated and will be removed in version 0.23.0.z8No image_samples_hook provided; no images will be loggedcheckpoint_c                     SU ;   $ )Nr   r   )xs    r3   <lambda>3_UnslothAlignPropTrainer.__init__.<locals>.<lambda>b  s	    -1"4r6   r   zNo checkpoints found in _rA   rG   )r   r   project_configgradient_accumulation_stepstensorboard)alignprop_trainer_config)r   init_kwargs
T)device_specificFTimestep)positiondisableleavedescdynamic_ncolsr   bf16)dtyper   pt
max_lengthreturn_tensorspadding
truncationr   use_lorac                     U R                   $ N)requires_grad)ps    r3   r   r     s    !//r6   zResuming from r   )Pr   warnDeprecationWarningr   warning	prompt_fn	reward_fnr   image_samples_callbackr   project_kwargsr   r   pathnormpath
expanduserbasenamelistfilterlistdirlenrc   sortedr   splitjoin	iterationr   r   r   r   accelerator_kwargsacceleratoris_main_processinit_trackersr   dictto_dicttracker_kwargsinfor   r   r   set_progress_bar_configis_local_main_processr   float16bfloat16rM   vaerL   rm   text_encoderunetget_trainable_layersregister_save_state_pre_hook_save_model_hookregister_load_state_pre_hook_load_model_hookr   cudais_availablebackendsmatmul_setup_optimizer
isinstance
parameters	optimizer	tokenizerr   model_max_lengthr^   neg_prompt_embedautocastr-   r   preparetrainable_layers
load_statefirst_epoch)r.   r   r   r   r   r   accelerator_project_configcheckpointsr   checkpoint_numbersis_using_tensorboardinference_dtyper   r  s                 r3   r   !_UnslothAlignPropTrainer.__init__F  s    	S	
 %NNUV((&8#%9%WDKK<V<V%W";;"""&(gg&6&6rww7I7I$++JaJa7b&cDKK#BGG$4$4T[[5L5L$MM"4

4;;#:#:; {#q($'?@W@W?X%YZZ%+K,XKqSb1A-BK,X%Y"*,'',,KK++!"4R"8!9:+'
 8J"7MPQ7Q*4& 	
[[)) KK775 )-(U(U	
 kk,,	
  &d:_vR_?_++**00+ V^^5EF^^% KK66 +  	bM"!!48&00((>>> 	1 	
 ++v5#mmO--7#nnO#mmO 0 0 7 7O%%(()9)9)@)@(X  !1!1!8!8 P++@@B55d6K6KL55d6K6KL ;;!!ejj&=&=&?&?48ENN&&1..1;<Ld1S1S'')Yi
 !% 0 0 = =&&44<$++B^B^#$++55FF '  i4++223!
 ! ((11NT5E5E5N5N4##Z00T5E5E5N5N#'#3#3#;#;<Lnn#] D.$(0I4??K\)]$^D!484D4D4L4LM]_m_m4n1D!4>KK.););(<=>''(:(:;"6#5#5#;#;C#@#DEID DA -Ys   '_2c                 B    U R                  US   US   US   5      u  p#U$ )Nimagespromptsprompt_metadata)r   )r.   prompt_image_pairsrewardreward_metadatas       r3   compute_rewards(_UnslothAlignPropTrainer.compute_rewards  s3    "&..x(*<Y*GI[\mIn#
 r6   epochglobal_stepc           
         [        [        5      nU R                  R                  R	                  5         [        U R                  R                  5       GHD  nU R                  R                  U R                  R                  5         U R                  5          [        R                  " 5          U R                  U R                  R                  S9nU R                  U5      nXeS'   U R                  R!                  U5      R#                  5       R%                  5       R'                  5       nU R)                  U5      nU R                  R+                  U5        U R                  R,                  (       as  U R                  R/                  [1        U R2                  [        5      (       d  U R2                  R5                  5       OU R2                  U R                  R6                  5        U R8                  R;                  5         U R8                  R=                  5         SSS5        SSS5        SSS5        US   R?                  WRA                  5       5        US   R?                  URC                  5       5        US   R?                  WRE                  5       5        GMG     U R                  R,                  (       a  URG                  5        V	V
s0 s H0  u  pU	[        R@                  " [        RH                  " U
5      5      _M2     nn	n
U R                  RK                  USS9nURM                  S	U05        U R                  RO                  X2S
9  US-  n[        [        5      nO[Q        S5      eU RR                  bE  X R                  RT                  -  S:X  a)  U RS                  WX R                  RV                  S   5        US:w  aQ  XR                  RX                  -  S:X  a5  U R                  RZ                  (       a  U R                  R]                  5         U$ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= fs  sn
n	f )a  
Perform a single step of training.

Args:
    epoch (int): The current epoch.
    global_step (int): The current global step.

Side Effects:
    - Model weights are updated
    - Logs the statistics to the accelerator trackers.
    - If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step,
      and the accelerator tracker.

Returns:
    global_step (int): The updated global step.
)ro   rewardsNreward_mean
reward_stdlossmean)	reductionr2  )steprG   zsOptimization step should have been performed by this point. Please check calculated gradient accumulation settings.r   )/r   r   r   r  trainranger   r   r   
accumulater  r   enable_grad_generate_samplesr   r0  rN   detachcpunumpycalculate_lossbackwardsync_gradientsclip_grad_norm_r  r   r  r   r  r;  	zero_gradrR   r9  stditemitemsrw   reduceupdatelogrc   r   r   trackersr   r  
save_state)r.   r2  r3  r  r   r-  r5  rewards_visr8  kvs              r3   r;  _UnslothAlignPropTrainer.step  s8   " 4 ##%t{{DDEA!!,,T-=-=-B-BCT]]_V[VgVgVi%)%;%;#{{;; &< &" ../AB079-"..55g>EEGKKMSSU**73  ))$/##22$$44)$*?*?FF --88:!2277	 ##%((*1 Wj_C4 &&{'7'7'9:%%koo&78L		,; F@ **?Czz|L|tqAuzz%,,q/22|DL##**46*BDKK%()   81Kt$D F  &&2{[[E_E_7_cd7d''(:KIYIYIbIbcdIefA:%++"7"771<AQAQAaAa'')e WjVi__CCB MsC   P9P'-E,P	P'!P9
7Q
P$P''
P61P99
Q	c                 ,    SUR                  5       -
  nU$ )z
Calculate the loss for a batch of an unpacked sample

Args:
    rewards (torch.Tensor):
        Differentiable reward scalars for each generated image, shape: [batch_size]

Returns:
    loss (torch.Tensor) (all of these are of shape (1,))
g      $@)r9  )r.   r5  r8  s      r3   rD  '_UnslothAlignPropTrainer.calculate_loss  s     wnn&&r6   
advantages
clip_rangeratioc                     U* U-  nU* [         R                  " USU-
  SU-   5      -  n[         R                  " [         R                  " XE5      5      $ )Nr   )r   clampr9  maximum)r.   rW  rX  rY  unclipped_lossclipped_losss         r3   r8  _UnslothAlignPropTrainer.loss)  sT     %u,"{U[[**&
 

 zz%--EFFr6   c                    U R                   R                  (       a  SS KnUR                  R                  nO[
        R                  R                  nU" UU R                   R                  U R                   R                  U R                   R                  4U R                   R                  U R                   R                  S9$ )Nr   )lrbetasweight_decayeps)r   r   bitsandbytesoptim	AdamW8bitr   AdamWr   r   r   r   r   )r.   trainable_layers_parametersre  optimizer_clss       r3   r  )_UnslothAlignPropTrainer._setup_optimizer7  s    ;;**(..88M!KK--M'{{..;;//1M1MN<<..
 	
r6   c                 \    U R                   R                  XU5        UR                  5         g r   )r   save_checkpointpop)r.   modelsweights
output_dirs       r3   r  )_UnslothAlignPropTrainer._save_model_hookG  s!    ((*Er6   c                 Z    U R                   R                  X5        UR                  5         g r   )r   load_checkpointrn  )r.   ro  	input_dirs      r3   r  )_UnslothAlignPropTrainer._load_model_hookK  s    ((;

r6   c                    0 nU R                   R                  USS5      nUc2  [        [        U5       Vs/ s H  o`R	                  5       PM     sn6 u  p7O[        U5       Vs/ s H  n0 PM     nnU R
                  R                  USSSU R
                  R                  R                  S9R                  R                  U R                  R                  5      nU R
                  R                  U5      S   n	U(       a  U R
                  R                  U	UU R                  R                  U R                  R                   U R                  R"                  U R                  R$                  U R                  R&                  U R                  R(                  SS9	n
OPU R                  U	UU R                  R                  U R                  R                   U R                  R"                  SS9n
U
R*                  nXS	'   X4S
'   XtS'   U$ s  snf s  snf )aE  
Generate samples from the model

Args:
    batch_size (int): Batch size to use for sampling
    with_grad (bool): Whether the generated RGBs should have gradients attached to it.
    prompts (list[str], *optional*): If provided, use these prompts instead of generating new ones.

Returns:
    prompt_image_pairs (dict[Any])
rG   r   r   Tr   r   )	prompt_embedsnegative_prompt_embedsnum_inference_stepsguidance_scaleetar   r   truncated_rand_backprop_minmaxoutput_type)rx  ry  rz  r{  r|  r~  r*  r+  r,  )r  repeatrK   r=  r   r   r  r  r^   rL   r   rm   r  rgb_with_gradr   r   r   r   r   r   r}  r*  )r.   ro   	with_gradr+  r-  sample_neg_prompt_embedsr   r,  
prompt_idsrx  	sd_outputr*  s               r3   r@  *_UnslothAlignPropTrainer._generate_samplesO  s     #'#8#8#?#?
Aq#Q ?'*uZGX,YGX!^^-=GX,Y'Z$G_+0+<=+<ar+<O=%%// ''11BB 0 
 )BBt''../ 	 ((55jA!D((66+'?$(KK$@$@#{{@@KK**(,(K(K,0KK,S,S/3{{/Y/Y  7 
I ((+'?$(KK$@$@#{{@@KK**  ) I !!'-8$(/9%0?,-!!U -Z=s   H"Hepochsc                     SnUc  U R                   R                  n[        U R                  U5       H  nU R	                  X25      nM     g)z.
Train the model for a given number of epochs
r   N)r   r   r=  r"  r;  )r.   r  r3  r2  s       r3   r<  _UnslothAlignPropTrainer.train  sB     >[[++F4++V4E))E7K 5r6   c                 Z    U R                   R                  U5        U R                  5         g r   )r   save_pretrainedcreate_model_card)r.   save_directorys     r3   _save_pretrained)_UnslothAlignPropTrainer._save_pretrained  s"    ((8 r6   c                   > U R                   R                  c*  [        U R                   R                  5      R                  nO(U R                   R                  R                  S5      S   nU R                  US9  [        TU ]!  X5        g )N/rA   )
model_name)	r/   hub_model_idr   rq  namer   r  r   _save_checkpoint)r.   r*   trialr  r   s       r3   r  )_UnslothAlignPropTrainer._save_checkpoint  sj    99!!)dii22388J//55c:2>J*5 .r6   r  dataset_nametagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        R                   " S5      n[#        UUU R$                  UU['        5       (       a+  [(        R*                  b  [(        R*                  R,                  OS[/        5       SUS	S
S9nUR1                  [        R
                  R3                  U R4                  R6                  S5      5        g)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
N_name_or_pathunsloth_versionunslothJOB_IDhf_jobsaS          @article{prabhudesai2024aligning,
            title        = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}},
            author       = {Mihir Prabhudesai and Anirudh Goyal and Deepak Pathak and Katerina Fragkiadaki},
            year         = 2024,
            eprint       = {arXiv:2310.03739}
        }	AlignPropzCAligning Text-to-Image Diffusion Models with Reward Backpropagationz
2310.03739)
base_modelr  r  r  r  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror-   r*   r   r   r   isdirr  setr  straddenvironrM  
_tag_namesr   dedentr   r  r   wandbrunurlr   saver   r/   rq  )r.   r  r  r  r  citation
model_cards          r3   r  *_UnslothAlignPropTrainer.create_model_card  sn   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$ ?? $  )!!**%'9';';		@Ueiimm[_.0$%]!

 	TYY%9%9;GHr6   )r   r  r   r"  r   r  r  r   r   r   r   r   )TN)NNN)"r   r   r   r   r   r  r   r   r   r   tupler  r   r   r   r   r0  r   r;  rD  floatr8  r  r  r  r@  r<  r  r  r	   r   r  r   r   r   s   @r3   r   r   1  so   $ %J HLB!B! "5<<sU3Z"H%,,"VWB! ""eCHo"56	B!
 1B! %XsCos.B%CDB!HH# HC HTGLLG G ||	G
 ;"z8HSM 8!
/ %)&*,0	@ISM@I sm@I CcD()	@I @Ir6   r   c                   2   ^  \ rS rSrSr SU 4S jjrSrU =r$ )UnslothAlignPropTraineri  aX  
    
The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is
heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/ As of now only Stable Diffusion based
pipelines are supported

Args:
    config (`AlignPropConfig`):
        Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details.
    reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`):
        Reward function to be used
    prompt_function (`Callable[[], tuple[str, Any]]`):
        Function to generate prompts to guide model
    sd_pipeline (`DDPOStableDiffusionPipeline`):
        Stable Diffusion pipeline to be used for training.
    image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`):
        Hook to be called to log images

    c           	        > Wc
  [        5       n/ nSSKJn	  U	" SU5        [        USS 5      [        R
                  :X  a(  UR                  S:  a  [        USS5      S:w  a  SUl        S[        5       ;   a)  [        [        S5      (       a  [        R                  5         [        T
U ]4  " SUUUUUS	.UD6  S[        5       ;   a+  [        [        S
5      (       a  [        R                  5         g g g )Nr   )PatchRLStatisticsalignprop_trainerparallel_moderG   _n_gpur*   r+   )r   r   r   r   r   r,   r   )r   unsloth_zoo.logging_utilsr  getattrr&   NOT_DISTRIBUTEDn_gpur  localsr-   r*   r+   r   r   r,   )r.   r   r   r   r   r   r0   r/   other_metricsr  r   s             r3   r    UnslothAlignPropTrainer.__init__  s     < 6 8?-}= 4$/<3O3OOTXT^T^abTbtXq)Q.fh75.#A#A  	>--%!3	>
 7=	> fh75/#B#B! $Cr6   r   r   )r   r   r   r   r   r   r   r   r   s   @r3   r  r    s    2 "" "r6   r  	addFilterc                        \ rS rSrS rS rSrg)HideLoggingMessagei  c                     Xl         g r   text)r.   r  s     r3   r   HideLoggingMessage.__init__  s    d)r6   c                 <    U R                   UR                  5       ;  $ r   )r  
getMessage)r.   r   s     r3   r   HideLoggingMessage.filter  s    alln)DEr6   r  N)r   r   r   r   r   r   r   r   r6   r3   r  r    s    2Er6   r  z`use_cache=True`)Cr   r   r   torch.nnnnr   Ftypingr   r   r   r   r	   r
   r   r   trl.trainer.alignprop_trainerr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   dataclassesr    r!   packaging.versionr"   rC  np
contextlibr#   transformersr$   r%   +TransformersDataCollatorForLanguageModelingtransformers.training_argsr&   r7   typesr'   r9   torch_compile_optionscompiler]   r   rh   rv   r   r   r   r  r-   Filterr  r  r   r6   r3   <module>r     s  0    $ I I I T  T  T  T  T  T 
  ( %   " $  3      4;PR S"||  \\	&,, %  	
 \\6ell C ELL  ^5_ ^5 ^5@ rI3 rIf1"6 1"f  6;FW^^ F 	
'(:;<  r6   