
    hp                        S SK r S SKrS SKrS SKJr  S SKJr  S SKJr  S SK	J
r
JrJrJr  S SKrS SKJrJr  S SKJrJr  S SKJr  S S	KJr  S
SKJr  SSKJr  SSKJrJrJ r   \" 5       (       a  S SK!r!\RD                  " \#5      r$ " S S\5      r%g)    N)defaultdict)futures)Path)AnyCallableOptionalUnion)Acceleratorlogging)ProjectConfigurationset_seed)PyTorchModelHubMixin)is_wandb_available   )DDPOStableDiffusionPipeline   )
DDPOConfig)PerPromptStatTrackergenerate_model_cardget_comet_experiment_urlc                     ^  \ rS rSrSrSS/r S%S\S\\R                  \
\   \
\   /\R                  4   S\/ \
\\4   4   S	\S
\\\\\/\4      4
S jjrS&S jrS\S\4S jrS rS\R                  S\S\R                  4S jrS rS rS rS rS rS\
\\4   4S jrS%S\\   4S jjrS rU 4S jr   S'S \\   S!\\   S"\ \\!\   S4   4S# jjr"S$r#U =r$$ )(DDPOTrainer)   a/  
The DDPOTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is heavily
inspired by the work here: https://github.com/kvablack/ddpo-pytorch As of now only Stable Diffusion based pipelines
are supported

Args:
    config ([`DDPOConfig`]):
        Configuration object for DDPOTrainer. Check the documentation of [`PPOConfig`] for more details.
    reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`):
        Reward function to be used.
    prompt_function (`Callable[[], tuple[str, Any]]`): Function to generate prompts to guide model
    sd_pipeline ([`DDPOStableDiffusionPipeline`]): Stable Diffusion pipeline to be used for training.
    image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`): Hook to be called to log images.
trlddpoNconfigreward_functionprompt_functionsd_pipelineimage_samples_hookc           
         [         R                  " S[        5        Uc  [        R	                  S5        X0l        X l        Xl        XPl        [        S0 U R                  R                  D6nU R                  R                  (       Ga  [        R                  R                  [        R                  R                  U R                  R                  5      5      U R                  l        S[        R                  R!                  U R                  R                  5      ;  a  [#        [%        S [        R&                  " U R                  R                  5      5      5      n[)        U5      S:X  a"  [+        SU R                  R                   35      e[-        U Vs/ s H   n[/        UR1                  S5      S   5      PM"     sn5      n	[        R                  R3                  U R                  R                  SU	S    35      U R                  l        U	S   S	-   Ul        [/        U R                  R6                  U R                  R8                  -  5      U l        [=        SU R                  R>                  U R                  R@                  UU R                  RB                  U R:                  -  S
.U R                  RD                  D6U l#        U RI                  5       u  pU
(       d  [+        U5      eUR>                  S L=(       a    UR>                  S:H  nU RF                  RJ                  (       ao  U RF                  RM                  U R                  RN                  U(       d  [Q        URS                  5       S9OURS                  5       U R                  RT                  S9  [        RW                  SU 35        [Y        U R                  RZ                  SS9  X@l.        U R\                  R_                  S	U RF                  R`                  (       + SSSS9  U RF                  R@                  S:X  a  [b        Rd                  nO;U RF                  R@                  S:X  a  [b        Rf                  nO[b        Rh                  nU R\                  Rj                  Rm                  U RF                  Rn                  US9  U R\                  Rp                  Rm                  U RF                  Rn                  US9  U R\                  Rr                  Rm                  U RF                  Rn                  US9  U R\                  Ru                  5       nU RF                  Rw                  U Rx                  5        U RF                  R{                  U R|                  5        U R                  R~                  (       aL  [b        R                  R                  5       (       a)  S[b        R                  R                  R                  l?        U R                  [        U["        5      (       d  UR                  5       OU5      U lG        U R\                  Rq                  U R\                  R                  U R                  R                  c  S/OU R                  R                  SSSU R\                  R                  R                  S9R                  Rm                  U RF                  Rn                  5      5      S   U lL        UR                  (       a%  [        UR                  UR                  5      U lQ        U R\                  R                  =(       d    U RF                  R                  U lR        [        U R\                  S5      (       aq  U R\                  R                  (       aV  U RF                  R                  XR                  5      u  olG        [#        [%        S UR                  5       5      5      U lV        O2U RF                  R                  XR                  5      u  U lV        U lG        U R                  R                  (       a#  [        R                  " UR                  S9U l[        UR                  (       aw  [        RW                  SUR                   35        U RF                  R                  UR                  5        [/        UR                  R1                  S5      S   5      S	-   U l]        g SU l]        g s  snf ) Nz@DDPOTrainer is deprecated and will be removed in version 0.23.0.z8No image_samples_hook provided; no images will be loggedcheckpoint_c                     SU ;   $ )Nr"    )xs    R/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/ddpo_trainer.py<lambda>&DDPOTrainer.__init__.<locals>.<lambda>W   s	    -1"4    r   zNo checkpoints found in _r   )log_withmixed_precisionproject_configgradient_accumulation_stepstensorboard)ddpo_trainer_config)r   init_kwargs
T)device_specificFTimestep)positiondisableleavedescdynamic_ncolsfp16bf16)dtype pt
max_lengthreturn_tensorspadding
truncationr@   use_lorac                     U R                   $ N)requires_grad)ps    r&   r'   r(      s    !//r)   )max_workerszResuming from r$   )^warningswarnDeprecationWarningloggerwarning	prompt_fn	reward_fnr   image_samples_callbackr   project_kwargsresume_fromospathnormpath
expanduserbasenamelistfilterlistdirlen
ValueErrorsortedintsplitjoin	iterationsample_num_stepstrain_timestep_fractionnum_train_timestepsr
   r,   r-   !train_gradient_accumulation_stepsaccelerator_kwargsaccelerator_config_checkis_main_processinit_trackerstracker_project_namedictto_dicttracker_kwargsinfor   seedr   set_progress_bar_configis_local_main_processtorchfloat16bfloat16float32vaetodevicetext_encoderunetget_trainable_layersregister_save_state_pre_hook_save_model_hookregister_load_state_pre_hook_load_model_hook
allow_tf32cudais_availablebackendsmatmul_setup_optimizer
isinstance
parameters	optimizer	tokenizernegative_promptsmodel_max_length	input_idsneg_prompt_embedper_prompt_stat_trackingr   $per_prompt_stat_tracking_buffer_size"per_prompt_stat_tracking_min_countstat_trackerautocasthasattrrE   preparetrainable_layersasync_reward_computationr   ThreadPoolExecutorrJ   executor
load_statefirst_epoch)selfr   r   r   r   r    accelerator_project_configcheckpointsr%   checkpoint_numbersis_okaymessageis_using_tensorboardinference_dtyper   r}   s                   r&   __init__DDPOTrainer.__init__;   s    	N	
 %NNUV((&8#%9%WDKK<V<V%W";;"""&(gg&6&6rww7I7I$++JaJa7b&cDKK#BGG$4$4T[[5L5L$MM"4

4;;#:#:; {#q($'?@W@W?X%YZZ%+K,XKqSb1A-BK,X%Y"*,'',,KK++!"4R"8!9:+'
 8J"7MPQ7Q*4 $'t{{'C'CdkkFiFi'i#j & 	
[[)) KK775 )-(U(UX\XpXp(p	
 kk,,	
  --/W%%%d:_vR_?_++**00I]t0@Acicqcqcs KK66 +  	bM"!!48&00((>>> 	1 	
 ++v5#mmO--7#nnO#mmO 0 0 7 7O%%(()9)9)@)@(X  !1!1!8!8 P++@@B55d6K6KL55d6K6KL ;;!!ejj&=&=&?&?48ENN&&1..1;<Ld1S1S'')Yi
 !% 0 0 = =&&44<$++B^B^#$++55FF '  i4++223!
 ! ** 4;;99!D ((11NT5E5E5N5N4##Z00T5E5E5N5N#'#3#3#;#;<Lnn#] D.$(0I4??K\)]$^D!484D4D4L4LM]_m_m4n1D!4>;;//#666CUCUVDMKK.););(<=>''(:(:;"6#5#5#;#;C#@#DEID D] -Ys   'cc           	        ^  U(       d]  / nU HT  u  pEnT R                  XEU5      u  pxUR                  [        R                  " UT R                  R
                  S9U45        MV     O{T R                  R                  U 4S jU5      nU VVs/ s HL  u  px[        R                  " UR                  5       T R                  R
                  S9UR                  5       4PMN     nnn[        U6 $ s  snnf )Nr{   c                 "   > TR                   " U 6 $ rG   )rQ   )r%   r   s    r&   r'   -DDPOTrainer.compute_rewards.<locals>.<lambda>   s    $..!2Dr)   )
rQ   appendru   	as_tensorri   r{   r   mapresultzip)	r   prompt_image_pairsis_asyncrewardsimagespromptsprompt_metadatarewardreward_metadatas	   `        r&   compute_rewardsDDPOTrainer.compute_rewards   s    G4F0*.../*Z't7G7G7N7NO' 5G mm''(DFXYG 07/6+F 9I9I9P9PQSbSiSiSkl/6  
 G}s   AC)epochglobal_stepc                 
   U R                  U R                  R                  U R                  R                  S9u  p4US   R	                  5        VVs0 s H,  oU[
        R                  " U Vs/ s H  ofU   PM	     sn5      _M.     nnnU R                  X@R                  R                  S9u  px[        U5       H  u  pU
R                  Xy   X   /5        M     U R                  b)  U R                  XBU R                  R                  S   5        [
        R                  " U5      nU R                  R                  U5      R                  5       R!                  5       nU R                  R#                  UUUR%                  5       UR'                  5       S.US9  U R                  R(                  (       az  U R                  R                  US   5      R                  5       R!                  5       nU R*                  R,                  R/                  USS	9nU R0                  R3                  X5      nO&XwR%                  5       -
  UR'                  5       S
-   -  n[
        R4                  " U5      R7                  U R                  R8                  S5      U R                  R:                     R=                  U R                  R>                  5      US'   US	 US   R@                  u  p[C        U R                  RD                  5       GH  n[
        RF                  " XR                  R>                  S9nURI                  5        VVs0 s H  u  nnUUU   _M     nnn[
        RJ                  " [C        U5       Vs/ s H+  n[
        RF                  " XR                  R>                  S9PM-     sn5      nS H=  nUU   [
        RL                  " XR                  R>                  S9SS2S4   U4   UU'   M?     UR	                  5       nURO                  5       nU Vs/ s H8  nUR6                  " SU R                  RP                  /UR@                  SS Q76 PM:     nn[S        U6 nU Vs/ s H  n[U        [S        UU5      5      PM     nnU R*                  RV                  RY                  5         U R[                  UXU5      nU R                  R\                  (       a  GM  [_        S5      e   US:w  aQ  XR                  R`                  -  S:X  a5  U R                  Rb                  (       a  U R                  Re                  5         U$ s  snf s  snnf s  snnf s  snf s  snf s  snf )a  
Perform a single step of training.

Args:
    epoch (int): The current epoch.
    global_step (int): The current global step.

Side Effects:
    - Model weights are updated
    - Logs the statistics to the accelerator trackers.
    - If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step,
      and the accelerator tracker.

Returns:
    global_step (int): The updated global step.

)
iterations
batch_sizer   )r   N)r   r   reward_mean
reward_stdstep
prompt_idsT)skip_special_tokensg:0yE>r+   
advantages	timestepsr   )r   latentsnext_latents	log_probsr   zsOptimization step should have been performed by this point. Please check calculated gradient accumulation settings.)3_generate_samplesr   sample_num_batches_per_epochsample_batch_sizekeysru   catr   r   	enumerateextendrR   ri   trackersgathercpunumpylogmeanstdr   r   r   batch_decoder   updater   reshapenum_processesprocess_indexrz   r{   shaperangetrain_num_inner_epochsrandpermitemsstackarangevaluestrain_batch_sizer   rn   r}   train_train_batched_samplessync_gradientsr^   	save_freqrk   
save_state)r   r   r   samplesprompt_image_dataksr   rewards_metadatai
image_datar   r   r   total_batch_sizenum_timestepsinner_epochpermvr*   permskeyoriginal_keysoriginal_valuesreshaped_valuestransposed_values
row_valuessamples_batcheds                               r&   r   DDPOTrainer.step   s   $ &*%;%;{{??{{44 &< &
" CJ!*//BSTBSQeiiw 7w!1w 788BST$($8$8(L(L %9 %
! ''89MAwz+;+>?@ : &&2''(9HXHXHaHabcHde))G$""))'2668>>@!&||~%kkm	  	 	
 ;;//))001FGKKMSSUJ&&00==j^b=cG**11'CJ!LLN2w{{}t7KLJ OOJ'WT%%33R89I9I9W9WYR  ''( 	 L!*1+*>*D*D' !C!CDK>>"2;K;K;R;RSD.5mmo>odaq!D'zoG> KKX]^nXopXoST6F6F6M6MNXopE M&s|LL!1:J:J:Q:QRSTVZSZ[  M $LLNM%nn.Obqrbq]^qyyT[[-I-IXAGGTUTVKXbqOr !$_ 5VghVg
tCz$BCVgOh!!'')55k5WfgK##222  J ? EF A:%++"7"771<AQAQAaAa'')c !8T\ ?
 q s
 is0   T:+T59T:9U ,2U
?UU5T:c           	      N   U R                  5          U R                  R                  (       a  U R                  R	                  [
        R                  " U/S-  5      [
        R                  " U/S-  5      U5      R                  nUR                  S5      u  pXR                  R                  X-
  -  -   nO'U R                  R	                  UUU5      R                  nU R                  R                  UUUU R                  R                  US9n
U
R                  nSSS5        [
        R                  " UU R                  R                  * U R                  R                  5      n[
        R                  " WU-
  5      nU R!                  XPR                  R"                  U5      nS[
        R$                  " X-
  S-  5      -  n[
        R$                  " [
        R&                  " US-
  5      U R                  R"                  :  R)                  5       5      nXU4$ ! , (       d  f       GN= f)a  
Calculate the loss for a batch of an unpacked sample

Args:
    latents (torch.Tensor):
        The latents sampled from the diffusion model, shape: [batch_size, num_channels_latents, height, width]
    timesteps (torch.Tensor):
        The timesteps sampled from the diffusion model, shape: [batch_size]
    next_latents (torch.Tensor):
        The next latents sampled from the diffusion model, shape: [batch_size, num_channels_latents, height,
        width]
    log_probs (torch.Tensor):
        The log probabilities of the latents, shape: [batch_size]
    advantages (torch.Tensor):
        The advantages of the latents, shape: [batch_size]
    embeds (torch.Tensor):
        The embeddings of the prompts, shape: [2*batch_size or batch_size, ...] Note: the "or" is because if
        train_cfg is True, the expectation is that negative prompts are concatenated to the embeds

Returns:
    loss (torch.Tensor), approx_kl (torch.Tensor), clipfrac (torch.Tensor) (all of these are of shape (1,))
r   )etaprev_sampleNg      ?      ?)r   r   	train_cfgr   r}   ru   r   samplechunksample_guidance_scalescheduler_step
sample_etar   clamptrain_adv_clip_maxexplosstrain_clip_ranger   absfloat)r   r   r   r   r   r   embeds
noise_prednoise_pred_uncondnoise_pred_textscheduler_step_outputlog_probratior  	approx_klclipfracs                   r&   calculate_lossDDPOTrainer.calculate_lossL  s   . ]]_{{$$!--22IIwi!m,IIykAo. &	 
 6@5E5Ea5H2!.1R1R#72 
 "--22 &	  %)$4$4$C$CKK**( %D %! -66H7 : [[[[+++KK**

 		(Y./yy[[%A%A5I%**h&:q%@AA	::uyy58T8TT[[]^((W _s   DH
H$r   
clip_ranger  c                     U* U-  nU* [         R                  " USU-
  SU-   5      -  n[         R                  " [         R                  " XE5      5      $ )Nr  )ru   r  r   maximum)r   r   r  r  unclipped_lossclipped_losss         r&   r  DDPOTrainer.loss  sT     %u,"{U[[**&
 

 zz%--EFFr)   c                    U R                   R                  (       a  SS KnUR                  R                  nO[
        R                  R                  nU" UU R                   R                  U R                   R                  U R                   R                  4U R                   R                  U R                   R                  S9$ )Nr   )lrbetasweight_decayeps)r   train_use_8bit_adambitsandbytesoptim	AdamW8bitru   AdamWtrain_learning_ratetrain_adam_beta1train_adam_beta2train_adam_weight_decaytrain_adam_epsilon)r   trainable_layers_parametersr)  optimizer_clss       r&   r   DDPOTrainer._setup_optimizer  s    ;;**(..88M!KK--M'{{..;;//1M1MN<<..
 	
r)   c                 \    U R                   R                  XU5        UR                  5         g rG   )r   save_checkpointpop)r   modelsweights
output_dirs       r&   r   DDPOTrainer._save_model_hook  s!    ((*Er)   c                 Z    U R                   R                  X5        UR                  5         g rG   )r   load_checkpointr7  )r   r8  	input_dirs      r&   r   DDPOTrainer._load_model_hook  s    ((;

r)   c                    / n/ nU R                   R                  R                  5         U R                  R	                  USS5      n[        U5       GH  n[        [        U5       Vs/ s H  o`R                  5       PM     sn6 u  pxU R                   R                  USSSU R                   R                  R                  S9R                  R                  U R                  R                  5      n	U R                   R                  U	5      S   n
U R                  5          U R                  U
UU R                   R"                  U R                   R$                  U R                   R&                  SS9nUR(                  nUR*                  nUR,                  nSSS5        [.        R0                  " WSS	9n[.        R0                  " WSS	9nU R                   R2                  R4                  R	                  US5      nUR7                  U	U
UUSS2SS
24   USS2SS24   UUS.5        UR7                  WXx/5        GM     X44$ s  snf ! , (       d  f       N= f)z
Generate samples from the model

Args:
    iterations (int): Number of iterations to generate samples for
    batch_size (int): Batch size to use for sampling

Returns:
    samples (list[dict[str, torch.Tensor]]), prompt_image_pairs (list[list[Any]])
r   r?   r@   TrA   r   )prompt_embedsnegative_prompt_embedsnum_inference_stepsguidance_scaler  output_typeN)dimr+   )r   rA  r   r   r   r   rB  )r   r}   evalr   repeatr   r   rP   r   r   r   rz   ri   r{   r|   r   r   rd   r  r
  r   r   r   ru   r   	schedulerr   r   )r   r   r   r   r   sample_neg_prompt_embedsr*   r   r   r   rA  	sd_outputr   r   r   r   s                   r&   r   DDPOTrainer._generate_samples  s    ""$#'#8#8#?#?
Aq#Q z"A'*uZGX,YGX!^^-=GX,Y'Z$G))33#$++55FF 4  i4++223  !,,99*EaHM ,,"/+C(,(D(D#';;#D#D.. $ - 	 #))#++%//	 ! kk'q1GI15I((22<<CCJPQRINN",%2!*&q#2#v$+AqrEN!*.F
 %%vw&HIS #V **U -Z !s   'H9
A5H>>
I	c                    [        [        5      n[        U5       GH  u  pgU R                  R                  (       a  [
        R                  " US   US   /5      nOUS   n[        U R                  5       GH^  n	U R                  R                  U R                  R                  5         U R                  US   SS2U	4   US   SS2U	4   US   SS2U	4   US   SS2U	4   US   U5      u  pnUS	   R                  U5        US
   R                  U5        US   R                  U
5        U R                  R                  U
5        U R                  R                   (       as  U R                  R#                  [%        U R&                  [        5      (       d  U R&                  R)                  5       OU R&                  U R                  R*                  5        U R,                  R/                  5         U R,                  R1                  5         SSS5        U R                  R                   (       d  GM  UR3                  5        VVs0 s H0  u  pU[
        R4                  " [
        R6                  " U5      5      _M2     nnnU R                  R9                  USS9nUR;                  X!S.5        U R                  R=                  XSS9  US-  n[        [        5      nGMa     GM     U$ ! , (       d  f       N= fs  snnf )a  
Train on a batch of samples. Main training segment

Args:
    inner_epoch (int): The current inner epoch
    epoch (int): The current epoch
    global_step (int): The current global step
    batched_samples (list[dict[str, torch.Tensor]]): The batched samples to train on

Side Effects:
    - Model weights are updated
    - Logs the statistics to the accelerator trackers.

Returns:
    global_step (int): The updated global step
rB  rA  r   Nr   r   r   r   r  r  r  r   )	reduction)r   r   r   r   )r   rZ   r   r   r  ru   r   r   rf   ri   
accumulater   r}   r  r   backwardr   clip_grad_norm_r   r   r   train_max_grad_normr   r   	zero_gradr   r   r   reducer   r   )r   r   r   r   batched_samplesrq   _ir  r  jr  r  r  r   r   s                  r&   r   "DDPOTrainer._train_batched_samples  sx   " 4 #O4JB{{$$F+C$Df_F]#^_04334%%001A1A1F1FG040C0Cy)!Q$/{+AqD1~.q!t4{+AqD1|,1-DX %,,Y7$++H5L''-$$--d3''66((88#-d.C.CT#J#J !11<<>!%!6!6 KK;;	 NN'')NN,,.- H2 ##222FJjjlSldaAuzz%++a.99lDS++22462JDKK% LM$$(((@1$K&t,DC 5 5R C HG6 Ts   (EK?7K0

K-returnc                    U R                   R                  U R                  R                  -  U R                   R                  -  nU R                   R
                  U R                  R                  -  U R                   R                  -  nU R                   R                  U R                   R
                  :  d3  SSU R                   R                   SU R                   R
                   S34$ U R                   R                  U R                   R
                  -  S:X  d3  SSU R                   R                   SU R                   R
                   S34$ X-  S:X  d  SSU SU S34$ g	)
NFzSample batch size (z9) must be greater than or equal to the train batch size ()r   z-) must be divisible by the train batch size (zNumber of samples per epoch (z3) must be divisible by the total train batch size ()Tr>   )r   r   ri   r   r   r   rg   )r   samples_per_epochtotal_train_batch_sizes      r&   rj   DDPOTrainer._config_check1  s   KK))D,<,<,J,JJT[[MuMuu 	 KK((,,-kk;;< 	 {{,,0L0LL%dkk&C&C%DD}  C  J  J  [  [  ~\  \]  ^  {{,,t{{/K/KKqP%dkk&C&C%DDqrvr}r}  sO  sO  rP  PQ  R  !9Q>/0A/BBu  wM  vN  NO  P  r)   epochsc                     SnUc  U R                   R                  n[        U R                  U5       H  nU R	                  X25      nM     g)z.
Train the model for a given number of epochs
r   N)r   
num_epochsr   r   r   )r   r_  r   r   s       r&   r   DDPOTrainer.trainL  sB     >[[++F4++V4E))E7K 5r)   c                 Z    U R                   R                  U5        U R                  5         g rG   )r   save_pretrainedcreate_model_card)r   save_directorys     r&   _save_pretrainedDDPOTrainer._save_pretrainedV  s"    ((8 r)   c                   > U R                   R                  c*  [        U R                   R                  5      R                  nO(U R                   R                  R                  S5      S   nU R                  US9  [        TU ]!  X5        g )N/r+   )
model_name)	argshub_model_idr   r:  namera   re  super_save_checkpoint)r   modeltrialrk  	__class__s       r&   rp  DDPOTrainer._save_checkpoint[  sj    99!!)dii22388J//55c:2>J*5 .r)   rk  dataset_nametagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        R                   " S5      n[#        UUU R$                  UU['        5       (       a+  [(        R*                  b  [(        R*                  R,                  OS[/        5       SUS	S
S9nUR1                  [        R
                  R3                  U R4                  R6                  S5      5        g)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
N_name_or_pathunsloth_versionunslothJOB_IDhf_jobsa          @inproceedings{black2024training,
            title        = {{Training Diffusion Models with Reinforcement Learning}},
            author       = {Kevin Black and Michael Janner and Yilun Du and Ilya Kostrikov and Sergey Levine},
            year         = 2024,
            booktitle    = {The Twelfth International Conference on Learning Representations, {ICLR} 2024, Vienna, Austria, May 7-11, 2024},
            publisher    = {OpenReview.net},
            url          = {https://openreview.net/forum?id=YCWjhGrJFD},
        }DDPOz5Training Diffusion Models with Reinforcement Learningz
2305.13301)
base_modelrk  rm  ru  rv  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror   rq  r   rU   rV   isdirrx  setr   straddenvironr   
_tag_namestextwrapdedentr   rm  r   wandbrunurlr   saverb   rl  r:  )r   rk  ru  rv  r~  citation
model_cards          r&   re  DDPOTrainer.create_model_cardc  sn   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$ ?? $  )!!**%'9';';		@Ueiimm[_.0%O!

 	TYY%9%9;GHr)   )ri   r   r   r   r   rR   r   rf   r   rP   rQ   r   r   r   rG   )F)NNN)%__name__
__module____qualname____firstlineno____doc__r  r   r   ru   Tensortupler  r   r   r   r   r   r`   r   r  r  r  r   r   r   r   r   boolrj   r   rg  rp  r	   rZ   re  __static_attributes____classcell__)rs  s   @r&   r   r   )   s    J HLP!P! "5<<sU3Z"H%,,"VWP! ""eCHo"56	P!
 1P! %XsCos.B%CDP!d(i# iC iVB)HGLLG G ||	G
 <+|;zuT3Y/ 68HSM 8!
/ %)&*,0	BISMBI smBI CcD()	BI BIr)   r   )&rU   r  rK   collectionsr   
concurrentr   pathlibr   typingr   r   r   r	   ru   
accelerater
   r   accelerate.utilsr   r   huggingface_hubr   transformersr   r8  r   ddpo_configr   utilsr   r   r   r  
get_loggerr  rN   r   r$   r)   r&   <module>r     sk    
   #   1 1  + ; 0 + 0 # V V  
		H	%|	I& |	Ir)   