
    hK                        S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
Jr  S SKrS SKJrJr  S SKJrJr  S SKJr  S SKJr  S	S
KJr  SSKJr  SSKJrJr  \" 5       (       a  S SKr\R>                  " \ 5      r! " S S\5      r"g)    N)defaultdict)Path)AnyCallableOptionalUnion)Acceleratorlogging)ProjectConfigurationset_seed)PyTorchModelHubMixin)is_wandb_available   )DDPOStableDiffusionPipeline   )AlignPropConfig)generate_model_cardget_comet_experiment_urlc                     ^  \ rS rSrSrSS/r S"S\S\\R                  \
\   \
\   /\R                  4   S\/ \
\\4   4   S	\S
\\\\\/\4      4
S jjrS rS\S\4S jrS rS\R                  S\S\R                  4S jrS rS rS rS#S jrS"S\\   4S jjrS rU 4S jr   S$S\\   S\\   S\\\\   S4   4S  jjrS!r U =r!$ )%AlignPropTrainer'   aN  
The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is
heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/ As of now only Stable Diffusion based
pipelines are supported

Args:
    config (`AlignPropConfig`):
        Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details.
    reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`):
        Reward function to be used
    prompt_function (`Callable[[], tuple[str, Any]]`):
        Function to generate prompts to guide model
    sd_pipeline (`DDPOStableDiffusionPipeline`):
        Stable Diffusion pipeline to be used for training.
    image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`):
        Hook to be called to log images
trl	alignpropNconfigreward_functionprompt_functionsd_pipelineimage_samples_hookc           
         [         R                  " S[        5        Uc  [        R	                  S5        X0l        X l        Xl        XPl        [        S0 U R                  R                  D6nU R                  R                  (       Ga  [        R                  R                  [        R                  R                  U R                  R                  5      5      U R                  l        S[        R                  R!                  U R                  R                  5      ;  a  [#        [%        S [        R&                  " U R                  R                  5      5      5      n[)        U5      S:X  a"  [+        SU R                  R                   35      e[-        U Vs/ s H   n[/        UR1                  S5      S   5      PM"     sn5      n	[        R                  R3                  U R                  R                  SU	S    35      U R                  l        U	S   S	-   Ul        [7        SU R                  R8                  U R                  R:                  UU R                  R<                  S
.U R                  R>                  D6U l         UR8                  S L=(       a    UR8                  S:H  n
U R@                  RB                  (       ao  U R@                  RE                  U R                  RF                  U
(       d  [I        URK                  5       S9OURK                  5       U R                  RL                  S9  [        RO                  SU 35        [Q        U R                  RR                  SS9  X@l*        U RT                  RW                  S	U R@                  RX                  (       + SSSS9  U R@                  R:                  S:X  a  [Z        R\                  nO;U R@                  R:                  S:X  a  [Z        R^                  nO[Z        R`                  nU RT                  Rb                  Re                  U R@                  Rf                  US9  U RT                  Rh                  Re                  U R@                  Rf                  US9  U RT                  Rj                  Re                  U R@                  Rf                  US9  U RT                  Rm                  5       nU R@                  Ro                  U Rp                  5        U R@                  Rs                  U Rt                  5        U R                  Rv                  (       aL  [Z        Rx                  R{                  5       (       a)  S[Z        R|                  Rx                  R~                  l;        U R                  [        U["        5      (       d  UR                  5       OU5      U lC        U RT                  Ri                  U RT                  R                  U R                  R                  c  S/OU R                  R                  SSSU RT                  R                  R                  S9R                  Re                  U R@                  Rf                  5      5      S   U lH        U RT                  R                  =(       d    U R@                  R                  U lI        [        U RT                  S5      (       aq  U RT                  R                  (       aV  U R@                  R                  XR                  5      u  olC        [#        [%        S UR                  5       5      5      U lM        O2U R@                  R                  XR                  5      u  U lM        U lC        UR                  (       aw  [        RO                  SUR                   35        U R@                  R                  UR                  5        [/        UR                  R1                  S5      S   5      S	-   U lO        g SU lO        g s  snf )NzEAlignPropTrainer is deprecated and will be removed in version 0.23.0.z8No image_samples_hook provided; no images will be loggedcheckpoint_c                     SU ;   $ )Nr     )xs    W/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/alignprop_trainer.py<lambda>+AlignPropTrainer.__init__.<locals>.<lambda>X   s	    -1"4    r   zNo checkpoints found in _r   )log_withmixed_precisionproject_configgradient_accumulation_stepstensorboard)alignprop_trainer_config)r   init_kwargs
T)device_specificFTimestep)positiondisableleavedescdynamic_ncolsfp16bf16)dtype pt
max_lengthreturn_tensorspadding
truncationr>   use_lorac                     U R                   $ N)requires_grad)ps    r$   r%   r&      s    !//r'   zResuming from r"   )PwarningswarnDeprecationWarningloggerwarning	prompt_fn	reward_fnr   image_samples_callbackr   project_kwargsresume_fromospathnormpath
expanduserbasenamelistfilterlistdirlen
ValueErrorsortedintsplitjoin	iterationr	   r*   r+   !train_gradient_accumulation_stepsaccelerator_kwargsacceleratoris_main_processinit_trackerstracker_project_namedictto_dicttracker_kwargsinfor   seedr   set_progress_bar_configis_local_main_processtorchfloat16bfloat16float32vaetodevicetext_encoderunetget_trainable_layersregister_save_state_pre_hook_save_model_hookregister_load_state_pre_hook_load_model_hook
allow_tf32cudais_availablebackendsmatmul_setup_optimizer
isinstance
parameters	optimizer	tokenizernegative_promptsmodel_max_length	input_idsneg_prompt_embedautocasthasattrrC   preparetrainable_layers
load_statefirst_epoch)selfr   r   r   r   r   accelerator_project_configcheckpointsr#   checkpoint_numbersis_using_tensorboardinference_dtyper   rv   s                 r$   __init__AlignPropTrainer.__init__<   s    	S	
 %NNUV((&8#%9%WDKK<V<V%W";;"""&(gg&6&6rww7I7I$++JaJa7b&cDKK#BGG$4$4T[[5L5L$MM"4

4;;#:#:; {#q($'?@W@W?X%YZZ%+K,XKqSb1A-BK,X%Y"*,'',,KK++!"4R"8!9:+'
 8J"7MPQ7Q*4& 	
[[)) KK775 )-(U(U	
 kk,,	
  &d:_vR_?_++**00+ V^^5EF^^% KK66 +  	bM"!!48&00((>>> 	1 	
 ++v5#mmO--7#nnO#mmO 0 0 7 7O%%(()9)9)@)@(X  !1!1!8!8 P++@@B55d6K6KL55d6K6KL ;;!!ejj&=&=&?&?48ENN&&1..1;<Ld1S1S'')Yi
 !% 0 0 = =&&44<$++B^B^#$++55FF '  i4++223!
 ! ((11NT5E5E5N5N4##Z00T5E5E5N5N#'#3#3#;#;<Lnn#] D.$(0I4??K\)]$^D!484D4D4L4LM]_m_m4n1D!4>KK.););(<=>''(:(:;"6#5#5#;#;C#@#DEID DA -Ys   '_/c                 B    U R                  US   US   US   5      u  p#U$ )Nimagespromptsprompt_metadata)rN   )r   prompt_image_pairsrewardreward_metadatas       r$   compute_rewards AlignPropTrainer.compute_rewards   s3    "&..x(*<Y*GI[\mIn#
 r'   epochglobal_stepc           
         [        [        5      nU R                  R                  R	                  5         [        U R                  R                  5       GHD  nU R                  R                  U R                  R                  5         U R                  5          [        R                  " 5          U R                  U R                  R                  S9nU R                  U5      nXeS'   U R                  R!                  U5      R#                  5       R%                  5       R'                  5       nU R)                  U5      nU R                  R+                  U5        U R                  R,                  (       as  U R                  R/                  [1        U R2                  [        5      (       d  U R2                  R5                  5       OU R2                  U R                  R6                  5        U R8                  R;                  5         U R8                  R=                  5         SSS5        SSS5        SSS5        US   R?                  WRA                  5       5        US   R?                  URC                  5       5        US   R?                  WRE                  5       5        GMG     U R                  R,                  (       a  URG                  5        V	V
s0 s H0  u  pU	[        R@                  " [        RH                  " U
5      5      _M2     nn	n
U R                  RK                  USS9nURM                  S	U05        U R                  RO                  X2S
9  US-  n[        [        5      nO[Q        S5      eU RR                  bE  X R                  RT                  -  S:X  a)  U RS                  WX R                  RV                  S   5        US:w  aQ  XR                  RX                  -  S:X  a5  U R                  RZ                  (       a  U R                  R]                  5         U$ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= fs  sn
n	f )a  
Perform a single step of training.

Args:
    epoch (int): The current epoch.
    global_step (int): The current global step.

Side Effects:
    - Model weights are updated
    - Logs the statistics to the accelerator trackers.
    - If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step,
      and the accelerator tracker.

Returns:
    global_step (int): The updated global step.
)
batch_sizerewardsNreward_mean
reward_stdlossmean)	reductionr   )stepr   zsOptimization step should have been performed by this point. Please check calculated gradient accumulation settings.r   )/r   rW   r   rv   trainranger   ra   rc   
accumulater   rn   enable_grad_generate_samplestrain_batch_sizer   gatherdetachcpunumpycalculate_lossbackwardsync_gradientsclip_grad_norm_r   r   r   train_max_grad_normr   r   	zero_gradappendr   stditemitemstensorreduceupdatelogr[   rO   log_image_freqtrackers	save_freqrd   
save_state)r   r   r   rj   r(   r   r   rewards_visr   kvs              r$   r   AlignPropTrainer.step   s8   " 4 ##%t{{DDEA!!,,T-=-=-B-BCT]]_V[VgVgVi%)%;%;#{{;; &< &" ../AB079-"..55g>EEGKKMSSU**73  ))$/##22$$44)$*?*?FF --88:!2277	 ##%((*1 Wj_C4 &&{'7'7'9:%%koo&78L		,; F@ **?Czz|L|tqAuzz%,,q/22|DL##**46*BDKK%()   81Kt$D F  &&2{[[E_E_7_cd7d''(:KIYIYIbIbcdIefA:%++"7"771<AQAQAaAa'')e WjVi__CCB MsC   P9P'-E,P	P'!P9
7Q
P$P''
P61P99
Q	c                 ,    SUR                  5       -
  nU$ )z
Calculate the loss for a batch of an unpacked sample

Args:
    rewards (torch.Tensor):
        Differentiable reward scalars for each generated image, shape: [batch_size]

Returns:
    loss (torch.Tensor) (all of these are of shape (1,))
g      $@)r   )r   r   r   s      r$   r   AlignPropTrainer.calculate_loss  s     wnn&&r'   
advantages
clip_rangeratioc                     U* U-  nU* [         R                  " USU-
  SU-   5      -  n[         R                  " [         R                  " XE5      5      $ )Ng      ?)rn   clampr   maximum)r   r   r   r   unclipped_lossclipped_losss         r$   r   AlignPropTrainer.loss  sT     %u,"{U[[**&
 

 zz%--EFFr'   c                    U R                   R                  (       a  SS KnUR                  R                  nO[
        R                  R                  nU" UU R                   R                  U R                   R                  U R                   R                  4U R                   R                  U R                   R                  S9$ )Nr   )lrbetasweight_decayeps)r   train_use_8bit_adambitsandbytesoptim	AdamW8bitrn   AdamWtrain_learning_ratetrain_adam_beta1train_adam_beta2train_adam_weight_decaytrain_adam_epsilon)r   trainable_layers_parametersr   optimizer_clss       r$   r   !AlignPropTrainer._setup_optimizer-  s    ;;**(..88M!KK--M'{{..;;//1M1MN<<..
 	
r'   c                 \    U R                   R                  XU5        UR                  5         g rE   )r   save_checkpointpop)r   modelsweights
output_dirs       r$   ry   !AlignPropTrainer._save_model_hook=  s!    ((*Er'   c                 Z    U R                   R                  X5        UR                  5         g rE   )r   load_checkpointr   )r   r   	input_dirs      r$   r{   !AlignPropTrainer._load_model_hookA  s    ((;

r'   c                    0 nU R                   R                  USS5      nUc2  [        [        U5       Vs/ s H  o`R	                  5       PM     sn6 u  p7O[        U5       Vs/ s H  n0 PM     nnU R
                  R                  USSSU R
                  R                  R                  S9R                  R                  U R                  R                  5      nU R
                  R                  U5      S   n	U(       a  U R
                  R                  U	UU R                  R                  U R                  R                   U R                  R"                  U R                  R$                  U R                  R&                  U R                  R(                  SS9	n
OPU R                  U	UU R                  R                  U R                  R                   U R                  R"                  SS9n
U
R*                  nXS	'   X4S
'   XtS'   U$ s  snf s  snf )aE  
Generate samples from the model

Args:
    batch_size (int): Batch size to use for sampling
    with_grad (bool): Whether the generated RGBs should have gradients attached to it.
    prompts (list[str], *optional*): If provided, use these prompts instead of generating new ones.

Returns:
    prompt_image_pairs (dict[Any])
r   r=   r>   Tr?   r   )	prompt_embedsnegative_prompt_embedsnum_inference_stepsguidance_scaleetatruncated_backprop_randtruncated_backprop_timesteptruncated_rand_backprop_minmaxoutput_type)r   r   r   r   r   r   r   r   r   )r   repeatzipr   rM   r   r   r   r   rs   rc   rt   ru   rgb_with_gradr   sample_num_stepssample_guidance_scale
sample_etar   r   r   r   )r   r   	with_gradr   r   sample_neg_prompt_embedsr(   r   
prompt_idsr   	sd_outputr   s               r$   r   "AlignPropTrainer._generate_samplesE  s     #'#8#8#?#?
Aq#Q ?'*uZGX,YGX!^^-=GX,Y'Z$G_+0+<=+<ar+<O=%%// ''11BB 0 
 )BBt''../ 	 ((55jA!D((66+'?$(KK$@$@#{{@@KK**(,(K(K,0KK,S,S/3{{/Y/Y  7 
I ((+'?$(KK$@$@#{{@@KK**  ) I !!'-8$(/9%0?,-!!U -Z=s   H"Hepochsc                     SnUc  U R                   R                  n[        U R                  U5       H  nU R	                  X25      nM     g)z.
Train the model for a given number of epochs
r   N)r   
num_epochsr   r   r   )r   r	  r   r   s       r$   r   AlignPropTrainer.train  sB     >[[++F4++V4E))E7K 5r'   c                 Z    U R                   R                  U5        U R                  5         g rE   )r   save_pretrainedcreate_model_card)r   save_directorys     r$   _save_pretrained!AlignPropTrainer._save_pretrained  s"    ((8 r'   c                   > U R                   R                  c*  [        U R                   R                  5      R                  nO(U R                   R                  R                  S5      S   nU R                  US9  [        TU ]!  X5        g )N/r)   )
model_name)	argshub_model_idr   r   namer^   r  super_save_checkpoint)r   modeltrialr  	__class__s       r$   r  !AlignPropTrainer._save_checkpoint  sj    99!!)dii22388J//55c:2>J*5 .r'   r  dataset_nametagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        R                   " S5      n[#        UUU R$                  UU['        5       (       a+  [(        R*                  b  [(        R*                  R,                  OS[/        5       SUS	S
S9nUR1                  [        R
                  R3                  U R4                  R6                  S5      5        g)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
N_name_or_pathunsloth_versionunslothJOB_IDhf_jobsaS          @article{prabhudesai2024aligning,
            title        = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}},
            author       = {Mihir Prabhudesai and Anirudh Goyal and Deepak Pathak and Katerina Fragkiadaki},
            year         = 2024,
            eprint       = {arXiv:2310.03739}
        }	AlignPropzCAligning Text-to-Image Diffusion Models with Reward Backpropagationz
2310.03739)
base_modelr  r  r  r   	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror   r  r   rR   rS   isdirr"  setr   straddenvironr   
_tag_namestextwrapdedentr   r  r   wandbrunurlr   saver_   r  r   )r   r  r  r   r(  citation
model_cards          r$   r  "AlignPropTrainer.create_model_card  sn   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$ ?? $  )!!**%'9';';		@Ueiimm[_.0$%]!

 	TYY%9%9;GHr'   )rc   r   r   r   rO   r   r   rM   rN   r   r   rE   )TN)NNN)"__name__
__module____qualname____firstlineno____doc__r5  r   r   rn   Tensortupler2  r   r   r   r   r   r]   r   r   floatr   r   ry   r{   r   r   r  r  r   rW   r  __static_attributes____classcell__)r  s   @r$   r   r   '   so   $ %J HLB!B! "5<<sU3Z"H%,,"VWB! ""eCHo"56	B!
 1B! %XsCos.B%CDB!HH# HC HTGLLG G ||	G
 ;"z8HSM 8!
/ %)&*,0	@ISM@I sm@I CcD()	@I @Ir'   r   )#rR   r6  rH   collectionsr   pathlibr   typingr   r   r   r   rn   
accelerater	   r
   accelerate.utilsr   r   huggingface_hubr   transformersr   r   r   alignprop_configr   utilsr   r   r8  
get_loggerr?  rK   r   r"   r'   r$   <module>rS     sc    
   #  1 1  + ; 0 + 0 - @ 			H	%rI+ rIr'   