
    hf                        S r SSKrSSKJr  SSKrSSKJs  Jr  SSK	J
r
  SSKJr  SSKJrJrJrJrJr  SSKJrJrJrJrJr  \R2                  R5                  SS	5        S
\R6                  S\R6                  S\4S jr\S:X  Ga  \" \\\45      r\R?                  5       u  r r!r"SS0\!l#        \RH                  " \"RJ                  \"RL                  S9r'\RH                  " \"RJ                  \"RL                  S9r(\RH                  " \"RJ                  \"RL                  S9r)\)RT                  c  \)RV                  \)l*        \" \ RX                  \ RZ                  S9r.\
" 5       r/\RH                  " S\"RL                  S\R`                  SS9r1\/Re                  \15      r1\RH                  " S\"RL                  S9r3\" \\1S9r4\" \'\(\!\.\ Rj                     \!Rl                  S:w  a  \.\ Rn                     OS\)\" \"5      \4\3S9	r8\8Rs                  5         \8Ru                  \!Rv                  5        \!Rx                  (       a  \8Ry                  \ RX                  S9  ggg)a  
Run the BCO training script with the commands below. In general, the optimal configuration for BCO will be similar to that of KTO.

# Full training:
python examples/scripts/bco.py     --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct     --trust_remote_code     --dataset_name trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness     --per_device_train_batch_size 16     --per_device_eval_batch_size 32     --num_train_epochs 1     --learning_rate 1e-6     --gradient_checkpointing     --gradient_accumulation_steps 1     --eval_steps 0.2     --save_strategy no     --output_dir bco-aligned-model     --logging_first_step     --max_length 2048     --max_prompt_length 1536     --max_completion_length 1024     --no_remove_unused_columns     --warmup_ratio 0.1

# QLoRA:
python examples/scripts/bco.py     --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct     --trust_remote_code     --dataset_name trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness     --per_device_train_batch_size 16     --per_device_eval_batch_size 32     --num_train_epochs 1     --learning_rate 1e-6     --gradient_checkpointing     --gradient_accumulation_steps 1     --eval_steps 0.2     --save_strategy no     --output_dir bco-aligned-model-lora     --logging_first_step     --warmup_ratio 0.1     --max_length 2048     --max_prompt_length 1536     --max_completion_length 1024     --no_remove_unused_columns     --warmup_ratio 0.1     --use_peft     --load_in_4bit     --lora_target_modules all-linear     --lora_r 16     --lora_alpha 16
    N)partial)Accelerator)load_dataset)	AutoModelAutoModelForCausalLMAutoTokenizerHfArgumentParserPreTrainedModel)	BCOConfig
BCOTrainerModelConfigScriptArgumentsget_peft_configTRACKIO_SPACE_IDztrl-trackio	input_idsattention_maskmodelc                    S n[         R                  " 5          U" XS9nU" XA5      nSSS5        Sn[        R                  " WSSS9n[        R                  " XUR
                  S   4S9nUSS2SU24   nU$ ! , (       d  f       NU= f)	zR
Borrowed from https://huggingface.co/nomic-ai/nomic-embed-text-v1.5#transformers
c                     U S   nUR                  S5      R                  UR                  5       5      R                  5       n[        R
                  " X#-  S5      [        R                  " UR                  S5      SS9-  $ )Nr      g&.>)min)	unsqueezeexpandsizefloattorchsumclamp)model_outputr   token_embeddingsinput_mask_expandeds       N/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/bco.pymean_pooling"embed_prompt.<locals>.mean_poolingd   sn    '?,66r:AABRBWBWBYZ``byy)?CekkReRiRijkRlrvFwww    )r   r   Ni      r   )pdim)normalized_shape)r   no_gradF	normalize
layer_normshape)r   r   r   r$   r    
embeddingsmatryoshka_dims          r#   embed_promptr2   _   s    
x
 
yP!,?
 
 NZ1!4Jj<L<LQ<O;QRJA./J 
s   A77
B__main__use_reentrantT)trust_remote_code)nameznomic-ai/nomic-embed-text-v1.5auto)r5   safe_serializationdtype
device_mapzbert-base-uncased)r   no)argstrain_dataseteval_datasetprocessing_classpeft_configembedding_funcembedding_tokenizer)dataset_name)=__doc__os	functoolsr   r   torch.nn.functionalnn
functionalr,   
accelerater   datasetsr   transformersr   r   r   r	   r
   trlr   r   r   r   r   environ
setdefault
LongTensorr2   __name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsgradient_checkpointing_kwargsfrom_pretrainedmodel_name_or_pathr5   r   	ref_model	tokenizer	pad_token	eos_tokenrC   dataset_configdatasetacceleratorbfloat16embedding_modelprepare_modelrB   rA   dataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub r&   r#   <module>rm      sU  62h 
     " ! j j T T 

  (- 8E,, e>N>N Wf . z	;GHF-3-O-O-Q*K
3BD2IM/ !00%%9U9UE %44%%9U9UI --%%9U9UI "'11	;33+:T:TUG-K//($66nnO "//@O'77z/K/K N k==>@M@[@[_c@cW[;;<im"#J/%/
G MMO }//0  )A)AB !s r&   