
    hu                         S r SSKrSSKJr  SSKJr  SSKJrJr  \R                  R                  SS5        S r\S	:X  a  \" 5         gg)
z
Train Gemma-3 on the Codeforces COTS dataset.

accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml examples/scripts/sft_gemma3.py
    N)load_dataset)AutoModelForImageTextToText)	SFTConfig
SFTTrainerTRACKIO_SPACE_IDztrl-trackioc                      [        SSS9n U R                  S5      n Sn[        R                  " USS9n[	        U S3S	S	S	S
S0SSSSSS9
n[        UUU S9nUR                  5         UR                  SS9  g )Nzopen-r1/codeforces-cotstrain)splitpromptzgoogle/gemma-3-12b-iteager)attn_implementationz-codeforces-SFTTuse_reentrantFi              )

output_dirbf16use_liger_kernelgradient_checkpointinggradient_checkpointing_kwargs
max_lengthper_device_train_batch_sizegradient_accumulation_stepsdataset_num_procnum_train_epochs)argsmodeltrain_dataset)dataset_name)r   remove_columnsr   from_pretrainedr   r   r	   push_to_hub)r   model_idr   training_argstrainers        U/home/james-whalen/.local/lib/python3.13/site-packages/examples/scripts/sft_gemma3.pymainr'   *   s     !:'JM!00:M 'H'77V]^E Z/#'6&>$%$%M #G
 MMO %>?    __main__)__doc__osdatasetsr   transformersr   trlr   r   environ
setdefaultr'   __name__ r(   r&   <module>r3      sJ   0 
 ! 4 % 

  (- 8@D zF r(   