
    h#,                     	   S SK r S SKJrJr  S SK Jr  S SKJrJrJrJ	r	J
r
JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=J>r>  Sr?Sr@\" 5       rASIS jrBS rCS	\\S4S
\\S4S\\S4S\\S4S\\S4S\\S4S\\S4S\\S4S\\S4S\\S4S\$\%S4S\$\%S4S\$\%S4S\)\*S4S\)\*S4S\+\,S4S\.\/S4S \2\3S!4S"\2\3S#4S$\7\8S44 HY  u  rDrErFrG\DS$:X  a  S%OS&rH\R                  " \D\HS'9rJ\E" \K" \JR                  5      S(S)S*S*S+S,9rM\F" \M5      rN\C" \N5        \B" \N\JS-\G5        M[     S.\:\;S4S/\\S44 HR  u  rDrErFrG\R                  " \D5      rJ\E" \K" \JR                  5      S(S)S*S*S+S)S*S09rM\F" \M5      rN\C" \N5        \B" \N\JS-\G5        MT     \R                  " S 5      rJ\2" \K" \JR                  5      S1S)S*S*S+S,9rM\3" \M5      rN\B" \N\JS2S!5        \R                  " S35      rJ\7" \K" \JR                  5      S1S)S*S*S+S,9rM\8" \M5      rN\B" \N\JS25        S\$\&S4S \2\4S!4S3\7\9S44 HI  u  rDrErFrG\R                  " \D5      rJ\E" \K" \JR                  5      S(S)S*S*S+S4S59rM\F" \M5      rN\B" \N\JS-\G5        MK     S6\	\
S4S7\=\>S44 HL  u  rDrErFrG\R                  " \D5      rJ\E" \K" \JR                  5      S8S*S*S*S9S:S(S S;S<9
rM\F" \M5      rN\B" \N\JS-\G5        MN     S=\4S>\-4S?\!4S@\"4SA\<4SB\'4SC\(4SD\#4SE\64SF\144
 GHc  u  rDrF\R                  " \D5      rO\R                  " \D5      rMS*\MR                  lQ        S8\MR                  lR        S)\MR                  lS        S*\MR                  lT        S*\MR                  lQ        S8\MR                  lR        S)\MR                  lS        S*\MR                  lT        \V" \M\55      (       a  S*\MR                  lW        \V" \M\5\045      (       a  S*/\MR                  R                  SG'   \V" \M\05      (       a  S8\MR                  lY        \V" \M\ 5      (       a  S8\MR                  lR        \F" \M5      R                  \ R                  SH9rN\B" \N\OS-5        GMf     g)J    N)HfApi	ModelCard)nn)9
AutoConfigAutoProcessorAutoTokenizer
BartConfig	BartModelBloomConfigBloomForCausalLMCohereConfigCohereForCausalLM
DbrxConfigDbrxForCausalLMDeepseekV3ConfigDeepseekV3ForCausalLMFalconMambaConfigFalconMambaForCausalLMGemma2ConfigGemma2ForCausalLMGemma3ForConditionalGenerationGemmaConfigGemmaForCausalLM
GPT2ConfigGPT2LMHeadModelGPTNeoXConfigGPTNeoXForCausalLMGptOssConfigGptOssForCausalLMIdefics2Config Idefics2ForConditionalGeneration Idefics3ForConditionalGeneration InternVLForConditionalGenerationLlamaConfigLlamaForCausalLMLlamaForSequenceClassificationLlavaForConditionalGeneration!LlavaNextForConditionalGenerationMistralConfigMistralForCausalLM	OPTConfigOPTForCausalLM!PaliGemmaForConditionalGeneration
Phi3ConfigPhi3ForCausalLMQwen2_5_VLConfig"Qwen2_5_VLForConditionalGenerationQwen2ConfigQwen2ForCausalLMQwen2ForSequenceClassificationQwen2VLConfigQwen2VLForConditionalGenerationQwen3ConfigQwen3ForCausalLMQwen3ForSequenceClassificationQwen3MoeConfigQwen3MoeForCausalLMSmolVLMForConditionalGenerationT5ConfigT5ForConditionalGenerationztrl-internal-testingz
---
library_name: transformers
tags: [trl]
---

# Tiny {model_class_name}

This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library.
c                 t   U R                   R                  n[        R                  US9n[	        U5      nUb  U SU 3n[
         SU 3nUb  USU 3-  n[        R                  U5      (       a  U(       d  [        SU S35        g U R                  U5        UR                  U5        UR                  U5        g )N)model_class_name-/zModel z already exists, skipping)
	__class____name__
MODEL_CARDformatr   ORGANIZATIONapirepo_existsprintpush_to_hub)	model	tokenizerprefixsuffixforcer@   content
model_cardrepo_ids	            V/home/james-whalen/.local/lib/python3.13/site-packages/scripts/generate_tiny_models.pyrK   rK   d   s    //1ABG7#J$XQ'7&89a 012GQvh<
wwi89:'"g&w'    c                    U R                  5        GH  n[        U[        R                  5      (       aa  UR                  b)  [        R
                  R                  UR                  5        [        R
                  R                  UR                  5        M  [        U[        R                  5      (       a+  [        R
                  R                  UR                  SSS9  M  [        U[        R                  5      (       ae  [        R
                  R                  UR                  5        UR                  b,  [        R
                  R                  UR                  5        GMO  GMR  [        U[        R                  5      (       d  GMt  UR                  b)  [        R
                  R                  UR                  5        [        R
                  R                  UR                  5        GM     g)a  
Initialize tiny test models to avoid NaNs from uninitialized weights.

Uses safe defaults:
  - Linear/Conv1d: Xavier uniform (weights), zero (biases)
  - Embedding: Normal(0, 0.02)
  - LayerNorm: Ones (weights), zero (biases)

Args:
    model: PyTorch model (modified in-place)
Ng        g{Gz?)meanstd)modules
isinstancer   Linearbiasinitzeros_xavier_uniform_weight	Embeddingnormal_	LayerNormones_Conv1d)rL   modules     rT   init_weights_tiny_modelrg   v   s    --/fbii(({{&v{{+GG##FMM2--GGOOFMMO>--GGMM&--({{&v{{+ ' 		**{{&v{{+GG##FMM2+ "rU   zbigscience/bloomz-560mzCohereForAI/aya-expanse-8bzdatabricks/dbrx-instructzdeepseek-ai/DeepSeek-R1zdeepseek-ai/DeepSeek-R1-05280528ztiiuae/falcon-7b-instructzgoogle/gemma-2-2b-itzgoogle/gemma-7b-itzopenai-community/gpt2zEleutherAI/pythia-14mz#meta-llama/Meta-Llama-3-8B-Instruct3z meta-llama/Llama-3.1-8B-Instructz3.1z meta-llama/Llama-3.2-1B-Instructz3.2z"mistralai/Mistral-7B-Instruct-v0.1z0.1z"mistralai/Mistral-7B-Instruct-v0.2z0.2zfacebook/opt-1.3bzmicrosoft/Phi-3.5-mini-instructzQwen/Qwen2.5-32B-Instructz2.5zQwen/Qwen2.5-Coder-0.5Bz	2.5-CoderzQwen/Qwen3-8Bz
refs/pr/14main)revision             )
vocab_sizehidden_sizenum_attention_headsnum_key_value_headsnum_hidden_layersintermediate_sizetinyzQwen/Qwen3-30B-A3Bzopenai/gpt-oss-20b)rp   rq   rr   rs   rt   ru   num_expertsnum_experts_per_tok   smallzQwen/Qwen3-4B   )rp   rq   rr   rs   rt   ru   
num_labelszfacebook/bart-basezgoogle/flan-t5-small   @      T)
rp   d_modelencoder_layersdecoder_layersd_kvd_ff
num_layers	num_headsdecoder_start_token_idis_encoder_decoderzgoogle/gemma-3-4b-itzgoogle/paligemma-3b-pt-224zHuggingFaceM4/idefics2-8bz HuggingFaceM4/Idefics3-8B-Llama3z$HuggingFaceTB/SmolVLM2-2.2B-Instructzllava-hf/llava-1.5-7b-hfz!llava-hf/llava-v1.6-mistral-7b-hfzOpenGVLab/InternVL3-8B-hfzQwen/Qwen2-VL-2B-InstructzQwen/Qwen2.5-VL-3B-Instructmrope_section)dtype)NNF)]torchhuggingface_hubr   r   r   transformersr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   rG   rE   rH   rK   rg   model_idconfig_classmodel_classrO   rk   from_pretrainedrM   lenvocabconfigrL   	processortext_configrt   rq   rr   rs   vision_configrZ   depthrope_scalingout_hidden_sizeperceiver_configtobfloat16 rU   rT   <module>r      s  &  , : : : : : : : : : : : : : : :z &	
 g($!3L {,<dC!<1BDI_dC 02GN#%57LfU "35KTR\+<dC;(8$?j/4@m-?F*K9I3O'6FN'6FN)=:LeT)=:LeT)^T:&
OTJ +/?G-={Kk#3T:+4/HlK.  (?:|H--hJIy'F EE"y&&1G4N >+>E<):DA4/HlK --h7Iy'	F EE"y&&1#4* ))*EF		9??#
 	  E9gu -))/:		9??#
 	  E9g & (6TV[\ +/MuUk#A4H4/HlK
 --h7Iy'F Ey&&1!4* :y$7X'A4H4/HlK --h7Iy' F Ey&&1%4. ;<!#DE "BC')IJ+-LM!>?(*KL "BC "AB"$FGHk --h7I''1F+,F(%'F"-.F*-.F*-.F*')F$/0F,/0F,&=**%&"&=*:;<<<=3''8&+--/1,&.)).0+"""8Ey&)OrU   