
    >Ci                    R    S SK Jr  S SKJr  SSKJrJrJrJr   " S S5      r	S	S jr
g)
    )annotations)Sequence   )
MODEL_ARCHMODEL_TENSORMODEL_TENSORSTENSOR_NAMESc                     \ rS rSr% \R
                  S\R                  S\R                  S\R                  S\R                  S\R                  S\R                  S\R                  S	\R                  S	\R                  S
0
rS\S'   0 \R"                  S_\R$                  S_\R&                  S_\R(                  S_\R*                  S_\R,                  S_\R.                  S_\R0                  S_\R2                  S_\R4                  S_\R6                  S_\R8                  S_\R:                  S_\R<                  S_\R>                  S_\R@                  S_\RB                  S_0 \RD                  S_\RF                  S_\RH                  S _\RJ                  S!_\RL                  S"_\RN                  S#_\RP                  S$_\RR                  S%_\RT                  S&_\RV                  S'_\RX                  S(_\R                  S)_\RZ                  S*_\R\                  S+_\R^                  S,_\R`                  S-_\Rb                  S._E0 \Rd                  S/_\Rf                  S0_\Rh                  S1_\Rj                  S2_\Rl                  S3_\Rn                  S4_\Rp                  S5_\Rr                  S6_\Rt                  S7_\Rv                  S8_\Rx                  S9_\Rz                  S:_\R|                  S;_\R~                  S<_\R                  S=_\R                  S>_\R                  S?_E0 \R                  S@_\R                  SA_\R                  SB_\R                  SC_\R                  SD_\R                  SE_\R                  SF_\R                  SG_\R                  SH_\R                  SI_\R                  SJ_\R                  SK_\R                  SL_\R                  SM_\R                  SN_\R                  SO_\R                  SP_E0 \R                  SQ_\R                  SR_\R                  SS_\R                  ST_\R                  SU_\R                  SV_\R                  SW_\R                  SX_\R                  SY_\R                  SZ_\R                  S[_\R                  S\_\R                  S]_\R                  S^_\R                  S__\R                  S`_\R                  Sa_E0 \R                  Sb_\R                  Sc_\R                  Sd_\R                  Se_\R                  Sf_\R                  Sg_\R                  Sh_\R                  Si_\R                  Sj_\R                  Sk_\R                  Sl_\R                  Sm_\R                  Sn_\R                  So_\R                  Sp_\R                  Sq_\R                  Sr_E0 \R                  Ss_\R                  St_\R                  Su_\R                  Sv_\R                  Sw_\R                  Sx_\R                  Sy_\R                  Sz_\R                  S{_\R                  S|_\GR                   S}_\GR                  S~_\GR                  S_\GR                  S_\GR                  S_\GR
                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                   S_\GR"                  S_\GR$                  S_\GR&                  S_\GR(                  S_\GR*                  S_\GR,                  S_\GR.                  S_E0 \GR0                  S_\GR2                  S_\GR4                  S_\GR6                  S_\GR8                  S_\GR:                  S_\GR<                  S_\GR>                  S_\GR@                  S_\GRB                  S_\GRD                  S_\GRF                  S_\GRH                  S_\GRJ                  S_\GRL                  S_\GRN                  S_\GRP                  S_E0 \GRR                  S_\GRT                  S_\GRV                  S_\GRX                  S_\GRZ                  S_\GR\                  S_\GR^                  S_\GR`                  S_\GRb                  S_\GRd                  S_\GRf                  S_\GRh                  S_\GRj                  S_\GRl                  S_\GRn                  S	_\GRp                  S_\GRr                  S_E\GRt                  S\GRv                  S\GRx                  S\GRz                  S\GR|                  S\GR~                  S\GR                  S	\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S0ErS\S'   \GR                  \R6                  S\GR                  S00rS\S'   S\S'   SS jrSSS jjrSSS jjrSSS jjrSS jrSS jrSS jrSrg)TensorNameMap   )zgpt_neox.embed_inztransformer.wteztransformer.word_embeddingsword_embeddingszmodel.embed_tokenstok_embeddingszembeddings.word_embeddingsz(language_model.embedding.word_embeddingswteztransformer.embd.wtezmodel.tok_embeddingszmodel.embeddingzbackbone.embeddingzbackbone.embeddingsztransformer.in_out_embedzembedding.word_embeddingsztransformer.token_embeddingssharedzrwkv.embeddingszmodel.embeddingszmodel.word_embeddingsz!language_model.model.embed_tokensencoder)z embeddings.token_type_embeddings)	word_embeddings_layernormzembeddings.LayerNormemb_lntransformer.normrwkv.blocks.0.pre_lnr   zmodel.pre_lnzmodel.layers.0.pre_normzbackbone.norm)ztransformer.wpezembeddings.position_embeddingswpe)		embed_outlm_headoutputword_embeddings_for_headzlm_head.linearoutput_layerheadzhead.outr   )zgpt_neox.final_layer_normztransformer.ln_f
model.normnormztransformer.norm_fln_fz&language_model.encoder.final_layernormzmodel.final_layernormz
lm_head.lnzmodel.norm_fzbackbone.norm_fztransformer.rms_normzencoder.final_layernormr   r   zrwkv.ln_outzmodel.ln_outzbackbone.final_layer_normr   )z
rope.freqszrotary_pos_emb.inv_freq )zbackbone.embedz#dict[MODEL_TENSOR, tuple[str, ...]]mappings_cfg)z%gpt_neox.layers.{bid}.input_layernormztransformer.h.{bid}.ln_1ztransformer.blocks.{bid}.norm_1z#transformer.h.{bid}.input_layernormzh.{bid}.input_layernormztransformer.h.{bid}.ln_mlp"model.layers.{bid}.input_layernormzlayers.{bid}.attention_normz3language_model.encoder.layers.{bid}.input_layernormmodel.layers.{bid}.ln1zh.{bid}.ln_1ztransformer.h.{bid}.lnzmodel.layers.layers.{bid}.normz!model.layers.{bid}.attention_normzmodel.layers.{bid}.normzbackbone.layers.{bid}.normz(transformer.decoder_layer.{bid}.rms_normz.transformer.blocks.{bid}.norm_attn_norm.norm_1z$encoder.layers.{bid}.input_layernormz"transformer.layers.{bid}.attn_normzrwkv.blocks.{bid}.ln1r#   r"   z(transformer_encoder.{bid}.attention_norm)ztransformer.h.{bid}.ln_attnz encoder.layer.{bid}.layer_norm_1zrwkv.blocks.{bid}.ln2model.layers.{bid}.ln2)z/gpt_neox.layers.{bid}.attention.query_key_valueztransformer.h.{bid}.attn.c_attnz"transformer.blocks.{bid}.attn.Wqkvz1transformer.blocks.{bid}.norm_attn_norm.attn.Wqkvz2transformer.h.{bid}.self_attention.query_key_valuez&h.{bid}.self_attention.query_key_valuezBlanguage_model.encoder.layers.{bid}.self_attention.query_key_valuez,model.layers.{bid}.self_attn.query_key_valuezh.{bid}.attn.c_attnztransformer.h.{bid}.mixer.Wqkvzencoder.layers.{bid}.attn.Wqkvzencoder.layers.{bid}.mixer.Wqkvz%model.layers.{bid}.self_attn.qkv_projz3encoder.layers.{bid}.self_attention.query_key_valuez&transformer.layers.{bid}.attn.qkv_projztransformer_encoder.{bid}.qkv)#model.layers.{bid}.self_attn.q_projz+model.layers.{bid}.self_attn.q_proj_no_permzlayers.{bid}.attention.wqz(encoder.layer.{bid}.attention.self.queryz'transformer.layer.{bid}.attention.q_linztransformer.h.{bid}.attn.q_projz*model.layers.layers.{bid}.self_attn.q_projzmodel.layers.{bid}.attention.wqz:transformer.decoder_layer.{bid}.multi_head_attention.queryz)transformer.h.{bid}.attn.attention.q_projr%   )#model.layers.{bid}.self_attn.k_projz+model.layers.{bid}.self_attn.k_proj_no_permzlayers.{bid}.attention.wkz&encoder.layer.{bid}.attention.self.keyz'transformer.layer.{bid}.attention.k_linztransformer.h.{bid}.attn.k_projztransformer.h.{bid}.attn.kz*model.layers.layers.{bid}.self_attn.k_projzmodel.layers.{bid}.attention.wkz8transformer.decoder_layer.{bid}.multi_head_attention.keyz)transformer.h.{bid}.attn.attention.k_projr&   )#model.layers.{bid}.self_attn.v_projzlayers.{bid}.attention.wvz(encoder.layer.{bid}.attention.self.valuez'transformer.layer.{bid}.attention.v_linztransformer.h.{bid}.attn.v_projztransformer.h.{bid}.attn.vz*model.layers.layers.{bid}.self_attn.v_projzmodel.layers.{bid}.attention.wvz:transformer.decoder_layer.{bid}.multi_head_attention.valuez)transformer.h.{bid}.attn.attention.v_projr'   )z%gpt_neox.layers.{bid}.attention.denseztransformer.h.{bid}.attn.c_projz&transformer.blocks.{bid}.attn.out_projz(transformer.h.{bid}.self_attention.densezh.{bid}.self_attention.dense#model.layers.{bid}.self_attn.o_projz(model.layers.{bid}.self_attn.linear_attnzlayers.{bid}.attention.woz*encoder.layer.{bid}.attention.output.densez)transformer.layer.{bid}.attention.out_linz!transformer.h.{bid}.attn.out_projz8language_model.encoder.layers.{bid}.self_attention.densez"model.layers.{bid}.self_attn.densezh.{bid}.attn.c_projz"transformer.h.{bid}.mixer.out_projz*model.layers.layers.{bid}.self_attn.o_projzmodel.layers.{bid}.attention.woz"encoder.layers.{bid}.attn.out_projz#encoder.layers.{bid}.mixer.out_projz;transformer.decoder_layer.{bid}.multi_head_attention.linearz5transformer.blocks.{bid}.norm_attn_norm.attn.out_projz)encoder.layers.{bid}.self_attention.densez&transformer.layers.{bid}.attn.out_projz+transformer.h.{bid}.attn.attention.out_projr(   ztransformer_encoder.{bid}.wo)z.encoder.layer.{bid}.attention.output.LayerNormz%transformer.layer.{bid}.sa_layer_normzencoder.layers.{bid}.norm1z*transformer.decoder_layer.{bid}.rms_norm_1z.transformer.blocks.{bid}.norm_attn_norm.norm_2)+model.layers.{bid}.post_attention_layernormz+model.layers.{bid}.post_self_attn_layernorm)z0model.layers.{bid}.self_attn.rotary_emb.inv_freqz1layers.{bid}.attention.inner_attention.rope.freqsz7model.layers.layers.{bid}.self_attn.rotary_emb.inv_freqz,transformer.h.{bid}.attn.rotary_emb.inv_freq)z.gpt_neox.layers.{bid}.post_attention_layernormztransformer.h.{bid}.ln_2z h.{bid}.post_attention_layernormztransformer.blocks.{bid}.norm_2r)   zlayers.{bid}.ffn_normz<language_model.encoder.layers.{bid}.post_attention_layernormr$   zh.{bid}.ln_2zmodel.layers.{bid}.ffn_normz*transformer.decoder_layer.{bid}.rms_norm_2z-encoder.layers.{bid}.post_attention_layernormz!transformer.layers.{bid}.ffn_normr)   z"transformer_encoder.{bid}.ffn_norm)z,model.layers.{bid}.pre_feedforward_layernorm)z-model.layers.{bid}.post_feedforward_layernormz%model.layers.{bid}.post_mlp_layernorm)zlayers.{bid}.feed_forward.gatez(model.layers.{bid}.block_sparse_moe.gatezmodel.layers.{bid}.mlp.gatez&transformer.decoder_layer.{bid}.routerz)transformer.blocks.{bid}.ffn.router.layerz0model.layers.{bid}.block_sparse_moe.router.layerz&model.layers.{bid}.feed_forward.routerz%encoder.layers.{bid}.mlp.router.layer)z)model.layers.{bid}.mlp.shared_expert_gate)z.model.layers.{bid}.mlp.gate.e_score_correction)z'gpt_neox.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fcz$transformer.blocks.{bid}.ffn.up_projz%transformer.h.{bid}.mlp.dense_h_to_4hzh.{bid}.mlp.dense_h_to_4hzmodel.layers.{bid}.mlp.up_projzlayers.{bid}.feed_forward.w3z&encoder.layer.{bid}.intermediate.densez transformer.layer.{bid}.ffn.lin1ztransformer.h.{bid}.mlp.fc_inz transformer.h.{bid}.mlp.linear_3z5language_model.encoder.layers.{bid}.mlp.dense_h_to_4hz$model.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.w1zh.{bid}.mlp.c_fcztransformer.h.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.fc1z#model.layers.{bid}.mlp.gate_up_projz%model.layers.layers.{bid}.mlp.up_projz"model.layers.{bid}.feed_forward.w3zencoder.layers.{bid}.mlp.fc11zencoder.layers.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.c_fcz&encoder.layer.{bid}.mlp.gated_layers_vz$encoder.layer.{bid}.mlp.gated_layersz&encoder.layer.{bid}.mlp.up_gated_layerz"model.layers.{bid}.residual_mlp.w3z&encoder.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fc_1z'model.layers.{bid}.feed_forward.up_projz!transformer_encoder.{bid}.ffn.w12)z$layers.{bid}.feed_forward.experts.w3z,transformer.decoder_layer.{bid}.moe.linear_vz+transformer.blocks.{bid}.ffn.experts.mlp.v1z&model.layers.{bid}.mlp.experts.up_projz.model.layers.{bid}.block_sparse_moe.experts.w3z/model.layers.{bid}.feed_forward.experts.up_projz'encoder.layers.{bid}.mlp.experts.mlp.w1)z,model.layers.{bid}.mlp.shared_expert.up_projz-model.layers.{bid}.mlp.shared_experts.up_projz5model.layers.{bid}.feed_forward.shared_expert.up_proj)z transformer.blocks.{bid}.ffn.act)z model.layers.{bid}.mlp.gate_projzlayers.{bid}.feed_forward.w1ztransformer.h.{bid}.mlp.w2ztransformer.h.{bid}.mlp.c_fc2z'model.layers.layers.{bid}.mlp.gate_projz"model.layers.{bid}.feed_forward.w1zencoder.layers.{bid}.mlp.fc12z&encoder.layer.{bid}.mlp.gated_layers_wz transformer.h.{bid}.mlp.linear_1z"model.layers.{bid}.residual_mlp.w1ztransformer.h.{bid}.mlp.c_fc_0z)model.layers.{bid}.feed_forward.gate_proj)z$layers.{bid}.feed_forward.experts.w1z*transformer.decoder_layer.{bid}.moe.linearz+transformer.blocks.{bid}.ffn.experts.mlp.w1z(model.layers.{bid}.mlp.experts.gate_projz.model.layers.{bid}.block_sparse_moe.experts.w1z1model.layers.{bid}.feed_forward.experts.gate_proj)z.model.layers.{bid}.mlp.shared_expert.gate_projz/model.layers.{bid}.mlp.shared_experts.gate_projz7model.layers.{bid}.feed_forward.shared_expert.gate_proj)z'gpt_neox.layers.{bid}.mlp.dense_4h_to_hztransformer.h.{bid}.mlp.c_projz&transformer.blocks.{bid}.ffn.down_projz%transformer.h.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.dense_4h_to_hz model.layers.{bid}.mlp.down_projzlayers.{bid}.feed_forward.w2z encoder.layer.{bid}.output.densez transformer.layer.{bid}.ffn.lin2ztransformer.h.{bid}.mlp.fc_outz5language_model.encoder.layers.{bid}.mlp.dense_4h_to_hz$model.layers.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.c_projztransformer.h.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.fc2z'model.layers.layers.{bid}.mlp.down_projz"model.layers.{bid}.feed_forward.w2zencoder.layers.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.c_projzencoder.layer.{bid}.mlp.woz#transformer.layers.{bid}.ffn.proj_2z"model.layers.{bid}.residual_mlp.w2z"encoder.layer.{bid}.mlp.down_layerz&encoder.layers.{bid}.mlp.dense_4h_to_hzmodel.layers.h.{bid}.mlp.c_projz)model.layers.{bid}.feed_forward.down_projz transformer_encoder.{bid}.ffn.w3)z$layers.{bid}.feed_forward.experts.w2z,transformer.decoder_layer.{bid}.moe.linear_1z+transformer.blocks.{bid}.ffn.experts.mlp.w2z(model.layers.{bid}.mlp.experts.down_projz1model.layers.{bid}.block_sparse_moe.output_linearz.model.layers.{bid}.block_sparse_moe.experts.w2z1model.layers.{bid}.feed_forward.experts.down_projz'encoder.layers.{bid}.mlp.experts.mlp.w2)z.model.layers.{bid}.mlp.shared_expert.down_projz/model.layers.{bid}.mlp.shared_experts.down_projz7model.layers.{bid}.feed_forward.shared_expert.down_projz+model.layers.{bid}.shared_mlp.output_linear)z>language_model.encoder.layers.{bid}.self_attention.q_layernormz(model.layers.{bid}.self_attn.q_layernormz#model.layers.{bid}.self_attn.q_normz"transformer.blocks.{bid}.attn.q_lnz/encoder.layer.{bid}.attention.self.layer_norm_qz$transformer.layers.{bid}.attn.q_norm)z>language_model.encoder.layers.{bid}.self_attention.k_layernormz(model.layers.{bid}.self_attn.k_layernormz#model.layers.{bid}.self_attn.k_normz"transformer.blocks.{bid}.attn.k_lnz/encoder.layer.{bid}.attention.self.layer_norm_kz$transformer.layers.{bid}.attn.k_norm)zFlanguage_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq)z$encoder.layer.{bid}.output.LayerNormz)transformer.layer.{bid}.output_layer_normzencoder.layers.{bid}.norm2z*transformer.decoder_layer.{bid}.rms_norm_3z!encoder.layer.{bid}.mlp.layernormz encoder.layer.{bid}.layer_norm_2)zmodel.layers.{bid}.in_projz#backbone.layers.{bid}.mixer.in_proj)zmodel.layers.{bid}.conv1dz"backbone.layers.{bid}.mixer.conv1d)zmodel.layers.{bid}.x_projz"backbone.layers.{bid}.mixer.x_proj)zmodel.layers.{bid}.dt_projz#backbone.layers.{bid}.mixer.dt_proj)zmodel.layers.{bid}.A_logz!backbone.layers.{bid}.mixer.A_log)zmodel.layers.{bid}.Dzbackbone.layers.{bid}.mixer.D)zmodel.layers.{bid}.out_projz$backbone.layers.{bid}.mixer.out_proj)zmodel.layers.{bid}.attention.w0)z'rwkv.blocks.{bid}.attention.time_maa_w1z(model.layers.{bid}.self_attn.time_maa_w1zmodel.layers.{bid}.attention.w1)z'rwkv.blocks.{bid}.attention.time_maa_w2z(model.layers.{bid}.self_attn.time_maa_w2zmodel.layers.{bid}.attention.w2)zmodel.layers.{bid}.attention.a0)zmodel.layers.{bid}.attention.a1)zmodel.layers.{bid}.attention.a2)zmodel.layers.{bid}.attention.v0)zmodel.layers.{bid}.attention.v1)zmodel.layers.{bid}.attention.v2)zmodel.layers.{bid}.attention.g1)zmodel.layers.{bid}.attention.g2)z model.layers.{bid}.attention.k_k)z model.layers.{bid}.attention.k_a)z model.layers.{bid}.attention.r_k)z&rwkv.blocks.{bid}.attention.time_maa_xz'model.layers.{bid}.self_attn.time_maa_x)z&rwkv.blocks.{bid}.attention.time_maa_kz'model.layers.{bid}.self_attn.time_maa_k)z&rwkv.blocks.{bid}.attention.time_maa_vz'model.layers.{bid}.self_attn.time_maa_v)z&rwkv.blocks.{bid}.attention.time_maa_rz'model.layers.{bid}.self_attn.time_maa_r)z&rwkv.blocks.{bid}.attention.time_maa_gz'model.layers.{bid}.self_attn.time_maa_g)z&rwkv.blocks.{bid}.attention.time_maa_wz'model.layers.{bid}.self_attn.time_maa_w)z&rwkv.blocks.{bid}.attention.time_faaaa)z&rwkv.blocks.{bid}.attention.time_decayz'model.layers.{bid}.self_attn.time_decay)z)rwkv.blocks.{bid}.attention.time_decay_w1z*model.layers.{bid}.self_attn.time_decay_w1)z)rwkv.blocks.{bid}.attention.time_decay_w2z*model.layers.{bid}.self_attn.time_decay_w2)zrwkv.blocks.{bid}.attention.keyr&   z model.layers.{bid}.attention.keyz#model.layers.{bid}.attention.k_proj)z!rwkv.blocks.{bid}.attention.valuer'   z"model.layers.{bid}.attention.valuez#model.layers.{bid}.attention.v_proj)z&rwkv.blocks.{bid}.attention.receptancer%   z'model.layers.{bid}.attention.receptancez#model.layers.{bid}.attention.r_proj)z rwkv.blocks.{bid}.attention.gatez!model.layers.{bid}.self_attn.gate)z rwkv.blocks.{bid}.attention.ln_xz!model.layers.{bid}.attention.ln_x)z"rwkv.blocks.{bid}.attention.outputr(   z#model.layers.{bid}.attention.outputz#model.layers.{bid}.attention.o_proj)z)rwkv.blocks.{bid}.feed_forward.time_maa_kz#model.layers.{bid}.feed_forward.x_k)z)rwkv.blocks.{bid}.feed_forward.time_maa_r)z"rwkv.blocks.{bid}.feed_forward.keyz#model.layers.{bid}.feed_forward.key)z)rwkv.blocks.{bid}.feed_forward.receptance)z$rwkv.blocks.{bid}.feed_forward.valuez%model.layers.{bid}.feed_forward.value)z%model.layers.{bid}.self_attn.q_a_proj)z%model.layers.{bid}.self_attn.q_b_proj)z/model.layers.{bid}.self_attn.kv_a_proj_with_mqa)z&model.layers.{bid}.self_attn.kv_b_proj)z%model.layers.{bid}.self_attn.k_b_proj)z%model.layers.{bid}.self_attn.v_b_proj)z*model.layers.{bid}.self_attn.q_a_layernorm)z+model.layers.{bid}.self_attn.kv_a_layernorm)z*model.layers.{bid}.self_attn.inner_attn_ln)z$model.layers.{bid}.mlp.ffn_layernorm)z&decoder.block.{bid}.layer.0.layer_norm)z+decoder.block.{bid}.layer.0.SelfAttention.q)z+decoder.block.{bid}.layer.0.SelfAttention.k)z+decoder.block.{bid}.layer.0.SelfAttention.v)z+decoder.block.{bid}.layer.0.SelfAttention.o)zAdecoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&decoder.block.{bid}.layer.1.layer_norm)z-decoder.block.{bid}.layer.1.EncDecAttention.q)z-decoder.block.{bid}.layer.1.EncDecAttention.k)z-decoder.block.{bid}.layer.1.EncDecAttention.v)z-decoder.block.{bid}.layer.1.EncDecAttention.o)zCdecoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias)z&decoder.block.{bid}.layer.2.layer_norm)z/decoder.block.{bid}.layer.2.DenseReluDense.wi_0)z-decoder.block.{bid}.layer.2.DenseReluDense.wiz/decoder.block.{bid}.layer.2.DenseReluDense.wi_1)z-decoder.block.{bid}.layer.2.DenseReluDense.wo)zdecoder.final_layer_norm)z&encoder.block.{bid}.layer.0.layer_norm)z+encoder.block.{bid}.layer.0.SelfAttention.q)z+encoder.block.{bid}.layer.0.SelfAttention.k)z+encoder.block.{bid}.layer.0.SelfAttention.v)z+encoder.block.{bid}.layer.0.SelfAttention.o)zAencoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&encoder.block.{bid}.layer.1.layer_norm)z/encoder.block.{bid}.layer.1.DenseReluDense.wi_0)z-encoder.block.{bid}.layer.1.DenseReluDense.wiz/encoder.block.{bid}.layer.1.DenseReluDense.wi_1)z-encoder.block.{bid}.layer.1.DenseReluDense.wo)zencoder.final_layer_norm
layer_norm)
classifierzclassifier.densepre_classifierdense)zclassifier.out_proj)zbackbone.convnext.{bid}.dwconv)zbackbone.convnext.{bid}.norm)zbackbone.convnext.{bid}.pwconv1)zbackbone.convnext.{bid}.pwconv2)zbackbone.convnext.{bid}.gamma)zbackbone.posnet.{bid}.conv1)zbackbone.posnet.{bid}.conv2)zbackbone.posnet.{bid}.norm)zbackbone.posnet.{bid}.norm1)zbackbone.posnet.{bid}.norm2)zbackbone.posnet.{bid}.q)zbackbone.posnet.{bid}.k)zbackbone.posnet.{bid}.v)zbackbone.posnet.{bid}.proj_out)z"multi_modal_projector.linear_{bid}zvisual.merger.mlp.{bid})z(model.connector.modality_projection.proj)z model.mm_projector.mlp.mlp.{bid}z'vision_model.vision_adapter.mlp.fc{bid}z
mlp1.{bid})z model.mm_projector.peg.peg.{bid})z4vision_tower.vision_model.embeddings.class_embeddingzvision_model.class_embedding)z4vision_tower.vision_model.embeddings.patch_embeddingzvpm.embeddings.patch_embeddingz-model.vision_model.embeddings.patch_embeddingzvision_tower.patch_convz#vision_model.patch_embedding.linearzvisual.patch_embed.proj)z7vision_tower.vision_model.embeddings.position_embeddingz!vpm.embeddings.position_embeddingz0model.vision_model.embeddings.position_embeddingz%vision_model.positional_embedding_vlm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_projz)vpm.encoder.layers.{bid}.self_attn.q_projz8model.vision_model.encoder.layers.{bid}.self_attn.q_projz0vision_model.model.layers.{bid}.self_attn.q_projz6vision_tower.transformer.layers.{bid}.attention.q_projzvisual.blocks.{bid}.attn.q)z:vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_projz)vpm.encoder.layers.{bid}.self_attn.k_projz8model.vision_model.encoder.layers.{bid}.self_attn.k_projz0vision_model.model.layers.{bid}.self_attn.k_projz6vision_tower.transformer.layers.{bid}.attention.k_projzvisual.blocks.{bid}.attn.k)z:vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_projz)vpm.encoder.layers.{bid}.self_attn.v_projz8model.vision_model.encoder.layers.{bid}.self_attn.v_projz0vision_model.model.layers.{bid}.self_attn.v_projz6vision_tower.transformer.layers.{bid}.attention.v_projzvisual.blocks.{bid}.attn.v)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.vision_model.encoder.layers.{bid}.norm1z$vpm.encoder.layers.{bid}.layer_norm1z3model.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.transformer.layers.{bid}.attention_normz/vision_model.model.layers.{bid}.input_layernormzvisual.blocks.{bid}.norm1)zAvision_tower.vision_model.encoder.layers.{bid}.self_attn.out_projz8vision_tower.vision_model.encoder.layers.{bid}.attn.projz+vpm.encoder.layers.{bid}.self_attn.out_projz:model.vision_model.encoder.layers.{bid}.self_attn.out_projz0vision_model.model.layers.{bid}.self_attn.o_projz6vision_tower.transformer.layers.{bid}.attention.o_projzvisual.blocks.{bid}.attn.proj)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm2z4vision_tower.vision_model.encoder.layers.{bid}.norm2z$vpm.encoder.layers.{bid}.layer_norm2z3model.vision_model.encoder.layers.{bid}.layer_norm2z8vision_model.model.layers.{bid}.post_attention_layernormz.vision_tower.transformer.layers.{bid}.ffn_normzvisual.blocks.{bid}.norm2)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1z vpm.encoder.layers.{bid}.mlp.fc1z/model.vision_model.encoder.layers.{bid}.mlp.fc1z:vision_tower.transformer.layers.{bid}.feed_forward.up_projz'vision_model.model.layers.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.up_proj)z<vision_tower.transformer.layers.{bid}.feed_forward.gate_projz!visual.blocks.{bid}.mlp.gate_proj)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2z vpm.encoder.layers.{bid}.mlp.fc2z/model.vision_model.encoder.layers.{bid}.mlp.fc2z<vision_tower.transformer.layers.{bid}.feed_forward.down_projz'vision_model.model.layers.{bid}.mlp.fc2zvisual.blocks.{bid}.mlp.fc2z!visual.blocks.{bid}.mlp.down_proj)z2vision_tower.vision_model.encoder.layers.{bid}.ls1)z2vision_tower.vision_model.encoder.layers.{bid}.ls2)z&vision_tower.vision_model.pre_layrnormzvision_tower.ln_prezvision_model.layernorm_pre)z(vision_tower.vision_model.post_layernormz!model.vision_model.post_layernormzvision_model.layernorm_postzvisual.merger.ln_q)z)multi_modal_projector.mm_input_projection)zmulti_modal_projector.norm)z&multi_modal_projector.mm_soft_emb_norm)zresampler.pos_embed_k)zresampler.attn.in_proj_q)zresampler.attn.in_proj_k)zresampler.attn.in_proj_v)zresampler.attn.out_proj)zresampler.kv_proj)zresampler.ln_post)zresampler.ln_kv)zresampler.ln_q)zresampler.proj)zresampler.query)zv.token_embd.img_break)z0multi_modal_projector.patch_merger.merging_layer)zaudio_tower.embed_positions)zaudio_tower.conv{bid})zaudio_tower.layer_normzaudio_tower.ln_post)z)audio_tower.layers.{bid}.self_attn.q_proj)z)audio_tower.layers.{bid}.self_attn.k_proj)z)audio_tower.layers.{bid}.self_attn.v_proj)z-audio_tower.layers.{bid}.self_attn_layer_norm)z+audio_tower.layers.{bid}.self_attn.out_proj)z)audio_tower.layers.{bid}.final_layer_norm)zaudio_tower.layers.{bid}.fc1)zaudio_tower.layers.{bid}.fc2)z(audio.multi_modal_projector.linear_{bid})z"audio.multi_modal_projector.linearzaudio_tower.proj)z"audio.multi_modal_projector.ln_pre)z"audio.multi_modal_projector.ln_midblock_mappings_cfg)z%model.layers.{bid}.residual_layernorm)r)   z5dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]]arch_block_mappings_cfgz#dict[str, tuple[MODEL_TENSOR, str]]mappingc                l   0 U l         U R                  R                  5        HF  u  p4U[        U   ;  a  M  [        U   nX54U R                   U'   U H  nX54U R                   U'   M     MH     XR
                  ;   a(  U R                  R                  U R
                  U   5        [        U5       H  nU R                  R                  5        Hb  u  p4U[        U   ;  a  M  [        U   R                  US9nX54U R                   U'   U H"  nUR                  US9nX54U R                   U'   M$     Md     M     g )N)bid)
r0   r!   itemsr   r	   r/   r.   updaterangeformat)selfarchn_blockstensorkeystensor_namekeyr2   s           M/home/james-whalen/.local/lib/python3.13/site-packages/gguf/tensor_mapping.py__init__TensorNameMap.__init__  s%    --335LF]400&v.K)/(=DLL%%+$9S!  6 ///##**4+G+G+MN?C $ 7 7 = = ?t!44*62999D-3,A[)C**3*/C)/(=DLL%   !@ #    c                    U R                   R                  U5      nUb  U$ U HU  nUR                  U5      (       d  M  U R                   R                  US [        U5      *  5      nUc  MH  US   US   U-   4s  $    g )Nr   r   )r0   getendswithlen)r7   r=   try_suffixesresultsuffixs        r>   get_type_and_nameTensorNameMap.get_type_and_name  s~    !!#&M"F||F##))#mF|*<=%!!9fQi&&888	 #
 rA   c                2    U R                  XS9nUc  g US   $ )NrF   r   rI   r7   r=   rF   rG   s       r>   get_nameTensorNameMap.get_name  '    '''I>ayrA   c                2    U R                  XS9nUc  g US   $ )NrL   r   rM   rN   s       r>   get_typeTensorNameMap.get_type  rQ   rA   c                Z     U R                   U   S   $ ! [         a    [        U5      ef = f)Nr   )r0   KeyErrorr7   r=   s     r>   __getitem__TensorNameMap.__getitem__  s4    	 <<$Q'' 	 3-	 s    *c                    XR                   ;   $ Nr0   rW   s     r>   __contains__TensorNameMap.__contains__  s    ll""rA   c                ,    [        U R                  5      $ r[   )reprr0   )r7   s    r>   __repr__TensorNameMap.__repr__  s    DLL!!rA   r\   N)r8   r   r9   int)r    )r=   strrF   Sequence[str]returnztuple[MODEL_TENSOR, str] | None)r=   rd   rF   re   rf   z
str | None)r=   rd   rF   re   rf   zMODEL_TENSOR | None)r=   rd   rf   rd   )r=   rd   rf   bool)rf   rd   )__name__
__module____qualname____firstlineno__r   
TOKEN_EMBDTOKEN_TYPESTOKEN_EMBD_NORMPOS_EMBDOUTPUTOUTPUT_NORM
ROPE_FREQSROPE_FACTORS_LONGROPE_FACTORS_SHORTCONV1Dr!   __annotations__	ATTN_NORMATTN_NORM_2ATTN_QKVATTN_QATTN_KATTN_VATTN_OUTATTN_OUT_NORMATTN_POST_NORMATTN_ROT_EMBDFFN_NORMFFN_PRE_NORMFFN_POST_NORMFFN_GATE_INPFFN_GATE_INP_SHEXPFFN_EXP_PROBS_BFFN_UP
FFN_UP_EXPFFN_UP_SHEXPFFN_ACTFFN_GATEFFN_GATE_EXPFFN_GATE_SHEXPFFN_DOWNFFN_DOWN_EXPFFN_DOWN_SHEXPATTN_Q_NORMATTN_K_NORMLAYER_OUT_NORMSSM_IN
SSM_CONV1DSSM_XSSM_DTSSM_ASSM_DSSM_OUTTIME_MIX_W0TIME_MIX_W1TIME_MIX_W2TIME_MIX_A0TIME_MIX_A1TIME_MIX_A2TIME_MIX_V0TIME_MIX_V1TIME_MIX_V2TIME_MIX_G1TIME_MIX_G2TIME_MIX_K_KTIME_MIX_K_ATIME_MIX_R_KTIME_MIX_LERP_XTIME_MIX_LERP_KTIME_MIX_LERP_VTIME_MIX_LERP_RTIME_MIX_LERP_GTIME_MIX_LERP_WTIME_MIX_FIRSTTIME_MIX_DECAYTIME_MIX_DECAY_W1TIME_MIX_DECAY_W2TIME_MIX_KEYTIME_MIX_VALUETIME_MIX_RECEPTANCETIME_MIX_GATETIME_MIX_LNTIME_MIX_OUTPUTCHANNEL_MIX_LERP_KCHANNEL_MIX_LERP_RCHANNEL_MIX_KEYCHANNEL_MIX_RECEPTANCECHANNEL_MIX_VALUEATTN_Q_AATTN_Q_BATTN_KV_A_MQA	ATTN_KV_BATTN_K_BATTN_V_BATTN_Q_A_NORMATTN_KV_A_NORMATTN_SUB_NORMFFN_SUB_NORMDEC_ATTN_NORM
DEC_ATTN_Q
DEC_ATTN_K
DEC_ATTN_VDEC_ATTN_OUTDEC_ATTN_REL_BDEC_CROSS_ATTN_NORMDEC_CROSS_ATTN_QDEC_CROSS_ATTN_KDEC_CROSS_ATTN_VDEC_CROSS_ATTN_OUTDEC_CROSS_ATTN_REL_BDEC_FFN_NORMDEC_FFN_GATE
DEC_FFN_UPDEC_FFN_DOWNDEC_OUTPUT_NORMENC_ATTN_NORM
ENC_ATTN_Q
ENC_ATTN_K
ENC_ATTN_VENC_ATTN_OUTENC_ATTN_REL_BENC_FFN_NORMENC_FFN_GATE
ENC_FFN_UPENC_FFN_DOWNENC_OUTPUT_NORMCLSCLS_OUTCONVNEXT_DWCONVNEXT_NORMCONVNEXT_PW1CONVNEXT_PW2CONVNEXT_GAMMAPOSNET_CONV1POSNET_CONV2POSNET_NORMPOSNET_NORM1POSNET_NORM2POSNET_ATTN_NORMPOSNET_ATTN_QPOSNET_ATTN_KPOSNET_ATTN_VPOSNET_ATTN_OUTV_MMPROJV_MMPROJ_FCV_MMPROJ_MLPV_MMPROJ_PEGV_ENC_EMBD_CLSV_ENC_EMBD_PATCHV_ENC_EMBD_POSV_ENC_ATTN_QV_ENC_ATTN_Q_NORMV_ENC_ATTN_KV_ENC_ATTN_K_NORMV_ENC_ATTN_VV_ENC_INPUT_NORMV_ENC_ATTN_OV_ENC_POST_ATTN_NORMV_ENC_FFN_UPV_ENC_FFN_GATEV_ENC_FFN_DOWNV_LAYER_SCALE_1V_LAYER_SCALE_2
V_PRE_NORMV_POST_NORMV_MM_INP_PROJV_MM_INP_NORMV_MM_SOFT_EMB_NORMV_RESMPL_POS_EMBD_KV_RESMPL_ATTN_QV_RESMPL_ATTN_KV_RESMPL_ATTN_VV_RESMPL_ATTN_OUTV_RESMPL_KVV_RESMPL_POST_NORMV_RESMPL_KV_NORMV_RESMPL_Q_NORMV_RESMPL_PROJV_RESMPL_QUERYV_TOK_EMBD_IMG_BREAKV_MM_PATCH_MERGERA_ENC_EMBD_POSA_ENC_CONV1D
A_PRE_NORMA_POST_NORMA_ENC_ATTN_QA_ENC_ATTN_KA_ENC_ATTN_VA_ENC_INPUT_NORMA_ENC_OUTPUTA_ENC_OUTPUT_NORMA_ENC_FFN_UPA_ENC_FFN_GATEA_ENC_FFN_DOWNA_MMPROJA_MMPROJ_FCA_MM_NORM_PREA_MM_NORM_MIDr.   r   ARCTICFFN_NORM_EXPr/   r?   rI   rO   rS   rX   r]   ra   __static_attributes__r    rA   r>   r   r      s    	 "
6 	   #

 	$$ 
'
 	  
 	 

 	   #
. 	 "

 	&&'' 
Ee9L5 eNE? !
E?< 	   #
=E?L 	  
ME?t 	 
uE?R 	 
SE?r 	 
sE?P 	  
QE?L 	"" %
ME?\ 	## &
]E?h 	"" %
iE?x 	  
yE?^ 	!! $
_E?h 	"" %
iE?r 	!! 	$
sE?H 	'' *
IE?P 	$$ '
QE?Z 	  
[E?^ 	 "
_E?r 	!! $
sE?@ 	 
AE?J 	  
KE?h 	!! $
iE?z 	## &
{E?H	 	  
I	E?D
 	!! 	$
E
E?Z
 	## &
[
E?h
 	   #
i
E?z
 	   #
{
E?L 	 "
ME?T 	## &
UE?f 	 
gE?p 	 "
qE?z 	 
{E?D 	 
EE?N 	 
OE?X 	 
YE?b 	 
cE?l 	   #
mE?t 	   #
uE?@ 	   #
AE?L 	   #
ME?T 	   #
UE?\ 	   #
]E?d 	   #
eE?l 	   #
mE?t 	   #
uE?| 	   #
}E?D 	   #
EE?L 	!! $
ME?T 	!! $
UE?\ 	!! $
]E?d 	$$ '
eE?n 	$$ '
oE?x 	$$ '
yE?B 	$$ '
CE?L 	$$ '
ME?V 	$$ '
WE?` 	## &
aE?h 	## &
iE?r 	&& )
sE?| 	&& )
}E?F 	!! $
GE?T 	## &
UE?b 	(( +
cE?p 	"" %
qE?z 	   #
{E?D 	$$ '
EE?R 	'' *
SE?\ 	'' *
]E?d 	$$ '
eE?n 	++ .
oE?v 	&& )
wE?@ 	  
AE?H 	  
IE?P 	"" %
QE?X 	 !
YE?` 	  
aE?h 	  
iE?p 	"" %
qE?x 	## &
yE?@ 	"" %
AE?H 	!! $
IE?P 	"" %
QE?X 	 "
YE?` 	 "
aE?h 	 "
iE?p 	!! $
qE?x 	## &
yE?@ 	(( +
AE?H 	%% (
IE?P 	%% (
QE?X 	%% (
YE?` 	'' *
aE?h 	)) ,
iE?p 	!! $
qE?x 	!! $
yE?@ 	 "
AE?J 	!! $
KE?R 	$$ '
SE?Z 	"" %
[E?b 	 "
cE?j 	 "
kE?r 	 "
sE?z 	!! $
{E?B 	## &
CE?J 	!! $
KE?R 	!! $
SE?Z 	 "
[E?d 	!! $
eE?p 	$$ '
qE?z 	 
{E?H 	 
IE?R 	   #
SE?Z 	"" %
[E?b 	!! $
cE?j 	!! $
kE?r 	## &
sE?z 	!! $
{E?B 	!! $
CE?J 	   #
KE?R 	!! $
SE?Z 	!! $
[E?b 	%% (
cE?j 	"" %
kE?r 	"" %
sE?z 	"" %
{E?B 	$$ '
CE?P 	  
QE?Z 	   #
[E?b 	!! $
cE?n 	!! $
oE?v 	## &
wE?@ 	%% (
AE?R 	## &
SE?` 	!! $
aE?r 	&& )
sE?z 	!! $
{E?L 	&& )
ME?T 	!! $
UE?f 	%% (
gE?z 	!! $
{E?N 	)) ,
OE?b 	!! $
cE?v 	## &
wE?@ 	## &
AE?T 	$$ '
UE?\ 	$$ '
]E?d 	 "
eE?p 	   #
qE?~ 	"" %
E?F 	"" %
GE?N 	'' *
OE?V 	(( +
WE?^ 	$$ '
_E?f 	$$ '
gE?n 	$$ '
oE?v 	&& )
wE?~ 	   #
E?F 	'' *
GE?N 	%% (
OE?V 	$$ '
WE?^ 	"" %
_E?f 	## &
gE?n 	)) ,
oE?v 	&& )
wE?B  	## &
C E?J  	!! $
K E?R  	S E?V  	   #
W E?`  	!! $
a E?h  	!! $
 	!! $
 	%% (
 	!! $
 	&& )
 	!! $
 	##R## &
 	  
 	   #

 	"" %
 	"" %
E"E?; ER" 	!! $ %% (	
	VR 	 10>,	 #"rA   r   c                    [        X5      $ r[   )r   )r8   r9   s     r>   get_tensor_name_mapr0    s    ((rA   N)r8   r   r9   rc   rf   r   )
__future__r   typingr   	constantsr   r   r   r	   r   r0  r    rA   r>   <module>r4     s#    "  L Lt" t"n')rA   