
    5h                   R    S SK Jr  S SKJr  SSKJrJrJrJr   " S S5      r	S	S jr
g)
    )annotations)Sequence   )
MODEL_ARCHMODEL_TENSORMODEL_TENSORSTENSOR_NAMESc                     \ rS rSr% \R
                  S\R                  S\R                  S\R                  S\R                  S\R                  S\R                  S\R                  S	\R                  S
\R                  S\R                  S\R                   S0rS\S'   0 \R&                  S_\R(                  S_\R*                  S_\R,                  S_\R.                  S_\R0                  S_\R2                  S_\R4                  S_\R6                  S_\R8                  S_\R:                  S_\R<                  S_\R>                  S_\R@                  S_\RB                  S_\RD                  S_\RF                  S_0 \RH                  S _\RJ                  S!_\RL                  S"_\RN                  S#_\RP                  S$_\RR                  S%_\RT                  S&_\RV                  S'_\RX                  S(_\RZ                  S)_\R\                  S*_\R^                  S+_\R`                  S,_\Rb                  S-_\Rd                  S._\R                  S/_\Rf                  S0_E0 \Rh                  S1_\Rj                  S2_\Rl                  S3_\Rn                  S4_\Rp                  S5_\Rr                  S6_\Rt                  S7_\Rv                  S8_\Rx                  S9_\Rz                  S:_\R|                  S;_\R~                  S<_\R                  S=_\R                  S>_\R                  S?_\R                  S@_\R                  SA_E0 \R                  SB_\R                  SC_\R                  SD_\R                  SE_\R                  SF_\R                  SG_\R                  SH_\R                  SI_\R                  SJ_\R                  SK_\R                  SL_\R                  SM_\R                  SN_\R                  SO_\R                  SP_\R                  SQ_\R                  SR_E0 \R                  SS_\R                  ST_\R                  SU_\R                  SV_\R                  SW_\R                  SX_\R                  SY_\R                  SZ_\R                  S[_\R                  S\_\R                  S]_\R                  S^_\R                  S__\R                  S`_\R                  Sa_\R                  Sb_\R                  Sc_E0 \R                  Sd_\R                  Se_\R                  Sf_\R                  Sg_\R                  Sh_\R                  Si_\R                  Sj_\R                  Sk_\R                  Sl_\R                  Sm_\R                  Sn_\R                  So_\R                  Sp_\R                  Sq_\R                  Sr_\R                  Ss_\R                  St_E0 \R                  Su_\R                  Sv_\R                  Sw_\R                  Sx_\R                  Sy_\R                  Sz_\R                  S{_\R                  S|_\GR                   S}_\GR                  S~_\GR                  S_\GR                  S_\GR                  S_\GR
                  S_\GR                  S_\GR                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                   S_\GR"                  S_\GR$                  S_\GR&                  S_\GR(                  S_\GR*                  S_\GR,                  S_\GR.                  S_\GR0                  S_\GR2                  S_E0 \GR4                  S_\GR6                  S_\GR8                  S_\GR:                  S_\GR<                  S_\GR>                  S_\GR@                  S_\GRB                  S_\GRD                  S_\GRF                  S_\GRH                  S_\GRJ                  S_\GRL                  S_\GRN                  S_\GRP                  S_\GRR                  S_\GRT                  S_E0 \GRV                  S_\GRX                  S_\GRZ                  S_\GR\                  S_\GR^                  S_\GR`                  S_\GRb                  S_\GRd                  S_\GRf                  S_\GRh                  S_\GRj                  S_\GRl                  S_\GRn                  S_\GRp                  S_\GRr                  S_\GRt                  S_\GRv                  S_E0 \GRx                  S_\GRz                  S_\GR|                  S_\GR~                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_E\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S\GR                  S0ErS\S'   \GR                  \R<                  S\GR                  S00rS\S'   S\S'   SS jrSSS jjrSSS jjrSSS jjrSS jrSS jrSS jrSrg)TensorNameMap   )zgpt_neox.embed_inztransformer.wteztransformer.word_embeddingsword_embeddingszmodel.embed_tokensembed_tokenstok_embeddingszembeddings.word_embeddingsz(language_model.embedding.word_embeddingswteztransformer.embd.wtezmodel.tok_embeddingszmodel.embeddingzbackbone.embeddingzbackbone.embeddingsztransformer.in_out_embedzembedding.word_embeddingsztransformer.token_embeddingssharedzrwkv.embeddingszmodel.embeddingszmodel.word_embeddingsz!language_model.model.embed_tokensencoderzmodel.transformer.wter   )z embeddings.token_type_embeddings)
word_embeddings_layernormzembeddings.LayerNormemb_lntransformer.normrwkv.blocks.0.pre_lnr   zmodel.pre_lnzmodel.layers.0.pre_normzbackbone.normzmodel.embedding_norm)ztransformer.wpezembeddings.position_embeddingswpe)
	embed_outlm_headoutputword_embeddings_for_headzlm_head.linearoutput_layerheadzhead.outr   zmodel.transformer.ff_out)dense_2_out)dense_3_out)zgpt_neox.final_layer_normztransformer.ln_f
model.normnormztransformer.norm_fln_fz&language_model.encoder.final_layernormzmodel.final_layernormz
lm_head.lnzmodel.norm_fzbackbone.norm_fztransformer.rms_normzencoder.final_layernormr   r    zrwkv.ln_outzmodel.ln_outzbackbone.final_layer_normr    zmodel.transformer.ln_f)z
rope.freqszrotary_pos_emb.inv_freq )zbackbone.embedz#dict[MODEL_TENSOR, tuple[str, ...]]mappings_cfg)z%gpt_neox.layers.{bid}.input_layernormztransformer.h.{bid}.ln_1ztransformer.blocks.{bid}.norm_1z#transformer.h.{bid}.input_layernormzh.{bid}.input_layernormztransformer.h.{bid}.ln_mlp"model.layers.{bid}.input_layernormzlayers.{bid}.attention_normz3language_model.encoder.layers.{bid}.input_layernormmodel.layers.{bid}.ln1zh.{bid}.ln_1ztransformer.h.{bid}.lnzmodel.layers.layers.{bid}.normz(model.layers.layers.{bid}.pre_mixer_normz!model.layers.{bid}.attention_normzmodel.layers.{bid}.normzbackbone.layers.{bid}.normz(transformer.decoder_layer.{bid}.rms_normz model.layers.{bid}.pre_attn_normz.transformer.blocks.{bid}.norm_attn_norm.norm_1z$encoder.layers.{bid}.input_layernormz"transformer.layers.{bid}.attn_normzrwkv.blocks.{bid}.ln1r&   r%   layers.{bid}.input_layernormz(transformer_encoder.{bid}.attention_normz model.layers.{bid}.operator_normz(model.transformer.blocks.{bid}.attn_normr'   z&model.layers.{bid}.attention_layernorm)ztransformer.h.{bid}.ln_attnz encoder.layer.{bid}.layer_norm_1zrwkv.blocks.{bid}.ln2model.layers.{bid}.ln2)z/gpt_neox.layers.{bid}.attention.query_key_valueztransformer.h.{bid}.attn.c_attnz"transformer.blocks.{bid}.attn.Wqkvz1transformer.blocks.{bid}.norm_attn_norm.attn.Wqkvz2transformer.h.{bid}.self_attention.query_key_valuez&h.{bid}.self_attention.query_key_valuezBlanguage_model.encoder.layers.{bid}.self_attention.query_key_valuez,model.layers.{bid}.self_attn.query_key_valuezh.{bid}.attn.c_attnztransformer.h.{bid}.mixer.Wqkvzencoder.layers.{bid}.attn.Wqkvzencoder.layers.{bid}.mixer.Wqkvz%model.layers.{bid}.self_attn.qkv_projz(model.layers.layers.{bid}.mixer.qkv_projz3encoder.layers.{bid}.self_attention.query_key_valuez&transformer.layers.{bid}.attn.qkv_projztransformer_encoder.{bid}.qkv)#model.layers.{bid}.self_attn.q_projlayers.{bid}.self_attn.q_projz+model.layers.{bid}.self_attn.q_proj_no_permzlayers.{bid}.attention.wqz(encoder.layer.{bid}.attention.self.queryz'transformer.layer.{bid}.attention.q_linztransformer.h.{bid}.attn.q_projz*model.layers.layers.{bid}.self_attn.q_projzmodel.layers.{bid}.attention.wqz:transformer.decoder_layer.{bid}.multi_head_attention.queryz)transformer.h.{bid}.attn.attention.q_projr)   z%model.transformer.blocks.{bid}.q_projr*   z"backbone.layers.{bid}.mixer.q_proj)#model.layers.{bid}.self_attn.k_projlayers.{bid}.self_attn.k_projz+model.layers.{bid}.self_attn.k_proj_no_permzlayers.{bid}.attention.wkz&encoder.layer.{bid}.attention.self.keyz'transformer.layer.{bid}.attention.k_linztransformer.h.{bid}.attn.k_projztransformer.h.{bid}.attn.kz*model.layers.layers.{bid}.self_attn.k_projzmodel.layers.{bid}.attention.wkz8transformer.decoder_layer.{bid}.multi_head_attention.keyz)transformer.h.{bid}.attn.attention.k_projr+   z%model.transformer.blocks.{bid}.k_projr,   z"backbone.layers.{bid}.mixer.k_proj)#model.layers.{bid}.self_attn.v_projlayers.{bid}.self_attn.v_projzlayers.{bid}.attention.wvz(encoder.layer.{bid}.attention.self.valuez'transformer.layer.{bid}.attention.v_linztransformer.h.{bid}.attn.v_projztransformer.h.{bid}.attn.vz*model.layers.layers.{bid}.self_attn.v_projzmodel.layers.{bid}.attention.wvz:transformer.decoder_layer.{bid}.multi_head_attention.valuez)transformer.h.{bid}.attn.attention.v_projr-   z%model.transformer.blocks.{bid}.v_projr.   z"backbone.layers.{bid}.mixer.v_proj) z%gpt_neox.layers.{bid}.attention.denseztransformer.h.{bid}.attn.c_projz&transformer.blocks.{bid}.attn.out_projz(transformer.h.{bid}.self_attention.densezh.{bid}.self_attention.dense#model.layers.{bid}.self_attn.o_projlayers.{bid}.self_attn.o_projz%model.layers.{bid}.self_attn.out_projz(model.layers.{bid}.self_attn.linear_attnzlayers.{bid}.attention.woz*encoder.layer.{bid}.attention.output.densez)transformer.layer.{bid}.attention.out_linz!transformer.h.{bid}.attn.out_projz8language_model.encoder.layers.{bid}.self_attention.densez"model.layers.{bid}.self_attn.densezh.{bid}.attn.c_projz"transformer.h.{bid}.mixer.out_projz*model.layers.layers.{bid}.self_attn.o_projz&model.layers.layers.{bid}.mixer.o_projzmodel.layers.{bid}.attention.woz"encoder.layers.{bid}.attn.out_projz#encoder.layers.{bid}.mixer.out_projz;transformer.decoder_layer.{bid}.multi_head_attention.linearz5transformer.blocks.{bid}.norm_attn_norm.attn.out_projz)encoder.layers.{bid}.self_attention.densez&transformer.layers.{bid}.attn.out_projz+transformer.h.{bid}.attn.attention.out_projr/   ztransformer_encoder.{bid}.woz'model.transformer.blocks.{bid}.attn_outr0   z"backbone.layers.{bid}.mixer.o_proj)z.encoder.layer.{bid}.attention.output.LayerNormz%transformer.layer.{bid}.sa_layer_normzencoder.layers.{bid}.norm1z*transformer.decoder_layer.{bid}.rms_norm_1z!model.layers.{bid}.post_attn_normz.transformer.blocks.{bid}.norm_attn_norm.norm_2)+model.layers.{bid}.post_attention_layernorm%layers.{bid}.post_attention_layernormz+model.layers.{bid}.post_self_attn_layernormz0model.layers.layers.{bid}.post_mixer_norm.weight)z0model.layers.{bid}.self_attn.rotary_emb.inv_freqz1layers.{bid}.attention.inner_attention.rope.freqsz7model.layers.layers.{bid}.self_attn.rotary_emb.inv_freqz,transformer.h.{bid}.attn.rotary_emb.inv_freq)z"model.layers.{bid}.self_attn.sinks)z.gpt_neox.layers.{bid}.post_attention_layernormztransformer.h.{bid}.ln_2z h.{bid}.post_attention_layernormztransformer.blocks.{bid}.norm_2r1   zlayers.{bid}.ffn_normz<language_model.encoder.layers.{bid}.post_attention_layernormr(   zh.{bid}.ln_2zmodel.layers.{bid}.ffn_normz*transformer.decoder_layer.{bid}.rms_norm_2zmodel.layers.{bid}.pre_moe_normz-encoder.layers.{bid}.post_attention_layernormz!transformer.layers.{bid}.ffn_normz#model.layers.{bid}.pre_ff_layernormz$model.layers.{bid}.pre_moe_layernormr1   z"transformer_encoder.{bid}.ffn_normz&model.layers.layers.{bid}.pre_mlp_normz&model.transformer.blocks.{bid}.ff_normr2   z(model.layers.{bid}.feedforward_layernorm)z,model.layers.{bid}.pre_feedforward_layernormz&layers.{bid}.pre_feedforward_layernormz*model.layers.{bid}.pre_ff_layernorm.weight)z-model.layers.{bid}.post_feedforward_layernormz'layers.{bid}.post_feedforward_layernormz%model.layers.{bid}.post_mlp_layernormz.model.layers.layers.{bid}.post_mlp_norm.weight'model.layers.{bid}.feed_forward.up_projz model.layers.{bid}.post_moe_norm)zlayers.{bid}.feed_forward.gatez(model.layers.{bid}.block_sparse_moe.gatezmodel.layers.{bid}.mlp.gatez&transformer.decoder_layer.{bid}.routerz)transformer.blocks.{bid}.ffn.router.layerz0model.layers.{bid}.block_sparse_moe.router.layerz&model.layers.{bid}.feed_forward.routerz%encoder.layers.{bid}.mlp.router.layerzmodel.layers.{bid}.mlp.routerzmodel.layers.{bid}.mlp.gate.wgz2model.layers.{bid}.block_sparse_moe.primary_routerz$model.layers.{bid}.feed_forward.gate)z)model.layers.{bid}.mlp.shared_expert_gate)z.model.layers.{bid}.mlp.gate.e_score_correctionz5model.layers.{bid}.mlp.moe_statics.e_score_correctionz+model.layers.{bid}.feed_forward.expert_bias)%z'gpt_neox.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fcz$transformer.blocks.{bid}.ffn.up_projz%transformer.h.{bid}.mlp.dense_h_to_4hzh.{bid}.mlp.dense_h_to_4hzmodel.layers.{bid}.mlp.up_projlayers.{bid}.mlp.up_projzlayers.{bid}.feed_forward.w3z&encoder.layer.{bid}.intermediate.densez transformer.layer.{bid}.ffn.lin1ztransformer.h.{bid}.mlp.fc_inz transformer.h.{bid}.mlp.linear_3z5language_model.encoder.layers.{bid}.mlp.dense_h_to_4hz$model.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.w1zh.{bid}.mlp.c_fcztransformer.h.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.fc1z#model.layers.{bid}.mlp.gate_up_projz%model.layers.layers.{bid}.mlp.up_projz*model.layers.layers.{bid}.mlp.gate_up_projz"model.layers.{bid}.feed_forward.w3zencoder.layers.{bid}.mlp.fc11zencoder.layers.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.c_fcz&encoder.layer.{bid}.mlp.gated_layers_vz$encoder.layer.{bid}.mlp.gated_layersz&encoder.layer.{bid}.mlp.up_gated_layerz"model.layers.{bid}.residual_mlp.w3z&encoder.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fc_1r3   z!transformer_encoder.{bid}.ffn.w12z&model.layers.{bid}.block_sparse_moe.upz&model.transformer.blocks.{bid}.up_projr4   z#backbone.layers.{bid}.mixer.up_proj)z$layers.{bid}.feed_forward.experts.w3z,transformer.decoder_layer.{bid}.moe.linear_vz+transformer.blocks.{bid}.ffn.experts.mlp.v1z&model.layers.{bid}.mlp.experts.up_projz.model.layers.{bid}.block_sparse_moe.experts.w3z/model.layers.{bid}.feed_forward.experts.up_projz'encoder.layers.{bid}.mlp.experts.mlp.w1z.model.layers.{bid}.block_sparse_moe.experts.up)z,model.layers.{bid}.mlp.shared_expert.up_projz-model.layers.{bid}.mlp.shared_experts.up_projz5model.layers.{bid}.feed_forward.shared_expert.up_proj)model.layers.{bid}.feed_forward.down_projz)model.layers.{bid}.mlp.shared_mlp.up_proj)z,model.layers.{bid}.mlp.chunk_experts.up_proj)z transformer.blocks.{bid}.ffn.act)z model.layers.{bid}.mlp.gate_projlayers.{bid}.mlp.gate_projzlayers.{bid}.feed_forward.w1ztransformer.h.{bid}.mlp.w2ztransformer.h.{bid}.mlp.c_fc2z'model.layers.layers.{bid}.mlp.gate_projz"model.layers.{bid}.feed_forward.w1zencoder.layers.{bid}.mlp.fc12z&encoder.layer.{bid}.mlp.gated_layers_wz transformer.h.{bid}.mlp.linear_1z"model.layers.{bid}.residual_mlp.w1ztransformer.h.{bid}.mlp.c_fc_0z)model.layers.{bid}.feed_forward.gate_projz&model.transformer.blocks.{bid}.ff_projr6   )z$layers.{bid}.feed_forward.experts.w1z*transformer.decoder_layer.{bid}.moe.linearz+transformer.blocks.{bid}.ffn.experts.mlp.w1z(model.layers.{bid}.mlp.experts.gate_projz.model.layers.{bid}.block_sparse_moe.experts.w1z1model.layers.{bid}.feed_forward.experts.gate_projz0model.layers.{bid}.block_sparse_moe.experts.gate)z.model.layers.{bid}.mlp.shared_expert.gate_projz/model.layers.{bid}.mlp.shared_experts.gate_projz7model.layers.{bid}.feed_forward.shared_expert.gate_projz+model.layers.{bid}.mlp.shared_mlp.gate_proj)z.model.layers.{bid}.mlp.chunk_experts.gate_proj) z'gpt_neox.layers.{bid}.mlp.dense_4h_to_hztransformer.h.{bid}.mlp.c_projz&transformer.blocks.{bid}.ffn.down_projz%transformer.h.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.dense_4h_to_hz model.layers.{bid}.mlp.down_projlayers.{bid}.mlp.down_projzlayers.{bid}.feed_forward.w2z encoder.layer.{bid}.output.densez transformer.layer.{bid}.ffn.lin2ztransformer.h.{bid}.mlp.fc_outz5language_model.encoder.layers.{bid}.mlp.dense_4h_to_hz$model.layers.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.c_projztransformer.h.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.fc2z'model.layers.layers.{bid}.mlp.down_projz"model.layers.{bid}.feed_forward.w2zencoder.layers.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.c_projzencoder.layer.{bid}.mlp.woz#transformer.layers.{bid}.ffn.proj_2z"model.layers.{bid}.residual_mlp.w2z"encoder.layer.{bid}.mlp.down_layerz&encoder.layers.{bid}.mlp.dense_4h_to_hzmodel.layers.h.{bid}.mlp.c_projr5   z transformer_encoder.{bid}.ffn.w3z(model.layers.{bid}.block_sparse_moe.downz%model.transformer.blocks.{bid}.ff_outr7   z%backbone.layers.{bid}.mixer.down_proj)	z$layers.{bid}.feed_forward.experts.w2z,transformer.decoder_layer.{bid}.moe.linear_1z+transformer.blocks.{bid}.ffn.experts.mlp.w2z(model.layers.{bid}.mlp.experts.down_projz1model.layers.{bid}.block_sparse_moe.output_linearz.model.layers.{bid}.block_sparse_moe.experts.w2z1model.layers.{bid}.feed_forward.experts.down_projz'encoder.layers.{bid}.mlp.experts.mlp.w2z0model.layers.{bid}.block_sparse_moe.experts.down)z.model.layers.{bid}.mlp.shared_expert.down_projz/model.layers.{bid}.mlp.shared_experts.down_projz7model.layers.{bid}.feed_forward.shared_expert.down_projz+model.layers.{bid}.shared_mlp.output_linearz+model.layers.{bid}.mlp.shared_mlp.down_proj)z.model.layers.{bid}.mlp.chunk_experts.down_proj)z>language_model.encoder.layers.{bid}.self_attention.q_layernormz(model.layers.{bid}.self_attn.q_layernormz,model.layers.{bid}.self_attn.query_layernormz#model.layers.{bid}.self_attn.q_normlayers.{bid}.self_attn.q_normz"transformer.blocks.{bid}.attn.q_lnz/encoder.layer.{bid}.attention.self.layer_norm_qz$transformer.layers.{bid}.attn.q_normz!model.layers.layers.{bid}.mixer.qr8   z,model.layers.{bid}.attention.query_layernorm)z>language_model.encoder.layers.{bid}.self_attention.k_layernormz(model.layers.{bid}.self_attn.k_layernormz*model.layers.{bid}.self_attn.key_layernormz#model.layers.{bid}.self_attn.k_normlayers.{bid}.self_attn.k_normz"transformer.blocks.{bid}.attn.k_lnz/encoder.layer.{bid}.attention.self.layer_norm_kz$transformer.layers.{bid}.attn.k_normz!model.layers.layers.{bid}.mixer.kr9   z*model.layers.{bid}.attention.key_layernorm)zFlanguage_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq)z$encoder.layer.{bid}.output.LayerNormz)transformer.layer.{bid}.output_layer_normzencoder.layers.{bid}.norm2z*transformer.decoder_layer.{bid}.rms_norm_3z!encoder.layer.{bid}.mlp.layernormz encoder.layer.{bid}.layer_norm_2)zmodel.embed_tokens_per_layer)z model.per_layer_model_projection)zmodel.per_layer_projection_norm)zmodel.altup_projections)zmodel.altup_unembed_projections)z'model.layers.{bid}.per_layer_input_gate)z'model.layers.{bid}.per_layer_projection)z,model.layers.{bid}.post_per_layer_input_norm)z)model.layers.{bid}.altup.correction_coefs)z-model.layers.{bid}.altup.correct_output_scale)z)model.layers.{bid}.altup.prediction_coefs)z(model.layers.{bid}.altup.modality_router)z$model.layers.{bid}.altup.router_norm)z%model.layers.{bid}.laurel.linear_left)z&model.layers.{bid}.laurel.linear_right)z*model.layers.{bid}.laurel.post_laurel_norm)zmodel.layers.{bid}.in_projz#backbone.layers.{bid}.mixer.in_projz model.layers.{bid}.mamba.in_projz'model.layers.layers.{bid}.mixer.in_proj)zmodel.layers.{bid}.conv1dz"backbone.layers.{bid}.mixer.conv1dzmodel.layers.{bid}.mamba.conv1dz&model.layers.layers.{bid}.mixer.conv1d)zmodel.layers.{bid}.x_projz"backbone.layers.{bid}.mixer.x_projzmodel.layers.{bid}.mamba.x_projz)model.layers.layers.{bid}.mixer.bcdt_proj)zmodel.layers.{bid}.dt_projz#backbone.layers.{bid}.mixer.dt_projz model.layers.{bid}.mamba.dt_projz'model.layers.layers.{bid}.mixer.dt_proj)z.model.layers.layers.{bid}.mixer.dt_norm.weightz%model.layers.{bid}.mamba.dt_layernorm)zmodel.layers.{bid}.A_logz!backbone.layers.{bid}.mixer.A_logzmodel.layers.{bid}.mamba.A_logz%model.layers.layers.{bid}.mixer.A_log)z$model.layers.{bid}.mamba.b_layernormz$model.layers.{bid}.mamba.B_layernormz-model.layers.layers.{bid}.mixer.B_norm.weight)z$model.layers.{bid}.mamba.c_layernormz$model.layers.{bid}.mamba.C_layernormz-model.layers.layers.{bid}.mixer.C_norm.weight)zmodel.layers.{bid}.Dzbackbone.layers.{bid}.mixer.Dzmodel.layers.{bid}.mamba.Dz!model.layers.layers.{bid}.mixer.D)zmodel.layers.{bid}.mamba.normz backbone.layers.{bid}.mixer.norm)zmodel.layers.{bid}.out_projz$backbone.layers.{bid}.mixer.out_projz!model.layers.{bid}.mamba.out_projz(model.layers.layers.{bid}.mixer.out_proj)zmodel.layers.{bid}.attention.w0)z'rwkv.blocks.{bid}.attention.time_maa_w1z(model.layers.{bid}.self_attn.time_maa_w1zmodel.layers.{bid}.attention.w1)z'rwkv.blocks.{bid}.attention.time_maa_w2z(model.layers.{bid}.self_attn.time_maa_w2zmodel.layers.{bid}.attention.w2)zmodel.layers.{bid}.attention.a0)zmodel.layers.{bid}.attention.a1)zmodel.layers.{bid}.attention.a2)zmodel.layers.{bid}.attention.v0)zmodel.layers.{bid}.attention.v1)zmodel.layers.{bid}.attention.v2)zmodel.layers.{bid}.attention.g1)zmodel.layers.{bid}.attention.g2)z model.layers.{bid}.attention.k_k)z model.layers.{bid}.attention.k_a)z model.layers.{bid}.attention.r_k)z&rwkv.blocks.{bid}.attention.time_maa_xz'model.layers.{bid}.self_attn.time_maa_x)z&rwkv.blocks.{bid}.attention.time_maa_kz'model.layers.{bid}.self_attn.time_maa_k)z&rwkv.blocks.{bid}.attention.time_maa_vz'model.layers.{bid}.self_attn.time_maa_v)z&rwkv.blocks.{bid}.attention.time_maa_rz'model.layers.{bid}.self_attn.time_maa_r)z&rwkv.blocks.{bid}.attention.time_maa_gz'model.layers.{bid}.self_attn.time_maa_g)z&rwkv.blocks.{bid}.attention.time_maa_wz'model.layers.{bid}.self_attn.time_maa_w)z&rwkv.blocks.{bid}.attention.time_faaaa)z&rwkv.blocks.{bid}.attention.time_decayz'model.layers.{bid}.self_attn.time_decay)z)rwkv.blocks.{bid}.attention.time_decay_w1z*model.layers.{bid}.self_attn.time_decay_w1)z)rwkv.blocks.{bid}.attention.time_decay_w2z*model.layers.{bid}.self_attn.time_decay_w2)zrwkv.blocks.{bid}.attention.keyr+   z model.layers.{bid}.attention.keyz#model.layers.{bid}.attention.k_proj)z!rwkv.blocks.{bid}.attention.valuer-   z"model.layers.{bid}.attention.valuez#model.layers.{bid}.attention.v_proj)z&rwkv.blocks.{bid}.attention.receptancer)   z'model.layers.{bid}.attention.receptancez#model.layers.{bid}.attention.r_proj)z rwkv.blocks.{bid}.attention.gatez!model.layers.{bid}.self_attn.gate)z rwkv.blocks.{bid}.attention.ln_xz!model.layers.{bid}.attention.ln_x)z"rwkv.blocks.{bid}.attention.outputr/   z#model.layers.{bid}.attention.outputz#model.layers.{bid}.attention.o_proj)z)rwkv.blocks.{bid}.feed_forward.time_maa_kz#model.layers.{bid}.feed_forward.x_k)z)rwkv.blocks.{bid}.feed_forward.time_maa_r)z"rwkv.blocks.{bid}.feed_forward.keyz#model.layers.{bid}.feed_forward.key)z)rwkv.blocks.{bid}.feed_forward.receptance)z$rwkv.blocks.{bid}.feed_forward.valuez%model.layers.{bid}.feed_forward.value)z%model.layers.{bid}.self_attn.q_a_proj)z%model.layers.{bid}.self_attn.q_b_proj)z/model.layers.{bid}.self_attn.kv_a_proj_with_mqa)z&model.layers.{bid}.self_attn.kv_b_proj)z%model.layers.{bid}.self_attn.k_b_proj)z%model.layers.{bid}.self_attn.v_b_proj)z*model.layers.{bid}.self_attn.q_a_layernorm)z+model.layers.{bid}.self_attn.kv_a_layernorm)z*model.layers.{bid}.self_attn.inner_attn_ln)z$model.layers.{bid}.mlp.ffn_layernorm)z&decoder.block.{bid}.layer.0.layer_norm)z+decoder.block.{bid}.layer.0.SelfAttention.q)z+decoder.block.{bid}.layer.0.SelfAttention.k)z+decoder.block.{bid}.layer.0.SelfAttention.v)z+decoder.block.{bid}.layer.0.SelfAttention.o)zAdecoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&decoder.block.{bid}.layer.1.layer_norm)z-decoder.block.{bid}.layer.1.EncDecAttention.q)z-decoder.block.{bid}.layer.1.EncDecAttention.k)z-decoder.block.{bid}.layer.1.EncDecAttention.v)z-decoder.block.{bid}.layer.1.EncDecAttention.o)zCdecoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias)z&decoder.block.{bid}.layer.2.layer_norm)z/decoder.block.{bid}.layer.2.DenseReluDense.wi_0)z-decoder.block.{bid}.layer.2.DenseReluDense.wiz/decoder.block.{bid}.layer.2.DenseReluDense.wi_1)z-decoder.block.{bid}.layer.2.DenseReluDense.wo)zdecoder.final_layer_norm)z&encoder.block.{bid}.layer.0.layer_norm)z+encoder.block.{bid}.layer.0.SelfAttention.q)z+encoder.block.{bid}.layer.0.SelfAttention.k)z+encoder.block.{bid}.layer.0.SelfAttention.v)z+encoder.block.{bid}.layer.0.SelfAttention.o)zAencoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&encoder.block.{bid}.layer.1.layer_norm)z/encoder.block.{bid}.layer.1.DenseReluDense.wi_0)z-encoder.block.{bid}.layer.1.DenseReluDense.wiz/encoder.block.{bid}.layer.1.DenseReluDense.wi_1)z-encoder.block.{bid}.layer.1.DenseReluDense.wo)zencoder.final_layer_norm
layer_norm)
classifierzclassifier.densepre_classifierdense)zclassifier.out_proj)zbackbone.convnext.{bid}.dwconv)zbackbone.convnext.{bid}.norm)zbackbone.convnext.{bid}.pwconv1)zbackbone.convnext.{bid}.pwconv2)zbackbone.convnext.{bid}.gamma)zbackbone.posnet.{bid}.conv1)zbackbone.posnet.{bid}.conv2)zbackbone.posnet.{bid}.norm)zbackbone.posnet.{bid}.norm1)zbackbone.posnet.{bid}.norm2)zbackbone.posnet.{bid}.q)zbackbone.posnet.{bid}.k)zbackbone.posnet.{bid}.v)zbackbone.posnet.{bid}.proj_out)zmodel.layers.{bid}.conv.conv)zmodel.layers.{bid}.conv.in_proj)z model.layers.{bid}.conv.out_proj)z"multi_modal_projector.linear_{bid}zvisual.merger.mlp.{bid})z(model.connector.modality_projection.proj)z model.mm_projector.mlp.mlp.{bid}z'vision_model.vision_adapter.mlp.fc{bid}z
mlp1.{bid})z model.mm_projector.peg.peg.{bid})z4vision_tower.vision_model.embeddings.class_embeddingz'model.vision_tower.embeddings.cls_tokenzvision_model.class_embedding)	z4vision_tower.vision_model.embeddings.patch_embeddingz9model.vision_tower.embeddings.patch_embeddings.projectionzvpm.embeddings.patch_embeddingz-model.vision_model.embeddings.patch_embeddingzvision_tower.patch_convzvision_encoder.patch_convz#vision_model.patch_embedding.linearzvisual.patch_embed.projzvision_tower.patch_embed.proj)z7vision_tower.vision_model.embeddings.position_embeddingz1model.vision_tower.embeddings.position_embeddingsz!vpm.embeddings.position_embeddingz0model.vision_model.embeddings.position_embeddingz%vision_model.positional_embedding_vlmz vision_tower.patch_embed.pos_emb)	z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_projz7model.vision_tower.encoder.layer.{bid}.attention.q_projz)vpm.encoder.layers.{bid}.self_attn.q_projz8model.vision_model.encoder.layers.{bid}.self_attn.q_projz0vision_model.model.layers.{bid}.self_attn.q_projz6vision_tower.transformer.layers.{bid}.attention.q_projz4vision_encoder.transformer.layers.{bid}.attention.wqzvisual.blocks.{bid}.attn.qz$vision_tower.encoder.blocks.{bid}.wq)z:vision_tower.vision_model.encoder.layers.{bid}.attn.q_normz7model.vision_tower.encoder.layer.{bid}.attention.q_norm)	z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_projz7model.vision_tower.encoder.layer.{bid}.attention.k_projz)vpm.encoder.layers.{bid}.self_attn.k_projz8model.vision_model.encoder.layers.{bid}.self_attn.k_projz0vision_model.model.layers.{bid}.self_attn.k_projz6vision_tower.transformer.layers.{bid}.attention.k_projz4vision_encoder.transformer.layers.{bid}.attention.wkzvisual.blocks.{bid}.attn.kz$vision_tower.encoder.blocks.{bid}.wk)z:vision_tower.vision_model.encoder.layers.{bid}.attn.k_normz7model.vision_tower.encoder.layer.{bid}.attention.k_norm)	z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_projz7model.vision_tower.encoder.layer.{bid}.attention.v_projz)vpm.encoder.layers.{bid}.self_attn.v_projz8model.vision_model.encoder.layers.{bid}.self_attn.v_projz0vision_model.model.layers.{bid}.self_attn.v_projz6vision_tower.transformer.layers.{bid}.attention.v_projz4vision_encoder.transformer.layers.{bid}.attention.wvzvisual.blocks.{bid}.attn.vz$vision_tower.encoder.blocks.{bid}.wv)
z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.vision_model.encoder.layers.{bid}.norm1z7model.vision_tower.encoder.layer.{bid}.layernorm_beforez$vpm.encoder.layers.{bid}.layer_norm1z3model.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.transformer.layers.{bid}.attention_normz6vision_encoder.transformer.layers.{bid}.attention_normz/vision_model.model.layers.{bid}.input_layernormzvisual.blocks.{bid}.norm1z'vision_tower.encoder.blocks.{bid}.norm0)
zAvision_tower.vision_model.encoder.layers.{bid}.self_attn.out_projz8vision_tower.vision_model.encoder.layers.{bid}.attn.projzAmodel.vision_tower.encoder.layer.{bid}.attention.projection_layerz+vpm.encoder.layers.{bid}.self_attn.out_projz:model.vision_model.encoder.layers.{bid}.self_attn.out_projz0vision_model.model.layers.{bid}.self_attn.o_projz6vision_tower.transformer.layers.{bid}.attention.o_projz4vision_encoder.transformer.layers.{bid}.attention.wozvisual.blocks.{bid}.attn.projz$vision_tower.encoder.blocks.{bid}.wo)
z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm2z4vision_tower.vision_model.encoder.layers.{bid}.norm2z6model.vision_tower.encoder.layer.{bid}.layernorm_afterz$vpm.encoder.layers.{bid}.layer_norm2z3model.vision_model.encoder.layers.{bid}.layer_norm2z8vision_model.model.layers.{bid}.post_attention_layernormz.vision_tower.transformer.layers.{bid}.ffn_normz0vision_encoder.transformer.layers.{bid}.ffn_normzvisual.blocks.{bid}.norm2z'vision_tower.encoder.blocks.{bid}.norm1)
z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1z.model.vision_tower.encoder.layer.{bid}.mlp.fc1z vpm.encoder.layers.{bid}.mlp.fc1z/model.vision_model.encoder.layers.{bid}.mlp.fc1z:vision_tower.transformer.layers.{bid}.feed_forward.up_projz7vision_encoder.transformer.layers.{bid}.feed_forward.w3z'vision_model.model.layers.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.up_projz)vision_tower.encoder.blocks.{bid}.mlp.fc0)z<vision_tower.transformer.layers.{bid}.feed_forward.gate_projz7vision_encoder.transformer.layers.{bid}.feed_forward.w1z!visual.blocks.{bid}.mlp.gate_proj)
z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2z.model.vision_tower.encoder.layer.{bid}.mlp.fc2z vpm.encoder.layers.{bid}.mlp.fc2z/model.vision_model.encoder.layers.{bid}.mlp.fc2z<vision_tower.transformer.layers.{bid}.feed_forward.down_projz7vision_encoder.transformer.layers.{bid}.feed_forward.w2z'vision_model.model.layers.{bid}.mlp.fc2zvisual.blocks.{bid}.mlp.fc2z!visual.blocks.{bid}.mlp.down_projz)vision_tower.encoder.blocks.{bid}.mlp.fc1)z2vision_tower.vision_model.encoder.layers.{bid}.ls1z/model.vision_tower.encoder.layer.{bid}.lambda_1)z2vision_tower.vision_model.encoder.layers.{bid}.ls2z/model.vision_tower.encoder.layer.{bid}.lambda_2)z&vision_tower.vision_model.pre_layrnormzvision_tower.ln_prezvision_encoder.ln_prezvision_model.layernorm_pre)z(vision_tower.vision_model.post_layernormz!model.vision_model.post_layernormzvision_model.layernorm_postzvisual.merger.ln_qz$vision_tower.encoder.final_layernorm)z)multi_modal_projector.mm_input_projection)zmulti_modal_projector.normz multi_modal_projector.layer_normzmulti_modal_projector.pre_normpre_mm_projector_norm)z&multi_modal_projector.mm_soft_emb_norm)zresampler.pos_embed_k)zresampler.attn.in_proj_q)zresampler.attn.in_proj_k)zresampler.attn.in_proj_v)zresampler.attn.out_proj)zresampler.kv_proj)zresampler.ln_post)zresampler.ln_kv)zresampler.ln_q)zresampler.proj)zresampler.query)zv.token_embd.img_break)z0multi_modal_projector.patch_merger.merging_layerzpatch_merger.merging_layer)zaudio_tower.embed_positions)zaudio_tower.conv{bid})zaudio_tower.layer_normzaudio_tower.ln_post)z)audio_tower.layers.{bid}.self_attn.q_proj)z)audio_tower.layers.{bid}.self_attn.k_proj)z)audio_tower.layers.{bid}.self_attn.v_proj)z-audio_tower.layers.{bid}.self_attn_layer_norm)z+audio_tower.layers.{bid}.self_attn.out_proj)z)audio_tower.layers.{bid}.final_layer_norm)zaudio_tower.layers.{bid}.fc1)zaudio_tower.layers.{bid}.fc2)z(audio.multi_modal_projector.linear_{bid})z"audio.multi_modal_projector.linearzaudio_tower.proj)z"audio.multi_modal_projector.ln_pre)z"audio.multi_modal_projector.ln_mid)zmodel.layers.{bid}.eh_proj)zmodel.layers.{bid}.embed_tokens)zmodel.layers.{bid}.enorm)zmodel.layers.{bid}.hnorm)z#model.layers.{bid}.shared_head.head)z#model.layers.{bid}.shared_head.normblock_mappings_cfg)z%model.layers.{bid}.residual_layernorm)r1   z5dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]]arch_block_mappings_cfgz#dict[str, tuple[MODEL_TENSOR, str]]mappingc                l   0 U l         U R                  R                  5        HF  u  p4U[        U   ;  a  M  [        U   nX54U R                   U'   U H  nX54U R                   U'   M     MH     XR
                  ;   a(  U R                  R                  U R
                  U   5        [        U5       H  nU R                  R                  5        Hb  u  p4U[        U   ;  a  M  [        U   R                  US9nX54U R                   U'   U H"  nUR                  US9nX54U R                   U'   M$     Md     M     g )N)bid)
rA   r$   itemsr   r	   r@   r?   updaterangeformat)selfarchn_blockstensorkeystensor_namekeyrC   s           ;/home/james-whalen/llama.cpp/gguf-py/gguf/tensor_mapping.py__init__TensorNameMap.__init__  s%    --335LF]400&v.K)/(=DLL%%+$9S!  6 ///##**4+G+G+MN?C $ 7 7 = = ?t!44*62999D-3,A[)C**3*/C)/(=DLL%   !@ #    c                    U R                   R                  U5      nUb  U$ U HU  nUR                  U5      (       d  M  U R                   R                  US [        U5      *  5      nUc  MH  US   US   U-   4s  $    g )Nr   r   )rA   getendswithlen)rH   rN   try_suffixesresultsuffixs        rO   get_type_and_nameTensorNameMap.get_type_and_name  s~    !!#&M"F||F##))#mF|*<=%!!9fQi&&888	 #
 rR   c                2    U R                  XS9nUc  g US   $ )NrW   r   rZ   rH   rN   rW   rX   s       rO   get_nameTensorNameMap.get_name  '    '''I>ayrR   c                2    U R                  XS9nUc  g US   $ )Nr]   r   r^   r_   s       rO   get_typeTensorNameMap.get_type  rb   rR   c                Z     U R                   U   S   $ ! [         a    [        U5      ef = f)Nr   )rA   KeyErrorrH   rN   s     rO   __getitem__TensorNameMap.__getitem__  s4    	 <<$Q'' 	 3-	 s    *c                    XR                   ;   $ NrA   rh   s     rO   __contains__TensorNameMap.__contains__  s    ll""rR   c                ,    [        U R                  5      $ rl   )reprrA   )rH   s    rO   __repr__TensorNameMap.__repr__  s    DLL!!rR   rm   N)rI   r   rJ   int)r#   )rN   strrW   Sequence[str]returnztuple[MODEL_TENSOR, str] | None)rN   ru   rW   rv   rw   z
str | None)rN   ru   rW   rv   rw   zMODEL_TENSOR | None)rN   ru   rw   ru   )rN   ru   rw   bool)rw   ru   )__name__
__module____qualname____firstlineno__r   
TOKEN_EMBDTOKEN_TYPESTOKEN_EMBD_NORMPOS_EMBDOUTPUTDENSE_2_OUTDENSE_3_OUTOUTPUT_NORM
ROPE_FREQSROPE_FACTORS_LONGROPE_FACTORS_SHORTCONV1Dr$   __annotations__	ATTN_NORMATTN_NORM_2ATTN_QKVATTN_QATTN_KATTN_VATTN_OUTATTN_OUT_NORMATTN_POST_NORMATTN_ROT_EMBD
ATTN_SINKSFFN_NORMFFN_PRE_NORMFFN_POST_NORMFFN_GATE_INPFFN_GATE_INP_SHEXPFFN_EXP_PROBS_BFFN_UP
FFN_UP_EXPFFN_UP_SHEXPFFN_UP_CHEXPFFN_ACTFFN_GATEFFN_GATE_EXPFFN_GATE_SHEXPFFN_GATE_CHEXPFFN_DOWNFFN_DOWN_EXPFFN_DOWN_SHEXPFFN_DOWN_CHEXPATTN_Q_NORMATTN_K_NORMLAYER_OUT_NORMPER_LAYER_TOKEN_EMBDPER_LAYER_MODEL_PROJPER_LAYER_PROJ_NORM
ALTUP_PROJALTUP_UNEMBD_PROJPER_LAYER_INP_GATEPER_LAYER_PROJPER_LAYER_POST_NORMALTUP_CORRECT_COEFALTUP_CORRECT_SCALEALTUP_PREDICT_COEFALTUP_ROUTERALTUP_ROUTER_NORMLAUREL_LLAUREL_RLAUREL_POST_NORMSSM_IN
SSM_CONV1DSSM_XSSM_DTSSM_DT_NORMSSM_A
SSM_B_NORM
SSM_C_NORMSSM_DSSM_NORMSSM_OUTTIME_MIX_W0TIME_MIX_W1TIME_MIX_W2TIME_MIX_A0TIME_MIX_A1TIME_MIX_A2TIME_MIX_V0TIME_MIX_V1TIME_MIX_V2TIME_MIX_G1TIME_MIX_G2TIME_MIX_K_KTIME_MIX_K_ATIME_MIX_R_KTIME_MIX_LERP_XTIME_MIX_LERP_KTIME_MIX_LERP_VTIME_MIX_LERP_RTIME_MIX_LERP_GTIME_MIX_LERP_WTIME_MIX_FIRSTTIME_MIX_DECAYTIME_MIX_DECAY_W1TIME_MIX_DECAY_W2TIME_MIX_KEYTIME_MIX_VALUETIME_MIX_RECEPTANCETIME_MIX_GATETIME_MIX_LNTIME_MIX_OUTPUTCHANNEL_MIX_LERP_KCHANNEL_MIX_LERP_RCHANNEL_MIX_KEYCHANNEL_MIX_RECEPTANCECHANNEL_MIX_VALUEATTN_Q_AATTN_Q_BATTN_KV_A_MQA	ATTN_KV_BATTN_K_BATTN_V_BATTN_Q_A_NORMATTN_KV_A_NORMATTN_SUB_NORMFFN_SUB_NORMDEC_ATTN_NORM
DEC_ATTN_Q
DEC_ATTN_K
DEC_ATTN_VDEC_ATTN_OUTDEC_ATTN_REL_BDEC_CROSS_ATTN_NORMDEC_CROSS_ATTN_QDEC_CROSS_ATTN_KDEC_CROSS_ATTN_VDEC_CROSS_ATTN_OUTDEC_CROSS_ATTN_REL_BDEC_FFN_NORMDEC_FFN_GATE
DEC_FFN_UPDEC_FFN_DOWNDEC_OUTPUT_NORMENC_ATTN_NORM
ENC_ATTN_Q
ENC_ATTN_K
ENC_ATTN_VENC_ATTN_OUTENC_ATTN_REL_BENC_FFN_NORMENC_FFN_GATE
ENC_FFN_UPENC_FFN_DOWNENC_OUTPUT_NORMCLSCLS_OUTCONVNEXT_DWCONVNEXT_NORMCONVNEXT_PW1CONVNEXT_PW2CONVNEXT_GAMMAPOSNET_CONV1POSNET_CONV2POSNET_NORMPOSNET_NORM1POSNET_NORM2POSNET_ATTN_NORMPOSNET_ATTN_QPOSNET_ATTN_KPOSNET_ATTN_VPOSNET_ATTN_OUTSHORTCONV_CONVSHORTCONV_INPROJSHORTCONV_OUTPROJV_MMPROJV_MMPROJ_FCV_MMPROJ_MLPV_MMPROJ_PEGV_ENC_EMBD_CLSV_ENC_EMBD_PATCHV_ENC_EMBD_POSV_ENC_ATTN_QV_ENC_ATTN_Q_NORMV_ENC_ATTN_KV_ENC_ATTN_K_NORMV_ENC_ATTN_VV_ENC_INPUT_NORMV_ENC_ATTN_OV_ENC_POST_ATTN_NORMV_ENC_FFN_UPV_ENC_FFN_GATEV_ENC_FFN_DOWNV_LAYER_SCALE_1V_LAYER_SCALE_2
V_PRE_NORMV_POST_NORMV_MM_INP_PROJV_MM_INP_NORMV_MM_SOFT_EMB_NORMV_RESMPL_POS_EMBD_KV_RESMPL_ATTN_QV_RESMPL_ATTN_KV_RESMPL_ATTN_VV_RESMPL_ATTN_OUTV_RESMPL_KVV_RESMPL_POST_NORMV_RESMPL_KV_NORMV_RESMPL_Q_NORMV_RESMPL_PROJV_RESMPL_QUERYV_TOK_EMBD_IMG_BREAKV_MM_PATCH_MERGERA_ENC_EMBD_POSA_ENC_CONV1D
A_PRE_NORMA_POST_NORMA_ENC_ATTN_QA_ENC_ATTN_KA_ENC_ATTN_VA_ENC_INPUT_NORMA_ENC_OUTPUTA_ENC_OUTPUT_NORMA_ENC_FFN_UPA_ENC_FFN_GATEA_ENC_FFN_DOWNA_MMPROJA_MMPROJ_FCA_MM_NORM_PREA_MM_NORM_MIDNEXTN_EH_PROJNEXTN_EMBED_TOKENSNEXTN_ENORMNEXTN_HNORMNEXTN_SHARED_HEAD_HEADNEXTN_SHARED_HEAD_NORMr?   r   ARCTICFFN_NORM_EXPr@   rP   rZ   r`   rd   ri   rn   rr   __static_attributes__r#   rR   rO   r   r      s    	 "
< 	   #

 	$$ '
 	  
 	 
 	   #
 	   #
 	   #
0 	 "

 	&&'' 
[p9L5 pdV?  !
V?J 	   #
KV?Z 	  
[V?D 	 
EV?j 	 
kV?R 	 
SV?x 	 ! 
yV?@ 	"" %
AV?R 	## &
SV?b 	"" %
cV?p 	 "
qV?z 	  
{V?n 	!! $
oV?| 	"" %
}V?N 	!! $
OV?l 	'' *
mV?t 	$$ '
uV?B 	 &
CV?R	 	 	"
S	V?h	 	!! $
i	V?x	 	!! $
y	V?B
 	 
C
V?L
 	  
M
V?p
 	!! $
q
V?D 	## &
EV?R 	## &
SV?\ 	 ! 
]V?b 	!! 
$
cV?z 	## &
{V?J 	## &
KV?R 	   #
SV?n 	   #
oV?J 	 "
KV?R 	## &
SV?d 	)) ,
eV?l 	)) ,
mV?t 	(( +
uV?| 	 "
}V?D 	&& )
EV?L 	'' *
MV?T 	## &
UV?\ 	(( +
]V?d 	'' *
eV?l 	(( +
mV?t 	'' *
uV?| 	!! $
}V?D 	&& )
EV?L 	  
MV?T 	  
UV?\ 	%% (
]V?d 	 
eV?r 	 "
sV?@ 	 
AV?N 	 
OV?\ 	   #
]V?f 	 
gV?t 	 "
uV?@ 	 "
AV?L 	 
MV?Z 	  
[V?d 	 
eV?r 	   #
sV?z 	   #
{V?F 	   #
GV?R 	   #
SV?Z 	   #
[V?b 	   #
cV?j 	   #
kV?r 	   #
sV?z 	   #
{V?B 	   #
CV?J 	   #
KV?R 	!! $
SV?Z 	!! $
[V?b 	!! $
cV?j 	$$ '
kV?t 	$$ '
uV?~ 	$$ '
V?H 	$$ '
IV?R 	$$ '
SV?\ 	$$ '
]V?f 	## &
gV?n 	## &
oV?x 	&& )
yV?B 	&& )
CV?L 	!! $
MV?Z 	## &
[V?h 	(( +
iV?v 	"" %
wV?@ 	   #
AV?J 	$$ '
KV?X 	'' *
YV?b 	'' *
cV?j 	$$ '
kV?t 	++ .
uV?| 	&& )
}V?F 	  
GV?N 	  
OV?V 	"" %
WV?^ 	 !
_V?f 	  
gV?n 	  
oV?v 	"" %
wV?~ 	## &
V?F 	"" %
GV?N 	!! $
OV?V 	"" %
WV?^ 	 "
_V?f 	 "
gV?n 	 "
oV?v 	!! $
wV?~ 	## &
V?F 	(( +
GV?N 	%% (
OV?V 	%% (
WV?^ 	%% (
_V?f 	'' *
gV?n 	)) ,
oV?v 	!! $
wV?~ 	!! $
V?F 	 "
GV?P 	!! $
QV?X 	$$ '
YV?` 	"" %
aV?h 	 "
iV?p 	 "
qV?x 	 "
yV?@ 	!! $
AV?H 	## &
IV?P 	!! $
QV?X 	!! $
YV?` 	 "
aV?j 	!! $
kV?v 	$$ '
wV?@ 	 
AV?N 	 
OV?X 	   #
YV?` 	"" %
aV?h 	!! $
iV?p 	!! $
qV?x 	## &
yV?@ 	!! $
AV?H 	!! $
IV?P 	   #
QV?X 	!! $
YV?` 	!! $
aV?h 	%% (
iV?p 	"" %
qV?x 	"" %
yV?@ 	"" %
AV?H 	$$ '
IV?P 	## &
QV?X 	%% (
YV?` 	&& )
aV?n 	  
oV?x 	   #
yV?@  	!! $
A V?L  	!! $
M V?T  	## &
U V?`  	%% 
(
a V?x  	## &
y V?J! 	!! 
$
K!V?b! 	&& )
c!V?l! 	!! 
$
m!V?D" 	&& )
E"V?N" 	!! 
$
O"V?f" 	%% (
g"V?@# 	!! $
A#V?Z# 	)) ,
[#V?t# 	!! $
u#V?N$ 	## &
O$V?Z$ 	## &
[$V?t$ 	$$ '
u$V?~$ 	$$ '
$V?H% 	 "
I%V?V% 	   #
W%V?f% 	"" %
g%V?n% 	"" %
o%V?|% 	'' *
}%V?D& 	(( +
E&V?L& 	$$ '
M&V?T& 	$$ '
U&V?\& 	$$ '
]&V?d& 	&& )
e&V?l& 	   #
m&V?t& 	'' *
u&V?|& 	%% (
}&V?D' 	$$ '
E'V?L' 	"" %
M'V?T' 	## &
U'V?\' 	)) ,
]'V?d' 	&& )
e'V?r' 	## &
s'V?z' 	!! $
{'V?B( 	C(V?F( 	   #
G(V?P( 	!! $
Q(V?X( 	!! $
Y(V?`( 	!! $
a(V?h( 	%% (
i(V?p( 	!! $
q(V?x( 	&& )
y(V?@) 	!! $
A)V?H) 	##RI)V?L) 	## &
 	  
 	   #

 	"" %
 	"" %

 	"" %
 	'' *
 	   #
 	   #
 	++ .
 	++ .
g*V?; Vt* 	!! $ %% (	
	VR 	 10>,	 #"rR   r   c                    [        X5      $ rl   )r   )rI   rJ   s     rO   get_tensor_name_maprd    s    ((rR   N)rI   r   rJ   rt   rw   r   )
__future__r   typingr   	constantsr   r   r   r	   r   rd  r#   rR   rO   <module>rh     s#    "  L LP" P"f0)rR   