
    oi                        / S Qr SSKrSSKrSSKrSSKJr  SSKJr  SSKJ	r	  SSK
JrJr  S rSS	 jrS
 rS rS r \R$                  \R&                  4S j5       rS r \R$                  \R&                  4S j5       r\R$                  \R&                  S4S j5       r\R$                  S 5       r SS jrS rS rS\4S jrS rg))create_empty_modelset_additional_modulesextract_vision_layersget_model_layer_configcompare_attributescopy_attributes    N)deepcopy   )get_quant_type)logger)HAS_TORCH_DTYPEdtype_from_configc                     SSK Jn  [        U [        [        [
        [        [        [        [        S 5      [        R                  U4	5      $ )Nr   )Enum)enumr   
isinstanceintfloatboolstrlisttupletypetorchdtype)valr   s     Q/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/empty_model.pyis_comparabler   "   s.    cCc4T
EKKY]^__    c                 \   [        U R                  5       5      [        UR                  5       5      -  n[        U5       H  nU R                  US 5      nUR                  US 5      nU(       a  U SU 3OUn[	        U[
        5      (       a!  [	        U[
        5      (       a  [        XVUS9  Mm  [        U5      (       a-  [        U5      (       a  XV:w  a  [        SU SU SU 35        M  M  [        U5      [        U5      :w  d  M  [        SU S[        U5       S[        U5       35        M     g )N.prefixz	Dict key z mismatch: original z != new model z type mismatch: original )
setkeyssortedgetr   dictcompare_dictsr   printr   )	orig_dictnew_dictr#   all_keyskeyorig_valnew_valkey_paths           r   r)   r)   '   s   9>>#$s8==?';;Hh==d+,,sD)(.fXQse$Ch%%*Wd*C*C(H=8$$w)?)?"	(+?zX_W`ab #(^tG},IhZ'@h@PP^_cdk_l^mno  r   c                     SSK Jn  Un[        S5        / n/ n/ n[	        U S5      (       a  [        U R                  5      O	[        5       nUS1-  n[        Ub  UR                  5       O/ U b  U R                  5       O/ 5       GH  u  u  pu  p[        U5       Vs1 s H  oR                  S5      (       a  M  UiM     nn[        U	5       Vs1 s H  oR                  S5      (       a  M  UiM     nnUR                  SS9 VVs1 s H  u  pUiM	     nnnX-
  nUS	S
1-
  nU(       a(  [        U5       H  nUR                  W SU 35        M     X-
  nU(       a  [        S[        U5      S S  S35        X-  nUU-  n[        U5       GHy  nUR                  S5      (       a  M   [!        X5      n[!        X5      n[%        U5      n[%        U5      n['        U5      ['        U5      :w  aR  U(       d  U(       aB  UR                  W SU S['        U5      R(                   S['        U5      R(                   35        M   [+        U[,        5      (       a+  [+        U[,        5      (       a  X;   a  [/        UUW SU 3S9  O0U(       a)  U(       a"  UU:w  a  UR                  W SU SU SU 35         [+        UU5      (       aC  [+        UU5      (       a/  [/        UR3                  5       UR3                  5       W SU 3S9  GMv  GMy  GM|     GM     U(       a/  [        S[5        U5       S35        U H  n[        SU 35        M     U(       a/  [        S[5        U5       S35        U H  n[        SU 35        M     U(       a/  [        S[5        U5       S35        U H  n[        SU 35        M     U(       d  U(       d  U(       d  [        S5        g g g g !   SSK Jn   GN= fs  snf s  snf s  snnf ! ["         a     GMu  f = f! ["         a-  nUR                  W SU S[1        U5       35         S nAGNS nAff = f! ["         a.  nUR                  W SU S[1        U5       35         S nAGM  S nAff = f)Nr   )PreTrainedConfigPretrainedConfigz#=== ATTRIBUTE COMPARISON REPORT ===config_Frecursehf_device_map
source_clsr!   z"Found some extra attributes like:    z...z: original z != new r"   z: comparison failed - u   
🚨 MISSING ATTRIBUTES (z):z  - u   
⚠️  TYPE MISMATCHES (u   
📝 VALUE MISMATCHES (u4   
✅ No missing attributes or type mismatches found!) transformers.configuration_utilsr3   r5   r*   hasattr_extract_all_config_keysr6   r$   zipnamed_modulesdir
startswithnamed_buffersr&   appendr   getattr	Exceptionr   r   __name__r   r(   r)   r   to_dictlen)original_model	new_modelr3   r5   missing_attrstype_mismatchesvalue_mismatchesconfig_keysnamemodule	orig_nameoriginal_moduleattr
orig_attrs	new_attrsr7   buffer_namesmissing_in_newextra_in_newcommon_attrscommon_buffersoriginal_valr0   original_comparablenew_comparableemismatchs                              r   r   r   5   s}   FE+ 

/0MO FM^]eEfEf*>+@+@AlolqK
*K8;%.%:	!*8*D$$&"944 (+?';X';t??SVCWd';
X&)&kNkd9MTk	N+:+H+HQV+H+WX+W+WX $/'?L*II~.$$vQtf%56 / !-6tL7I"17M6NcRS
 "-#l2<(Ds##&=!&/ #0"=*73N L!T']2&.#**dV1TF+d<FXFaFaEbbjkopwkx  lB  lB  kC  ,D  E	WlD11j$6O6O*%lGtfAdVDTU(^#w.(//4&${<.X`ah`i0jkWl,<==*WVfBgBg!,"6"6"8'//:KW[V\\]^b]cTde Ch== )59| +C,>+?rBC!DD-  " +C,@+ADE'HD
#$ ( )#.>*?)@CD(HD
#$ ) 9IEF :J=sFEE YNX4  (  W&&$q6LSQRVH'UVVW  W&&$q6LSQRVH'UVVWsr   O O 8O O%*O%O*&O0=?P=0P.AP<O0
O?>O?
P9"P44P9<
Q4"Q//Q4c                 @   ^^ [        5       mSUU4S jjmT" U 5        T$ )z1Extract all keys from config at any nesting levelc                 t  > [        U S5      (       a  U R                  5       n [        U [        5      (       a  U R	                  5        Hl  u  p#TR                  U5        [        U[        5      (       a  T" X1(       a  U SU 3OU5        MC  [        US5      (       d  MV  T" X1(       a  U SU 3OU5        Mn     g g )NrI   r!   )r>   rI   r   r(   itemsadd)objr#   r.   value_extract_keysr%   s       r   rh   /_extract_all_config_keys.<locals>._extract_keys   s    3	""++-Cc4  !iik
eT**!%fF81SE):#NUI..!%fF81SE):#N * !r    )r$   )r6   rh   r%   s    @@r   r?   r?      s%    5D
O 
O &Kr   c           
      0   SSK Jn  U b  Uc  [        S5        g [        U S5      (       a  [	        U R
                  5      O	[        5       nUS1-  nS1nSnSn/ nSnSn	[        UR                  5       U R                  5       5       GHZ  u  u  pu  pUR                  SS9 V
Vs/ s H  u  pU
PM	     nn
n[        U5       GH  nUR                  S5      (       a  M   [        X5      nX;   a'  [        XUR                  UR                  5      5        MT  [!        U5      (       a  [        XU5        US	-  nMw  [#        U[$        5      (       aF  X;   a!  [        X['        U5      5        US	-  nUS	-  nM  US	-  nUR)                  U S
35        U	S	-  n	M  [#        UU5      (       a  [        X['        U5      5        US	-  nM  X;   a  [        X[        X5      5        GM  GM     GM]     [*        R,                  R/                  SS5      S:X  ak  [        SU SU S35        U	S:  a  [        SU	 S35        US:  a=  [        SU S35        US::  a  [        SU 35        g [        SUS S  SUS-
   S35        g g g s  snn
f !   US	-  nUR)                  U5         GM  = f)Nr   r4   z1Cannot copy attributes: one of the models is Noner6   hf_quantizerFr8   r7   r
   z (dict not in config)UNSLOTH_ENABLE_LOGGING01u   ✅ Copied z attributes (including z config-related dicts)u   📋 Skipped z non-config dictionariesu   ⏭️ Skipped z< total attributes (tensors, modules, non-config dicts, etc.)
   z    Skipped: z    Sample: r<   z... and z more)r=   r5   r*   r>   r?   r6   r$   r@   rA   rD   rB   rC   rF   setattrtodevicer   r   r(   r	   rE   osenvironr'   )rK   rL   r5   rP   extra_attrscopied_countskipped_countskipped_attrsdict_copied_countdict_skipped_countrQ   rR   r7   rT   rX   rU   r]   s                    r   r   r      s   A!2AB FM^]eEfEf*>+@+@AlolqK
*K!$KLMM03I4K4K4M~OkOkOm0n,,+:+H+HQV+H+WX+W+WX(Ds##+&=' F,//):J:J*KL"<00F,7 A%Ld33*h|.DE$))Q.)%*%,,v5J-KL*a/*.>??F(<*@A A%L(F'/*HI )9 ) 1oH 
zz~~.4;L>)@AR@SSijk!M"4!55MNO1OM?2nop"m_56]2A%6$7xa?PPUVW 	 <G Y>+"$$T**s0   !I35I9!I949I9/I9+I9<I99Jc                 x   SSK Jn  SSKJn  [        R
                  R                  SS5      nS[        R
                  S'   [        U SS 5      n[        (       a  SOS	[        U 5      0nS nS nU" S
S9   Ub   UR                  " U40 UD6nUc   UR                  U 5      nS S S 5         U[        R
                  S'   Ub  Uc  [        SU 35        S n[        U 5      n
SU
l        SU
l        SU
l        SU
l        SU
l        SU
l        SU
l        [        U SU R"                  U R$                  -  5      nU
R/                  SU05        UR                  U
SS9nXU R0                  4$ ! [         a  n	[        U	5      nS n S n	A	NS n	A	ff = f! [         a  n	[        U	5      nS n S n	A	GNS n	A	ff = f! , (       d  f       GN= f)Nr   )AutoModelForCausalLMinit_empty_weightsUNSLOTH_WARN_UNINITIALIZEDrp   ro   
model_nametorch_dtyper   F)include_bufferszEFailed to create original_meta_model for AutoModelForCausalLM. Error r
   head_dimeager)attn_implementation)transformersr~   
accelerater   ru   rv   r'   rF   r   r   from_pretrainedrG   r   from_configr*   r	   intermediate_sizehidden_sizenum_attention_headsnum_key_value_headsr   
vocab_sizepad_token_idupdatenum_hidden_layers)r6   r   r~   r   old_warnr   kwargsoriginal_meta_modelerrorr`   
new_configr   rL   s                r   create_empty_causal_lmr      s    2-zz~~:C@H/2BJJ+,t4J.mG>OPV>WXFE 
e	4!+&:&J&J:&`Y_&`# &+&:&F&Fv&N# 
5 	/7BJJ+,08UV[U\]^"&!J#$J J%&J"%&J"JJJ vz6+=+=A[A[+[\HzH-.$00% 1 I
 6+C+CCCG  +A&*#+  +A&*#+ 
5	4sZ   1F*6E!	F*F!
F+E=8F*=FF*
F'F"F*"F''F**
F9c                 r    UR                  5        H#  u  p#[        X5      (       d  M  [        XU5        M%     g)zCHelper to set multiple attributes on a config object if they exist.N)rd   r>   rr   )
config_objattrs_to_setrU   rg   s       r   _set_config_attrsr   &  s,    #))+:$$Je, ,r   c                    [        U 5      nSSKJn  [        US5      (       d  UR                  Ul        S nXCl        SS Kn[        XPR                  S   5      n SSK	J
n  U" 5          U" U 5      nS S S 5        [        US5      (       a  UR
                  Ul        U?[!        U 5      n[#        UR$                  SSSS	SSSS
.5        [#        UR&                  SSSSSSSSS.5        U R$                  R(                  n[        U R&                  SS 5      =(       d    [        U R&                  SS5      nUS:X  a  SUR&                  l        OUS:X  a  SUR&                  l        [-        X5      nU" U5      nUWU4$ ! , (       d  f       GN= f! [         a<  n	[        SUR                   SU	 35        SS Kn
U
R                  5         S n S n	A	GNQS n	A	ff = f)Nr   )SiglipVisionModel_original_initialize_weightsc                     g N )selfrR   s     r   _init_weights0create_empty_vision_model.<locals>._init_weights9  s    r   r   z)Failed to create original_meta_model for z. Error r
      )r   r   r   r   r   r   r   )r   r   
patch_size
image_sizevision_output_dim	num_headsattention_headsr   r   depth
qwen2_5_vlqwen3_vl)get_model_type*transformers.models.siglip.modeling_siglipr   r>   r   r   r   rF   architecturesr   r   rG   r*   rH   	traceback	print_excr	   r   text_configvision_configr   out_hidden_sizemax)r6   r   
model_typer   r   r   	model_clsr   r   r`   r   r   text_layersvision_layers
num_layersrL   s                   r   create_empty_vision_modelr   .  s     'JL $&DEE9J9X9X6	*7'&:&:1&=>I#1  !"+F"3 "  "@AA*;*X*X': &!J j,,  /  j.. 
1 
 $$66KF002EtLyPWX^XlXlnuwxPyM \!34
  0	z	!34
  0 [0J*%I):55k "! #9):L:L9MXVWUXYZ"	#s6   F '	E60F 6
F F F 
G1G		GFc                     U(       a  [        X5      u  p4nO[        X5      u  p4n[        SS9n[        UR	                  5       / 5      nX4XW4$ )NF)return_non_layered)r   r   r   sumvalues)r6   r   is_vision_modelrL   r   r   layer_templateslayer_namess           r   r   r     sQ     5Nv5]2	
5KF5Z2	
 -FOo,,.3K:BBr   c                   ^^^ [        U S5      (       a  U R                  nSnOSnU R                  nU S3m[        USS 5      nUc  [        USS 5      nUb  [        USS 5      nUb3  UTT   R                  S   :  d   SU S	TT   R                  S    35       eSU4S jjnU" UR
                  TU5        ST;   a'  U" U R                  R                  R                  SS S
S9  U S3mTT   n[        R                  R                  US
S9nXR                  l        [        USU5      n[        USS
5      (       a  U S3n	OSn	U	T;   a  TU	   n
SSKJn  U" SSU
R                  S
S9nU
R                  S   Ul        U
R                  S   Ul        [        R                  R                  U
S
S9Ul        [        U S5      (       a  Xl        O[        US5      (       a  Xl        OXl        [        USS
5      (       aC  [        U S5      (       a  U R'                  5         O![        US5      (       a  UR'                  5         [)        UU4S jTR+                  5        5       5      n[-        SU< 35        U H`  nS HW  n TU   n[/        U[        R0                  5      (       a  [        R                  R                  US
S9n[3        U U S35          M^     Mb     g !    Me  = f)Nlanguage_modelzmodel.language_modelmodelz.embed_tokens.weightr   r   r   zPad token id z out of bounds for vocab size Fc                    > TU   R                   u  pETU   n[        U[        R                  5      (       a  [        R                  R                  XcS9nX`l        X l        X@l        XPl	        g )Nrequires_grad)
shaper   r   Tensornn	Parameterweightpadding_idxnum_embeddingsembedding_dim)rR   embed_tokens_keyr   r   r   r   
embeddingsquant_state_dicts          r   set_embedding-set_additional_modules.<locals>.set_embedding  sc    (89I(J(P(P%%&67
j%,,// ++J+VJ") .,r   zmodel.visual.pos_embed.weightr   z.norm.weighttie_word_embeddingszlm_head.weight)Linear)rt   biasr
   lm_headtie_weightsc              3   r   >^#    U  H+  m[        U4S  jSSTTSSSS4 5       5      (       a  M'  Tv   M-     g7f)c              3   ,   >#    U  H	  oT;   v   M     g 7fr   r   ).0substrxs     r   	<genexpr>3set_additional_modules.<locals>.<genexpr>.<genexpr>  s       C  .B6Q;  .Bs   layersblocksr   mlplinearr   N)any)r   r   r   norm_keys    @r   r   )set_additional_modules.<locals>.<genexpr>  sO      *a  ChBRT\^ginpx  {A  .B  C  C 	
*s   &7	7z,Performing substitution for additional_keys=)new_z
new_model.z = val)F)r>   r   r   rF   r   embed_tokensvisual	pos_embedr   r   r   normr   torch.nnr   rt   in_featuresout_featuresr   r   r$   r%   r*   r   r   exec)rL   r   r6   r   language_model_prefixr   r   r   r   
lmhead_keyr   r   layeradditional_keysr.   r#   r   r   r   s    `               @@r   r   r     s5   y*++"11 6 '" 000DE6>48LfmT:"";EL7GHX7Y7_7_`a7b(b  "Nfs  uA  tB  B`  aq  rB  aC  aI  aI  JK  aL  `M  eN  "N(b
- .--/?N&*::ioo,,668WY]mrs ((5HH%D88dE:D!%
 &-8K{1599-..BC
%
 %%!*-# q!FMM>"LLO#\\!_xx))&)F 9i(( % ~y11).&$)!60%88y-00%%'77**,
  #((* O 
9(:
;<,F&s+c5<<00((,,Su,ECxuF+, -  	s   :AKKc           
          1 Sk1 Sk1 Sk1 Sk1 SkS.nUR                  5        VVs0 s H)  u  p#US:w  d	  U (       d  M  U[        [        U5      5      _M+     snn$ s  snnf )z
Returns a unified layer configuration containing the union of layer names
from all supported vision models. Serves as a fallback.

Returns:
    dict: Dictionary containing layer templates for different components.
>   model.layers.{kk}.mlp.up_projmodel.layers.{kk}.mlp.down_projmodel.layers.{kk}.mlp.gate_proj"model.layers.{kk}.self_attn.k_proj"model.layers.{kk}.self_attn.o_proj"model.layers.{kk}.self_attn.q_proj"model.layers.{kk}.self_attn.v_proj,model.language_model.layers.{kk}.mlp.up_proj.model.language_model.layers.{kk}.mlp.down_proj.model.language_model.layers.{kk}.mlp.gate_proj1model.language_model.layers.{kk}.self_attn.k_proj1model.language_model.layers.{kk}.self_attn.o_proj1model.language_model.layers.{kk}.self_attn.q_proj1model.language_model.layers.{kk}.self_attn.v_proj>   model.visual.blocks.{kk}.norm1model.visual.blocks.{kk}.norm2!model.layers.{kk}.input_layernorm"model.layers.{kk}.self_attn.k_norm"model.layers.{kk}.self_attn.q_norm*model.layers.{kk}.post_attention_layernorm+model.layers.{kk}.pre_feedforward_layernorm,model.layers.{kk}.post_feedforward_layernorm,model.visual.deepstack_merger_list.{kk}.norm0model.language_model.layers.{kk}.input_layernorm1model.language_model.layers.{kk}.self_attn.k_norm1model.language_model.layers.{kk}.self_attn.q_norm2model.language_model.layers.{kk}.cross_attn.k_norm2model.language_model.layers.{kk}.cross_attn.q_norm3model.vision_tower.transformer.layers.{kk}.ffn_norm9model.language_model.layers.{kk}.post_attention_layernorm9model.vision_tower.transformer.layers.{kk}.attention_norm:model.language_model.layers.{kk}.pre_feedforward_layernorm;model.language_model.layers.{kk}.post_feedforward_layernorm?model.vision_tower.vision_model.encoder.layers.{kk}.layer_norm1?model.vision_tower.vision_model.encoder.layers.{kk}.layer_norm2Bmodel.vision_tower.vision_model.encoder.layers.{kk}.post_layernorm>2   !model.visual.blocks.{kk}.attn.qkv"model.visual.blocks.{kk}.attn.proj$model.visual.blocks.{kk}.mlp.up_proj&model.visual.blocks.{kk}.mlp.down_proj&model.visual.blocks.{kk}.mlp.gate_proj'model.visual.blocks.{kk}.mlp.linear_fc1'model.visual.blocks.{kk}.mlp.linear_fc2)model.visual.blocks.{kk}.mlp.gate_up_proj2model.language_model.layers.{kk}.cross_attn.k_proj2model.language_model.layers.{kk}.cross_attn.o_proj2model.language_model.layers.{kk}.cross_attn.q_proj2model.language_model.layers.{kk}.cross_attn.v_proj2model.vision_model.transformer.layers.{kk}.mlp.fc12model.vision_model.transformer.layers.{kk}.mlp.fc24model.language_model.layers.{kk}.cross_attn.qkv_proj9model.vision_model.global_transformer.layers.{kk}.mlp.fc19model.vision_model.global_transformer.layers.{kk}.mlp.fc2:model.vision_model.transformer.layers.{kk}.input_layernorm;model.language_model.layers.{kk}.cross_attn_input_layernorm;model.vision_model.global_transformer.layers.{kk}.gate_attn;model.vision_model.transformer.layers.{kk}.self_attn.k_proj;model.vision_model.transformer.layers.{kk}.self_attn.o_proj;model.vision_model.transformer.layers.{kk}.self_attn.q_proj;model.vision_model.transformer.layers.{kk}.self_attn.v_proj;model.vision_tower.transformer.layers.{kk}.attention.k_proj;model.vision_tower.transformer.layers.{kk}.attention.o_proj;model.vision_tower.transformer.layers.{kk}.attention.q_proj;model.vision_tower.transformer.layers.{kk}.attention.v_proj;model.vision_tower.vision_model.encoder.layers.{kk}.mlp.fc1;model.vision_tower.vision_model.encoder.layers.{kk}.mlp.fc2=model.vision_model.transformer.layers.{kk}.self_attn.qkv_proj=model.vision_tower.transformer.layers.{kk}.attention.qkv_proj?model.vision_tower.transformer.layers.{kk}.feed_forward.up_projAmodel.vision_model.global_transformer.layers.{kk}.input_layernormAmodel.vision_tower.transformer.layers.{kk}.feed_forward.down_projAmodel.vision_tower.transformer.layers.{kk}.feed_forward.gate_projBmodel.vision_model.global_transformer.layers.{kk}.self_attn.k_projBmodel.vision_model.global_transformer.layers.{kk}.self_attn.o_projBmodel.vision_model.global_transformer.layers.{kk}.self_attn.q_projBmodel.vision_model.global_transformer.layers.{kk}.self_attn.v_projCmodel.vision_model.transformer.layers.{kk}.post_attention_layernormDmodel.language_model.layers.{kk}.cross_attn_post_attention_layernormDmodel.vision_model.global_transformer.layers.{kk}.self_attn.qkv_projDmodel.vision_tower.transformer.layers.{kk}.feed_forward.gate_up_projDmodel.vision_tower.vision_model.encoder.layers.{kk}.self_attn.k_projDmodel.vision_tower.vision_model.encoder.layers.{kk}.self_attn.q_projDmodel.vision_tower.vision_model.encoder.layers.{kk}.self_attn.v_projFmodel.vision_tower.vision_model.encoder.layers.{kk}.self_attn.out_projFmodel.vision_tower.vision_model.encoder.layers.{kk}.self_attn.qkv_projJmodel.vision_model.global_transformer.layers.{kk}.post_attention_layernorm>	   model.visual.merger.mlp.{kk}!model.visual.merger.linear_fc{kk}'model.multi_modal_projector.linear_{kk}2model.visual.deepstack_merger_list.{kk}.linear_fc12model.visual.deepstack_merger_list.{kk}.linear_fc26model.multi_modal_projector.patch_merger.merging_layer:model.language_model.model.layers.{kk}.cross_attn_mlp_gate:model.vision_model.global_transformer.layers.{kk}.gate_ffn;model.language_model.model.layers.{kk}.cross_attn_attn_gate>   model.visual.normmodel.visual.pos_embedmodel.visual.merger.ln_qmodel.visual.merger.normmodel.language_model.normmodel.vision_tower.ln_premodel.multi_modal_projectormodel.vision_tower.patch_convmodel.visual.patch_embed.proj model.vision_model.layernorm_pre!model.vision_model.layernorm_post"model.vision_model.class_embedding,model.multi_modal_projector.mm_soft_emb_norm-model.vision_model.gated_positional_embedding-model.vision_tower.patch_positional_embedding.model.vision_tower.vision_model.post_layernorm5model.vision_model.pre_tile_positional_embedding.gate6model.multi_modal_projector.mm_input_projection_weight6model.vision_model.post_tile_positional_embedding.gate:model.vision_model.pre_tile_positional_embedding.embedding:model.vision_tower.vision_model.embeddings.patch_embedding;model.vision_model.post_tile_positional_embedding.embedding<model.vision_model.gated_positional_embedding.tile_embedding=model.vision_tower.vision_model.embeddings.position_embedding)standard_layers
layernormsr   additional_layersnon_layered_componentsrt  )rd   r&   r   )r   r   r.   rg   s       r   r   r     s|    
"
8C
H
* "
OhOV 8G7L7L7N  G7NRUWoRo  tF$CU$$7N  G  G  Gs
   AAc                 p    [        U SS5      n[        U S5      (       a  [        U R                  SU5      nU$ )Nr   	causal_lmr   )rF   r>   r   r6   r   s     r   r   r     s9    {;Jv'' V11<L
r   c                 
   [        U 5      nUS:X  aE  [        U R                  SS5      [        U R                  SS5      [        U R                  SS5      S.$ US:X  a%  [        U SS5      [        U R                  SS5      S	.$ US
:X  a;  [        U SS5      [        U R                  SS5      [        U R                  SS5      S.$ US:X  a/  [        U R                  SS5      [        U R                  SS5      S	.$ [        U SS5      $ )z
Returns layer counts for different model types.

Args:
    config: Model configuration

Returns:
    int or dict: Number of layers (int for causal_lm, dict for VL models)
mllamar       num_global_layersr   )r   r   global_layersr   r   )r   r   r   $      deepstack_depth   )r   r   deepstack_layersgemma3)r   rF   r   r   rw  s     r   get_model_layer_countsr    s"     'JX"6#5#57JBO$V%9%9;NPRS$V%9%9;NPQR
 	

 
|	#"6+>C$V%9%97BG
 	
 
z	!"6+>C$V%9%97BG '(<(<>OQR S
 	

 
x	"6#5#57JBO$V%9%9;NPRS
 	
 v2B77r   	attr_pathc                 
   UR                  S5      nUS   S:X  a  [        U S5      (       d  USS  nU n U H3  nUR                  5       (       a  U[        U5         nM(  [	        X45      nM5     U$ ! [
        [        4 a     g f = f)Nr!   r   r   r
   )splitr>   isdigitr   rF   AttributeError
IndexError)rf   r  partscurparts        r   _get_nested_attrr    s    OOC EQx773#8#8ab	
CD||~~#d)nc(	 
 
J' s   :A/ /BBc           
      J	   [        U R                  5      n[        5       nUR                  S/ 5      UR                  S/ 5      -   UR                  S/ 5      -   n[	        U R                  5      n[        U[        5      (       a  [        UR                  5       5      OUn[        U5       GH  n	U GH  n
U
R                  U	S9n[        X5      nSU;   a  UR                  SS5      nUc  M;  SU;   ad  US	;   a  U" US
XSS9  MR  U" UR                  SS5       S
X5        U" UR                  SS5       SX5        U" UR                  SS5       SX5        M  SU;   a8  U" UR                  SS5       S
X5        U" UR                  SS5       SX5        M  SU;   d  SU;   a  U" US
X5        M  [        U[        R                  R                  5      (       a!  [!        US5      (       a  U" US
X5        GMB  GME  [        U[        R                  R"                  5      (       a  UR$                  X '   X    X+ '   GM  ['        SU S[)        U5       35        GM     GM     UR                  S/ 5      nU GHF  n[        X5      nUc  M  [!        US5      (       a  U" US
X5        M1  [        U[        R                  R"                  5      (       a  UR$                  X'   UR$                  X.'   Mx  [        U[        R                  R                  5      (       a  UR+                  5        Ht  u  nnUR                  SS5      U;   a  M  U SU 3n[!        US5      (       a  U" US
UU5        MC  [!        US5      (       d  MV  UR$                  UU'   UR$                  UU'   Mv     GM,  ['        S U S[)        U5       35        GMI     S!n[        U U5      nUbK  UR,                  R.                  nUR1                  UR2                  S
   S"S#S#5      UU S3'   UU S3   UU S3'   S$nU R                  R4                  n[        U U5      nUbh  UR.                  nUR1                  UR6                  UR8                  UR:                  UR<                  UR<                  5      UU S3'   UU S3   UU S3'   gg)%z
Extracts vision layers for any supported vision model by dynamically using
a model-specific configuration. This approach is more robust and avoids
failures by correctly identifying layer paths and parameters.
r   rr  rs  )kkzlanguage_model.modelr   Nqkv)r   r   r   F)slice_weightsqkv_projq_projk_projr
   v_proj   gate_up_proj	gate_projup_projfcprojr   zUnsloth: Skipping layer 'z' of unexpected type: rt  z.weightrk   r!   dataz)Unsloth: Skipping non-layered component 'z"model.vision_model.patch_embeddingr     ra  )r   r6   r   r'   r  r   r(   r   r   rangeformatr  replacer   r   Moduler>   r   r  r*   r   named_parameters_linearr   reshaper   r   r   in_channelstemporal_patch_sizer   )vllm_internals
state_dictr   get_state_dictr   layer_configall_layered_templateslayer_countsnum_layers_to_iterater  layer_template
layer_pathlayer_modulert  component_path	component
param_nameparamfull_param_pathpathr   r   s                         r   r   r     s      5 56J)+L 	"-r*	+,b1	2  *.*?*?@L:D\SW:X:XC 3 3 56^j )*3N'..".5J+NGL%3'//0FHXY
 'J&!%?? 'z1j^cd':+=+=j(+S*TWXZds':+=+=j(+S*TWXZds':+=+=j(+S*TWXZds#z1 #j&8&8&T%UXY[et"j&8&8	&R%SVWYcrZ'6Z+?":q*K!,@@"<::*:q*S ;#L%((2D2DEE6B6G6G
\3<F<W(<9 9*E[\`am\n[opqE 4 +L *--.FK0$^D	 y(++~q*HIuxx'9'9::-6^^
*3<>> 0Iuxx77)2)C)C)E%J!)))R8<RRT\)7(8*&FOuh//&:uM //6;jj
?3<AJJ(9 *F A.AQQghlmvhwgxyz+ 12 0D 6I""))'-~~fll1oq"b'Q
dV7#$-74&8H-ID6)* +D"))77M 6I!!'-~~m6O6OQ^QjQjly  mN  mN  P]  Ph  Ph  jw  jB  jB  (C
dV7#$-74&8H-ID6)* r   rj   )T)__all__r   reru   copyr	   utilsr   logr   hf_utilsr   r   r   r)   r   r?   r   inference_modefloat16r   r   r   r   r   r   r   r  r   r  r   r   r   r   <module>r     s  "  	 	  !  8`
p^G@&>X~  +0== 9D 9Dv-
  .3mm O6 O6d ',}} C C l	 l	Z sGj$8NS "bJr   