
    C1iF                    4
   S SK r S SKrS SKrS SKrS SKrS SKJrJr  S SKr	 S SK
r
S SKrS SKrS SKJr  S SKJr  S SKJrJrJrJrJrJrJr  \R4                  R6                  \R4                  R8                  \R4                  R8                  \R4                  R6                  \R4                  R8                  \R4                  R:                  \R4                  R<                  \R4                  R>                  \R4                  R>                  S.	r \RB                  RD                  \RB                  RF                  \RB                  RH                  \RB                  RF                  S.r%\RL                  RN                  \RL                  RP                  S.r)0 r*S	 r+ " S
 S\5      r, " S S\ RZ                  5      r.\+" S5       " S S\.5      5       r/\+" S5       " S S\/5      5       r0\+" S5       " S S\/5      5       r1\+" S5       " S S\/5      5       r2\+" S5       " S S\/5      5       r3\+" S5       " S S\/5      5       r4\+" S 5       " S! S"\.5      5       r5\+" S#5       " S$ S%\.5      5       r6\+" S&5       " S' S(\.5      5       r7\+" S)5       " S* S+\.5      5       r8\+" S,5       " S- S.\.5      5       r9\+" S/5       " S0 S1\/5      5       r:\+" S25       " S3 S4\/5      5       r;\+" S55       " S6 S7\/5      5       r<\+" S85       " S9 S:\.5      5       r=\+" S;5       " S< S=\=5      5       r>\+" S>5       " S? S@\.5      5       r?\+" SA5       " SB SC\.5      5       r@\+" SD5       " SE SF\.5      5       rA\+" SG5       " SH SI\.5      5       rB\+" SJ5       " SK SL\.5      5       rC\+" SM5      \+" SN5       " SO SP\.5      5       5       rD\+" SQ5       " SR SS\.5      5       rE\+" ST5       " SU SV\.5      5       rF\+" SW5       " SX SY\.5      5       rG\+" SZ5       " S[ S\\.5      5       rH\+" S]5       " S^ S_\.5      5       rI\+" S`5       " Sa Sb\.5      5       rJ\+" Sc5       " Sd Se\.5      5       rK\+" Sf5       " Sg Sh\K5      5       rL\+" Si5       " Sj Sk\.5      5       rM\+" Sl5       " Sm Sn\.5      5       rN\+" So5       " Sp Sq\.5      5       rO\+" Sr5       " Ss St\.5      5       rP\+" Su5       " Sv Sw\.5      5       rQSx rR\SSy:X  a  \R" 5         / SzQ/ S{Q/ S|Q/ S}Q/ S~Q/ SQ/ SQ/ SQ/ SQ/ SQ/ SQS.rT\+" S5       " S S\.5      5       rUg! \ a     GNf = f)    N)ListOptional)utils)	Converter)attention_speccommon_spec
model_spectransformer_specwav2vec2_specwav2vec2bert_specwhisper_spec)	gelu	gelu_fastgelu_newgelu_pythongelu_pytorch_tanh
quick_gelurelusiluswish)linearsullama3longrope)gemmgemvc                    ^  U 4S jnU$ )z5Registers a model loader for this configuration name.c                 $   > U " 5       [         T'   U $ N)_MODEL_LOADERS)clsconfig_names    ]/home/james-whalen/.local/lib/python3.13/site-packages/ctranslate2/converters/transformers.py	decorator"register_loader.<locals>.decorator<   s    &)e{#
     )r"   r$   s   ` r#   register_loaderr(   9   s     r&   c                   ~    \ rS rSrSr      SS\S\\   S\\\      S\S\\   S	\S
\4S jjr	S r
S rS rS rSrg)TransformersConverterC   z/Converts models from Hugging Face Transformers.Nmodel_name_or_pathactivation_scales
copy_filesload_as_float16revisionlow_cpu_mem_usagetrust_remote_codec                 X    Xl         X l        X0l        X@l        XPl        X`l        Xpl        g)a  Initializes the converter.

Arguments:
  model_name_or_path: Name of the pretrained model to download, or path to the
    directory containing the pretrained model.
  activation_scales: Path to the pre-computed activation scales. Models may
    use them to rescale some weights to smooth the intermediate activations
    and improve the quantization accuracy. See
    https://github.com/mit-han-lab/smoothquant.
  copy_files: List of filenames to copy from the Hugging Face model to the
    converted model directory.
  load_as_float16: Load the model weights as float16. More precisely, the model
    will be loaded with ``from_pretrained(..., dtype=torch.float16)``.
  revision: Revision of the model to download from the Hugging Face Hub.
  low_cpu_mem_usage: Enable the flag ``low_cpu_mem_usage`` when loading the model
    with ``from_pretrained``.
  trust_remote_code: Allow converting models using custom code.
N)_model_name_or_path_activation_scales_copy_files_load_as_float16	_revision_low_cpu_mem_usage_trust_remote_code)selfr,   r-   r.   r/   r0   r1   r2   s           r#   __init__TransformersConverter.__init__F   s-    8 $6 "3% /!"3"3r&   c                    [         R                  " 5          [        R                  R	                  U R
                  U R                  S9nUR                  R                  n[        R                  U5      nUc>  [        SU< SSR                  [        [        R                  5       5      5      < S35      e[        [        UR                   5      n[        R"                  nSU R$                  (       a  [         R&                  O [        USS 5      =(       d    [        USS 5      0nU R(                  (       a  U R(                  US'   U R*                  (       a  U R*                  US	'   U R                  (       a  U R                  US
'   U R,                  " X@R
                  40 UD6n0 nU R                  (       a  U R                  US
'   U R.                  " XPR
                  40 UD6n	U" Xy5      n
U R0                  (       a0  [         R2                  " U R0                  SS9nUR5                  X5        U R6                  (       a3  U R6                   H#  nU
R9                  U R;                  U5      5        M%     U
sS S S 5        $ ! , (       d  f       g = f)N)r2   z8No conversion is registered for the model configuration z  (supported configurations are: , )dtypetorch_dtyper0   r1   r2   cpu)map_location)torchno_gradtransformers
AutoConfigfrom_pretrainedr4   r:   	__class____name__r    get
ValueErrorjoinsortedkeysgetattrarchitecture_nameAutoTokenizerr7   float16r8   r9   
load_modelload_tokenizerr5   loadsmooth_activationr6   register_fileget_model_file)r;   configr"   loadermodel_classtokenizer_classkwargsmodeltokenizer_kwargs	tokenizerspecr-   filenames                r#   _loadTransformersConverter._loadj   s   ]]_!,,<<((D<S<S = F !**33K#''4F~  #DIIf^5H5H5J.K$LN  ",0H0HIK*88O ,, MM $7 <v}d;F ~~%)^^z"&&.2.E.E*+&&.2.E.E*+OOK1I1ITVTE!&&8<8O8O !45++!9!9=MI %+D&&$)JJ++%%! ((A $ 0 0H&&t':':8'DE !1 q __s   II77
Jc                 (    UR                   " U40 UD6$ r   rI   )r;   r]   r,   r_   s       r#   rU    TransformersConverter.load_model   s    **+=HHHr&   c                 (    UR                   " U40 UD6$ r   rh   )r;   r^   r,   r_   s       r#   rV   $TransformersConverter.load_tokenizer   s    ../ALVLLr&   c                    [         R                  R                  U R                  5      (       a+  [         R                  R	                  U R                  U5      nO  [
        R                  " U R                  US9nUb$  [         R                  R                  U5      (       d  [        SU< SU R                  < 35      eU$ ! [
        R                  R                   a    S n Nhf = f)N)repo_idrd   zFile z does not exist in model )ospathisdirr4   rN   huggingface_hubhf_hub_downloadr   EntryNotFoundErrorisfilerM   )r;   rd   ro   s      r#   rZ   $TransformersConverter.get_model_file   s    77==112277<< 8 8(CD&66 44x <rww~~d33T557 
  #((;; s   C    C#"C#)r5   r6   r7   r9   r4   r8   r:   )NNFNFF)rK   
__module____qualname____firstlineno____doc__strr   r   boolr<   re   rU   rV   rZ   __static_attributes__r'   r&   r#   r*   r*   C   s    9
 ,0*. %"&"'"'"4"4 $C="4 T#Y'	"4
 "4 3-"4  "4  "4H9vIMr&   r*   c                       \ rS rSrSr\S 5       r\R                  S 5       r	S r
S rS rS rS	 r\R                   R"                  4S
 jrS rS rS rS rSrg)ModelLoader   zRBase class for loading Transformers models into a CTranslate2 model specification.c                     g r   r'   r;   s    r#   rR   ModelLoader.architecture_name   s    r&   c                     [        5       er   NotImplementedErrorr;   r`   s     r#   get_model_specModelLoader.get_model_spec   s    !##r&   c                     U R                  U5      nU R                  UR                  X5        U R                  X5      nU R	                  X45        U$ r   )r   
set_configr[   get_vocabularyset_vocabulary)r;   r`   rb   rc   tokenss        r#   __call__ModelLoader.__call__   sG    ""5)U6$$U6D)r&   c                     [        UR                  5       R                  5       S S9 VVs/ s H  u  p4UPM	     snn$ s  snnf )Nc                     U S   $ N   r'   )items    r#   <lambda>,ModelLoader.get_vocabulary.<locals>.<lambda>   s    Qr&   )key)rO   	get_vocabitems)r;   r`   rb   token_s        r#   r   ModelLoader.get_vocabulary   sL     ###%++-3G
 
 	
 
s   =c                     g r   r'   r;   rc   r   s      r#   r   ModelLoader.set_vocabulary       r&   c                     g r   r'   r;   r[   r`   rb   s       r#   r   ModelLoader.set_config   r   r&   c                 H    UR                   Ul        UR                  Ul        g r   weightgammabiasbetar;   rc   modules      r#   set_layer_normModelLoader.set_layer_norm   s    ]]
KK	r&   c                    U[         R                  R                  :X  a  UR                  Ul        O3UR                  Ul        UR
                  Ul        UR                  Ul        [        U[        R                  5      (       a!  UR                  R                  SS5      Ul        [        US5      (       a   UR                  b  UR                  Ul        g g g )Nr   r   r   )r   QuantizationCT2r   qweightscalesweight_scaleqzerosweight_zero
isinstancerG   Conv1D	transposehasattrr   )r;   rc   r   
quant_types       r#   
set_linearModelLoader.set_linear   s    11555 --DK ..DK &D%}}Dfl1122++//15DK66""v{{'>DI (?"r&   c                 &    UR                   Ul         g r   )r   r   s      r#   set_embeddingsModelLoader.set_embeddings   s    mmr&   c                 v    UR                   Ul        [        USS5      nUS:  a  UR                  US  Ul        g g )Noffsetr   r   	encodingsrQ   r;   rc   r   r   s       r#   set_position_encodings"ModelLoader.set_position_encodings   s9    1-A:!^^FG4DN r&   c                     [        S5      e)Nz7No activation smoothing logic is defined for this modelr   )r;   rc   r-   s      r#   rX   ModelLoader.smooth_activation   s    !E
 	
r&   c           	         [        USS 5      nU(       a  UR                  S5      =(       d    UR                  S5      nUS:X  a  S nOL[        R                  U5      nUc4  [        SU< SSR	                  [        R                  5       5      < 35      eUR                  SS	5      nUR                  S
U5      nOS nS	n[        US
U5      nXVU4$ )Nrope_scalingtype	rope_typedefaultRoPE scaling type 'T' is not yet implemented. The following RoPE scaling types are currently supported: r?   factorr   
rope_theta)rQ   rL   _SUPPORTED_ROPE_SCALINGr   rN   rP   )r;   r[   default_rope_thetar   r   rotary_scaling_typerotary_scaling_factorr   s           r#   get_rotary_paramsModelLoader.get_rotary_params   s    v~t<$((0QL4D4D[4QII%&*#&=&A&A)&L#&.- %dii0G0L0L0N&OQ 
 %1$4$4Xq$A!%)),8JKJ"&$%! 7IJJ":EEr&   r'   N)rK   rv   rw   rx   ry   propertyrR   abcabstractmethodr   r   r   r   r   r   r   r   r   r   r   r   rX   r   r|   r'   r&   r#   r~   r~      su    \  	$ $
  3>2J2J2N2N $$5

Fr&   r~   
BartConfigc                   f   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
S rSS	 jrS
 rSrU =r$ )
BartLoaderi  c                     g)NBartForConditionalGenerationr'   r   s    r#   rR   BartLoader.architecture_name  s    -r&   c                 "   [         R                  R                  UR                  R                  UR                  R
                  4UR                  R                  UR                  R                  [        UR                  R                     [        UR                  SS5      S9nU R                  UR                  UR                  R                  5        U R                  UR                  UR                  R                  5        U R!                  UR                  R"                  UR$                  5        [        USS 5      nUbK  UR'                  5       R)                  5       S:w  a)  UR+                  5       UR                  R"                  l        U$ )Nnormalize_embeddingTpre_norm
activationlayernorm_embeddingfinal_logits_biasr   )r
   TransformerSpecfrom_configr[   encoder_layersdecoder_layersencoder_attention_headsnormalize_before_SUPPORTED_ACTIVATIONSactivation_functionrQ   set_encoderencoderr`   set_decoderdecoderr   
projectionlm_headnonzeronumelsqueezer   )r;   r`   rc   r   s       r#   r   BartLoader.get_model_spec  s!   //;;\\((%,,*E*EFLL00\\22-ell.N.NO '6KT R < 
 	u{{':':;u{{':':;//?#E+>E(->-F-F-H-N-N-PTU-U+<+D+D+FDLL##(r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      :  a  US UR                  R                   nU$ r   )superr   r[   
vocab_sizelenr;   r`   rb   r   rJ   s       r#   r   BartLoader.get_vocabulary2  sD    '9<<""S[05ell556Fr&   c                 H    UR                  U5        UR                  U5        g r   register_source_vocabularyregister_target_vocabularyr   s      r#   r   BartLoader.set_vocabulary8      ''/''/r&   c                     UR                   Ul         UR                  Ul        UR                  Ul        UR                  UR                  R
                  5      Ul        g r   )	bos_token	eos_token	unk_tokenconvert_ids_to_tokensr[   decoder_start_token_iddecoder_start_tokenr   s       r#   r   BartLoader.set_config<  sL    $..$..$..%.%D%DLL//&
"r&   c                 B   U R                  X5        [        UR                  UR                  5       H  u  p4U R	                  UR
                  UR                  SS9  U R                  UR
                  R                  UR                  5        U R                  UR                  R                  UR                  5        U R                  UR                  R                  UR                  5        U R                  UR                  R                  UR                   5        M     g NTself_attention)set_common_layersziplayerlayersset_attentionr  	self_attnr   
layer_normself_attn_layer_normr   ffnlinear_0fc1linear_1fc2final_layer_norm)r;   rc   r   
layer_specr  s        r#   r   BartLoader.set_encoderD  s    t-!$TZZ!@J))#  
 ))44**
 OOJNN33UYY?OOJNN33UYY?
 9 95;Q;QR "Ar&   c                    U R                  X5        [        UR                  UR                  5       GHQ  u  p4U R	                  UR
                  UR                  SS9  U R                  UR
                  R                  UR                  5        [        US5      (       aU  U R	                  UR                  UR                  SS9  U R                  UR                  R                  UR                  5        U R                  UR                  R                   UR"                  5        U R                  UR                  R$                  UR&                  5        U R                  UR                  R                  UR(                  5        GMT     g )NTr  encoder_attnF)r  r  r  r  r  r  r  r   r  r  r   	attentionr#  encoder_attn_layer_normr   r  r  r  r  r  r  )r;   rc   r   r   r  s        r#   r   BartLoader.set_decoderV  s4   t-!$TZZ!@J))#  
 ))44**
 un--""((&&#( # 
 ##((3311
 OOJNN33UYY?OOJNN33UYY?
 9 95;Q;QR1 "Ar&   c                 j   [        S5       Vs/ s H  n[        R                  " 5       PM     nnU R                  US   UR                  5        U R                  US   UR
                  5        U R                  US   UR                  5        U(       a%  [        R                  " UR                  S   U5        ON[        R                  " UR                  S   US S 5        [        R                  " UR                  S   USS  5        U R                  UR                  S   UR                  5        g s  snf )N   r   r      )ranger   
LinearSpecr   q_projk_projv_projr   fuse_linearr   out_projr;   rc   r$  r  r   split_layerss         r#   r  BartLoader.set_attentions  s    :?(C(Q..0(CQ)9)9:Q)9)9:Q)9)9:dkk!nl;dkk!nl2A.>?dkk!nl12.>?B););< Ds   D0c                    SS K n[        US5      (       dC  UR                  R                  (       a%  UR	                  UR                  R
                  5      OSnOUR                  nXAl        U R                  UR                  UR                  5        U R                  [        UR                  [        5      (       a  UR                  S   OUR                  UR                  5        [        US5      (       a&  U R!                  UR"                  UR"                  5        [        US5      (       a'  U R!                  UR$                  UR$                  5        g g )Nr   embed_scale      ?r  r   )mathr   r[   scale_embeddingsqrtd_modelr6  scale_embeddingsr   position_encodingsembed_positionsr   r   
embeddingslistembed_tokensr   r  r   )r;   rc   r   r8  r6  s        r#   r  BartLoader.set_common_layers  s   v}-- ==00 		&--//0  !,,K +##D$;$;V=S=ST doot44 "__	
 6<((1B1BC6011 8 8&:T:TU 2r&   r'   F)rK   rv   rw   rx   r   rR   r   r   r   r   r   r   r  r  r|   __classcell__rJ   s   @r#   r   r     sF    . .&0
S$S:=V Vr&   r   MarianConfigc                   \   ^  \ rS rSr\S 5       rU 4S jrS rU 4S jrU 4S jr	S r
SrU =r$ )	MarianMTLoaderi  c                     g)NMarianMTModelr'   r   s    r#   rR    MarianMTLoader.architecture_name  s    r&   c                    > SUR                   l        SUR                   l        [        TU ]  U5      nU R                  U5        U$ NF)r[   r   r   r   r   _remove_pad_weights)r;   r`   rc   rJ   s      r#   r   MarianMTLoader.get_model_spec  s=    (-%+0(w%e,  &r&   c                 j    UR                   Ul         UR                  Ul        UR                   Ul        g r   )r  r	  r  r   s       r#   r   MarianMTLoader.set_config  s.    $..$.. &/%8%8"r&   c                 2   > SUl         [        TU ]	  X5        g NT)start_from_zero_embeddingr   r   r;   rc   r   rJ   s      r#   r   MarianMTLoader.set_decoder  s    )-&D*r&   c                 X   > [         TU ]  X5      nUS   S:X  a  UR                  5         U$ )Nr*  z<pad>)r   r   popr   s       r#   r   MarianMTLoader.get_vocabulary  s.     '9": JJLr&   c                     UR                   R                  S   UR                  R                  UR                  R                  /nUS   R                  R
                  S   S-
  nU H  nUR                  R
                  S   US-   :X  a  UR                  S S Ul        [        U[        R                  5      (       d  MX  UR                  5       (       d  Mo  UR                  R
                  S   US-   :X  d  M  UR                  S S Ul
        M     g )Nr   r   r*  )r   r?  r   r   r   shaper   r   r,  has_biasr   )r;   rc   vocab_specsnew_vocab_size
vocab_specs        r#   rN  "MarianMTLoader._remove_pad_weights  s    LL##A&LL##LL##
 %Q..44Q7!;%J  &&q)^a-??$.$5$5cr$:
!:{'='=>>''))OO))!,0BB",//#2"6
 &r&   r'   )rK   rv   rw   rx   r   rR   r   r   r   r   rN  r|   rD  rE  s   @r#   rH  rH    s3     9+7 7r&   rH  M2M100Configc                   J   ^  \ rS rSr\S 5       rU 4S jrS rU 4S jrSr	U =r
$ )M2M100Loaderi  c                     g)NM2M100ForConditionalGenerationr'   r   s    r#   rR   M2M100Loader.architecture_name  s    /r&   c                 f   > SUR                   l        SUR                   l        [        TU ]  U5      $ )NTF)r[   r   r   r   r   )r;   r`   rJ   s     r#   r   M2M100Loader.get_model_spec  s+    (,%+0(w%e,,r&   c                 @    UR                   UR                  S  Ul        g r   )weightsr   r   r   s      r#   r   #M2M100Loader.set_position_encodings  s    8r&   c                   > [         TU ]  X5      nUS   UR                  :X  a*  UR                  UR                  UR                  5       5        UR                  R                  S/ 5       H  nXC;  d  M
  UR                  U5        M     [        USUR                  R                  [        U5      -
  5      nUS:  a"  U[        U5       Vs/ s H  nSU-  PM
     sn-  nU$ s  snf )Nr*  additional_special_tokensnum_madeup_wordsr   zmadeupword%d)r   r   r	  insertunk_token_idrX  special_tokens_maprL   appendrQ   r[   r   r   r+  )r;   r`   rb   r   r   rn  irJ   s          r#   r   M2M100Loader.get_vocabulary  s    '9 ":,,,MM)00&**,?11556QSUVE"e$ W #)5<<+B+BS[+P
 a59I3JK3Ja~)3JKKF Ls   
C r'   )rK   rv   rw   rx   r   rR   r   r   r   r|   rD  rE  s   @r#   rc  rc    s)    0 0-
9 r&   rc  MBartConfigc                   *    \ rS rSr\S 5       rS rSrg)MBartLoaderi  c                     g)NMBartForConditionalGenerationr'   r   s    r#   rR   MBartLoader.architecture_name  s    .r&   c                     UR                   Ul         UR                  Ul        UR                  Ul        [        UR                  SS 5      S;   a  S Ul        g UR                  Ul        g )Nr^   )MBartTokenizerN)r  r  r	  rQ   r[   r  r   s       r#   r   MBartLoader.set_config  s[    $..$..$.. 5<<!2D9=UU)-F&)2)<)<F&r&   r'   NrK   rv   rw   rx   r   rR   r   r|   r'   r&   r#   rw  rw    s    / /	=r&   rw  PegasusConfigc                   *    \ rS rSr\S 5       rS rSrg)PegasusLoaderi	  c                     g)NPegasusForConditionalGenerationr'   r   s    r#   rR   PegasusLoader.architecture_name      0r&   c                     UR                   Ul        UR                  Ul        UR                  Ul        UR                   Ul        g r   )	pad_tokenr  r  r	  r  r   s       r#   r   PegasusLoader.set_config  s:    $..$..$..%.%8%8"r&   r'   Nr~  r'   r&   r#   r  r  	  s    1 19r&   r  	OPTConfigc                   b   ^  \ rS rSr\S 5       rS rS rS rS r	U 4S jr
S rU 4S	 jrS
rU =r$ )	OPTLoaderi  c                     g)NOPTForCausalLMr'   r   s    r#   rR   OPTLoader.architecture_name      r&   c                    [         R                  R                  UR                  R                  UR                  R
                  UR                  R                  [        UR                  R                     UR                  R                  UR                  R                  :g  S9nU R                  UR                  UR                  R                  5        U R                  UR                  R                  UR                   5        U$ )N)r   r   project_in_out)r
   TransformerDecoderModelSpecr   r[   num_hidden_layersnum_attention_headsdo_layer_norm_beforer   r   word_embed_proj_dimhidden_sizer   r   r`   r   r   r   r;   r`   rc   s      r#   r   OPTLoader.get_model_spec  s    ;;GGLL**LL,,\\66-ell.N.NO <<;;u||?W?WW H 
 	u{{':':;//?r&   c                 |   [        UR                  R                  5       H  u  p4SU-  n[        R                  " UR
                  R                  UR
                  R                  S   USU-     5        [        R                  " UR                  R                  UR                  R                  USU-     5        M     g )Nzmodel.decoder.layers.%dr   z%s.self_attn.q_projz%s.fc1)
	enumerater   r  r   rX   r  r  r   r  r  )r;   rc   r-   rs  r  layer_scopes         r#   rX   OPTLoader.smooth_activation)  s    !$,,"4"45HA3a7K##$$//$$++A.!"7+"EF ##		$$		""!(["89 6r&   c                 &    UR                  U5        g r   register_vocabularyr   s      r#   r   OPTLoader.set_vocabulary9        (r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r  r	  r   s       r#   r   OPTLoader.set_config<  ,    $..$..$..r&   c                 X  > [         TU ]  X5        UR                  b&  U R                  UR                  UR                  5        UR                  b&  U R                  UR                  UR                  5        UR
                  b'  U R                  UR                  UR
                  5        g g r   )r   r   
project_inr   project_outr  r   r  rU  s      r#   r   OPTLoader.set_decoderA  s    D*)OODOOW-?-?@*OOD,,g.A.AB##/1I1IJ 0r&   c                     SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        g rM  )r<  r   r=  r>  r   r?  rA  r   s      r#   r  OPTLoader.set_common_layersK  s@     %##D$;$;V=S=STDOOV-@-@Ar&   c                    > [         TU ]  X5      nSn[        U5      S-  S:w  a@  SR                  U5      nXS;  a  UR	                  U5        US-  n[        U5      S-  S:w  a  M@  U$ )Nr      zmadeupword{:04d}r   )r   r   r   formatrr  )r;   r`   rb   r   rs  symbolrJ   s         r#   r   OPTLoader.get_vocabularyP  sk    '9&kAo"'..q1F#f%FA	 &kAo" r&   r'   )rK   rv   rw   rx   r   rR   r   rX   r   r   r   r  r   r|   rD  rE  s   @r#   r  r    s?        )/
KB

 
r&   r  GPTBigCodeConfigc                   P   ^  \ rS rSr\S 5       rS rS rU 4S jrS r	S r
SrU =r$ )	GPTBigCodeMHALoaderi]  c                     g)NGPTBigCodeForCausalLMr'   r   s    r#   rR   %GPTBigCodeMHALoader.architecture_name_  s    &r&   c                 z   [         R                  R                  UR                  R                  UR                  R
                  S[        UR                  R                     SS9nU R                  UR                  UR                  5        U R                  UR                  R                  UR                  5        U$ )NT)r   r   multi_query_attentionr
   r  r   r[   n_layern_headr   r   r   r   transformerr   r   r   r  s      r#   r   "GPTBigCodeMHALoader.get_model_specc  s    ;;GGLL  LL-ell.N.NO"& H 
 	u'8'89//?r&   c                 &    UR                  U5        g r   r  r   s      r#   r   "GPTBigCodeMHALoader.set_vocabularyp  r  r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ Nz<extra_id_%d>r   r   r[   r   r   r+  rr  r;   r`   rb   r   	extra_idsrs  rJ   s         r#   r   "GPTBigCodeMHALoader.get_vocabularys  Q    '9LL++c&k9	y!AMM/A-. " r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   GPTBigCodeMHALoader.set_config|  r  r&   c                    SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        U R                  UR                  UR                  5        [        UR                  UR                  5       GHT  u  p4U R                  UR                  R                  UR                  5        U R                  UR                  R                   S   UR"                  R$                  5        U R                  UR                  R                   S   UR"                  R&                  5        U R                  UR(                  R                  UR*                  5        U R                  UR(                  R,                  UR.                  R0                  5        U R                  UR(                  R2                  UR.                  R&                  5        GMW     g NFr   r   r<  r   r?  wter   r=  wper   r  ln_fr  r  hr  ln_1r   r   attnc_attnc_projr  ln_2r  mlpc_fcr  r;   rc   r   r   r  s        r#   r   GPTBigCodeMHALoader.set_decoder  B    %DOOVZZ8##D$;$;VZZHDOOV[[9!$TZZ!:J
 9 9 D DejjQOOJ55<<Q?ARARSOOJ55<<Q?ARARS
 9 95::FOOJNN33UYY^^DOOJNN33UYY5E5EF ";r&   r'   )rK   rv   rw   rx   r   rR   r   r   r   r   r   r|   rD  rE  s   @r#   r  r  ]  s5    ' ')/
G Gr&   r  
GPT2Configc                   <    \ rS rSr\S 5       rS rS rS rS r	Sr
g)	
GPT2Loaderi  c                     g)NGPT2LMHeadModelr'   r   s    r#   rR   GPT2Loader.architecture_name       r&   c                 x   [         R                  R                  UR                  R                  UR                  R
                  S[        UR                  R                     S9nU R                  UR                  UR                  5        U R                  UR                  R                  UR                  5        U$ )NT)r   r   r  r  s      r#   r   GPT2Loader.get_model_spec  s    ;;GGLL  LL-ell.N.NO	 H 
 	u'8'89//?r&   c                 &    UR                  U5        g r   r  r   s      r#   r   GPT2Loader.set_vocabulary  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   GPT2Loader.set_config  r  r&   c                    SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        U R                  UR                  UR                  5        [        UR                  UR                  5       GHT  u  p4U R                  UR                  R                  UR                  5        U R                  UR                  R                   S   UR"                  R$                  5        U R                  UR                  R                   S   UR"                  R&                  5        U R                  UR(                  R                  UR*                  5        U R                  UR(                  R,                  UR.                  R0                  5        U R                  UR(                  R2                  UR.                  R&                  5        GMW     g r  r  r  s        r#   r   GPT2Loader.set_decoder  r  r&   r'   NrK   rv   rw   rx   r   rR   r   r   r   r   r|   r'   r&   r#   r  r    s)    ! !
)/
Gr&   r  
GPTJConfigc                   <    \ rS rSr\S 5       rS rS rS rS r	Sr
g)	
GPTJLoaderi  c                     g)NGPTJForCausalLMr'   r   s    r#   rR   GPTJLoader.architecture_name  r  r&   c                    [         R                  R                  UR                  R                  UR                  R
                  S[        UR                  R                     UR                  R                  SSSS9nU R                  UR                  UR                  UR                  R                  UR                  R
                  5        U R                  UR                  R                  UR                  5        U$ NTFr   r   
rotary_dimrotary_interleaveparallel_residualshared_layer_norm)r
   r  r   r[   r  r  r   r   r  r   r   r  r   r   r   r  s      r#   r   GPTJLoader.get_model_spec  s    ;;GGLL  LL-ell.N.NO||..#"" H 	
 	LLLL##LL		
 	//?r&   c                 &    UR                  U5        g r   r  r   s      r#   r   GPTJLoader.set_vocabulary  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   GPTJLoader.set_config  r  r&   c                 2   SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        UR                  UR                  5       GH  u  pVU R	                  UR                  UR                  5        UR                  R                  R                  nUR                  R                  R                  nUR                  R                   R                  n	["        R$                  " XtU5      n["        R$                  " XU5      n[&        R(                  " XxU	45      UR*                  R,                  S   l        U R/                  UR*                  R,                  S   UR                  R0                  5        U R/                  UR2                  R4                  UR6                  R8                  5        U R/                  UR2                  R:                  UR6                  R<                  5        GM     g r  )r<  r   r?  r  r   r  r  r  r  r  r  r  r  r-  r   r.  r/  r   permute_for_sliced_rotaryrE   catr  r   r   r1  r  r  r  fc_inr  fc_out)
r;   rc   r   r  	num_headsr   r  qwkwvws
             r#   r   GPTJLoader.set_decoder  sd    %DOOVZZ8DOOV[[9!$TZZ!:J
 < <ejjI""))B""))B""))B00
KB00
KB9>BB<9PJ%%,,Q/6OOJ55<<Q?ATATUOOJNN33UYY__EOOJNN33UYY5E5EF ";r&   r'   Nr  r'   r&   r#   r  r    s)    ! !*)/
Gr&   r  CodeGenConfigc                   P   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
SrU =r$ )	CodeGenLoaderi  c                     g)NCodeGenForCausalLMr'   r   s    r#   rR   CodeGenLoader.architecture_name      #r&   c                    [         R                  R                  UR                  R                  UR                  R
                  S[        UR                  R                     UR                  R                  SSSS9nSn[        UR                  S5      (       a  UR                  R                  S;   a  SnU R                  UR                  UR                  UR                  R                  UR                  R
                  UR                  R                  US9  U R                  UR                  R                   UR"                  5        U$ )	NTFr     head_dim)      r  )mp_num)r
   r  r   r[   r  r  r   r   r  r   r  r   r   r  n_embdr   r   r   )r;   r`   rc   r  s       r#   r   CodeGenLoader.get_model_spec  s   ;;GGLL  LL-ell.N.NO||..#"" H 	
 5<<,,1F1F*1T FLLLL##LLLL 	 	
 	//?r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   CodeGenLoader.get_vocabulary  sQ    '9LL++c&k9	y!AMM/A-. " r&   c                 &    UR                  U5        g r   r  r   s      r#   r   CodeGenLoader.set_vocabulary   r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   CodeGenLoader.set_config#  r  r&   c           
      &   SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        R                  " SUS-  5      R                  SS5      R                  R                  5       R                  5       nXV-  n[        R                  " U V	s/ s H"  n	[        R                  " X-  U	S-   U-  5      PM$     sn	5      n
[        UR                   UR"                  5       GH|  u  pU R	                  UR$                  UR&                  5        UR(                  R*                  R,                  nXS S 24   nUR/                  SSS9u  nnn[0        R2                  " XU5      n[0        R2                  " UXC5      n[        R                  " UUU45      UR4                  R6                  S   l        U R9                  UR4                  R6                  S   UR(                  R:                  5        U R9                  UR<                  R>                  UR@                  RB                  5        U R9                  UR<                  RD                  UR@                  RF                  5        GM     g s  sn	f )NFr   r(  r*  r   dim)$r<  r   r?  r  r   r  r  nparangereshapeTflattentolistrE   r  r  r  r  r  r  r  qkv_projr   chunkr   r  r  r   r   r1  r  r  r  r   r  r  )r;   rc   r   r  r  	embed_dimr  base_permutation	local_dimrs  permutationr   r  r%  new_qkv_projr  r  r  s                     r#   r   CodeGenLoader.set_decoder(  s    %DOOVZZ8DOOV[[999Q
3;;BBDDLLNUUW'	iiGWXGW!U\\!-!a%9)<=GWX
 "%TZZ!:J
 < <ejjI zz**11H $N3L%++A1+5JBB 00
KB00YKB9>BB<9PJ%%,,Q/6OOJ55<<Q?ATATUOOJNN33UYY__EOOJNN33UYY5E5EF- "; Ys   <)Jr'   rK   rv   rw   rx   r   rR   r   r   r   r   r   r|   rD  rE  s   @r#   r	  r	    s5    $ $<)/
!G !Gr&   r	  GPTNeoXConfigc                   P   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
SrU =r$ )	GPTNeoXLoaderiL  c                     g)NGPTNeoXForCausalLMr'   r   s    r#   rR   GPTNeoXLoader.architecture_nameN  r  r&   c                 h   [         R                  R                  UR                  R                  UR                  R
                  S[        UR                  R                     [        UR                  R                  UR                  R                  UR                  R
                  -  -  5      SUR                  R                  SS9nU R                  UR                  UR                  UR                  R
                  5        U R                  UR                  R                   UR"                  5        U$ r  )r
   r  r   r[   r  r  r   
hidden_actint
rotary_pctr  use_parallel_residualr   r   gpt_neoxr   r   	embed_outr  s      r#   r   GPTNeoXLoader.get_model_specR  s    ;;GGLL**LL,,-ell.E.EF''<<++u||/O/OOQ $#ll@@# H 
 	u~~u||7W7WX//Ar&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   GPTNeoXLoader.get_vocabularye  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   GPTNeoXLoader.set_vocabularyn  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   GPTNeoXLoader.set_configq  r  r&   c                    SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        UR                  UR                  5       GHo  u  pE[        US5      (       aM  U R	                  UR                  UR                  5        U R	                  UR                  UR                  5        O`U R	                  UR                  R
                  UR                  5        U R	                  UR                   R
                  UR                  5        UR"                  R$                  R&                  nUR"                  R$                  R(                  nUR+                  USSUR,                  S   5      R/                  SS5      R+                  SUR,                  S   5      nUR+                  USS5      R/                  SS5      R+                  S5      nXdR                  R0                  S   l        XtR                  R0                  S   l        U R3                  UR                  R0                  S   UR"                  R4                  5        U R3                  UR                   R6                  UR8                  R:                  5        U R3                  UR                   R<                  UR8                  R>                  5        GMr     g )NFinput_layer_normr(  r*  r   r   ) r<  r   r?  embed_inr   r  r  r  r  r  r   rC  input_layernormpost_attention_layer_normpost_attention_layernormr  r  r$  query_key_valuer   r   r!  r[  swapaxesr   r   denser  r  dense_h_to_4hr  dense_4h_to_h)r;   rc   r   r  r   r  qkv_wqkv_bs           r#   r   GPTNeoXLoader.set_decoderv  s!    %DOOV__=DOOV-D-DE!$TZZ!?Jz#566##J$?$?AVAVW##88%:X:X ##--88%:O:O ##NN--u/M/M OO33::EOO3388E iBB@!QU[[_- 
 MM)Q3<<QBJJ2NE9>%%,,Q/67<%%,,Q/4OOJ55<<Q?AVAVWOOJNN33UYY5L5LMOOJNN33UYY5L5LM= "@r&   r'   r-  rE  s   @r#   r0  r0  L  s5    $ $&)/
#N #Nr&   r0  WhisperConfigc                   t   ^  \ rS rSr\S 5       rS rS rS rU 4S jr	S r
U 4S jrU 4S	 jrS
 rS rSrU =r$ )WhisperLoaderi  c                     g)NWhisperForConditionalGenerationr'   r   s    r#   rR   WhisperLoader.architecture_name  r  r&   c                    [         R                  " UR                  R                  UR                  R                  UR                  R
                  UR                  R                  5      nU R                  UR                  UR                  R                  5        U R                  UR                  UR                  R                  5        U R                  UR                  R                  UR                  5        U$ r   )r   WhisperSpecr[   r   r   r   decoder_attention_headsr   r   r`   r   r   r   r   proj_outr  s      r#   r   WhisperLoader.get_model_spec  s    ''LL''LL00LL''LL00	
 	u{{':':;u{{':':;//@r&   c                     / SQn[        US/ 5      nU(       d  / $ U Vs/ s H  nXB;  d  M
  UR                  U5      PM     sn$ s  snf )N)z<|endoftext|>z<|startoftranscript|>z<|translate|>z<|transcribe|>z<|startoflm|>z<|startofprev|>z<|nocaptions|>z<|notimestamps|>rm  )rQ   convert_tokens_to_ids)r;   rb   non_lang_special_tokensadditional_tokensr   s        r#   _get_lang_ids_from_tokenizer*WhisperLoader._get_lang_ids_from_tokenizer  s^    	#
 $I/JBO I +
*3 3I++E2*
 	
 
s
   	AAc                    [        USS 5      nUb~  UR                  Ul        UR                  Ul        [        US5      (       a  UR                  Ul        [        US5      (       a(  [        UR                  R                  5       5      Ul
        OZUR                  R                  Ul        UR                  R                  Ul        [        R                  UR                  5      Ul        [        USS 5      c  U R                  U5      Ul
        UR                  ch  UR                  R                   nUR                  R"                  n[%        [&        R(                  " [+        US-  U5      [+        U5      5      5      Ul        g g )Ngeneration_configalignment_heads
lang_to_idlang_idsr)  )rQ   suppress_tokenssuppress_idsbegin_suppress_tokenssuppress_ids_beginr   rc  rO   rd  valuesre  r[   _WHISPER_ALIGNMENT_HEADSrL   name_or_pathr_  r   rX  r@  	itertoolsproductr+  )r;   r[   r`   rb   
gen_config
num_layersr  s          r#   r   WhisperLoader.set_config  s6   U$7>
!","<"<F(2(H(HF%z#455)3)C)C&z<00"()>)>)E)E)G"H"',,">">F(-(J(JF%%=%A%A%BTBT%UF"6:t,4"??	JFO!!)44J<<I%)!!*/:6)$&F"	 *r&   c           	         > [         TU ]  X5      nUR                  S [        UR                  R
                  [        U5      -
  5       5       5        U$ )Nc              3   2   #    U  H  nS US-  -  v   M     g7f)z<|%.2f|>g{Gz?Nr'   ).0rs  s     r#   	<genexpr>/WhisperLoader.get_vocabulary.<locals>.<genexpr>  s       
A !d(#As   )r   r   extendr+  r[   r   r   r   s       r#   r   WhisperLoader.get_vocabulary  sL    '9 	 
5<<22S[@A
 	

 r&   c                 &    UR                  U5        g r   r  r   s      r#   r   WhisperLoader.set_vocabulary  r  r&   c                    > U R                  UR                  UR                  5        U R                  UR                  UR                  5        [        TU ]  X5        g r   )
set_conv1dconv1conv2r   r   )r;   rc   r   rJ   s      r#   r   WhisperLoader.set_encoder  s<    

GMM2

GMM2D*r&   c                 p   > U R                  UR                  UR                  5        [        TU ]  X5        g r   )r   r?  rA  r   r   rU  s      r#   r   WhisperLoader.set_decoder  s*    DOOW-A-ABD*r&   c                     U R                  UR                  UR                  5        U R                  UR                  UR                  5        g r   )r   r=  r>  r   r  r   s      r#   r  WhisperLoader.set_common_layers  s8    ##D$;$;V=S=STDOOV->->?r&   c                 H    UR                   Ul         UR                  Ul        g r   r   r   r   s      r#   r|  WhisperLoader.set_conv1d  s    mmKK	r&   r'   )rK   rv   rw   rx   r   rR   r   r_  r   r   r   r   r   r  r|  r|   rD  rE  s   @r#   rR  rR    sH    1 1
,:	)+
+@   r&   rR  Wav2Vec2Configc                   h   ^  \ rS rSr\S 5       rS rS rS rS r	S r
S rS	 rU 4S
 jrS rSrU =r$ )Wav2Vec2Loaderi  c                     g)NWav2Vec2ForCTCr'   r   s    r#   rR    Wav2Vec2Loader.architecture_name  r  r&   c                 .   [        UR                  R                  SS5      n[        R                  " UR                  R                  R
                  UR                  R                  R                  R                  UR                  R                  R                  R                  UR                  R                  R                  S   U5      nUR                  R                  R                   Hv  nUR                  Ul        UR                  Ul        UR"                  R$                  Ul        UR"                  R(                  Ul        UR"                  R,                  Ul        Mx     U R1                  UR                  XR                  R                  5        U$ Nreturn_hiddenFr   )rQ   wav2vec2r[   r   Wav2Vec2Specnum_feat_extract_layersr   r  r  r   r   r[  r  r$  r  r  r  feed_forwardintermediate_act_fnactivation_fnintermediate_denser  output_denser  r   )r;   r`   r  rc   r  s        r#   r   Wav2Vec2Loader.get_model_spec	  s    5 5N))NN!!99NN""));;NN""))==MM  &&q)
 ^^++22E#ooEO).)9)9E&"'"4"4"H"HE**==EI**77EI 3 	unn.C.CDr&   c                     g r   r'   r   s       r#   r   Wav2Vec2Loader.set_config      r&   c                 "    UR                  5       $ r   r   r;   r`   rb   s      r#   r   Wav2Vec2Loader.get_vocabulary!      ""$$r&   c                 &    UR                  U5        g r   r  r   s      r#   r   Wav2Vec2Loader.set_vocabulary$  r  r&   c                    UR                   S   R                  R                  UR                  R                  l        UR                   S   R                  R                  UR                  R                  l        U R                  UR                  R                  UR                   S   R                  5        [        UR                  UR                   SS  5       Hu  u  p4UR                  R                  UR                  l        UR                  R                  UR                  l        U R                  UR                  UR                  5        Mw     g )Nr   r   )	conv_layersconvr   feat_layer0r   r   r  r  
feat_layer)r;   rc   feature_extractor
spec_layermodule_layers        r#   set_feature_extractor$Wav2Vec2Loader.set_feature_extractor'  s   '8'D'DQ'G'L'L'S'S$%6%B%B1%E%J%J%O%O"''):)F)Fq)I)T)T	
 ),OO.::12>)
$J &2%6%6%=%=JOO"#/#4#4#9#9JOO 
 5 5|7N7NO)
r&   c                     U R                  UR                  UR                  5        U R                  UR                  UR
                  5        g r   r   fp_layer_normr  r   fp_projectionr   r;   rc   feature_projections      r#   set_feature_projection%Wav2Vec2Loader.set_feature_projection4  :    D..0B0M0MN**,>,I,IJr&   c                 $   UR                   R                  R                  R                  R	                  5       UR                   R                  R                  l        UR                   R                  R
                  R	                  5       UR                   R                  R
                  l        UR                   R                  5        H"  nUR                  R	                  5       Ul        M$     UR                  [        R                  " SSUR                  45      5        UR                   R                  R                  UR                   R                  l        UR                   R                  R
                  UR                   R                  l        g r   )
pos_conv_embedr  r   datafloatr   
parametersrE   randnr  )r;   rc   r   r[   params        r#   set_pos_conv_embed!Wav2Vec2Loader.set_pos_conv_embed8  s    ""''..3399; 	##**/ 180F0F0K0K0P0P0V0V0X##((-++668E))+EJ 9u{{Aq&2D2D+EFG*1*@*@*E*E*L*L  '(/(>(>(C(C(H(H  %r&   c                   > U R                  XR                  R                  5        U R                  XR                  R                  5        U R                  XR                  R                  U5        [        TU ]!  XR                  R                  5        [        UR                  R                  SS5      nU(       d'  U R                  UR                  UR                  5        g g Nr  F)r  r  r  r  r  r  r   r   r   rQ   r[   r   r   )r;   rc   r`   r[   r  rJ   s        r#   r   Wav2Vec2Loader.set_encoderE  s    ""4)I)IJ##D..*K*KLnn&<&<fED.."8"89 5 5NOODLL%--8 r&   c                 P    U R                  UR                  UR                  5        g r   )r   r  r   s      r#   r   Wav2Vec2Loader.set_common_layersN  s    DOOV->->?r&   r'   )rK   rv   rw   rx   r   rR   r   r   r   r   r  r  r  r   r  r|   rD  rE  s   @r#   r  r    sL       *%)PKI9@ @r&   r  Wav2Vec2BertConfigc                   l    \ rS rSr\S 5       rS rS rS rS r	S r
 SS	 jrS
 rS rS rS rS rSrg)Wav2Vec2BertLoaderiR  c                     g)NWav2Vec2BertForCTCr'   r   s    r#   rR   $Wav2Vec2BertLoader.architecture_nameT  r  r&   c                 l   [        UR                  R                  SS5      n[        R                  " UR                  R                  R
                  UR                  R                  R                  UR                  R                  R                  S   U5      nU R                  UR                  U5        U$ r  )rQ   wav2vec2_bertr[   r   Wav2Vec2BertSpecnum_adapter_layersr  r   r   r[  r   r   )r;   r`   r  rc   s       r#   r   !Wav2Vec2BertLoader.get_model_specX  s     3 3 : :OUS 11&&99&&88MM  &&q)	
 	u-r&   c                     g r   r'   r   s       r#   r   Wav2Vec2BertLoader.set_configc  r  r&   c                 "    UR                  5       $ r   r  r  s      r#   r   !Wav2Vec2BertLoader.get_vocabularyf  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   !Wav2Vec2BertLoader.set_vocabularyi  r  r&   c                     U R                  UR                  UR                  5        U R                  UR                  UR
                  5        g r   r  r  s      r#   r  )Wav2Vec2BertLoader.set_feature_projectionl  r  r&   Nc                    [        S5       Vs/ s H  n[        R                  " 5       PM     nnU R                  US   UR                  5        U R                  US   UR
                  5        U R                  US   UR                  5        [        R                  " UR                  S   U5        U R                  UR                  S   UR                  5        U(       d  U(       ap  UR                  R                  Ul        [        R                  " S5      R!                  U5      Ul        [        R                  " S5      R!                  U5      Ul        g g s  snf )Nr(  r   r   r)  r*  int32)r+  r   r,  r   linear_qlinear_klinear_vr   r0  r   
linear_outdistance_embeddingr   !relative_asymmetric_position_keysr  rA   r   relative_left_max_positionrelative_right_max_position)r;   rc   r$  left_max_positionright_max_positionr   r3  s          r#   r   Wav2Vec2BertLoader.set_attentionp  s    ;@(C(Q..0(CQ););<Q););<Q););<$++a.,7B)=)=> 25>5Q5Q5X5XD2.0hhw.?.D.DEV.WD+/1xx/@/E/E"0D, !3 Ds   Ec                 ^   [        X5       GH  u  pVU R                  UR                  UR                  5        U R	                  UR
                  R                  UR                  R                  5        U R	                  UR
                  R                  UR                  R                  5        U R                  UR                  UR                  X45        U R                  UR                  UR                  5        U R                  UR                   UR"                  R$                  5        U R'                  UR(                  UR"                  R*                  5        U R'                  UR,                  UR"                  R.                  5        U R                  UR0                  UR"                  R2                  5        U R'                  UR4                  UR"                  R6                  5        U R                  UR8                  UR:                  5        U R	                  UR<                  R                  UR>                  R                  5        U R	                  UR<                  R                  UR>                  R                  5        U R                  UR@                  URB                  5        GM     g r   )"r  r   enc_ffn1_layer_normffn1_layer_normr   enc_ffn1r  ffn1r  r  r  r  enc_attnr  enc_attn_layer_normr  enc_conv_layer_normconv_moduler  r|  enc_conv_pointwise_conv1pointwise_conv1enc_conv_depthwise_convdepthwise_convenc_conv_depthwise_layer_normdepthwise_layer_normenc_conv_pointwise_conv2pointwise_conv2enc_ffn2_layer_normffn2_layer_normenc_ffn2ffn2enc_final_layer_normr  )r;   spec_layersr  r  r  slayerr  s          r#   set_wav2vec2bert_encoder+Wav2Vec2BertLoader.set_wav2vec2bert_encoder  s    !5MF : :E<Q<QROOFOO44ejj6S6STOOFOO44ejj6M6MN2C  : :E<V<VW**E,=,=,H,H OO//1B1B1R1R OO..0A0A0P0P 44!!66 OO//1B1B1R1R  : :E<Q<QROOFOO44ejj6S6STOOFOO44ejj6M6MN ; ;U=S=ST7 6r&   c                    [        X5       GH^  u  p4U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        U R                  UR                  UR                  5        U R	                  UR                  UR                  5        U R                  UR                  UR                  5        U R                  UR                  UR                  5        U R!                  UR"                  R$                  UR&                  R(                  5        U R!                  UR"                  R*                  UR&                  R,                  5        GMa     g r   )r  r   adpt_residual_layer_normresidual_layer_normr|  adpt_residual_convresidual_convadpt_attn_layer_normr  adpt_attn_convself_attn_convr  adpt_attn_layerr  adpt_ffn_layer_normffn_layer_normr   adpt_ffnr  r  r  r  r  )r;   r  r  r  r  s        r#   set_wav2vec2bert_adapter+Wav2Vec2BertLoader.set_wav2vec2bert_adapter  s     5MF//1J1J OOF55u7J7JK ; ;U=W=WXOOF1153G3GHv55uG : :E<P<PQOOFOO44eii6R6RSOOFOO44eii6L6LM 6r&   c                 P   U R                  XR                  R                  5        U R                  UR                  UR                  R
                  R                  UR                  R                  R                  UR                  R                  R                  5        U R                  UR                  UR                  R                  R                  5        [        UR                  R                  SS5      nU(       d'  U R                  UR                  UR                  5        g g r  )r  r  r  r  r   r   r  r[   left_max_position_embeddingsright_max_position_embeddingsr
  adapter_layersadapterrQ   r   r   )r;   rc   r`   r  s       r#   r   Wav2Vec2BertLoader.set_encoder  s    ##D*=*=*P*PQ%%''..&&CC&&DD		
 	%%!4!4!<!<!C!C	
   3 3 : :OUSOODLL%--8 r&   c                 d    UR                   Ul         UR                  b  UR                  Ul        g g r   r  r   s      r#   r|  Wav2Vec2BertLoader.set_conv1d  s'    mm;;"DI #r&   c                 d    UR                   Ul        UR                  b  UR                  Ul        g g r   r   r   s      r#   r   !Wav2Vec2BertLoader.set_layer_norm  s'    ]]
;;"DI #r&   r'   )NN)rK   rv   rw   rx   r   rR   r   r   r   r   r  r  r  r
  r   r|  r   r|   r'   r&   r#   r  r  R  sV    $ $	%)K
 KO U@N9$
$r&   r  T5Configc                   v   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	SS jr
S rS	 rS
 rSS jrS rSrU =r$ )T5Loaderi  c                     g)NT5ForConditionalGenerationr'   r   s    r#   rR   T5Loader.architecture_name  s    +r&   c           
         [         R                  R                  UR                  R                  UR                  R
                  4UR                  R                  S[        UR                  R                     UR                  R                  SSS9nU R                  UR                  UR                  5        U R                  UR                  UR                  SS9  U R                  UR                  R                  UR                  5        UR                  R                   (       a(  UR                  R"                  S-  UR                  l        U$ )NT)r   r   ffn_glurelative_attention_biasrms_norm)
is_decoderg      )r
   r   r   r[   rp  num_decoder_layersr  r   dense_act_fnis_gated_act	set_stackr   r   r   r   r   tie_word_embeddingsr;  scale_outputsr  s      r#   r   T5Loader.get_model_spec  s    //;;\\$$ell&E&EFLL""-ell.G.GHLL--$( < 
 	t||U]]3t||U]]tD//?<<++).)=)=t)CDLL&r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   T5Loader.get_vocabulary  r  r&   c                 H    UR                  U5        UR                  U5        g r   r  r   s      r#   r   T5Loader.set_vocabulary  r  r&   c                    UR                   Ul        UR                  Ul        UR                  Ul        [	        UR
                  S5      (       a+  UR                  UR
                  R                  5      Ul        g UR                   Ul        g )Nr  )	r  r  r  r	  r   r[   r
  r  r  r   s       r#   r   T5Loader.set_config  sn    $..$..$..5<<!9::)2)H)H33*F& *3)<)<F&r&   c                    U R                  UR                  UR                  5        U R                  [	        UR
                  [        5      (       a  UR
                  S   OUR
                  UR                  5        SUl        [        [        UR                  UR                  5      5       H  u  nu  pVU R                  UR                  UR                  S   5        US:  aO  UR                  S   R                  nUR                  UR                  l        UR                   UR                  l        U(       a)  U R#                  UR$                  UR                  S   5        U R'                  UR(                  UR                  S   5        M     g )Nr   Fr   r*  )r   r  r  r   r   r?  r@  rA  r<  r  r  r  blockset_self_attentionr  r  relative_attention_max_distanceset_cross_attentionr$  set_ffnr  )r;   rc   r   r   rs  r   r/  first_self_attentions           r#   r$  T5Loader.set_stack  s2   DOOV-D-DE doot44 "__	
 !&&/DJJ0M&N"A"
##J$=$=u{{1~N1u'+zz!}'C'C$(@@ ))A )HH ))I (()=)=u{{1~NLLR9! 'Or&   c                    [        US5      (       aa  U R                  UR                  UR                  R                  5        U R                  UR
                  UR                  R                  5        O0U R                  UR                  UR                  R                  5        U R                  UR                  UR                  R                  5        U R                  UR                  UR                  5        g )Nlinear_0_noact)r   r   r  DenseReluDensewi_0r7  wi_1wir  wor   r  r   s      r#   r3  T5Loader.set_ffn  s    4)**OODMM6+@+@+E+EFOOD//1F1F1K1KLOODMM6+@+@+C+CDv'<'<'?'?@DOOV->->?r&   c                     U R                  XR                  SS9  U R                  UR                  UR                  5        g r  )r  SelfAttentionr   r  r   s      r#   r0  T5Loader.set_self_attention"  s5    4!5!5dKDOOV->->?r&   c                     U R                  XR                  5        U R                  UR                  UR                  5        g r   )r  EncDecAttentionr   r  r   s      r#   r2  T5Loader.set_cross_attention&  s0    4!7!78DOOV->->?r&   c                 :   SUl         [        S5       Vs/ s H  n[        R                  " 5       PM     nnU R	                  US   UR
                  5        U R	                  US   UR                  5        U R	                  US   UR                  5        U(       a%  [        R                  " UR                  S   U5        ON[        R                  " UR                  S   US S 5        [        R                  " UR                  S   USS  5        U R	                  UR                  S   UR                  5        UR                  (       aP  UR                  R                  Ul        [        R                   " S5      R#                  UR$                  5      Ul        g g s  snf )Nr7  r(  r   r   r)  r*  r  )queries_scaler+  r   r,  r   qkvr   r0  r   ohas_relative_attention_biasr  r   r  rA   r   r1  r2  s         r#   r  T5Loader.set_attention*  s-    :?(C(Q..0(CQ5Q5Q5dkk!nl;dkk!nl2A.>?dkk!nl12.>?B500+4+L+L+S+SD(3588G3D3I3I994D0 1 Ds   Fc                 &    UR                   Ul        g r   r   r   r;   rc   r  s      r#   r   T5Loader.set_layer_norm@      &&
r&   r'   rC  )rK   rv   rw   rx   r   rR   r   r   r   r   r$  r3  r0  r2  r  r   r|   rD  rE  s   @r#   r  r    sO    , ,(0	=:>@@@,' 'r&   r  	MT5Configc                   $    \ rS rSr\S 5       rSrg)	MT5LoaderiD  c                     g)NMT5ForConditionalGenerationr'   r   s    r#   rR   MT5Loader.architecture_nameF  s    ,r&   r'   N)rK   rv   rw   rx   r   rR   r|   r'   r&   r#   rS  rS  D  s    - -r&   rS  BloomConfigc                   V   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
S rS	rU =r$ )
BloomLoaderiK  c                     g)NBloomForCausalLMr'   r   s    r#   rR   BloomLoader.architecture_nameM      !r&   c           
      x   [         R                  R                  UR                  R                  UR                  R
                  S[        R                  R                  SSSS9nU R                  UR                  UR                  5        U R                  UR                  R                  UR                  5        U$ )NT)r   r   r   alibialibi_use_positive_positions)r
   r  r   r[   r  r  r   
ActivationGELUTanhr   r   r  r   r   r   r  s      r#   r   BloomLoader.get_model_specQ  s    ;;GGLL  LL"--66 $)- H 
 	u'8'89//?r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   BloomLoader.get_vocabulary`  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   BloomLoader.set_vocabularyi  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   BloomLoader.set_configl  r  r&   c                    SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        U R	                  UR                  UR                  5        [        UR                  UR                  5       GHi  u  p4U R	                  UR                  R                  UR                  5        U R                  UR                  R                  S   UR                  R                   UR                  R"                  5        U R%                  UR                  R                  S   UR                  R&                  5        U R	                  UR(                  R                  UR*                  5        U R%                  UR(                  R,                  UR.                  R0                  5        U R%                  UR(                  R2                  UR.                  R4                  5        GMl     g r  )r<  r   r?  word_embeddingsr   r   word_embeddings_layernormr  r  r  r  r  r  rE  set_qkv_linearr   rH  r  r   rJ  r  rG  r  r  rK  r  rL  r  s        r#   r   BloomLoader.set_decoderq  sv    %DOOV-C-CDD44f6V6VWDOOV[[9!$TZZ!:J))44e6K6K ))003$$44$$..
 OO))003U5I5I5O5O ))5+I+I OOJNN33UYY5L5LMOOJNN33UYY5L5LM# ";r&   c                 \   UR                   nUR                  USSUR                  S   5      nUR                  SS5      nUR                  SUR                  S   5      nUR                  nUR                  USS5      nUR                  SS5      nUR                  S5      nXAl         XQl        g )Nr(  r*  r   r   )r   r!  r[  r   r   )r;   rc   r   r  r   r   s         r#   rm  BloomLoader.set_qkv_linear  s    	1b&,,r2BC!!!Q'FLL$45{{||Iq"-~~a#||B	r&   r'   )rK   rv   rw   rx   r   rR   r   r   r   r   r   rm  r|   rD  rE  s   @r#   rY  rY  K  s9    " ")/
N2 r&   rY  	MPTConfigc                   V   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
S rS	rU =r$ )
	MPTLoaderi  c                     gNAutoModelForCausalLMr'   r   s    r#   rR   MPTLoader.architecture_name      %r&   c                    [         R                  R                  UR                  R                  UR                  R
                  S[        R                  R                  SS9nU R                  UR                  UR                  5        U$ )NT)r   r   r_  )r
   r  r   r[   n_layersn_headsr   ra  GELUr   r   r  r  s      r#   r   MPTLoader.get_model_spec  sn    ;;GGLL!!LL  "--22 H 
 	u'8'89r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   MPTLoader.get_vocabulary  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   MPTLoader.set_vocabulary  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   MPTLoader.set_config  r  r&   c                    U R                  UR                  UR                  5        U R                  UR                  UR
                  5        SUl        UR                  R                  UR                  l        [        UR                  UR                  5       GHT  u  p4U R                  UR                  R                  UR                  5        U R                  UR                  R                  S   UR                   R"                  5        U R                  UR                  R                  S   UR                   R$                  5        U R                  UR&                  R                  UR(                  5        U R                  UR&                  R*                  UR&                  R,                  5        U R                  UR&                  R.                  UR&                  R0                  5        GMW     g r  )r   r?  r  r   r  norm_fr<  r   r   r  r  blocksr  norm_1r   r   r  Wqkvr1  r  norm_2r  up_projr  	down_projr  s        r#   r   MPTLoader.set_decoder  s?   DOOVZZ8DOOV]]; %!%!7!7!$TZZ!?J
 9 9 D DellSOOJ55<<Q?QOOJ55<<Q?ATATU
 9 95<<HOOJNN33UYY5F5FGOOJNN33UYY5H5HI "@r&   c                 p    UR                   Ul        [        R                  " UR                  5      Ul        g r   )r   r   rE   
zeros_liker   r   s      r#   r   MPTLoader.set_layer_norm  s#    ]]
$$TZZ0	r&   r'   )rK   rv   rw   rx   r   rR   r   r   r   r   r   r   r|   rD  rE  s   @r#   rs  rs    s9    & &
)/
J 1 1r&   rs  GemmaConfigc                   V   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
S rS	rU =r$ )
GemmaLoaderi  c                     g)NGemmaForCausalLMr'   r   s    r#   rR   GemmaLoader.architecture_name  r]  r&   c                    UR                   R                  nUR                   R                  n[        UR                   SU5      nXC:X  a  S n[        UR                   SS5      n[        R
                  R                  UUUS:X  a  [        R                  R                  O[        R                  R                  SSSSS[        UR                   SS	5      UUR                   R                  S
9nU R                  UR                  UR                  5        U R                  UR                  R                   UR"                  5        UR                   R$                  S-  UR                  R&                  l        U$ )Nnum_key_value_headshidden_activationr   r   Tr   Fr   '  )	r   r   r  r  r  r  rotary_basenum_heads_kvr        ?r[   r  r  rQ   r
   r  r   r   ra  r|  rb  r  r   r   r`   r   r   r   r  r?  multiply_by_sqrt_depthr;   r`   rp  r  r  activation_configrc   s          r#   r   GemmaLoader.get_model_spec  s0   \\33
LL44	u||-BIN$L#LL-/B
  ;;GG %. &&++ ++44#lEB%\\** H 
$ 	u{{3//?9>9Q9QSV9V6r&   c                 .  > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     UR                  R                  [	        U5      :  a  US UR                  R                   nU$ r  r  r  s         r#   r   GemmaLoader.get_vocabulary  |    '9LL++c&k9	y!AMM/A-. "<<""S[05ell556Fr&   c                 &    UR                  U5        g r   r  r   s      r#   r   GemmaLoader.set_vocabulary  r  r&   c                     UR                   Ul         UR                  Ul        UR                  Ul        UR                  R                  Ul        g r   r  r  r	  r[   rms_norm_epslayer_norm_epsilonr   s       r#   r   GemmaLoader.set_config  >    $..$..$..$)LL$=$=!r&   c                 4    UR                   Ul        SUl        g rS  r   r   layer_norm_use_residualrN  s      r#   r   GemmaLoader.set_layer_norm      &&
'+$r&   c                 &   SUl         SUl        U R                  UR                  UR                  5        U R                  UR                  UR                  5        [        UR                  UR                  5       GH  u  p4U R                  UR                  R                  UR                  5        U R                  UR                  R                  UR                  5        UR                  R                   R"                  nUR                  R$                  R"                  nUR                  R&                  R"                  nUR                  R(                  R"                  n[*        R,                  " XVU/5      UR                  R.                  S   l        XR                  R.                  S   l        U R1                  UR                  R2                  UR4                  R6                  5        U R1                  UR                  R8                  UR4                  R:                  5        U R1                  UR                  R<                  UR4                  R>                  5        [A        US5        [A        US5        [B        RD                  " 5         GM     g NTFr   r   r  r  )#r<  rT  r   r?  rA  r   r  normr  r  r  r  rE  r  rG  r  r-  r   r.  r/  o_projrE   r  r   r   r  r  	gate_projr7  r  r  r  delattrgccollect	r;   rc   r   r   r  wqwkwvr<  s	            r#   r   GemmaLoader.set_decoder  s    $).&DOOV-@-@ADOOV[[9!$TZZ!?J))44e6K6K ))5+I+I ''..B''..B''..B''..B9>BB<9PJ%%,,Q/69;%%,,Q/6OOJNN33UYY5H5HIOOJNN99599;L;LMOOJNN33UYY5H5HIE;'E5!JJL- "@r&   r'   rK   rv   rw   rx   r   rR   r   r   r   r   r   r   r|   rD  rE  s   @r#   r  r    s9    " "!F	)>, r&   r  Gemma2Configc                   V   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
S rS	rU =r$ )
Gemma2Loaderi1  c                     g)NGemma2ForCausalLMr'   r   s    r#   rR   Gemma2Loader.architecture_name3      "r&   c                    UR                   R                  nUR                   R                  n[        UR                   SU5      nXC:X  a  S n[        UR                   SS5      n[        R
                  R                  UUUS:X  a  [        R                  R                  O[        R                  R                  SSSSS[        UR                   SS	5      UUR                   R                  SS
9nU R                  UR                  UR                  5        U R                  UR                  R                   UR"                  5        UR                   R$                  S-  UR                  R&                  l        U$ )Nr  r  r   r   Tr   Fr   r  )
r   r   r  r  r  r  r  r  r  pre_post_layer_normr  r  r  s          r#   r   Gemma2Loader.get_model_spec7  s3   \\33
LL44	u||-BIN$L#LL-/B
  ;;GG %. &&++ ++44#lEB%\\** $! H 
& 	u{{3//?9>9Q9QSV9V6r&   c                 .  > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     UR                  R                  [	        U5      :  a  US UR                  R                   nU$ r  r  r  s         r#   r   Gemma2Loader.get_vocabulary[  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   Gemma2Loader.set_vocabularyf  r  r&   c                     UR                   Ul         UR                  Ul        UR                  Ul        UR                  R                  Ul        g r   r  r   s       r#   r   Gemma2Loader.set_configi  r  r&   c                 4    UR                   Ul        SUl        g rS  r  rN  s      r#   r   Gemma2Loader.set_layer_normo  r  r&   c                    SUl         SUl        U R                  UR                  UR                  5        U R                  UR                  UR                  5        [        UR                  UR                  5       GHJ  u  p4U R                  UR                  UR                  5        U R                  UR                  UR                  5        U R                  UR                  UR                   5        U R                  UR"                  UR$                  5        UR&                  R(                  R*                  nUR&                  R,                  R*                  nUR&                  R.                  R*                  nUR&                  R0                  R*                  n[2        R4                  " XVU/5      UR6                  R8                  S   l        XR6                  R8                  S   l        U R;                  UR<                  R>                  UR@                  RB                  5        U R;                  UR<                  RD                  UR@                  RF                  5        U R;                  UR<                  RH                  UR@                  RJ                  5        [M        US5        [M        US5        [N        RP                  " 5         GMM     g r  ))r<  rT  r   r?  rA  r   r  r  r  r  r  rC  rE  rF  rG  pre_feedforward_layer_normpre_feedforward_layernormpost_feedforward_layer_normpost_feedforward_layernormr  r-  r   r.  r/  r  rE   r  r  r   r   r  r  r  r  r7  r  r  r  r  r  r  r  s	            r#   r   Gemma2Loader.set_decoders  s    $).&DOOV-@-@ADOOV[[9!$TZZ!?J
 ; ;U=R=RS44e6T6T 55u7V7V 668X8X ''..B''..B''..B''..B9>BB<9PJ%%,,Q/69;%%,,Q/6OOJNN33UYY5H5HIOOJNN99599;L;LMOOJNN33UYY5H5HIE;'E5!JJL; "@r&   r'   r  rE  s   @r#   r  r  1  s9    # #"H	)>,# #r&   r  LlamaConfigc                      ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
\R                  R                  4S jrS	rU =r$ )
LlamaLoaderi  c                     g)NLlamaForCausalLMr'   r   s    r#   rR   LlamaLoader.architecture_name  r]  r&   c                    UR                   R                  nUR                   R                  n[        UR                   SU5      nXC:X  a  S nU R	                  UR                   S5      u  pVn[        UR                   SS 5      nU(       a  S n	UR
                  S:X  a  [        R                  UR                  5      n	U	c>  [        SUR
                  < SSR                  [        R                  5       5      < 35      eUR                  n
UR                  nO[        R                  R                   n	S n
S n["        R$                  R'                  UU[        R(                  R*                  SSSS	S
UUUUU	U
US9nU R-                  UR.                  UR0                  U	5        U R3                  UR.                  R4                  UR6                  5        [        UR                   SS 5      nU[8        R:                  R<                  :X  aE  UR.                  R>                   H+  nUS   UR@                  l!        US   UR@                  l"        M-     U$ )Nr  r  quantization_configawqQuantization type 'T' is not yet implemented. The following Quantization types are currently supported: r?   Tr   Fr   r   r  r  r  r  r   r   r  r  r   quant_group_size
quant_bitsr   low_freq_factorhigh_freq_factor)#r[   r  r  rQ   r   quant_method_SUPPORTED_QUANTIZATIONrL   versionr   rN   rP   
group_sizebitsr   r   r   r
   r  r   ra  SWISHr   r   r`   r   r   r   r   RotaryScalingTypeLlama3r  r  rotary_low_freq_factorrotary_high_freq_factor)r;   r`   rp  r  r  r   r   r   r  r   r  r  rc   r   r  s                  r#   r   LlamaLoader.get_model_spec  s   \\33
LL44	u||-BIN$LAEAWAWLL&B
>J &ell4I4PJ"//584889L9T9TU
!) ,88		"9">">"@A	   3==,11J$1155J#J;;GG"--33# 3"7"%!-! H 
$ 	u{{J?//? u||^TB."B"B"I"II++>J%?$$; @L&@$$<	 , r&   c                 .  > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     UR                  R                  [	        U5      :  a  US UR                  R                   nU$ r  r  r  s         r#   r   LlamaLoader.get_vocabulary  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   LlamaLoader.set_vocabulary  r  r&   c                     UR                   Ul         UR                  Ul        UR                  b  UR                  OSUl        UR                  R                  Ul        g N r  r   s       r#   r   LlamaLoader.set_config  sP    $..$..#,#6#6#BI 	 %*LL$=$=!r&   c                 &    UR                   Ul        g r   rM  rN  s      r#   r   LlamaLoader.set_layer_norm  rP  r&   c                 x   SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        UR                  UR                  5       GH  u  pEU R	                  UR                  R
                  UR                  5        U R	                  UR                  R
                  UR                  5        [        S5       Vs/ s H  n[        R                   " 5       PM     nnU R#                  US   UR$                  R&                  US9  U R#                  US   UR$                  R(                  US9  U R#                  US   UR$                  R*                  US9  U[        R,                  R.                  :X  a/  [0        R2                  " UR                  R4                  S   U5        OPU[        R,                  R6                  :X  a  SOSn[0        R8                  " UR                  R4                  S   Xx5        U R#                  UR                  R4                  S   UR$                  R:                  US9  U R#                  UR                  R<                  UR>                  R@                  US9  U R#                  UR                  RB                  UR>                  RD                  US9  U R#                  UR                  RF                  UR>                  RH                  US9  [K        US5        [K        US5        [L        RN                  " 5         GM     g s  snf 	NFr(  r   r   r   r)  r  r  (r<  r   r?  rA  r   r  r  r  r  r  r  rE  r  rG  r+  r   r,  r   r  r-  r.  r/  r   r   r   r0  r   AWQ_GEMMfuse_linear_prequantr  r  r  r  r7  r  r  r  r  r  r  	r;   rc   r   r   r   r  r   r3  cc_dims	            r#   r   LlamaLoader.set_decoder  s    %DOOV-@-@ADOOV[[9!$TZZ!?J))44e6K6K ))5+I+I ?DAhGhK224hLGOOQ!7!7J   OOQ!7!7J   OOQ!7!7J   [55999!!*";";"B"B1"E|T(K,D,D,M,MMST**--44Q7 OO))003&&%   OO'')<)<   OO--uyy/@/@Z   OO'')<)<   E;'E5!JJLY "@ H   'L7r'   rK   rv   rw   rx   r   rR   r   r   r   r   r   r   r   r   r   r|   rD  rE  s   @r#   r  r    sK    " "@D	)>' 4?3K3K3O3O 1 1r&   r  Gemma3TextConfigGemma3Configc                      ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
\R                  R                  4S jrS	rU =r$ )
Gemma3Loaderi.  c                     g)NGemma3ForCausalLMr'   r   s    r#   rR   Gemma3Loader.architecture_name1  r  r&   c                    UR                   R                  nUR                   R                  n[        UR                   SU5      nXC:X  a  S nUR                   R                  n[        UR                   SS5      n[        UR                   SS5      n[        UR                   SS5      n[        UR                   SS	5      n	[        UR                   S
S 5      n
[        UR                   SS 5      nU(       ac  UR
                  S:X  a  [        R                  UR                  5      nWc  [        SUR
                  -  5      eUR                  nUR                  nO[        R                  R                  nS nS n[        R                   R#                  UUUS:X  a  [        R$                  R&                  O[        R$                  R(                  SSSUSUUUU	SUUUSS9nXl        [-        U
5       GH   u  nnUR.                  R0                  U   nUS:X  aj  [2        R4                  " S5      R7                  U5      UR8                  l        [2        R4                  " S5      R7                  S5      UR8                  l        M  US:X  d  M  [2        R4                  " S5      R7                  U5      UR8                  l        [2        R4                  " S5      R7                  U	5      UR8                  l        GM     U R?                  UR.                  UR@                  U5        U RC                  UR.                  RD                  URF                  5        U$ )Nr  r  r   r   @B rope_local_base_freqr  sliding_windowi   layer_typesr  r  z.Quantization type '%s' is not yet implemented.r   TF)r   r   r  r  r  r  r  r  r  r  r  r   r  r  qk_normfull_attentionfloat32r  r   sliding_attention)$r[   r  r  rQ   r  r  r  rL   r  r   r  r  r   r   r   r
   r  r   ra  r|  rb  _layer_typesr  r   r  r  rA   r   r  r  r  r   r`   r   r   r   )r;   r`   rp  r  r  r  r  r   r  r  r  r  r   r  r  rc   rs  
layer_typer  s                      r#   r   Gemma3Loader.get_model_spec5  s   \\33
LL44	u||-BIN$L<<((#LL-/B

 U\\<C
&LL0& 

 !/?FellM4@%ell4I4P"//584889L9T9TU
!)D)667   3==,11J$1155J#J  ;;GG %. &&++ ++44#,%) $!-!+ H 
2 ( '{3MAzLL&&q)E--3588I3F3K3KJ3W$$068hhw6G6L6LQ6O$$3223588I3F3K3K(4$$0 79hhw6G6L6L"7$$3 4 	u{{J?//?r&   c                 .  > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     UR                  R                  [	        U5      :  a  US UR                  R                   nU$ r  r  r  s         r#   r   Gemma3Loader.get_vocabulary  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   Gemma3Loader.set_vocabulary  r  r&   c                    UR                   Ul         UR                  Ul        [        US5      (       aF  [        UR                  [
        5      (       a'  UR                  R                  5       (       a  SUl        g UR                  Ul        g )Nchat_templatez<end_of_turn>)r  r	  r   r   r   rz   stripr  r   s       r#   r   Gemma3Loader.set_config  sk    $..$.. I//922C88''--//.F(22Fr&   c                 4    UR                   Ul        SUl        g rS  r  rN  s      r#   r   Gemma3Loader.set_layer_norm  r  r&   c                    SUl         SUl        U R                  UR                  UR                  5        U R                  UR                  UR                  5        [        UR                  UR                  5       GHi  u  pEU R                  UR                  UR                  5        U R                  UR                  UR                  5        U R                  UR                  UR                   5        U R                  UR"                  UR$                  5        U R                  UR&                  R(                  UR*                  R(                  5        U R                  UR&                  R,                  UR*                  R,                  5        [/        S5       Vs/ s H  n[0        R2                  " 5       PM     nnU R5                  US   UR*                  R6                  US9  U R5                  US   UR*                  R8                  US9  U R5                  US   UR*                  R:                  US9  U[0        R<                  R>                  :X  a/  [@        RB                  " UR&                  RD                  S   U5        OPU[0        R<                  RF                  :X  a  SOSn[@        RH                  " UR&                  RD                  S   Xx5        U R5                  UR&                  RD                  S   UR*                  RJ                  US9  U R5                  URL                  RN                  URP                  RR                  US9  U R5                  URL                  RT                  URP                  RV                  US9  U R5                  URL                  RX                  URP                  RZ                  US9  []        US5        []        US	5        [^        R`                  " 5         GMl     g s  snf )
NTFr(  r   r  r   r)  r  r  )1r<  rT  r   r?  rA  r   r  r  r  r  r  rC  rE  rF  rG  r  r  r  r  r  q_normr  k_normr+  r   r,  r   r-  r.  r/  r   r   r   r0  r   r  r  r  r  r  r  r  r7  r  r  r  r  r  r  r  s	            r#   r   Gemma3Loader.set_decoder  s+    $).&DOOV-@-@ADOOV[[9!$TZZ!?J
 ; ;U=R=RS44e6T6T 55u7V7V 668X8X
 ))00%//2H2H ))00%//2H2H
 ?DAhGhK224hLGOOQ!7!7J   OOQ!7!7J   OOQ!7!7J   [55999!!*";";"B"B1"E|T(K,D,D,M,MMST**--44Q7 OO))003&&%   OO'')<)<   OO--uyy/@/@Z   OO'')<)<   E;'E5!JJL} "@0 Hs   O*)r  r  rE  s   @r#   r  r  .  sO     # #Sj	)3, 4?3K3K3O3O D Dr&   r  MistralConfigc                      ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
\R                  R                  4S jrS	rU =r$ )
MistralLoaderi  c                     g)NMistralForCausalLMr'   r   s    r#   rR   MistralLoader.architecture_name  r  r&   c                    UR                   R                  nUR                   R                  n[        UR                   SU5      nXC:X  a  S n[        UR                   SS5      nU R	                  UR                   S5      u  pgn[        UR                   SS 5      n	U	(       a  U	R
                  S:X  a  [        R                  U	R                  5      n
W
c>  [        SU	R
                  < SS	R                  [        R                  5       5      < 35      eU	R                  nU	R                  nO[        R                  R                   n
S nS n["        R$                  R'                  UU[        R(                  R*                  S
S
S
SSUUUUUU
UUUR                   R,                  S9nU R/                  UR0                  UR2                  U
S9  U R5                  UR0                  R6                  UR8                  5        U$ )Nr  r  r   r  r  r  r  r  r?   TF)r   r   r  r  r  r  r   r   r  r  r  r   r  r  r  r  )r[   r  r  rQ   r   r  r  rL   r  r   rN   rP   r  r  r   r   r   r
   r  r   ra  r  r  r   r   r`   r   r   r   )r;   r`   rp  r  r  r  r   r   r   r  r   r  r  rc   s                 r#   r   MistralLoader.get_model_spec  s   \\33
LL44	u||-BIN$L /?CAEAWAWLL&B
>J &ell4I4P"//584889L9T9TU
!) ,88		"9">">"@A	   3==,11J$1155J#J;;GG"--33# 3"7"%)!-!\\**# H 
( 	u{{zJ//?r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   MistralLoader.get_vocabulary0  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   MistralLoader.set_vocabulary9  r  r&   c                     UR                   Ul         UR                  Ul        UR                  Ul        UR                  R                  Ul        g r   r  r   s       r#   r   MistralLoader.set_config<  r  r&   c                 &    UR                   Ul        g r   rM  rN  s      r#   r   MistralLoader.set_layer_normB  rP  r&   c                 x   SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        UR                  UR                  5       GH  u  pEU R	                  UR                  R
                  UR                  5        U R	                  UR                  R
                  UR                  5        [        S5       Vs/ s H  n[        R                   " 5       PM     nnU R#                  US   UR$                  R&                  US9  U R#                  US   UR$                  R(                  US9  U R#                  US   UR$                  R*                  US9  U[        R,                  R.                  :X  a/  [0        R2                  " UR                  R4                  S   U5        OPU[        R,                  R6                  :X  a  SOSn[0        R8                  " UR                  R4                  S   Xx5        U R#                  UR                  R4                  S   UR$                  R:                  US9  U R#                  UR                  R<                  UR>                  R@                  US9  U R#                  UR                  RB                  UR>                  RD                  US9  U R#                  UR                  RF                  UR>                  RH                  US9  [K        US5        [K        US5        [L        RN                  " 5         GM     g s  snf r  r   r  s	            r#   r   MistralLoader.set_decoderE  s    %DOOV-@-@ADOOV[[9!$TZZ!?J))44e6K6K ))5+I+I ?DAhGhK224hLGOOQ!7!7J   OOQ!7!7J   OOQ!7!7J   [55999!!*";";"B"B1"E|T(K,D,D,M,MMST**--44Q7 OO))003&&%   OO'')<)<   OO--uyy/@/@Z   OO'')<)<   E;'E5!JJLW "@ Hr  r'   r  rE  s   @r#   r+  r+    sJ    $ $8t)>' 4?3K3K3O3O 0 0r&   r+  Qwen2Configc                      ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
\R                  R                  4S jrS	rU =r$ )
Qwen2Loaderix  c                     g)NQwen2ForCausalLMr'   r   s    r#   rR   Qwen2Loader.architecture_namez  r]  r&   c                    UR                   R                  nUR                   R                  n[        UR                   SU5      nXC:X  a  S nU R	                  UR                   S5      u  pVn[        UR                   SS 5      nU(       a  S n	UR
                  S:X  a  [        R                  UR                  5      n	U	c>  [        SUR
                  < SSR                  [        R                  5       5      < 35      eUR                  n
UR                  nO[        R                  R                   n	S n
S n["        R$                  R'                  UU[        R(                  R*                  SSSS	S
UUUUU	U
US9nU R-                  UR.                  UR0                  U	5        U R3                  UR.                  R4                  UR6                  5        U$ )Nr  r  r  r  r  r  r?   Tr   Fr  )r[   r  r  rQ   r   r  r  rL   r  r   rN   rP   r  r  r   r   r   r
   r  r   ra  r  r   r   r`   r   r   r   )r;   r`   rp  r  r  r   r   r   r  r   r  r  rc   s                r#   r   Qwen2Loader.get_model_spec~  s   \\33
LL44	u||-BIN$LAEAWAWLL&B
>J
 &ell4I4PJ"//584889L9T9TU
!) ,88		"9">">"@A	   3==,11J$1155J#J;;GG"--33# 3"7"%!-! H 
$ 	u{{J?//?r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   Qwen2Loader.get_vocabulary  sO    '9LL++c&k9	y!AMM/A-. "r&   c                 &    UR                  U5        g r   r  r   s      r#   r   Qwen2Loader.set_vocabulary  r  r&   c                     UR                   b  UR                   OUR                  Ul         UR                  Ul        UR                  b  UR                  OSUl        UR                  R
                  Ul        g r  r  r  r  r	  r[   r  r  r   s       r#   r   Qwen2Loader.set_config  n     "". $$ 	
 %..#,#6#6#BI 	 %*LL$=$=!r&   c                 &    UR                   Ul        g r   rM  rN  s      r#   r   Qwen2Loader.set_layer_norm  rP  r&   c                 x   SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        UR                  UR                  5       GH  u  pEU R	                  UR                  R
                  UR                  5        U R	                  UR                  R
                  UR                  5        [        S5       Vs/ s H  n[        R                   " 5       PM     nnU R#                  US   UR$                  R&                  US9  U R#                  US   UR$                  R(                  US9  U R#                  US   UR$                  R*                  US9  U[        R,                  R.                  :X  a/  [0        R2                  " UR                  R4                  S   U5        OPU[        R,                  R6                  :X  a  SOSn[0        R8                  " UR                  R4                  S   Xx5        U R#                  UR                  R4                  S   UR$                  R:                  US9  U R#                  UR                  R<                  UR>                  R@                  US9  U R#                  UR                  RB                  UR>                  RD                  US9  U R#                  UR                  RF                  UR>                  RH                  US9  [K        US5        [K        US5        [L        RN                  " 5         GM     g s  snf r  r   r  s	            r#   r   Qwen2Loader.set_decoder  s    %DOOV-@-@ADOOV[[9!$TZZ!?J))44e6K6K ))5+I+I ?DAhGhK224hLGOOQ!7!7J   OOQ!7!7J   OOQ!7!7J   [55999!!*";";"B"B1"E|T(K,D,D,M,MMST**--44Q7 OO))003&&%   OO'')<)<   OO--uyy/@/@Z   OO'')<)<   E;'E5!JJL[ "@ Hr  r'   r  rE  s   @r#   r=  r=  x  sJ    " "6p)
>' 4?3K3K3O3O 2 2r&   r=  Qwen3Configc                      ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
\R                  R                  4S jrS	rU =r$ )
Qwen3Loaderi	  c                     g)NQwen3ForCausalLMr'   r   s    r#   rR   Qwen3Loader.architecture_name	  r]  r&   c                 "   UR                   R                  nUR                   R                  n[        UR                   SU5      n[        UR                   SUR                   R                  U-  5      nXC:X  a  S nU R                  UR                   S5      u  pgn[        UR                   SS 5      n	U	(       a  S n
U	R                  S:X  a  [        R                  U	R                  5      n
U
c>  [        SU	R                  < SSR                  [        R                  5       5      < 35      eU	R                  nU	R                  nO[        R                   R"                  n
S nS n[$        R&                  R)                  UU[        R*                  R,                  S	S	S	UR                   R.                  S
UUUUUS	U
UUS9nU R1                  UR2                  UR4                  U
5        U R7                  UR2                  R8                  UR:                  5        U$ )Nr  r  r  r  r  r  r  r?   TF)r   r   r  r  r  r  r   r   r  r  r  r  r   r  r  )r[   r  r  rQ   r  r   r  r  rL   r  r   rN   rP   r  r  r   r   r   r
   r  r   ra  r  r  r   r   r`   r   r   r   )r;   r`   rp  r  r  r  r   r   r   r  r   r  r  rc   s                 r#   r   Qwen3Loader.get_model_spec	  s   \\33
LL44	u||-BINLL*ell&>&>)&K
 $LAEAWAWLL)B
>J &ell4I4PJ"//584889L9T9TU
!) ,88		"9">">"@A	   3==,11J$1155J#J;;GG"--33||,,# 3"7"%!-!# H 
( 	u{{J?//?r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   Qwen3Loader.get_vocabularyG	  sO    '9LL++c&k9	y!AMM/A-. "r&   c                 &    UR                  U5        g r   r  r   s      r#   r   Qwen3Loader.set_vocabularyN	  r  r&   c                     UR                   b  UR                   OUR                  Ul         UR                  Ul        UR                  b  UR                  OSUl        UR                  R
                  Ul        g r  rH  r   s       r#   r   Qwen3Loader.set_configQ	  rJ  r&   c                 &    UR                   Ul        g r   rM  rN  s      r#   r   Qwen3Loader.set_layer_norm]	  rP  r&   c                 x   SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        [        UR                  UR                  5      5       GH4  u  nu  pVU R	                  UR                  R
                  UR                  5        U R	                  UR                  R
                  UR                  5        U R	                  UR                  R                  UR                   R                  5        U R	                  UR                  R"                  UR                   R"                  5        [%        S5       Vs/ s H  n[&        R(                  " 5       PM     nnU R+                  US   UR                   R,                  US9  U R+                  US   UR                   R.                  US9  U R+                  US   UR                   R0                  US9  U[&        R2                  R4                  :X  a/  [6        R8                  " UR                  R:                  S   U5        OPU[&        R2                  R<                  :X  a  SOSn	[6        R>                  " UR                  R:                  S   X5        U R+                  UR                  R:                  S   UR                   R@                  US9  U R+                  UR                  RB                  URD                  RF                  US9  U R+                  UR                  RH                  URD                  RJ                  US9  U R+                  UR                  RL                  URD                  RN                  US9  [Q        US5        [Q        US5        [R        RT                  " 5         GM7     g s  snf r  )+r<  r   r?  rA  r   r  r  r  r  r  r  r  rE  r  rG  r&  r  r'  r+  r   r,  r   r-  r.  r/  r   r   r   r0  r   r  r  r  r  r  r  r7  r  r  r  r  r  r  )
r;   rc   r   r   	layer_idxr   r  r   r3  r  s
             r#   r   Qwen3Loader.set_decoder`	  s    %DOOV-@-@ADOOV[[9.7DJJ8V.W*I*
))44e6K6K ))5+I+I ))00%//2H2H ))00%//2H2H ?DAhGhK224hLGOOQ!7!7J   OOQ!7!7J   OOQ!7!7J   [55999!!*";";"B"B1"E|T(K,D,D,M,MMST**--44Q7 OO))003&&%   OO'')<)<   OO--uyy/@/@Z   OO'')<)<   E;'E5!JJLi /X Hs   'N7r'   r  rE  s   @r#   rQ  rQ  	  sJ    " ":x)
>' 4?3K3K3O3O 9 9r&   rQ  MixFormerSequentialConfigc                   P   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
SrU =r$ )	MixFormerSequentialLoaderi	  c                     gru  r'   r   s    r#   rR   +MixFormerSequentialLoader.architecture_name	  rx  r&   c                    [         R                  R                  UR                  R                  UR                  R
                  S[        UR                  R                     UR                  R                  SSSS9nU R                  UR                  UR                  5        U R                  UR                  R                  UR                  S   R                  5        U$ )NTFrp  r  r   r   r  r  r  r  r*  )r
   r  r   r[   r  r  r   r   r  r   r   r  r   r   r   r  s      r#   r   (MixFormerSequentialLoader.get_model_spec	  s    ;;GG||++ll))-ell.N.NO||..#"" H 	
 	u||4//b1A1H1HIr&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   (MixFormerSequentialLoader.get_vocabulary	  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   (MixFormerSequentialLoader.set_vocabulary	  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   $MixFormerSequentialLoader.set_config	  r  r&   c                 &   SUl         U R                  UR                  US   R                  5        U R	                  UR
                  US   R                  5        [        UR                  USS 5       GH  u  p4U R	                  UR                  UR                  5        U R                  UR                  R                  S   UR                  R                  5        U R                  UR                  R                  S   UR                  R                  5        U R                  UR                   R"                  UR$                  R&                  5        U R                  UR                   R(                  UR$                  R*                  5        GM     g )NFr   r*  r   )r<  r   r?  r  r   r  lnr  r  r  r   r  r   mixerr  r1  r  r  r  r  r  r  r  s        r#   r   %MixFormerSequentialLoader.set_decoder	  s    %DOOVAY]];DOOVBZ]];!$TZZ"!>J
 < <ehhGOOJ55<<Q?AQAQROOJ55<<Q?AUAUVOOJNN33UYY]]COOJNN33UYY]]C "?r&   r'   r-  rE  s   @r#   rd  rd  	  s5    & & )/

D 
Dr&   rd  	PhiConfigc                   P   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
SrU =r$ )		PhiLoaderi	  c                     gru  r'   r   s    r#   rR   PhiLoader.architecture_name	  rx  r&   c                 0   [         R                  R                  UR                  R                  UR                  R
                  S[        UR                  R                     UR                  R                  SSSS9nU R                  UR                  UR                  5        U R                  UR                  R                  UR                  R                  5        U R!                  UR                  R"                  UR                  R$                  5        U$ )NTFrh  )r
   r  r   r[   r  r  r   r   r  r   r   r  r   r   r   r   r   r  rq  r  s      r#   r   PhiLoader.get_model_spec	  s    ;;GG||++ll))-ell.N.NO||..#"" H 	
 	u'8'89//1E1EFDLL33U]]5E5EFr&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   PhiLoader.get_vocabulary	  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   PhiLoader.set_vocabulary	  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   PhiLoader.set_config	  r  r&   c                    SUl         U R                  UR                  UR                  R                  5        [        UR                  UR                  5       GH  u  p4U R                  UR                  UR                  5        U R                  UR                  R                  S   UR                  R                  5        U R                  UR                  R                  S   UR                  R                   5        U R                  UR"                  R$                  UR&                  R(                  5        U R                  UR"                  R*                  UR&                  R,                  5        GM     g r  )r<  r   r?  embdr  r  r  r  r   r  rq  r   r  r   rr  r  r1  r  r  r  r  r  r  r  s        r#   r   PhiLoader.set_decoder	  s     %DOOV[[__=!$TZZ!:J
 < <ehhGOOJ55<<Q?AQAQROOJ55<<Q?AUAUVOOJNN33UYY]]COOJNN33UYY]]C ";r&   r'   r-  rE  s   @r#   rv  rv  	  s5    & &")/
	D 	Dr&   rv  
Phi3Configc                      ^  \ rS rSr\S 5       rS rU 4S jrS rS r	S r
S r\R                  R                  4S	 jrS
rU =r$ )
Phi3Loaderi
  c                     gru  r'   r   s    r#   rR   Phi3Loader.architecture_name
  rx  r&   c                    UR                   R                  nUR                   R                  n[        UR                   SU5      nXC:X  a  S n[        UR                   SS5      n[        UR                   SS5      n[        UR                   SS 5      nU(       ae  [        R                  US   5      nUR                  SS5      n	Uc7  [        S	US   < S
SR                  [        R                  5       5      < 35      eOS nSn	[        UR                   SS 5      n
U
(       a  S nU
R                  S:X  a  [        R                  U
R                  5      nUc>  [        SU
R                  < SSR                  [        R                  5       5      < 35      eU
R                  nU
R                  nO[        R                  R                   nS nS n["        R$                  R'                  UU[        R(                  R*                  SSSSSUU	[        UR                   SS5      UUUUUUS9nU R-                  UR.                  UR0                  U5        U R3                  UR.                  R4                  UR6                  5        U$ )Nr   original_max_position_embeddingsr   max_position_embeddingsr   r   r   r   r   r   r?   r  r  r  r  TFr   r  )r   r   r  r  r  r  r   r   r  r  r  r  r   r  r  )r[   r  r  rQ   r   rL   r   rN   rP   r  r  r  r  r  r   r   r   r
   r  r   ra  r  r   r   r`   r   r   r   )r;   r`   rp  r  r  r  r  r   r   r   r  r   r  r  rc   s                  r#   r   Phi3Loader.get_model_spec

  sK   \\33
LL44	u||-BIN$L+2LL<a,
( #*%,,8QST"Uu||^TB"9"="=l6>R"S$0$4$4Xq$A!"*) $F+TYY7N7S7S7U-VX  + #'$%! &ell4I4PJ"//584889L9T9TU
!) ,88		"9">">"@A	   3==,11J$1155J#J;;GG"--33# 3"7lEB-M$;%!-!# H 
( 	u{{J?//?r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   Phi3Loader.get_vocabularyS
  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   Phi3Loader.set_vocabulary\
  r  r&   c                 j    UR                   Ul         UR                  Ul        UR                  Ul        g r   r  r   s       r#   r   Phi3Loader.set_config_
  r  r&   c                 &    UR                   Ul        g r   rM  rN  s      r#   r   Phi3Loader.set_layer_normd
  rP  r&   c                     [         R                  " U[         R                  S9Ul        [         R                  " U[         R                  S9Ul        g )N)rA   )rE   tensorr  rotary_scaling_long_factorrotary_scaling_short_factor)r;   rc   r  r  s       r#   set_rotary_embeddings Phi3Loader.set_rotary_embeddingsg
  s:     +0,,&emm+
' ,1<<'u}},
(r&   c                    SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        UR                  UR                  5       GH  u  pEU R	                  UR                  R
                  UR                  5        U R	                  UR                  R
                  UR                  5        U R                  UR                  R                  S   UR                   R"                  US9  U R                  UR                  R                  S   UR                   R$                  US9  UR                   R&                  R(                  bz  UR                   R&                  R*                  bY  U R-                  UR                  UR                   R&                  R(                  UR                   R&                  R*                  5        U[.        R0                  R2                  :X  af  UR4                  R6                  R8                  R;                  SSS9u  pgXdR                  R<                  l        XtR                  R>                  l        GO,UR4                  R6                  R@                  R;                  SSS9u  pUR4                  R6                  RB                  R;                  SSS9u  pUR4                  R6                  RD                  R;                  SSS9u  pXR                  R<                  l        XR                  R<                  l#        XR                  R<                  l$        XR                  R>                  l        XR                  R>                  l#        XR                  R>                  l$        U R                  UR                  RJ                  UR4                  RL                  US9  [O        US5        [O        US5        [P        RR                  " 5         GM     g )	NFr   r  r   r)  r  r  r  )*r<  r   r?  rA  r   r  r  r  r  r  r  rE  r  rG  r   r   r  r%  r  
rotary_emblong_factorshort_factorr  r   r   r   r  gate_up_projr   r&  r  r7  r   r   r   r   r   r  r  r  r  r  )r;   rc   r   r   r   r  r  r  gate_qweight
up_qweightgate_scales	up_scalesgate_qzeros	up_qzeross                 r#   r   Phi3Loader.set_decoderq
  s    %DOOV-@-@ADOOV[[9!$TZZ!?J))44e6K6K ))5+I+I OO))003((%  
 OO))003&&%   **66BOO..;;G**--OO..::OO..;; [55999%*YY%;%;%B%B%H%HPQ%H%R"	1:''.7>--4 ,199+A+A+I+I+O+O1 ,P ,( */)?)?)F)F)L)LQTU)L)V&).)?)?)F)F)L)LQTU)L)V&1=''.7B''46A''37A--4=F--:<E--9OO'')<)<   E;'E5!JJLo "@r&   r'   )rK   rv   rw   rx   r   rR   r   r   r   r   r   r  r   r   r   r   r|   rD  rE  s   @r#   r  r  
  sP    & &GR)/
'
 4?3K3K3O3O < <r&   r  RWConfigc                   `   ^  \ rS rSr\S 5       rS rS rU 4S jrS r	S r
S rSS	 jrS
rU =r$ )RWLoaderi
  c                     gru  r'   r   s    r#   rR   RWLoader.architecture_name
  rx  r&   c                     UR                   R                  U l        UR                   R                  U l        [        UR                   SS 5      U l        SU l        g )N	n_head_kvnum_kv)r[   r  _num_layersr  
_num_headsrQ   _num_heads_kv_num_kv_attrr   s     r#   get_falcon_specRWLoader.get_falcon_spec
  sB     <<//,,--$U\\;E$r&   c                 b   U R                  U5        [        UR                  SS5      (       a  SnOU R                  n[        R
                  R                  U R                  U R                  S[        R                  R                  UR                  R                  SSUR                  R                  (       a  SOS SUR                  R                  US:H  US9nU R                  UR                   UR"                  5        U R%                  UR                   R&                  UR(                  5        U$ )Nmulti_queryFr   Tr   )
r   r   r_  r`  scale_alibir  r  r  r  r  )r  rQ   r[   r  r
   r  r   r  r  r   ra  r|  r_  rotaryparallel_attnr   r   r  r   r   r   )r;   r`   r  rc   s       r#   r   RWLoader.get_model_spec
  s    U#5<<66L--L;;GGOO"--22,,$$)-!LL//qT##ll88*a/% H 
 	u'8'89//?r&   c                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   RWLoader.get_vocabulary
  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   RWLoader.set_vocabulary
  r  r&   c                 j    UR                   Ul        UR                   Ul         UR                   Ul        g r   )r  r  r	  r   s       r#   r   RWLoader.set_config
  r  r&   c                    SUl         U R                  UR                  UR                  5        U R	                  UR
                  UR                  5        [        UR                  UR                  5       GH  u  p4[        US5      (       aM  U R	                  UR                  UR                  5        U R	                  UR                  UR                  5        O[        US5      (       a'  U R	                  UR                  UR                   5        O`U R	                  UR"                  R
                  UR                   5        U R	                  UR$                  R
                  UR&                  5        [)        UR"                  U R*                  5      nUS:X  a>  U R-                  UR"                  R.                  S   UR"                  R0                  5        OnU R3                  UR"                  R.                  S   UR"                  R0                  UR"                  R4                  XTR"                  R4                  :  a  UOS 5        U R-                  UR"                  R.                  S   UR"                  R6                  5        U R-                  UR$                  R8                  UR:                  R<                  5        U R-                  UR$                  R>                  UR:                  R@                  5        GM     g )NFln_attnr  r   r   )!r<  r   r?  rk  r   r  r  r  r  r  r   rC  r  rF  ln_mlpr  rE  r  r  rG  rQ   r  r   r   rH  rm  r  rJ  r  r  rK  r  rL  )r;   rc   r   r   r  r  s         r#   r   RWLoader.set_decoder
  s,    %DOOV-C-CDDOOV[[9!$TZZ!:Jui((##J$?$?O##J$H$H%,,W%899##J$@$@%BWBWX##--88%:O:O ##NN--u/M/M U1143D3DEF{--44Q7((88
 ##--44Q7((88((22$';';'E'EEF4	 OO))003U5I5I5O5O OOJNN33UYY5L5LMOOJNN33UYY5L5LMC ";r&   c                    UR                   nUcS  UR                  USSUR                  S   5      nUR                  SS5      nUR                  SUR                  S   5      nOUR                  S   X4S-  -   -  nUR                  SX4-  S-   XeR                  S   5      nUR	                  X4-  SS/SS9u  pxn	[
        R                  " UR                  X6-  S5      UR                  XF-  S5      U	R                  XF-  S5      /5      nXQl         UR                  b  UR                  n
Uc7  U
R                  USS5      n
U
R                  SS5      n
U
R                  S5      n
O|U
R                  SX4-  S-   W5      n
U
R	                  X4-  SS/SS9u  pxn	[
        R                  " UR                  X6-  5      UR                  XF-  5      U	R                  XF-  5      /5      n
Xl        g g )Nr(  r*  r   r   r)  r  )r   r!  r[  r   splitrE   r  r   )r;   rc   r   r  r  r   r  rF  rG  rH  r   s              r#   rm  RWLoader.set_qkv_linear  s   >^^Iq"fll26FGF%%a+F^^BR(89F||A9z+ABH^^I'!+X||B7GF llI$7A#>AlFGA!YYIIi2B7IIf/4IIf/4F ;;";;D~||Iq"5~~a+||B'||B	(;a(?J**i&91a%@a*Hayy		)"67		&"34		&"34 I% #r&   r  r  r  r  r   )rK   rv   rw   rx   r   rR   r  r   r   r   r   r   rm  r|   rD  rE  s   @r#   r  r  
  s?    & &%6)/
&NP) )r&   r  FalconConfigc                       \ rS rSrS rSrg)FalconLoaderi<  c                     UR                   R                  U l        UR                   R                  U l        [        UR                   SS 5      U l        SU l        g )Nnum_kv_heads)r[   r  r  r  r  rQ   r  r  r   s     r#   r  FalconLoader.get_falcon_spec>  sB     <<99,,::$U\\>4H*r&   r  N)rK   rv   rw   rx   r  r|   r'   r&   r#   r  r  <  s    +r&   r  DistilBertConfigc                   6    \ rS rSr\S 5       rS rS rS rSr	g)DistilBertLoaderiE  c                     g)NDistilBertModelr'   r   s    r#   rR   "DistilBertLoader.architecture_nameG  r  r&   c                    [         R                  " UR                  R                  UR                  R                  S[
        UR                  R                     SS9n[         R                  " U5      nSUR                  l	        U R                  UR                  R                  S   UR                  R                  5        U R                  UR                  R                  UR                  R                  5        U R!                  UR                  R"                  UR                  R$                  5        ['        UR                  R(                  UR*                  R(                  5       GH  u  pE[-        S5       Vs/ s H  n[.        R0                  " 5       PM     nnU R3                  US   UR4                  R6                  5        U R3                  US   UR4                  R8                  5        U R3                  US   UR4                  R:                  5        [<        R>                  " UR@                  RB                  S   U5        U R3                  UR@                  RB                  S   UR4                  RD                  5        U R!                  UR@                  RF                  URH                  5        U R3                  URJ                  RL                  URJ                  RN                  5        U R3                  URJ                  RP                  URJ                  RR                  5        U R!                  URJ                  RF                  URT                  5        GM     U$ s  snf )NFTr   r   r(  r   r)  )+r
   TransformerEncoderSpecr[   rz  r{  r   r   TransformerEncoderModelSpecr   r<  r   r?  rk  r   r=  position_embeddingsr   r   	LayerNormr  r  r  r+  r   r,  r   r$  q_link_linv_linr   r0  r  r   out_linr  sa_layer_normr  r  lin1r  lin2output_layer_normr;   r`   encoder_specrc   r   r  r   r3  s           r#   r   DistilBertLoader.get_model_specK  sp   '>>LL!!LL  -ell.E.EF $
  ;;
 ).%LL##A&(8(8(H(H	
 	##LL++U-=-=-Q-Q	
 	LL,,e.>.>.H.H	
 "%T\\%7%79J9J9P9P!QJ>CAhGhK224hLGOOLOU__-B-BCOOLOU__-B-BCOOLOU__-B-BCj77>>qA<POO))003U__5L5L ))44e6I6I OOJNN33UYY^^DOOJNN33UYY^^D
 9 95;R;RS! "R$ # Hs   ;Mc                 &    UR                  U5        g r   r  r   s      r#   r   DistilBertLoader.set_vocabularyw  r  r&   c                 4    UR                   Ul         SUl        g )Ng-q=)r	  r  r   s       r#   r   DistilBertLoader.set_configz  s    $..$)!r&   r'   N)
rK   rv   rw   rx   r   rR   r   r   r   r|   r'   r&   r#   r  r  E  s$    ! !*X)*r&   r  
BertConfigc                   J   ^  \ rS rSr\S 5       rS rU 4S jrS rS r	Sr
U =r$ )
BertLoaderi  c                     g)N	BertModelr'   r   s    r#   rR   BertLoader.architecture_name  s    r&   c           
         UR                   R                  S:X  d   e[        R                  " UR                   R                  UR                   R
                  S[        UR                   R                     SS[        R                  R                  S9n[        R                  " US[        R                  R                  S9nSUR                  l        U R!                  UR                  R"                  S   UR"                  R$                  5        U R!                  UR                  R"                  S   UR"                  R&                  5        U R)                  UR                  R*                  UR"                  R,                  5        U R/                  UR                  R0                  UR"                  R2                  5        U R5                  UR6                  UR8                  R:                  5        [=        UR                  R>                  UR                  R>                  5       GH4  u  pE[A        S	5       Vs/ s H  n[        RB                  " 5       PM     nnU R5                  US   URD                  RF                  RH                  5        U R5                  US   URD                  RF                  RJ                  5        U R5                  US   URD                  RF                  RL                  5        [N        RP                  " URR                  RT                  S   U5        U R5                  URR                  RT                  S   URD                  RV                  R:                  5        U R/                  URR                  RX                  URD                  RV                  R2                  5        U R5                  URZ                  R\                  UR^                  R:                  5        U R5                  URZ                  R`                  URV                  R:                  5        U R/                  URZ                  RX                  URV                  R2                  5        GM7     U$ s  snf 
NabsoluteFTr)  )r   r   r   num_source_embeddingsembeddings_merge)pooling_layerpooling_activationr   r   r(  )1r[   position_embedding_typer
   r  r  r  r   r5  r   EmbeddingsMergeADDr  ra  Tanhr   r<  r   r?  rk  token_type_embeddingsr   r=  r  r   r   r  r   pooler_densepoolerrJ  r  r  r+  r,  r$  r;   queryr   valuer   r0  r  r   outputr  r  r  intermediater  r  s           r#   r   BertLoader.get_model_spec  s$   ||33zAAA'>>LL**LL,,-ell.E.EF $"#(88<<
  ;;*55::
 ).%LL##A&(8(8(H(H	
 	LL##A&(8(8(N(N	
 	##LL++U-=-=-Q-Q	
 	LL,,e.>.>.H.H	
 	))5<<+=+=>!$T\\%7%79L9L!MJ>CAhGhK224hLGOOLOU__-A-A-G-GHOOLOU__-A-A-E-EFOOLOU__-A-A-G-GHj77>>qA<POO))003U__5K5K5Q5Q ))44eoo6L6L6V6V OOJNN33U5G5G5M5MNOOJNN33U\\5G5GH
 9 95<<;Q;QR! "N$ # Hs   6Qc                    > [         TU ]  X5      nUR                  R                  [	        U5      -
  n[        U5       H  nUR                  SU-  5        M     U$ r  r  r  s         r#   r   BertLoader.get_vocabulary  r  r&   c                 &    UR                  U5        g r   r  r   s      r#   r   BertLoader.set_vocabulary  r  r&   c                 \    UR                   Ul         UR                  R                  Ul        g r   r	  r[   layer_norm_epsr  r   s       r#   r   BertLoader.set_config  "    $..$)LL$?$?!r&   r'   )rK   rv   rw   rx   r   rR   r   r   r   r   r|   rD  rE  s   @r#   r  r    s1     6p)@ @r&   r  XLMRobertaConfigc                   <    \ rS rSr\S 5       rS rS rS rS r	Sr
g)	XLMRobertaLoaderi  c                     g)N#XLMRobertaForSequenceClassificationr'   r   s    r#   rR   "XLMRobertaLoader.architecture_name  s    4r&   c           
      	   UR                   R                  S:X  d   e[        R                  " UR                   R                  UR                   R
                  S[        UR                   R                     SS[        R                  R                  S9nUR                  R                  c  SnOSn[        R                  " UU[        R                  R                  S9nSUR                   l        U R%                  UR                   R&                  S   UR                  R&                  R(                  5        U R%                  UR                   R&                  S   UR                  R&                  R*                  5        U R-                  UR                   R.                  UR                  R&                  R0                  5        U R3                  UR                   R4                  UR                  R&                  R6                  5        U(       a:  U R9                  UR:                  UR                  R                  R<                  5        [?        UR                   R@                  UR                  R                   R@                  5       GH4  u  pV[C        S	5       Vs/ s H  n[        RD                  " 5       PM     nnU R9                  US   URF                  RH                  RJ                  5        U R9                  US   URF                  RH                  RL                  5        U R9                  US   URF                  RH                  RN                  5        [P        RR                  " URT                  RV                  S   U5        U R9                  URT                  RV                  S   URF                  RX                  R<                  5        U R3                  URT                  RZ                  URF                  RX                  R6                  5        U R9                  UR\                  R^                  UR`                  R<                  5        U R9                  UR\                  Rb                  URX                  R<                  5        U R3                  UR\                  RZ                  URX                  R6                  5        GM7     U$ s  snf r  )2r[   r  r
   r  r  r  r   r5  r   r  r  robertar  r  ra  r  r   r<  r   r?  rk  r  r   r=  r  r   r   r  r   r  rJ  r  r  r+  r,  r$  r;   r  r   r  r   r0  r  r   r  r  r  r  r   r  	r;   r`   r  r  rc   r   r  r   r3  s	            r#   r   XLMRobertaLoader.get_model_spec  s^   ||33zAAA'>>LL**LL,,-ell.E.EF $"#(88<<
 =='!M M;;'*55::
 ).%LL##A&(@(@(P(P	
 	LL##A&(@(@(V(V	
 	##LL++MM$$88	
 	LL,,emm.F.F.P.P	
 OOD--u}}/C/C/I/IJ!$T\\%7%79N9N9T9T!UJ>CAhGhK224hLGOOLOU__-A-A-G-GHOOLOU__-A-A-E-EFOOLOU__-A-A-G-GHj77>>qA<POO))003U__5K5K5Q5Q ))44eoo6L6L6V6V OOJNN33U5G5G5M5MNOOJNN33U\\5G5GH
 9 95<<;Q;QR! "V$ # Hs   R=c                 &    UR                  U5        g r   r  r   s      r#   r   XLMRobertaLoader.set_vocabulary  r  r&   c                 \    UR                   Ul         UR                  R                  Ul        g r   r  r   s       r#   r   XLMRobertaLoader.set_config  r
  r&   c                 |    UR                   Ul        [        USS5      nUS:  a  UR                  US-   S  Ul        g g Npadding_idxr   r   r   r   s       r#   r   'XLMRobertaLoader.set_position_encodings  =    2A:!^^FQJL9DN r&   r'   NrK   rv   rw   rx   r   rR   r   r   r   r   r|   r'   r&   r#   r  r    s*    5 5<|)@:r&   r  RobertaConfigc                   <    \ rS rSr\S 5       rS rS rS rS r	Sr
g)	RobertaLoaderi   c                     g)NRobertaModelr'   r   s    r#   rR   RobertaLoader.architecture_name"  s    r&   c           
         UR                   R                  S:X  d   e[        R                  " UR                   R                  UR                   R
                  S[        UR                   R                     SS[        R                  R                  S9nUR                  c  SnOSn[        R                  " UU[        R                  R                  S9nSUR                  l        U R#                  UR                  R$                  S   UR$                  R&                  5        U R#                  UR                  R$                  S   UR$                  R(                  5        U R+                  UR                  R,                  UR$                  R.                  5        U R1                  UR                  R2                  UR$                  R4                  5        U(       a0  U R7                  UR8                  UR                  R:                  5        [=        UR                  R>                  UR                  R>                  5       GH4  u  pV[A        S	5       Vs/ s H  n[        RB                  " 5       PM     nnU R7                  US   URD                  RF                  RH                  5        U R7                  US   URD                  RF                  RJ                  5        U R7                  US   URD                  RF                  RL                  5        [N        RP                  " URR                  RT                  S   U5        U R7                  URR                  RT                  S   URD                  RV                  R:                  5        U R1                  URR                  RX                  URD                  RV                  R4                  5        U R7                  URZ                  R\                  UR^                  R:                  5        U R7                  URZ                  R`                  URV                  R:                  5        U R1                  URZ                  RX                  URV                  R4                  5        GM7     U$ s  snf r  1r[   r  r
   r  r  r  r   r5  r   r  r  r  r  ra  r  r   r<  r   r?  rk  r  r   r=  r  r   r   r  r   r  rJ  r  r  r+  r,  r$  r;   r  r   r  r   r0  r  r   r  r  r  r  r   r  r  s	            r#   r   RobertaLoader.get_model_spec&  <   ||33zAAA'>>LL**LL,,-ell.E.EF $"#(88<<
 <<!M M;;'*55::
 ).%LL##A&(8(8(H(H	
 	LL##A&(8(8(N(N	
 	##LL++00	
 	LL,,e.>.>.H.H	
 OOD--u||/A/AB!$T\\%7%79L9L!MJ>CAhGhK224hLGOOLOU__-A-A-G-GHOOLOU__-A-A-E-EFOOLOU__-A-A-G-GHj77>>qA<POO))003U__5K5K5Q5Q ))44eoo6L6L6V6V OOJNN33U5G5G5M5MNOOJNN33U\\5G5GH
 9 95<<;Q;QR! "N$ # H   Q7c                 &    UR                  U5        g r   r  r   s      r#   r   RobertaLoader.set_vocabularyd  r  r&   c                 \    UR                   Ul         UR                  R                  Ul        g r   r  r   s       r#   r   RobertaLoader.set_configg  r
  r&   c                 |    UR                   Ul        [        USS5      nUS:  a  UR                  US-   S  Ul        g g r  r   r   s       r#   r   $RobertaLoader.set_position_encodingsk  r  r&   r'   Nr  r'   r&   r#   r!  r!     s*     <|)@:r&   r!  CamembertConfigc                   <    \ rS rSr\S 5       rS rS rS rS r	Sr
g)	CamembertLoaderir  c                     g)NCamembertModelr'   r   s    r#   rR   !CamembertLoader.architecture_namet  r  r&   c           
         UR                   R                  S:X  d   e[        R                  " UR                   R                  UR                   R
                  S[        UR                   R                     SS[        R                  R                  S9nUR                  c  SnOSn[        R                  " UU[        R                  R                  S9nSUR                  l        U R#                  UR                  R$                  S   UR$                  R&                  5        U R#                  UR                  R$                  S   UR$                  R(                  5        U R+                  UR                  R,                  UR$                  R.                  5        U R1                  UR                  R2                  UR$                  R4                  5        U(       a0  U R7                  UR8                  UR                  R:                  5        [=        UR                  R>                  UR                  R>                  5       GH4  u  pV[A        S	5       Vs/ s H  n[        RB                  " 5       PM     nnU R7                  US   URD                  RF                  RH                  5        U R7                  US   URD                  RF                  RJ                  5        U R7                  US   URD                  RF                  RL                  5        [N        RP                  " URR                  RT                  S   U5        U R7                  URR                  RT                  S   URD                  RV                  R:                  5        U R1                  URR                  RX                  URD                  RV                  R4                  5        U R7                  URZ                  R\                  UR^                  R:                  5        U R7                  URZ                  R`                  URV                  R:                  5        U R1                  URZ                  RX                  URV                  R4                  5        GM7     U$ s  snf r  r&  r  s	            r#   r   CamembertLoader.get_model_specx  r(  r)  c                 &    UR                  U5        g r   r  r   s      r#   r   CamembertLoader.set_vocabulary  r  r&   c                 \    UR                   Ul         UR                  R                  Ul        g r   r  r   s       r#   r   CamembertLoader.set_config  r
  r&   c                 |    UR                   Ul        [        USS5      nUS:  a  UR                  US-   S  Ul        g g r  r   r   s       r#   r   &CamembertLoader.set_position_encodings  r  r&   r'   Nr  r'   r&   r#   r2  r2  r  s*       <|)@:r&   r2  c            
      .   [         R                  " [         R                  S9n U R                  SSSS9  U R                  SSS9  U R                  S	S
SS9  U R                  SSS9  U R                  SSSS9  U R                  SSSS9  [        R
                  " U 5        U R                  5       n[        UR                  UR                  UR                  UR                  S;   UR                  UR                  UR                  S9nUR                  U5        g )N)formatter_classz--modelTzaName of the pretrained model to download, or path to a directory containing the pretrained model.)requiredhelpz--activation_scaleszPath to the pre-computed activation scales. Models may use them to rescale some weights to smooth the intermediate activations and improve the quantization accuracy. See https://github.com/mit-han-lab/smoothquant.)rA  z--copy_files+zWList of filenames to copy from the Hugging Face model to the converted model directory.)nargsrA  z
--revisionz<Revision of the model to download from the Hugging Face Hub.z--low_cpu_mem_usage
store_truezNEnable the flag low_cpu_mem_usage when loading the model with from_pretrained.)actionrA  z--trust_remote_codez*Allow converting models using custom code.)rT   int8_float16)r-   r.   r/   r0   r1   r2   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr*   r`   r-   r.   quantizationr0   r1   r2   convert_from_args)parserargs	converters      r#   mainrR    sG   $$ >>F F	   :   	   K   ]  
 9   'D%

00??))-HH0000I %r&   __main__))r   r   )r)  r   )r)     r(  r   r(  r   r(  r)  r(  r(  r(  r  ))r)  r)  rU  rW  rX  rY  )r(  rT  )rX  r     rT  r   )rT  rT  )rT  r[  )rV  )r  r)  )r  r(  rZ  r\  )rT  r)  )rT  r  )rT     ))r]  r]  r[  r   )r[  r(  )r[  r  )r  r)  )r  rT  r  r[  	   r   )ra  r  )ra  r  )ra  
   )rb  r   )rb  r   )rb  r)  )rb  r(  )rb  r]  )rb     rc  r)  rc  r  )
)rT  r(  )rT  ra  )r  r   )r  r  r_  )r  r  r`  )ra  r[  )ra  ra  )rb  rT  )re  )   r   )rf     )rf  rf     r  )   r   )rj  r  )rj  ra  )   rg  rk  rf  )   r[  )rm  rb  )rm  ri     r   )ro  r(  )ro  ra  )ro  rf  )   rg  ))   ri  rh  )ri  ri  rj  r   rn  )   r  )	)ra     rd  re  )rc  rk  )   r[  )ru  rc  )ru  rk  )rs  r)  )rs  ri  ))rb  rg  )rq  rk  )rj  rc  )rj  rg  )rj  rq  )rk  ri  )rk  rj  )rm  r  )rm  rc  )rm  rt  rt  rc  )rp  r)  )rp  r(  )ru  r(  )ru  ra  )ru  rg  )rs  rT  )rs  r[  )rs  rq  )   rT  )   r   )rx  rg  )   ri  )
r^  )rb  rk  )rg  rm  )rq  rg  rr  rl  rv  )rp  r  )   r   )rw  r]  )zopenai/whisper-tiny.enzopenai/whisper-tinyzopenai/whisper-base.enzopenai/whisper-basezopenai/whisper-small.enzopenai/whisper-smallzopenai/whisper-medium.enzopenai/whisper-mediumzopenai/whisper-largezopenai/whisper-large-v2zopenai/whisper-large-v3T5GemmaConfigc                       \ rS rSr\S 5       rS rS rS rS r	\
R                  R                  4S jr\
R                  R                  4S jrS	rg
)T5GemmaLoaderi  c                     g)NT5GemmaForConditionalGenerationr'   r   s    r#   rR   T5GemmaLoader.architecture_name  r  r&   c                 @    UR                   R                  S-   Ul        g )Nr7  )r   r  r   rN  s      r#   r   T5GemmaLoader.set_layer_norm  s    &&++c1
r&   c                 J   UR                   R                  nUR                   R                  n[        UR                   SS5      nUR                  n[        USU5      nXe:X  a  S n[
        R                  " UR                  UR                  S[        UR                     SSUR                  S[        USS5      USUUR                  S9nUR                  n[        USU5      n	X:X  a  S n	[
        R                  " UR                  UR                  S[        UR                     SSSUR                  S[        USS5      USSU	UR                  S	9n
[
        R                  " Xz5      nU R                  UR                  UR                  R                  U5        U R                  UR                  UR                  R                  U[         R"                  R$                  5        U R'                  UR                  R(                  UR                  R                  R*                  5        U$ )
Nr  i   r  TFr   r  )r   r   r  r  r  r  r  r  r  r  r  )r   r   r  r  with_encoder_attentionr  r  r  r  r   external_pre_post_encoder_layersr  r  )r[   r   r   rQ   r  r
   r  r  r   r  r  TransformerDecoderSpecr   r   r`   r   r   r   r   r   r   rA  )r;   r`   encoder_configdecoder_configr  encoder_num_headsencoder_num_heads_kvr   decoder_num_headsdecoder_num_heads_kvr   rc   s               r#   r   T5GemmaLoader.get_model_spec  s   ---- /?F*>>&13D 
  4#' "99,,..-n.N.NO%..#eD) $-#,,
  +>>&13D 
  4#' "99,,..-n.N.NO#'%..#eD) $-1-#,,
$  //Au{{':':NKLLKK$$((		
 	//1D1D1Q1QRr&   c                 H    UR                  U5        UR                  U5        g r   r  r   s      r#   r   T5GemmaLoader.set_vocabulary  r  r&   c                    UR                   Ul         UR                  Ul        UR                  Ul        [        UR                  S5      (       a&  UR                  R
                  R                  Ul        O>[        UR                  S5      (       a  UR                  R                  Ul        OSUl        UR                   Ul        g )Nr   r  gư>)	r  r  r	  r   r[   r   r  r  r  r   s       r#   r   T5GemmaLoader.set_config  s    $..$..$..5<<++(-(<(<(I(IF%U\\>22(-(A(AF%(,F%%.%8%8"r&   c                    SUl         [        UR                  [        5      (       a  UR                  S   OUR                  nU R	                  XRR
                  5        UR                  S-  Ul        U R                  UR                  UR                  5        Un[        [        UR                  UR                  5      5       GH  u  nu  pU R                  UR                  U	R                   5        U R                  UR"                  U	R$                  5        ['        S5       V
s/ s H  n
[(        R*                  " 5       PM     nn
U R-                  US   U	R.                  R0                  US9  U R-                  US   U	R.                  R2                  US9  U R-                  US   U	R.                  R4                  US9  [6        R8                  " UR:                  R<                  S   U5        U R-                  UR:                  R<                  S   U	R.                  R>                  US9  U R                  UR@                  U	RB                  5        U R                  URD                  U	RF                  5        U R-                  URH                  RJ                  U	RL                  RN                  US9  U R-                  URH                  RP                  U	RL                  RR                  US9  U R-                  URH                  RT                  U	RL                  RV                  US9  [Y        U	S5        [Y        U	S	5        [Z        R\                  " 5         GM     g s  sn
f )
NTr   r  r(  r  r   r)  r  r  )/r<  r   r?  r@  r   rA  r  r  r   r  r  r  r  r  r  rC  pre_self_attn_layernormrF  post_self_attn_layernormr+  r   r,  r   r  r-  r.  r/  r   r0  r  r   r  r  r  r  r  r  r  r  r  r7  r  r  r  r  r  r  )r;   rc   r   r  r   encoder_emb_specr   rs  r   r  r   qkv_split_layerss               r#   r   T5GemmaLoader.set_encoder  s    !% #-T__d"C"CDOOA 	 	,.B.BC2@2L2Lc2Q/DOOW\\:&/DJJ0N&O"A"
++U-J-J 44e6T6T
 CH(K(Q 6 6 8(KOO #U__%;%;
   OO #U__%;%;
   OO #U__%;%;
   j77>>qACSTOO))003&&%   55u7V7V 668X8X
 OO'')<)<   OO--uyy/@/@Z   OO'')<)<  
 E;'E5!JJLc 'P  Ls   $Mc                 n	   SUl         SUl        U R                  UR                  UR                  5        UR
                  S-  UR                  l        U R                  UR                  UR                  5        [        [        UR                  UR                  5      5       GH  u  nu  pgU R                  UR                  UR                  5        U R                  UR                   UR"                  5        [%        S5       Vs/ s H  n[&        R(                  " 5       PM     n	nU R+                  U	S   UR,                  R.                  US9  U R+                  U	S   UR,                  R0                  US9  U R+                  U	S   UR,                  R2                  US9  [4        R6                  " UR8                  R:                  S   U	5        U R+                  UR8                  R:                  S   UR,                  R<                  US9  U R                  UR>                  UR@                  5        U R                  URB                  URD                  5        U R+                  URF                  R:                  S   URH                  R.                  US9  [%        S5       Vs/ s H  n[&        R(                  " 5       PM     n
nU R+                  U
S   URH                  R0                  US9  U R+                  U
S   URH                  R2                  US9  [4        R6                  " URF                  R:                  S   U
5        U R+                  URF                  R:                  S   URH                  R<                  US9  U R                  URJ                  URL                  5        U R                  URN                  URP                  5        U R+                  URR                  RT                  URV                  RX                  US9  U R+                  URR                  RZ                  URV                  R\                  US9  U R+                  URR                  R^                  URV                  R`                  US9  [c        US	5        [c        US
5        [c        US5        [d        Rf                  " 5         GM     g s  snf s  snf )NTFr  r(  r   r  r   r)  r  
cross_attnr  )4r<  rT  r   r?  rA  r  r  r   r  r  r  r  r  r  rC  r  rF  r  r+  r   r,  r   r  r-  r.  r/  r   r0  r  r   r  )external_pre_encoder_attention_layer_normpre_cross_attn_layernorm*external_post_encoder_attention_layer_normpost_cross_attn_layernormr$  r  r  r  r  r  r  r  r  r  r7  r  r  r  r  r  r  )r;   rc   r   r  r   rs  r   r  r   r  kv_split_layerss              r#   r   T5GemmaLoader.set_decoder&  s    !%).&DOOV-@-@A1?1K1KS1P.DOOV[[9&/DJJ0N&O"A"
++U-J-J 44e6T6T
 CH(K(Q 6 6 8(KOO #U__%;%;
   OO #U__%;%;
   OO #U__%;%;
   j77>>qACSTOO))003&&%   DD..
 EE// OO$$++A.  ''%   BGqJA{557OJOO"  ''%  
 OO"  ''%  
 j2299!<oN OO$$++A.  ''%   55u7V7V 668X8X
 OO'')<)<   OO--uyy/@/@Z   OO'')<)<  
 E;'E<(E5!JJLs 'P  LH Ks   R-R2r'   N)rK   rv   rw   rx   r   rR   r   r   r   r   r   r   r   r   r   r|   r'   r&   r#   r}  r}    sZ    1 12BH09 9D8P8P8T8T?D 8C7O7O7S7Scr&   r}  )Vr   rG  r  rm  rn   typingr   r   numpyr  rq   rE   rG   ImportErrorctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   r	   r
   r   r   r   ra  r|  rb  GELUSigmoidRELUr  r   r  LinearSur  r   r   r  AWQ_GEMVr  r    r(   r*   ABCr~   r   rH  rc  rw  r  r  r  r  r  r	  r0  rR  r  r  r  rS  rY  rs  r  r  r  r  r+  r=  rQ  rd  rv  r  r  r  r  r  r  r!  r2  rR  rK   rk  r}  r'   r&   r#   <module>r     s   
  	  	 ! 	 ) 6   ""''''00&&//))..$//88((44""''""((##))
  ..55

*
*
-
-..550033	  $$--$$-- 
 yI yxWF#'' WFt @V @V @VF  47Z 47 !47n  :  !B =* =  =" !	9J 	9 "	9 C
 C CL #$/G+ /G %/Gd %G %G %GP 5G 5G 5Gp !VGK VG "VGr !LNK LN "LN^ !c J c  "c L !"K@Z K@ #K@\ %&q$ q$ 'q$h y'{ y' y'x - - - J+ J  JZ 41 41 41n \+ \  \~  d; d !dN Q+ Q  Qh #$ }; } ! %}@ !DK D "DN I+ I  IX S+ S  Sl ,-0D 0D .0Df 0D 0D 0Df h h hV H{ H HV  +8 + !+ #$6*{ 6* %6*r K@ K@ K@\ #$N:{ N: %N:b !N:K N: "N:b "#N:k N: $N:b7&t zF	 LF	 *!( V
 2 c} B !BK B "Bqk  		s   T TT