
    oiE             *          S SK Jr  S SKJr  S SKJrJr  S SKJ	r	J
r
JrJrJrJr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJr  S SKrS SKrS SKrS SKrS SKrS SK r S SK!r!S S	K"J#r#  S
SK$J%r%J&r&J'r'  S SK(r(S SK)r)S SK*r*S S	K"J#r#  S
SK+J,r,  S
SK-J.r.  S
SK/J0r0  S
SK1J2r2J3r3  S SK4J5r5  S SK6J7r7   S SK6J8r8  S SK;J<r<  S SK=J>r>J?r?  / SQr@/ SQrASSR                  \R                  R                  5       5      -   rES\E;   rFS\E;   rGSrHCESrISrJ0 SS_SS _S!S"_S#S$_S%S&_S'S(_S)S*_S+S,_S-S._S/S0_S1S2_S3S4_S5S6_S7S8_S9S:_S;S<_S=S>_S?S@SASBSCSDSE.ErKSF rL\L" 5       (       a  SGOSHrMSI rN SSJ jrOSK rPSL rQSM rR\R                  SNSOSSPS\R                  SQSPSSPSSRSSOSSSSSTSU4SV\\U\R                  4   SW\USX\WSY\\\U\W4      SZ\WS[\\X   S\\S]\\Y\U4   S^\WS_\\U   S`\WSa\\W   Sb\\U   Sc\\W   Sd\WSe\USf\USg\\U   Sh\USi\Z4(Sj jj5       r[Sk r\Sl r]/ 4Sm jr^SSn jr_SSo jr`SSp jraSq rb      SSr\USs\USt\USu\WSv\USw\USx\WSy\W4Sz jjrcSS{SOSSPS\R                  SQSPSSPSSTS|4SV\\U\R                  4   SW\USX\WSY\\\U\W4      SZ\WS[\\X   S\\S]\\Y\U4   S^\WS_\\U   S`\WSg\\U   Sh\USi\Z4S} jjrd              SS~\USW\USa\\W   Sb\\U   Sc\\W   SY\\W\US4   S]\\Y\US4   Sd\WS^\WSe\USf\USg\\\U      Sh\USi\Z4S jjreSrfS rg  SS jrh     SS jriS rjS rkS\USr\US\US\U4S jrlS\USr\US\U4S jrmS\USr\US\U4S jrnSSSSOSSSPS\R                  SQSPSSPSSTS4SV\\U\R                  4   Sw\USX\WSY\\\U\W4      Sc\\W   SZ\WS[\\X   S\\S]\\Y\U4   S^\WS_\\U   S`\WSg\\U   Sh\USi\Z4S jjro               SS~\USw\USa\\W   Sb\\U   Sc\\W   SY\\W\US4   S]\\Y\US4   Sd\WS^\WSe\USf\USg\\\U      Sh\USi\Z4S jjrpS rq         SS~\USa\\W   Sb\\U   Sc\\W   SY\\W\US4   Sd\WSe\USf\USh\USi\Z4S jjrr  SSV\USh\USi\Z4S jjrsS
SK-J.r.  S SKtJuruJvrv  S SKJrJ	rw  \R                      SS j5       rx\R                  SSNSOSSPS\R                  SQSPSSPSSRSSOSSSSSTSU4SV\\U\R                  4   SW\USX\WSY\\\U\W4      SZ\WS[\\X   S\\S]\\Y\U4   S^\WS_\\U   S`\WSa\\W   Sb\\U   Sc\\W   Sd\WSe\USf\USg\\U   Sh\USi\Z4(S jj5       rySS{SOSSPS\R                  SQSPSSPSSTS|4SV\\U\R                  4   SW\USX\WSY\\\U\W4      SZ\WS[\\X   S\\S]\\Y\U4   S^\WS_\\U   S`\WSg\\U   Sh\USi\Z4S jjrz              SS~\USW\USa\\W   Sb\\U   Sc\\W   SY\\W\US4   S]\\Y\US4   Sd\WS^\WSe\USf\USg\\\U      Sh\USi\Z4S jjr{  SSV\\U\R                  4   SX\WSY\\\U\W4      4S jjr|  SSV\\U\R                  4   SX\WSY\\\U\W4      4S jjr}    SSV\\U\R                  4   SX\WSY\\\U\W4      4S jjr~S rSS jrg!    S SK9J8r8   GNK!   S SK:J8r8    GNW= f= f)    )Version)version)dtype_from_configHAS_TORCH_DTYPE)convert_to_ggufquantize_ggufuse_local_ggufinstall_llama_cppcheck_llama_cpp_download_convert_hf_to_gguf)
Linear4bit)Linear)OptionalCallableUnionListN)logger   )fast_dequantizeQUANT_STATEget_lora_parameters_bias)fix_sentencepiece_gguf)get_model_name)_convert_torchao_model)OLLAMA_TEMPLATESMODEL_TO_OLLAMA_TEMPLATE_MAPPER)ProcessorMixinHfApi)	get_token)Path)PeftModelForCausalLM	PeftModel)print_quantization_methodsunsloth_save_modelsave_to_ggufpatch_saving_functionscreate_huggingface_repo)zllama-quantizez	llama-clizllama-server
z
COLAB_z
KAGGLE_z/tmp)zself_attn.q_projzself_attn.k_projzself_attn.v_projzself_attn.o_projzmlp.gate_projzmlp.up_projzmlp.down_proj)input_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernormzself_attn.q_normzself_attn.k_normnot_quantizedz8Recommended. Fast conversion. Slow inference, big files.fast_quantizedz9Recommended. Fast conversion. OK inference, OK file size.	quantizedz:Recommended. Slow conversion. Fast inference, small files.f32zINot recommended. Retains 100% accuracy, but super slow and memory hungry.bf16zNBfloat16 - Fastest conversion + retains 100% accuracy. Slow and memory hungry.f16zNFloat16  - Fastest conversion + retains 100% accuracy. Slow and memory hungry.q8_0z=Fast conversion. High resource use, but generally acceptable.q4_k_mzZRecommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_Kq5_k_mzZRecommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_Kq2_kzWUses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.q3_k_lzTUses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_Kq3_k_mzTUses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_Kq3_k_szUses Q3_K for all tensorsq4_0zOriginal quant method, 4-bit.q4_1z`Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.q4_k_szUses Q4_K for all tensorsq4_kzalias for q4_k_mzalias for q5_k_mz<Higher accuracy, higher resource usage and slower inference.z:Even higher accuracy, resource usage and slower inference.zUses Q5_K for all tensorszUses Q8_K for all tensorsz3-bit extra small quantization)q5_kq5_0q5_1q5_k_sq6_kq3_k_xsc                  2    [         R                  " S5      S L$ )Ncurl)shutilwhich     F/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/save.pyhas_curlrL      s    <<t++rJ   z-DLLAMA_CURL=ONz-DLLAMA_CURL=OFFc                  `    [         R                  5        H  u  p[        SU  SU 35        M     g )N"z"  ==> )ALLOWED_QUANTSitemsprint)keyvalues     rK   r$   r$      s,    $**,
#geW%& -rJ   c                    [        U S5      (       d  gU R                  nSn[        R                  R	                  XR
                  5      nSn[        R                  R                  U5      (       d  Sn[        R                  " U5        UR                  U5        [        R                  R                  U S35      (       a  SnU(       a  [        R                  " USS9  U$ )N_saved_temp_tokenizerFTz/tokenizer.modelignore_errors)hasattrrU   ospathjoinname_or_pathexistsmakedirssave_pretrainedisfilerG   rmtree)modeltemporary_locationtemp_tokenizersentencepiece_modelfile_locationcreated_folders         rK   check_if_sentencepiece_modelrh      s     512200NGGLL!35P5PQMN77>>-((
M"""=1	ww~~'7899"mT:rJ   c                 x   SSK Jn  [        U" 5       R                  5      nU H  nUR                  U R
                  R                  :X  d  M)  [        UR                  5      S   R                  nU" 5       R                  U5      n[        R                  " SUR                  -   5        UR                  5         M     g )Nr   )scan_cache_dirz-Unsloth: Will remove a cached repo with size )huggingface_hubrj   listreposrepo_idconfig_name_or_path	revisionscommit_hashdelete_revisionsr   warning_onceexpected_freed_size_strexecute)rb   rj   cached_reposcached_reporemove_cache_commitdelete_strategys         rK   _free_cached_modelr{      s    .(../L $%,,"<"<<"&{'<'<"=a"@"L"L,.??#O ?!99:
 ##% $rJ   c                 z   [        U SS 5      n[        U [        [        [        45      (       Ga  [        U 5      u  p4pVprUb/  [        U5      [        La  UR                  OUS   n[        X45      nOUR                  nUR                  [        R                  5      R                  5       nUb  UR                  UR                  5       R                  [        R                  5      UR                  5       R                  [        R                  5      US9  [        R                  " UR!                  5       R#                  5       UR                  5       5      n	[        R$                  " U	5      R'                  5       (       d  [)        SU S35      eUR                  5       R                  U5      nX24$ U R*                  nX24$ )Nbias   )alphazUnsloth: Merge failed.
z has some elements = infinity.)getattr
isinstanceBnb_Linear4bitPeft_Linear4bitPeft_Linearr   typerl   dtyper   totorchfloat32taddmm_maxminabsisfiniteitem
ValueErrorweight)
layernamer}   Wquant_stateABsr   maximum_elements
             rK   _merge_lorar      sS   5&$'D%./;GHH(@(G%a"%)+%6d%B!!TU   /AGGEDD!!# = HHQSSUXXemm,accehhu}}.EqHQ $iiquuw?O>>/27799 .tf4RS  CCEHHUO 7N LL7NrJ   c                 P    [        SU S35        [        R                  " U U5        g )NzUnsloth: Saving ...)rQ   r   save)shardr   s     rK   fast_save_pickler      s+    	TF#
&'	JJ rJ   loraFT5GBTrained with Unsloth+Upload model trained with Unsloth 2x faster _unsloth_temporary_saved_buffersg?save_directorysave_methodpush_to_hubtokenis_main_process
state_dictsave_functionmax_shard_sizesafe_serializationvariantsave_peft_formatuse_temp_dircommit_messageprivate	create_prrevisioncommit_descriptiontagsrc   maximum_memory_usagec                    Uc
  [        5       nUc  SnSU;  a  US-  nUR                  5       nUc  SnOSU;  a  US-  nUS:X  a  [        S5      eUS	:X  a  Sn[        [	        5       5      nS
 H  nUU	 M     U(       a  SSKJn   U" US9S   nUS:  a  US::  d   e[        S5       H6  n[        R                  R                  5         [        R                  " 5         M8     UR                  5       R                  SS5      nUS:w  a  US:w  a  US:w  a  [        S5      eUS:X  aB  [        S5        [        S5        [!        U S5      (       a  U R#                  5       n [        S5        Ub-  [%        U[&        [(        45      (       d   e['        U5      S/-   nOS/nUUS'   US:X  d  US:X  a  U(       a  Uc  [        S5      eUS:X  a  [        S5        OUS:X  a  [        S5        [+        U UUS S!S S US"9n[-        U S#U R.                  5      " UUUUUU	UU
UUUS$9  Ub?  UR0                  nS%Ul        [-        US#UR.                  5      " UUUUUU	UU
UUUS$9  UUl        [!        U S&5      (       a  [        S'U S(3U-   5        US 4$ US)   S S US*   US+   US,   S-.nSS.KJn  [%        U U5      (       a  U R6                  nOU nUS:X  d3  US:X  d-  [!        U S/5      (       a  [!        UR6                  S05      (       Gd  US*   S1L a  S2OS3nU H  nUU	 M     [!        U S45      (       a  U R9                  S/5        U(       a  [+        U US)   US S!S S US"9nUbB  [        S5SS69  UR0                  nS%Ul        UR:                  " Su0 UD6  UUl        [        S75        O
[        5         [        S8SS69  US:w  a
  [        S9SS69  US:X  a  S US:'   U R:                  " Su0 UD6  U(       a"  [!        U S&5      (       a  [        S;US)   -   5        [        S75        US 4$ S nU(       a  S<U;   a  Un U S U R=                  S<5       nU U R=                  S<5      S=-   S  n [>        (       aV  [@        RB                  RE                  [F        U U R=                  S<5      S=-   S  5      n [H        RJ                  " S>U S?U  35        O[H        RJ                  " S@U SAU S?U  35        U US)'   U US)'   U n[        SB5        [L        RN                  " 5       RP                  n!SCn"[S        U	5      [T        L a  [V        RX                  " SDU	[V        RZ                  SE9n#[V        RX                  " SFU	[V        RZ                  SE9n$U#(       a$  []        U#R_                  S=5      5      SG-  SG-  SG-  n"O<U$(       a   []        U$R_                  S=5      5      SG-  SG-  n"O[S        U	5      [\        L a  U	n"[L        R`                  " S1SH9n%U%c  [L        R`                  " 5       n%U%c  S=n%U
c  SIn
U
USJ'   O9U
(       a2  U%SK::  a,  [H        RJ                  " SLU% SM35        S1n
[b        nU
USJ'   UUSN'   U
(       a  U!U"-  n!OU!U"SO-  -  n![]        [e        SU!5      U-  5      n![        SP[g        U!SG-  SG-  SG-  SK5       SQ[g        [L        RN                  " 5       Rh                  SG-  SG-  SG-  SK5       SR35        [>        (       a$  [@        RB                  RE                  [F        U5      n[@        RB                  Rk                  U5      (       d  [@        Rl                  " U5        [>        (       d  [n        (       a!  [H        RJ                  " SS5        [q        U5        SSTK9J:n&  U&" 5       n[w        URx                  5      n'[S        U'5      [T        L a-  U'SU:X  a  [        Rz                  n'OU'SV:X  a  [        R|                  n'UR6                  R~                  R                  R                  R                  U'5      USW'   []        [        R                  R                  S5      R                  U-  5      n([        SX5        SSYKEJEn)  [        U)" UR6                  R                  5      5       GH"  u  n*n+[         H  n,[        SZU, 35      n-S[U* S\U, S]3n.[        U-U.5      u  n/n0U0b  U0US[U* S\U, S^3'   [        R                  R                  5       U/R                  -   U(:  a  U/UU.'   Mn  [H        RJ                  " S_5        [@        RB                  RE                  UU. S`35      n1[        R                  " U/U1[        [        R                  Sa9  [        R                  " U1SbSIS1Sc9UU.'   M     [         H  n, [        SZU, Sd35      US[U* S\U, S]3'   M     GM%     UR6                  R                  R                  R                  USe'   UR6                  R~                  R                  R                  5       UR                  R                  R                  5       :w  a2  UR                  R                  R                  R                  U'5      USf'   UR                  5        Hl  u  n2n3[!        U3Sg5      (       a  U3R                  =UU2'   n3[S        U35      [        R                  Ld  MF  [H        RJ                  " ShU2 Si[S        U35       S\35        Mn     UUSj'   U(       d  S2OS3nU H  nUU	 M     [!        U S45      (       a  U R9                  S/5        U(       a  [+        U US)   US S!S UUS"9nUS)   nUS*   (       a%  [        UUU5      u  n n4Ub  SSKJn  U" US9S   n5OUn5US*   (       a  UW5:w  a  [        SkW  35        S1US*'   U US)'   UbB  [        S5SS69  UR0                  nS%Ul        UR:                  " Su0 UD6  UUl        [        S75        O
[        5         U Rx                  n6U Rx                  R                  5       n7SlU7;   a  U7Sl	 U n8[S        U Rx                  5      R                  U75      n7[!        U8S/5      (       a&  U8R6                  n8U7U8l<        [!        U8S/5      (       a  M&  U7U l<        US*   (       ar  UW5:w  al  [        SkW  35        U US)'   S1US*'   UR:                  " Su0 UD6  [@        R                  " U 5      n9[        US,   S9n:[        Sm5        U:R                  U S\U S/SnSoSp9  OUR:                  " Su0 UD6  U n8[!        U8S/5      (       a&  U8R6                  n8U6U8l<        [!        U8S/5      (       a  M&  U6U l<        [        S5        U(       aC  [!        U S&5      (       a2  [        SqU S<UR                  S<5      R                  S<5      Sr    35        S USj'   [        UR                  5       5       HL  u  n*u  n2n3S UU2'   U*Ss-  S:X  d  M  [        R                  R                  5         [        R                  " 5         MN     S nA[        R                  R                  5         [        R                  " 5         SS K^n;U;R                  " USISt9  [        S5       H6  n[        R                  R                  5         [        R                  " 5         M8     UU4$ !   [        S5      e= f!    GM+  = f)vN Unslothz (Trained with Unsloth)r   zUnsloth 2x fasterz! (Trained with Unsloth 2x faster)merged_4bit  Unsloth: Merging into 4bit will cause your model to lose accuracy if you plan
to merge to GGUF or others later on. I suggest you to do this as a final step
if you're planning to do multiple saves.
If you are certain, change `save_method` to `merged_4bit_forced`.merged_4bit_forced)rb   	tokenizerr   rc   r   r   whoamir   r   zLUnsloth: Please supply a token!
Go to https://huggingface.co/settings/tokensgffffff?    _r   merged_16bitaC  Unsloth: You must select one of 3 options when saving models:
"lora"         ==> This is the fastest and easiet. Just saves LoRA modules.
"merged_16bit" ==> This merges LoRA weights and saves to float16. Needed for llama.cpp / GGUF.
"merged_4bit"  ==> This merges LoRA weights and saves to 4bit. Useful for DPO / inference.z1Unsloth: Merging 4bit and LoRA weights to 4bit...zThis might take 5 minutes...merge_and_unloadzDone.unslothr   zoUnsloth: Pushing to HF requires a token. Pass `token = 'hf_....'`
Go to https://huggingface.co/settings/tokens.z-Unsloth: Saving LoRA adapters. Please wait...z7Unsloth: Saving 4bit Bitsandbytes model. Please wait...	finetunedtrl)rf   old_usernamer   original_push_to_hub)rn   r   r   r   r   r   r   r   r   r   r   leftro   zSaved z! model to https://huggingface.co/r   r   r   r   )r   legacy_formatfilename_prefixr   r   r   )r"   rb   layersF)r   r   r   r   r   r   )r   r   r   r   r   add_model_tagszUnsloth: Saving tokenizer...endz Done.zUnsloth: Saving model...z+ This might take 10 minutes for Llama-7b...selected_adaptersz Saved to https://huggingface.co//r   zUUnsloth: You are pushing to hub in Kaggle environment.
To save memory, we shall move  to zCUnsloth: You are pushing to hub, but you passed your HF username = z.
We shall truncate z2Unsloth: Merging 4bit and LoRA weights to 16bit...l        z([0-9]{1,})[\s]{0,}GB)flagsz([0-9]{1,})[\s]{0,}MBi   )logicalTr   r~   zUnsloth: You have z CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.r   g      ?zUnsloth: Will use up to z out of z RAM for saving.zUnsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.)OrderedDictfloat16bfloat16zmodel.embed_tokens.weightz6Unsloth: Saving model... This might take 5 minutes ...)tqdmzlayer.zmodel.layers..z.weightz.biasz&
We will save to Disk and not RAM now.z.pt)pickle_modulepickle_protocolcpu)map_locationmmapweights_onlyz.weight.datazmodel.norm.weightzlm_head.weightdata	Unsloth: z is not a Tensor but a r   z-Unsloth: Saving to organization with address quantization_configz.Unsloth: Uploading all files... Please wait...(Trained with Unsloth)z*.md)folder_pathpath_in_reporn   	repo_typer   ignore_patternsz-Saved merged model to https://huggingface.co/
   rV   rI   )`r    lstripRuntimeErrordictlocalsrk   r   ranger   cudaempty_cachegccollectlowerreplacerQ   rX   r   r   rl   tupleupload_to_huggingfacer   r   padding_sidepeftr"   rb   r   r_   findIS_KAGGLE_ENVIRONMENTrY   rZ   r[   
KAGGLE_TMPr   rt   psutilvirtual_memory	availabler   strrematch
IGNORECASEintgroup	cpu_countr   r   roundtotalr]   r^   IS_COLAB_ENVIRONMENTr{   collectionsr   r   ro   r   r   embed_tokensr   r   r   get_device_propertiestotal_memoryr   	enumerater   LLAMA_WEIGHTSevalr   memory_allocatednbytesr   pickleHIGHEST_PROTOCOLloadLLAMA_LAYERNORMSnormdata_ptrlm_headrP   Tensor_determine_usernameto_dict	from_dictlistdirr   upload_foldersplitrG   ra   )<rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rc   r   save_pretrained_settingsdeletionr   usernamer   old_padding_sidetokenizer_save_settingsr"   internal_modelwhat_to_deletenew_save_directorymax_ramsharded_ram_usagegb_foundmb_foundn_cpusr   torch_dtypemax_vramProgressBarjr   r   projr   r   r}   filenamerR   rS   new_usernameactual_username
old_config
new_configoriginal_model	filenameshf_apirG   s<                                                               rK   r%   r%      sY   6 }&33#**,N!J	$6	6AAm#P
 	
 
,	,##FH~ %X. *	e,V4H  !#(<(DDD 1X

 


  ##%--c37Kv>)=(i
 	
 m#AB,- 5,--**,Eg$u....Dz
 

 
 (,V$		K=$@{=@ 
 & ABM)KL " 	
 	-u/@/@A$'++!!3!3	
  (55%+I"I5y7L7LM(+!/!!/%%7#%7 &6I"5(##%FG.X t## 33CD/>+I6)'2 *%-.. 
	%6!w''w~7K7KX/V/V (6%? 	$ 'H(2 '5*++   %()9: $#!	A  0;  )55%+I"%%@(?@ &6I"(OG(3& ?rJ & <@$%899 8975(332*+;<=
 	ht## Hsn,+%&D(:(?(?(DE/0B0G0G0Lq0P0RS  !#./A/F/Fs/Ka/O/QR" 11?0@EWDXZ
 UV^U_ `%%3$4D9K8LN
 6H !124F 01+	
>? ##%//G.Ns"88$nbmm
 88$nbmm
  #HNN1$5 6 = Dt K #HNN1$5 6 = D	n		$* .F~!!#~!!9K !56	1  )G H	

 #(9K !564A 1 $$$t++#a/$889G	
"d"4'+
,H&&(..t3D8=qA
BBR	T WW\\*6HI 77>>,--
&'  4 4`	
 	>* (J#N$9$9:KKC)#--KJ&..K 	))005588E *+ 

((+88;OOH 

BC(k.*>*>*E*EFG5!D&(D"1#QtfG4D!$-GAt >B
]1#QtfE:;

++-8HD#$
4  ##$MN77<<(:tfCLI

$*&,&=&=	 $)::U4PU$
4 ; "@ %D@DTF,/A
]1#QtfG<= %C HT '5&:&:&?&?&F&F&K&KJ"#
 	))0099;!!((113	4 (6'='='D'D'I'I'L'L(

#$
 !&&(
U5&!!&+jj0JsOe;ell*)C50GU}TU VW	 ) .8\* 	

 $ #$X. #u&''	
 !$%56 #	
 ..>?N.+>He,
(L .$U3F;O&O  .H4O=>P=QRS16.4F 01 ,B7 %11!'	!!<$;< "2	h J%%'J
*,-Nell#--j9J
.'
*
*'-- * .'
*
* EL  .H4O=>P=QRS 6H !1227 /&&B)AB JJ12	7@A>?,(5$ 	 	
 	&&B)AB N
.'
*
*'-- * .'
*
* EL	'Nwuh//;H:Q~G\G\]`GaGgGghkGlmoGpFqr	
 .2\*$Z%5%5%78<C
3r6Q;JJ""$JJL	 9
 J	JJJJL 
MM$d;1X

 


  8##Y	? js   	y1  z1y>zc                  r    / SQn [         R                  " U [         R                  [         R                  S9nU$ )N)gitclonez--recursivez&https://github.com/ggerganov/llama.cppstdoutstderr
subprocessPopenDEVNULLSTDOUT)full_commandrun_installers     rK   $install_llama_cpp_clone_non_blockingrK  \  s6    L $$z11J<M<MM rJ   c            	      @   [         R                  " S5      n SnU S:X  aJ  [        [        [        R
                  " 5       =(       d    SS-  5      S5      nSSS[        U5      -   S	S
/nSnO[        [        [        R
                  " 5       =(       d    S5      S5      n[         R                  " S[         35      n U S:w  a  [        SU  S35      eSSSSSS[        U5      -   SS/[        -   nSn[        R                  " U[        R                  [        R                  S9nXA4$ )Nmake clean -C llama.cppFr   r   g      ?makeallz-jz-C	llama.cppKcmake llama.cpp -B llama.cpp/build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=OFF zH*** Unsloth: Failed compiling llama.cpp using os.system(...) with error . Please report this ASAP!cmakez--buildzllama.cpp/buildz--configReleasez--clean-firstz--targetTrA  )rY   systemr   r  r   r	  r  	CURL_FLAGr   LLAMA_CPP_TARGETSrE  rF  rG  rH  )checkIS_CMAKEn_jobsrI  rJ  s        rK   #install_llama_cpp_make_non_blockingr[  i  s,   
 II/0EHzS&**,1S891=tc&k'94M S))+0q115		YZcYde
 A:Z[`Zaa{| 
 3v;	
 	  $$z11J<M<MM ""rJ   c                 x    SS/U -   n[         R                  " U[         R                  [         R                  S9nU$ )NpipinstallrA  rD  )packagesrI  rJ  s      rK   install_python_non_blockingr`    s<    9%0L$$z11J<M<MM rJ   c           	      T   U  GH  n[         R                  " US[         R                  [         R                  SS9 nUR                   Hn  nUR                  SSS9nSU;   a  [        SU S	35      eS
U;   a    S S S 5          gSU;   a  [        SU S	35      eSU;   a  [        SU S	35      e[        USSS9  Mp     U(       aH  UR                  b;  UR                  S:w  a+  [         R                  " UR                  UR                  5      eS S S 5        GM     g ! , (       d  f       GM$  = f)NTr   )shellrB  rC  bufsizeutf-8r   )errorszundefined referencez-*** Unsloth: Failed compiling llama.cpp with rR  
deprecatedCMAKEzUnknown argumentz***r   )flushr   r   )rE  rF  PIPErH  rB  decoder   rQ   
returncodeCalledProcessErrorargs)commandsforce_completecommandsplines        rK   try_executers    s5   __&&
 		{{7Y{?(D0&GvMgh  "T)"
 
 (4/&GvMgh  d]&GvMgh  dD3! "" "--";QR@R 33BMM277KK3
 
 6 5
 
s   <DBD
D'	c                    [         R                  " / SQ5      nUR                  S5      R                  SS5      R	                  S5      n[        U5       H  u  p#SU;  d  M    O   US W nUS   nX   R	                  S5      S   n [        R                  R                  S	5      (       aY  [        S
5        SS K
n[        S5       H&  n[        SSU-
   S35        UR                  S5        M(     SS KnUR                  " S	SS9  SSU  S3/n[        U5        SS[         R"                  " 5       =(       d    SS-   S3/n[        U5      S:X  aP  S[$         3S[         R"                  " 5       =(       d    SS-   SSR'                  [(        5       3SS/n[        U5        [        R                  R                  S5      (       d  [        R                  R                  S5      (       d  [        R                  R                  S 5      (       dz  [        R                  R                  S!5      (       dU  [        R                  R                  S"5      (       d0  [        R                  R                  S#5      (       d  [+        S$5      eg g g g g g )%N)r?  z	ls-remotez--tagsz*https://github.com/ggerganov/llama.cpp.gitrd  	r   r)   zrefs/tags/br   r   rP  z**[WARNING]** You have a llama.cpp directory which is broken.
Unsloth will DELETE the broken directory and install a new one.
Press CTRL + C / cancel this if this is wrong. We shall wait 30 seconds.
   z.**[WARNING]** Deleting llama.cpp directory... z seconds left.r   TrV   <git clone --recursive https://github.com/ggerganov/llama.cppz!cd llama.cpp && git reset --hard z && git clean -dfrM  make all -jr~    -C llama.cpprg  rQ  1cmake --build llama.cpp/build --config Release -j --clean-first --target (cp llama.cpp/build/bin/llama-* llama.cpprm -rf llama.cpp/buildzllama.cpp/llama-quantize.exezllama.cpp/llama-quantizezllama.cpp/quantize.exezllama.cpp/quantizez"llama.cpp/build/bin/llama-quantizezllama.cpp/build/bin/quantizea.  Unsloth: The file 'llama.cpp/llama-quantize' or `llama.cpp/quantize` does not exist.
We've also double checked the building directory under 'llama.cpp/build/bin/'.
But we expect this file to exist! Check if the file exists under llama.cpp and investigate the building process of llama.cpp (make/cmake)!)rE  check_outputrj  r   r#  r  rY   rZ   r]   rQ   timer   sleeprG   ra   rs  r   r	  rV  r[   rW  r   )r   releasesixlatestr  rG   rn  s           rK   install_llama_cpp_oldr    sh    &&TH w'//c:@@FH(#! $ |Hb\F%%c*1-G 
ww~~k""Y	

 	rAB2a4&WXJJqM  	k48
 	G
+G94EFH  	"
v'').Q12-@H 8' ZZcYde?AQAQASAXWXZ[@[?\\tuxu}u}  P  vQ  uR  S6$	
 	H 	56677>>45577>>23377>>.//77>>>??77>>899Y
 	
 : @ 0 4 6 	7rJ   c                 z   SS/n[         R                  R                  S5      (       a  g [        U5        SS[        R
                  " 5       =(       d    SS-   S3/n[        U5      S	:X  aQ  S
[         3S[        R
                  " 5       =(       d    SS-   SSR                  [        5       3SS/n[        U5        g g )Nrw  zpip install gguf protobufrP  rM  rx  r   r~   ry  rg  rQ  rz  r{  r   r|  r}  )	rY   rZ   r]   rs  r   r	  rV  r[   rW  )use_cudarn  s     rK   install_llama_cpp_blockingr    s     	G#H 
ww~~k"" 	" v'').Q12-@H 8' ZZcYde?AQAQASAXWXZ[@[?\\tuxu}u}  P  vQ  uR  S6$	
 	H (rJ   c                    [         R                  R                  S5      R                  [         R                  5      nU H  nU  Hx  n[         R
                  R                  X#5      n[         R
                  R                  U5      (       d  MH  [         R                  " U[         R                  5      (       d  Mt  Us  s  $    M     g )NPATH)
rY   environgetr#  pathseprZ   r[   r]   accessX_OK)executablessystem_directories	directory
executablerZ   s        rK   get_executabler  !  s{    /55bjjA'	%J77<<	6Dww~~d##		$(@(@	 & ( rJ   
model_name
model_typemodel_dtypeis_sentencepiecemodel_directoryfirst_conversionis_vlm
is_gpt_ossc	                 
   [         R                  R                  SS5      S:X  a  Sn	OSn	US:X  d  US:X  d   eUS:X  a  SOS	n[        U[        5      (       a  OE[        U[
        5      (       a  U/nO,[        U[        5      (       a  [	        U5      nO[        S
5      eUS	:X  a;  [        R                  R                  5       (       d  [        R                  " S5        SnUc  UnU H$  n
U
R                  S5      (       d  M  [        S5      e   / nU H  n
U
S:X  a  Un
OU
S:X  a  Sn
OU
S:X  a  Sn
OU
c  Sn
U
[        R!                  5       ;  a:  SU
 S3n[        R#                  5        H  u  pUSU SU S3-  nM     [        U5      eUR%                  U
5        M     UnU(       a  ['        S5        SnS/nOUc  [)        U5      S:X  a  US   S:X  a  SnOdSnU H?  n
U
S:X  a  [+        US5      nM  U
S:X  a  [+        US5      nM+  U
S	:X  d  M3  [+        US5      nMA     US:  a  SnOUS:  a  SnOUS:  a  S	nOS	nUS	:X  a;  [        R                  R                  5       (       d  [        R                  " S 5        SnUS:X  a  S!OUnS"[-        S#5       [-        S#5       S$[-        S#5       S%[-        S#5       S&U S'[-        S#5       S(U S)U S*3n['        U5         [/        5       u  nn['        S+5        ['        S.5        [5        5          [7        5       u  nnn['        S/U S035        ['        S15        [9        U UUUUUUUUS2U	S39u  nnSSS5        WnW HP  n[         R:                  R=                  U5      (       a  M)  [0        (       a  [        S4U S535      e[        S4U S635      e   ['        S7U 35        UR?                  5       n[@        RB                  " 5       nUc  SnUS-  nU(       d  US   nSnU HS  n
X:w  d  M
  ['        S8U S9U
 S:35        U  S;U
RE                  5        S<3n [G        UUU
UU	S=9nUR%                  U5        SnMU     ['        SA5        U(       a:  URK                  U5        [M        U5      RO                  5         URQ                  5         O['        SB5        U(       a  Sn!OU[S        U5      ;   n!['        SC5        ['        SDU 35        UU!U4$ !   ['        S,5        [0        (       a  [3        SU	S-9u  nn GN[3        SU	S-9u  nn GN= f! , (       d  f       GN= f! [H         a.  n [0        (       a  [        S>U S?35      e[        S>U S@35      eSn A ff = f)Ezh
Orchestrates the complete GGUF conversion process.
Handles installation, conversion, and quantization.
UNSLOTH_ENABLE_LOGGING01TFr   r   r3   r2   FUnsloth: quantization_method can only be a string or a list of stringszlUnsloth: Cannot convert to bf16 GGUF since your computer doesn't support it.
We shall switch instead to f16.Niq2zGUnsloth: Currently iq2 type quantizations aren't supported yet - sorry!r.   r/   r4   r0   r5   Unsloth: Quant method = [$] not supported. Choose from below:
[] => r)   zCUnsloth: GPT-OSS model detected - using special conversion settingsNoner   r   r1   r   r~   z:Unsloth: Switching bf16 to f16 due to hardware limitationsr   zA==((====))==  Unsloth: Conversion from HF to GGUF information
   \   z=   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ z_/ z    [1] Converting HF to GGUF z might take 3 minutes.
z!        /    [2] Converting GGUF r   z` might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.
z>Unsloth: llama.cpp found in the system. Skipping installation.z;Unsloth: Installing llama.cpp. This might take 3 minutes...)gpu_supportprint_outputz&Unsloth: Preparing converter script...z#Unsloth: [1] Converting model into z GGUF format.zThis might take 3 minutes...50GB)r  input_folderr  quantization_typeconverter_locationsupported_text_archssupported_vision_archsr  r  r   r  zUnsloth: Conversion failed for z
You are in a Kaggle environment with limited disk space (20GB).
Try saving to /tmp for more space or use a smaller model.
Alternatively, save the 16bit model first, then convert manually.z'
Please check disk space and try again.z.Unsloth: Initial conversion completed! Files: zUnsloth: [2] Converting GGUF z into z. This might take 10 minutes...r   z.gguf)
input_ggufoutput_gguf
quant_typequantizer_locationr  z!Unsloth: Quantization failed for a  
You are in a Kaggle environment, which might be the reason this is failing.
Kaggle only provides 20GB of disk space in the working directory.
Merging to 16bit for 7b models use 16GB of space.
This means using `model.{save_pretrained/push_to_hub}_merged` works, but
`model.{save_pretrained/push_to_hub}_gguf will use too much disk space.
You can try saving it to the `/tmp` directory for larger disk space.
I suggest you to save the 16bit model first, then use manual llama.cpp conversion.
Error: {e}a|  
You might have to compile llama.cpp yourself, then run this again.
You do not need to close this Python program. Run the following commands in a new terminal:
You must run this in the same folder as you're saving your model.
git clone --recursive https://github.com/ggerganov/llama.cpp
cd llama.cpp && make clean && make all -j
Once that's done, redo the quantization.
Error: {e}zUnsloth: Model files cleanup...z:Unsloth: GPT-OSS model - skipping additional quantizationsz5Unsloth: All GGUF conversions completed successfully!zGenerated files: )*rY   r  r  r   rl   r  r   	TypeErrorr   r   is_bf16_supportedr   warning
startswithr   rO   keysrP   appendrQ   lenr   chrr   r   r
   r	   r   r   rZ   r]   copyr   r	  upperr   	Exceptionremover!   unlinkreverse	frozenset)"r  r  r  r  r  quantization_methodr  r  r  r  quant_methodnew_quantization_methodserrorrR   rS   strengthfirst_conversion_dtype
print_infor  r  converter_pathr  r  initial_filesis_vlm_updatefileall_saved_locationsr0  	base_ggufquants_createdoutput_locationquantized_fileewant_full_precisions"                                     rK   r&   r&   .  sD     
zz~~.4; )#{j'@@@&)3%K %t,,	'	-	-
 
'	/	/"#67T
 	

 fUZZ%A%A%C%C.	
  & ,""5))Y  ,  "+?*&L--!L[(#L!!L ~2244/~=bcE,224
1SEugR00 5u%% ''5# ,$ 3 ST!%h#&'1,1DQ1G61Q#)  $7L#u,#&x#3%.#&x#3%/#&x#3 %8 q=',$]',$]'-$'-$ 6!%***F*F*H*HST #3v#=RCS"gYs2wi  ByCG9$BCYBZZrr7)45K4LDQdPe fO	P  
*1@1B..NO 

23		(* 	E,.D
 	12H1IW	
 	,.'6#*% 0!/#7%;##'(
$} 
2 Fww~~d##$$"5dV <X X  #5dV <= =   
:=/
JK (,,. F~
aKF!!$	/L/34J3K6R^Q__~ &0L,2D2D2F1Gu"M"%2%.&5%1-?'3&N (..~>%)N# 0T 	/0&&y1O""$  '')JK".)<O2PP	AC	12
34 3V;;GKL  5F#L62 2 6G#+62 2 
	T ! ,,*??P Q) )
 
 +??P Q) )	 	s6   S ?T! T %T;T
T 
U*)UUr         ?c                     Uc  [         R                  " S5        [        [        5       5      nU US'   US	 [	        S0 UD6  [        S5       H  n[        R                  " 5         M     g)aw  
Same as .save_pretrained(...) except 4bit weights are auto
converted to float16 with as few overhead as possible.

Choose for `save_method` to be either:
1. `16bit`: Merge LoRA into float16 weights. Useful for GGUF / llama.cpp.
2.  `4bit`: Merge LoRA into int4 weights. Useful for DPO / HF inference.
3.  `lora`: Save LoRA adapters with no merging. Useful for HF inference.
NmUnsloth: You're not saving a tokenizer as well?
You can do it separately via `tokenizer.save_pretrained(...)`rb   selfr   rI   r   rt   r   r   r%   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   rc   r   	argumentsr   s                     rK   unsloth_save_pretrained_mergedr  9  s_    6 L	

 VXIIg&##1X


 rJ   rn   c                     Uc  [         R                  " S5        [        [        5       5      nU US'   UUS'   SUS'   US	 US	 [	        S
0 UD6  [        S	5       H  n[        R                  " 5         M     gs  
Same as .push_to_hub(...) except 4bit weights are auto
converted to float16 with as few overhead as possible.

Choose for `save_method` to be either:
1. `16bit`: Merge LoRA into float16 weights. Useful for GGUF / llama.cpp.
2.  `4bit`: Merge LoRA into int4 weights. Useful for DPO / HF inference.
3.  `lora`: Save LoRA adapters with no merging. Useful for HF inference.
NziUnsloth: You're not saving a tokenizer as well?
You can do it separately via `tokenizer.push_to_hub(...)`rb   r   Tr   r  rn   r   rI   r  r  rn   r   r   r   r   r   r   r   r   r   r   r   r   rc   r   r  r   s                     rK   unsloth_push_to_hub_mergedr  b  s|    6 H	

 VXIIg")I#Im&)##1X


 rJ   a  ---
base_model: {base_model}
tags:
- text-generation-inference
- transformers
- unsloth
- {model_type}
- {extra}
license: apache-2.0
language:
- en
---

# Uploaded {method} model

- **Developed by:** {username}
- **License:** apache-2.0
- **Finetuned from model :** {base_model}

This {model_type} model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth)

[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
c                     SnU R                  S5      n SU ;  a3  SSKJn   U" US9S   n[        U5      [        L a  X1:w  a  UnU SU  3n X4$ U R                  S5      S   nX4$ !   [        SU  S	35      e= f)
Nr   z./r   r   r   r   r   r   z  is not a Huggingface directory.)r   rk   r   r   r  r   r#  )r   r   r   r&  r   s        rK   r  r    s    H#**40N
. *	e,V4HL!S(X-E' (z>*:;N ## "'',Q/##	N++KL s   )A$ $A5c                 :   Uc
  [        5       n[        USU5      u  pSSKJn   U" UUSSUS9  SSKJn  [
        R                  UU R                  R                  U R                  R                  SSS	9nU" U5      nUR                  XS
9  [        US
9n	X4$ !    N= f)Nr   r   create_reporb   Frn   r   r   exist_okr   	ModelCardr   r&  
base_modelr  methodextrar   )r    r  rk   r  r  
MODEL_CARDformatro   rp   r  r   r   )
rb   r   r   r   r&  r  r  contentcardr=  s
             rK   r(   r(     s     }2>2uMN+$	
 	.##3300 $ 
 !7 5!F!!s   A&B Bc	           	      F   [        XU5      u  pSSKJn
   U
" UUSSUS9  SSKJn  [        R                  U	U R                  R                  U R                  R                  SUS9nU" U5      nUR                  XS	9  UGb  [        US	9nS
U;   a  XUR                  S
5      S-   S  nOUnSS KnUR                  SSS9n[        U5      S:  a;  [        SUS-   5        U H&  nUR                  UUR!                  US5      USSS9  M(     UR                  UUUSSS9  U(       ah  SS Kn[%        SSSS9 nUR'                  SU R                  R                  0USS9  S S S 5        UR                  SSUSSS9  [(        R*                  " S5        U	$ !    GN= f! , (       d  f       NA= f)Nr   r  rb   Fr  r  r   r  r   r   r   z*out.tfevents*T)	recursivez6Unsloth: Uploading tensorboard files... Please wait...r   )path_or_fileobjr   rn   r   r   z_temporary_unsloth_config.jsonwrd  encodingr     )indentconfig.json)r  rk   r  r  r  r  ro   rp   r  r   r   rfindglobr  rQ   upload_filer   jsonopendumprY   r  )rb   r   r   r  r  rf   r   r   create_configr&  r  r  r  r  r=  uploaded_locationr  ftevent_filesftevent_filer  r  s                        rK   r   r     s     3>QVWN+$	
 	.##3300 $ 
 !7  u%- -.A.A#.F.J.L M - 			"2	E}!H 00 !.""&2#/#7#7r#J, '%= #  !. 	+,$5 	 	
 6PTX		<)@)@A4RS	T Q"B,(#!9   II67OiR QPs   A&F
 /(F
F
F c                    Sn[        U SS 5      nU " S5      R                  S   [        U SS 5      :X  a  Ub  U R                  U;   d,  SUR                  SS5      ;   d  S	UR                  SS5      ;   aN  S
n[        R
                  " S5        [        R                  " SSU5      n[        R                  " SSU5      nX0l        X4$ )NFchat_templater   r   bos_token_idz{bos_token}r   r   z{bos_token+TzUnsloth: ##### The current model auto adds a BOS token.
Unsloth: ##### Your chat template has a BOS token. We shall remove it temporarily.z2\{[\s]{0,}\{[\s]{0,}bos\_token[\s]{0,}\}[\s]{0,}\}z0\{[\s]{0,}\{[\s]{0,}bos\_token[\s]{0,}\+[\s]{0,})	r   	input_ids	bos_tokenr   r   r  r  subr  )r   fix_bos_tokenr  new_chat_templates       rK   fix_tokenizer_bos_tokenr  =  s    MI=M~"gi&NN$=0 5 5c2 >> 5 5c2 >> MNNe !#Er=! !#C!! '8#''rJ   c                 d   [         R                  " U5      nU(       d  [        SU S35        g[        R                  " U5      nU(       d  [        SU S35        gUU l        UnSnSnSnSn	UR                  SU5      R                  S	U5      R                  S
U5      R                  SU	5      nUR                  US5      R                  US	5      nSU;   a  UR                  UU R                  S9nOUR                  US9nUR                  SS
5      R                  SS5      R                  5       nU$ )zc
Creates an Ollama Modelfile.
Use ollama.create(model = "new_ollama_model", modelfile = modelfile)
z5Unsloth: No Ollama template mapping found for model 'z'. Skipping Ollama ModelfileNu)   ⚫@✅#🦥__FILE_LOCATION__⚡@🦥#⛵u%   ⚫@✅#🦥__EOS_TOKEN__⚡@🦥#⛵u   ⚫@✅#🦥u   ⚡@🦥#⛵z{__FILE_LOCATION__}z{__EOS_TOKEN__}{}__EOS_TOKEN__)__FILE_LOCATION__r  )r  )	r   r  rQ   r   _ollama_modelfiler   r  	eos_tokenrstrip)
r   base_model_namemodel_locationollama_template_nameollama_modelfile	modelfileFILE_LOCATION_REPLACEREOS_TOKEN_REPLACERLEFT_BRACKET_REPLACERRIGHT_BRACKET_REPLACERs
             rK   create_ollama_modelfiler   ^  sl   
 ;>>OCOCTTpq	
 '++,@ACOCTTpq	
   !IH@*+
 	/1GH	"$6	7	+	,	,	-	  !! 5g "34  )#$$ .%// % 
	
 $$ . % 
	 !!.#6>>~sSZZ\IrJ   r&  tagmodelfile_pathc           	      p    [         R                  " SS/SSSS9nUR                  S:X  a$  [        UR                  R                  5       5        O[        S5         [         R                  " S	S
U  SU SU 3SU /[         R                  [         R                  SSSS9n[        UR                  R                  S5       H+  n[        USS9  [        R                  R                  5         M-     UR                  5       nUS:w  a  [        SU 35        g [        S5        g ! [         R                   a     gf = f)NrF   http://localhost:11434Tr   capture_outputtexttimeoutr   Ollama Server is not RunningOllama Request Timeoutollamacreater   :z-fr   rB  rC  r'  rc  universal_newlinesr   r   z'
MODEL CREATED FAILED WITH RETURN CODE z
MODEL CREATED SUCCESSFULLYrE  runrk  rQ   rB  stripTimeoutExpiredrF  ri  rH  iterreadlinesysrh  wait)r&  r  r!  r"  
init_checkprocessrr  return_codes           rK   create_ollama_modelr;    s+   (^^-.!	

   A%*##))+,01 j*Qse,	
 ""!G W^^,,b1d"

 2 ,,.Ka8FG,-7 $$ ('(s   AD D D54D5c           	      j    [         R                  " SS/SSSS9nUR                  S:X  a$  [        UR                  R                  5       5        O[        S5         [         R                  " S	S
U  SU SU 3/[         R                  [         R                  SSSS9n[        UR                  R                  S5       H+  n[        USS9  [        R                  R                  5         M-     UR                  5       nUS:w  a  [        SU 35        g [        S5        g ! [         R                   a     gf = f)NrF   r$  Tr   r%  r   r)  r*  r+  pushr   r-  r   r.  r   r   z)
MODEL PUBLISHED FAILED WITH RETURN CODE z
MODEL PUBLISHED SUCCESSFULLYr0  )r&  r  r!  r8  r9  rr  r:  s          rK   push_to_ollama_hubr>    s   (^^-.!	

   A%*##))+,01 	6hZqAcU;<""!G W^^,,b1d"

 2 ,,.Ka:;-HI./+ $$ ('(s   AD D D21D2c                     [        XS9n[        SU 3SSS9 nUR                  U5        UR                  5         S S S 5        [	        UUUSU 3S9  [        X#US9  [        S5        g ! , (       d  f       N3= f)	N)r   gguf_location
Modelfile_r  rd  r  )r&  r  r!  r"  )r&  r  r!  zSuccessfully pushed to ollama)r   r   writecloser;  r>  rQ   )r   r@  r&  r  r!  
model_filefs          rK   push_to_ollamarF    s}    (J 

:,'	AQ	
		 
B %j\2	 (3O	
)* 
B	As   "A''
A5333333?c                    Uc  [        S5      e [        U R                  R                  SS9nUR	                  S5      S   nU(       a  [        S5      eSn[        U S5      (       a`  [        U R                  S	5      (       aE  [        S
 U R                  R                   5       5      nU=(       d    [        U R                  S5      nU=(       a    [        U[        5      n[        U R                  S	5      (       a  U R                  R                  S:X  d5  [        U R                  S5      (       a  U R                  R                  S;   a  SOSn[        [        5       5      nU US'   UUS'   SUS'   U(       a  SUS'   OSUS'   US	 US	 US	 US	 US	 US	 US	 US	 U(       a  [        UR                  5      u  nnO[        U5      u  nn[        SU(       a  SOS S 35         [!        SG0 UD6  U(       a  UR                  nU(       a  UUl        [)        S"5       H[  nS#SKnUR,                  " 5         [.        R0                  R3                  5       (       d  M=  [.        R0                  R5                  5         M]      [7        U R                  5      nU R                  R                  n[9        U5      [:        L a  US$:X  d  US%:X  d   eO9U[.        R<                  :X  a  S$nO"U[.        R>                  :X  a  S%nO[A        S&5      e[        S)5        / nUb  [        U[B        5      (       a  OE[        U[:        5      (       a  U/nO,[        U[D        5      (       a  [C        U5      nO[A        S*5      e[G        U5       HU  u  n n!U!RI                  5       n!U!S+:X  a  S,n!OU!S-:X  a  S.n!OU!S/:X  a  S0n!OU!c  S.n!URK                  U!RI                  5       5        MW      [M        UWUSUUUUUS19	u  n"n#n$Sn%Sn&U"(       a   U$(       a  [Q        UUS45      n'O[Q        UUU"S#   5      n'U'b  U$(       a!  [R        RT                  RW                  US55      n%O3[R        RT                  RW                  [R        RX                  " 5       S55      n%[[        U%S6S7S89 n(U(R]                  U'5        SSS5        Sn&U(       a  [^        R`                  " S:5        U$(       a9  [        S;5        [        S<U"S#    S=U"S    35        [        S>5        [        S?5        O[        S@U"S#    SA35        U&(       a   U$(       a  [        SBU% 35        [        SC5        U&(       a  U$(       d  [        SD5        [        SE5        UU"U%U#U$USF.$ !   U R                  R                  nUR	                  S5      S   n GNH= f! ["         a  n[%        S!U 35      eSnAff = f! ["         a  n[        S'U S(35        S$n SnAGNSnAff = f! ["         a,  n[N        (       a  [%        S2U 35      e[%        S3U 35      eSnAff = f! , (       d  f       GN= f! ["         a  n[        S9U 35         SnAGNSnAff = f)HaE  
Same as .save_pretrained(...) except 4bit weights are auto
converted to float16 then converted to GGUF / llama.cpp format.

Choose for `quantization_method` to be:
"not_quantized"  : "Recommended. Fast conversion. Slow inference, big files.",
"fast_quantized" : "Recommended. Fast conversion. OK inference, OK file size.",
"quantized"      : "Recommended. Slow conversion. Fast inference, small files.",
"f32"     : "Not recommended. Retains 100% accuracy, but super slow and memory hungry.",
"f16"     : "Fastest conversion + retains 100% accuracy. Slow and memory hungry.",
"q8_0"    : "Fast conversion. High resource use, but generally acceptable.",
"q4_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K",
"q5_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K",
"q2_k"    : "Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.",
"q3_k_l"  : "Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
"q3_k_m"  : "Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
"q3_k_s"  : "Uses Q3_K for all tensors",
"q4_0"    : "Original quant method, 4-bit.",
"q4_1"    : "Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.",
"q4_k_s"  : "Uses Q4_K for all tensors",
"q4_k"    : "alias for q4_k_m",
"q5_k"    : "alias for q5_k_m",
"q5_0"    : "Higher accuracy, higher resource usage and slower inference.",
"q5_1"    : "Even higher accuracy, resource usage and slower inference.",
"q5_k_s"  : "Uses Q5_K for all tensors",
"q6_k"    : "Uses Q8_K for all tensors",
"iq2_xxs" : "2.06 bpw quantization",
"iq2_xs"  : "2.31 bpw quantization",
"iq3_xxs" : "3.06 bpw quantization",
"q3_k_xs" : "3-bit extra small quantization",
N.Unsloth: Saving to GGUF must have a tokenizer.F)load_in_4bitr   r   z`Unsloth: Please use .push_to_hub_gguf() instead of .save_pretrained_gguf() with push_to_hub=Truero   architecturesc              3   D   #    U  H  nUR                  S 5      v   M     g7f)ForConditionalGenerationForVisionText2TextNendswith.0r  s     rK   	<genexpr>/unsloth_save_pretrained_gguf.<locals>.<genexpr>?  s$      
. JJIJJ.    vision_configGptOssForCausalLMr  )zgpt-ossgpt_ossTrb   r   r   mxfp4r   r   r  r  r  r  r  r  r  is_processorz"Unsloth: Merging model weights to z16-bitz
 format...zFailed to save/merge model: r   r   r   r   z4Unsloth: Model dtype can only be float16 or bfloat16z$Unsloth: Could not determine dtype (z), defaulting to float16z%Unsloth: Converting to GGUF format...r  r.   r3   r/   r4   r0   r5   )	r  r  r  r  r  r  r  r  r  zUnsloth: GGUF conversion failed in Kaggle environment.
This is likely due to the 20GB disk space limit.
Try saving to /tmp directory or use a smaller model.
Error: z!Unsloth: GGUF conversion failed: r   	Modelfiler  rd  r  z,Warning: Could not create Ollama modelfile: zuUnsloth: ##### The current model auto adds a BOS token.
Unsloth: ##### We removed it in GGUF's chat template for you.r)   z>Unsloth: example usage for Multimodal LLMs: llama-mtmd-cli -m z
 --mmproj zBUnsloth: load image inside llama.cpp runner: /image test_image.jpgz+Unsloth: Prompt model to describe the imagez=Unsloth: example usage for text only LLMs: llama-cli --model z -p "why is the sky blue?"z#Unsloth: Saved Ollama Modelfile to zuUnsloth: convert model to ollama format by running - ollama create model_name -f ./Modelfile - inside save directory.z4Unsloth: Saved Ollama Modelfile to current directoryzxUnsloth: convert model to ollama format by running - ollama create model_name -f ./Modelfile - inside current directory.)r   
gguf_filesmodelfile_locationr  r  r  rI   )1r   r   ro   rp   r#  rX   anyrK  r   r   r  r   r   r  r   rQ   unsloth_generic_saver  r   r  r   r   r   r   r   is_availabler   r   r   r  r   r   r  rl   r   r  r   r  r&   r   r   rY   rZ   r[   getcwdr   rB  r   r  ))r  r   r   r  r  r   r   r   r   r   r   r   r   r   r   r   rc   r   r  r  r  r[  r  r  r  old_chat_templater  r   r   r  r  quantization_methodsr  r  all_file_locationsr  r  r^  ollama_successr  r  s)                                            rK   unsloth_save_pretrained_ggufrg    sl   f IJJ4()B)BSXY$**3/3
 n
 	

 FtX74;;#H#H 
[[..
 
 @74;;@CjNCL
 DKK11))-@@ DKK..&&*@@ 	   VXIIg&Ik$Im#*	- #1	- &'($%(,,#$.! +B9CVCV+W((+B9+M(( 

,
W,QQ[\?)y)
 ''	 "3	 1X


::""$$JJ""$  '4[[++
#)+{j/HHHEMM)#KENN*$KRSS 

12 &)400+S11## +U33"&':";X   ))<=OA|'--/L.$!11%,'%% ''(:(:(<=  >HAM##%$,"6/#
B
>/. N	F3IPST	30B10E	 $ )+nk)R&)+biik;)O&,cgF$JJy) G!%
 L	

 dLM_`aMbLccm  oA  BD  oE  nF  G	
 	RS;<KL^_`LaKbb|}	
 -34F3GHI D	
 mDE G	
 )(02& a4++33$**3/3
@  ?9!=>>?<   4QC7OPQ b  	H     !B1#FGG	H6 GF  	F@DEE	Fs   2U !V 8AV- V- V- 2V- W .BX! <X
X! ,V

V*V%%V*-
W7WW
X 'XX
XX! !
Y+X??Yc                 b
   Uc  [        S5      eSU;   a  UR                  S5      S   OUnU(       d  Uc  SSKnUR                  SS9nUnSnOUnS	n[	        S
5         [        U UUUUS	SU	UUUS9nUS   nUS   nUS   nUS   nUS   nUS   n[	        S5         SSK
Jn  U" US9n SU;  a  U R                  5       S   n!U! SU 3n"OUn"U R                  U"SUSS9  U H  n#[        R                  R!                  U#5      n$U(       a+  SU$;   a%  SU$;   a  U$R                  SS5      S   OU$n%U SU% 3n&O/U$R#                  [        R                  R!                  U5      U5      n&[	        SU& S35        U R%                  U#U&U"SUUU
US9  M     [        R                  R'                  US5      n'[        R                  R)                  U'5      (       a#  [	        S5        U R%                  U'SU"SU S 3U
US!9  U(       aG  [        R                  R)                  U5      (       a#  [	        S"5        U R%                  US#U"SU S$3U
US!9  S%U(       a  S&OS' S(UR                  S5      S    S)U S*U S+3	n(U H  n)[        R                  R!                  U)5      n$U(       a+  SU$;   a%  SU$;   a  U$R                  SS5      S   OU$n%U SU% 3n&O/U$R#                  [        R                  R!                  U5      U5      n&U(S,U& S-3-  n(M     U(       a+  U(       a$  U(S.-  n(U(S/-  n(U(S0-  n(U(S1-  n(U(S2-  n(U(S3-  n(U(S4-  n(OU(       a
  U(S5-  n(U(S6-  n(U(       a
  U(S7-  n(U(S8-  n(U(S9-  n([        R                  R'                  US:5      n*[+        U*S;5       n+U+R-                  U(5        SSS5        U R%                  U*S:U"SS<U
US!9  [	        S=U" 35        Uc  / nUR/                  / S>Q5        U(       a  UR1                  S?5         U R3                  U"USS@9  U(       aH  [        R                  R)                  U5      (       a$  [	        SB5        SSKn UR                  " U5        U"$ U"$ ! [         a9  nU(       a  SSKn UR                  " U5        O!    O= f[        SU 35      eSnAff = f! , (       d  f       GN= f!    N= f! [         a  n[        SAU 35      eSnAff = f!    U"$ = f! U(       aO  [        R                  R)                  U5      (       a*  [	        SB5        SSKn UR                  " U5        f !    f = ff f = f)Cad  
Same as .push_to_hub(...) except 4bit weights are auto
converted to float16 then converted to GGUF / llama.cpp format.

Choose for `quantization_method` to be:
"not_quantized"  : "Recommended. Fast conversion. Slow inference, big files.",
"fast_quantized" : "Recommended. Fast conversion. OK inference, OK file size.",
"quantized"      : "Recommended. Slow conversion. Fast inference, small files.",
"f32"     : "Not recommended. Retains 100% accuracy, but super slow and memory hungry.",
"f16"     : "Fastest conversion + retains 100% accuracy. Slow and memory hungry.",
"q8_0"    : "Fast conversion. High resource use, but generally acceptable.",
"q4_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K",
"q5_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K",
"q2_k"    : "Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.",
"q3_k_l"  : "Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
"q3_k_m"  : "Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
"q3_k_s"  : "Uses Q3_K for all tensors",
"q4_0"    : "Original quant method, 4-bit.",
"q4_1"    : "Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.",
"q4_k_s"  : "Uses Q4_K for all tensors",
"q5_0"    : "Higher accuracy, higher resource usage and slower inference.",
"q5_1"    : "Even higher accuracy, resource usage and slower inference.",
"q5_k_s"  : "Uses Q5_K for all tensors",
"q6_k"    : "Uses Q8_K for all tensors",
NrI  r   r   r   unsloth_gguf_)prefixTFz+Unsloth: Converting model to GGUF format...)r  r   r   r  r  r   r   r   r   rc   r   r]  r^  r  r  r  r   z!Failed to convert model to GGUF: z-Unsloth: Uploading GGUF to Huggingface Hub...r   r   r   rb   )rn   r   r   r  r   r   z
Uploading r   )r  r   rn   r   r   r   r   r   r  zUploading config.json...z	 - config)r  r   rn   r   r   r   r   zUploading Ollama Modelfile...r\  z - Ollama Modelfilez'---
tags:
- gguf
- llama.cpp
- unsloth
z- vision-language-modelr   z
---

# z : GGUF

This model was finetuned and converted to GGUF format using [Unsloth](https://github.com/unslothai/unsloth).

**Example usage**:
- For text only LLMs:    `./llama.cpp/llama-cli -hf zC --jinja`
- For multimodal models: `./llama.cpp/llama-mtmd-cli -hf z% --jinja`

## Available Model files:
z- `z`
u)   
## ⚠️ Ollama Note for Vision Models
z[**Important:** Ollama currently does not support separate mmproj files for vision models.

z2To create an Ollama model from this vision model:
zR1. Place the `Modelfile` in the same directory as the finetuned bf16 merged model
z23. Run: `ollama create model_name -f ./Modelfile`
z2   (Replace `model_name` with your desired name)

z;This will create a unified bf16 model that Ollama can use.
z
## Ollama
z5An Ollama Modelfile is included for easy deployment.
z	
## Note
zDThe model's BOS token behavior was adjusted for GGUF compatibility.
zThis was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth)
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
z	README.mdr  z
Add READMEz>Unsloth: Successfully uploaded GGUF to https://huggingface.co/)ggufz	llama-cppr   zvision-language-model)rn   r   r   z&Failed to upload to Hugging Face Hub: z'Unsloth: Cleaning up temporary files...)r   r#  tempfilemkdtemprQ   rg  r  rG   ra   r   rk   r   r   r  rY   rZ   basenamer   r  r[   r]   r   rB  extendr  add_tags),r  rn   r   r  r  r   r   r   r   r   r   r   r   r   r   rc   r   r  rl  temp_dirr   cleanup_tempresultre  r^  r  r  r  actual_save_directoryr  rG   r   apir&  full_repo_idrf   original_namequant_suffixproper_nameconfig_pathreadme_contentr  readme_pathrE  s,                                               rK   unsloth_push_to_hub_ggufr}    s"   X IJJ ,/'>s#B'wJ|+##_#=!# 
79 D-+!"5/+!3!3#7
 $L1#$89$%:;!/ &'7 8 

9:x)E" gzz|F+H&Zq	2L"L 	"	 	 	
 0MGG,,];M= @ m+ "''Q/2& 
 ",Al^<+33GG$$^4j J{m3/0OO"/*&#!/%7%#  	% 0< ggll#8-H77>>+&&,-OO"-,&#$2#39!=%#   "''..1C"D"D12OO"4*&#$2#33F!G%#  
 %" - . ==b 5
 6=I >::A C$ 'DGG,,T2M= @ m+ "''Q/2& 
 ",Al^<+33GG$$^4j K=44N '" (KKN}}NSSNssNSSNTTNNN  o-NVVNm+NWN 	r	

 ggll#8+F+s#qGGN# $ 	)&")! 	 	
 	L\N[	

 <D45KK/0	LL&#   BGGNN>::;<n- <O  Dn->qcBCCDp $#<	 ICA3GHHI BGGNN>::;<n- ;<s   1Q K%R. ?RAR. -R' 9S 
RR%Q87R8Q<:RR
R$R. 'R+)R. *S +R. .
S8SSS S;T.T%$T.%T)'T.c                 H    [         R                  " USS9  [        U UUSSS9  g )NT)r  r   F)r   r   r   )rY   r^   r%   )rb   r   r   s      rK   save_lora_to_custom_dirr  .	  s)    KK40 'rJ   c           
      *   [         R                  R                  S5      (       dt  [        (       a(  [	        S/5      nUR                  5         [        SS9  S nOC[        5       n[	        S/5      nUR                  5         [        5       nUR                  5         OS n[        S5       H  n[        R                  " 5         M     Sn[        XU5        U R                  R                  n[         R                  R                  US5      n[!        SU S	35        [!        S
U 35        SU SU S3n ["        R$                  " US["        R&                  ["        R&                  SSS9 nUR(                   H  n[!        USSS9  M     UR*                   H  n[!        USSS9  M     UR                  5         UR,                  S:w  a!  ["        R.                  " UR,                  U5      e S S S 5        [!        SU 35        [!        S5        [1        U UUSSUS U5      nUR3                  S5       n[!        S5        [!        SU 35        [!        S5        g ! , (       d  f       Np= f! ["        R.                   a"  n[!        SUR,                   35         S nAg S nAff = f)NrP  protobufFr  r   zlora-to-ggml-pushggml-adapter-model.bin0Unsloth: Converting auto-saved LoRA adapters at  to GGML format.The output file will be *python3 llama.cpp/convert-lora-to-ggml.py r    llamaTr   rb  rB  rC  rc  r/  r   r   rh  r   *Error: Conversion failed with return code ,Unsloth: Conversion completed! Output file: z3Unsloth: Uploading GGML file to Hugging Face Hub...zGGML converted LoRAggmlr   Unsloth: Done.z>Converted LoRA to GGML and uploaded to https://huggingface.co/
This GGML making function was made by Maheswar. Ping him @Maheswar on the Unsloth Discord or on HuggingFace (@mahiatlinux) if you like this!)rY   rZ   r]   r   r`  r7  r  rK  r[  r   r   r   r  ro   r  r[   rQ   rE  rF  ri  rB  rC  rk  rl  r   r   )r  r   rn   r   r   r   r   r   r   r   rc   r   python_installmakefile	git_cloner   lora_directory_pushr  output_filerp  rq  rr  r  r&  links                            rK   ,unsloth_convert_lora_to_ggml_and_push_to_hubr  =	  sW    77>>+&&  8*FN!&%8H<>I8*FNNN:<H!1X


  .D-@A''J'',,24LMK	
:;N:OO_` 
$[M
23:;N:OqQ\P]]cdG____!%
 		d"d3 "		d"d3 "GGI}}! 33BMM7KK "
& 
8
FG	
?@$	H nnS!"D	
	J4&
QR	 	YG
 
 (( :1<<.IJs7   -5I "A>I!I 
II I J0JJc           	         [         R                  R                  S5      (       dt  [        (       a(  [	        S/5      nUR                  5         [        SS9  S nOC[        5       n[	        S/5      nUR                  5         [        5       nUR                  5         OS n[        S5       H  n[        R                  " 5         M     [        XU5        U R                  R                  n	[         R                  R                  US5      n
[!        SU S35        [!        S	U
 35        S
U SU
 S3n ["        R$                  " US["        R&                  ["        R&                  SSS9 nUR(                   H  n[!        USSS9  M     UR*                   H  n[!        USSS9  M     UR                  5         UR,                  S:w  a!  ["        R.                  " UR,                  U5      e S S S 5        [!        S5        [!        SU
 35        [!        S5        g ! , (       d  f       N3= f! ["        R.                   a"  n[!        SUR,                   35         S nAg S nAff = f)NrP  r  Fr  r   r  r  r  r  r  r   r  Tr   r  r   r  r   r  r  r  r  )rY   rZ   r]   r   r`  r7  r  rK  r[  r   r   r   r  ro   r  r[   rQ   rE  rF  ri  rB  rC  rk  rl  )r  r   r   rc   r   r  r  r  r   r  r  rp  rq  rr  r  s                  rK   -unsloth_convert_lora_to_ggml_and_save_locallyr  	  s    77>>+&&  8*FN!&%8H<>I8*FNNN:<H!1X


  D^<''J'',,~/GHK	
:>:JJZ[ 
$[M
23:>:J!K=X^_G____!%
 		d"d3 "		d"d3 "GGI}}! 33BMM7KK "
$ 

	8
FG	 	Y)
 
 (( :1<<.IJs7   +5H  A>HH 
HH H I1II)merge_and_overwrite_loraprepare_saving)r
   r   c           
         Uc  Ub
  [        5       nUb  Uc  [        S5      e[        R                  R	                  [        R                  R                  SS5      5      (       d	  [        SS9  / nUb  [        U[        5      (       a  OE[        U[        5      (       a  U/nO,[        U[        5      (       a  [        U5      nO[        S5      e[        U5       HT  u  pU	R                  5       n	U	S:X  a  Sn	OU	S	:X  a  S
n	OU	S:X  a  Sn	OU	c  S
n	UR                  U	R                  5       5        MV     OUR                  UR                  5       5        U HU  n	U	[        R!                  5       ;  d  M  SU	 S3n
[        R#                  5        H  u  pU
SU SU S3-  n
M     [        U
5      e   U H@  n[%        USUS9nUc  M  ['        U USSSUS9  SSKJn  U" US9nUR-                  UUSS/S9  MB     W$ )N.Unsloth: Please specify a token for uploading!rP  zunsloth_convert_hf_to_gguf.pyT)just_clone_repor  r.   r3   r/   r4   r0   r5   r  r  r  r  r)   )r  r  r  )r   r   r   r   r   r   r   rb   z*.gguf)r   rn   r   allow_patterns)r    r   rY   rZ   r]   r[   r
   r   rl   r  r   r  r  r   r  rO   r  rP   _convert_to_ggufr  rk   r   r"  )rb   r   r   r  r  rn   r   r  r  r  r  rR   rS   metadatar   ru  s                   rK   save_to_gguf_genericr  	  s    },u}KLL77>>"'',,{4STUUD1  "&)400+S11## +U33"&':";X   ))<=OA'--/L.$!11%,'%%$++L,>,>,@A  > 	!''(9(?(?(AB0~2244/~=bcE,224
1SEugR00 5u%% 1 6# 1

 "!' .&C,!#"*	  ' 62 OrJ   unsloth_finetuned_mergec                     Uc  U(       a
  [        5       nUS:X  a  [        S5      eUS:X  a  Sn[        [        U UUUUUUS SSS9  g )Nr   r   r   TF)
rb   r   r   r   r   r   r   output_dtypelow_disk_space_usageuse_temp_file)r    r   r  r   )rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rc   r   s                         rK   r`  r`  3
  sk    6 }m#P
 	
 
,	,#'!!# rJ   c                     Uc  [         R                  " S5        [        [        5       5      nU US'   US	 [	        S0 UD6  [        S5       H  n[        R                  " 5         M     g)r  Nr  rb   r  r   rI   r   rt   r   r   r`  r   r   r   r  s                     rK   &unsloth_generic_save_pretrained_mergedr  k
  s_    6 L	

 VXIIg&%9%1X


 rJ   c                     Uc  [         R                  " S5        [        [        5       5      nU US'   UUS'   SUS'   US	 US	 [	        S
0 UD6  [        S	5       H  n[        R                  " 5         M     gr  r  r  s                     rK   "unsloth_generic_push_to_hub_mergedr  
  s|    6 H	

 VXIIg")I#Im&)%9%1X


 rJ   c           	         [        U 5        [        U [        5      (       a#  [        U UUU R                  R
                  UUS9  gSnU(       a   U R                  XUS9  UR                  XS9  gU R                  XS9  UR                  U5        g)zXSave a QAT-trained model by converting fake-quantized weights to real quantized weights.rb   r   r   torchao_configr   r   NFr   r   r   r   )r   r   r"   '_unsloth_save_torchao_with_given_configro   r   r   r_   )rb   r   r   r   r   r   s         rK   *_unsloth_save_torchao_with_attached_configr  
  s     5!%-../+!"\\==%	
 	 U 	 	
 	n<nV!!.1rJ   c                    U(       a
  Uc   S5       eUc   S5       e[        [        5       5      nSUS'   SUS'   US	 [        U [        5      (       d8  [        U [        5      (       d#  U R                  U5        UR                  U5        O[        S0 UD6  [        S	5       H  n[        R                  " 5         M     S
SK
JnJn	Jn
JnJn  S
SKJn  [        X:5      (       a  UnOU
" US9nSn[%        U S5      (       a`  [%        U R&                  S5      (       aE  [)        S U R&                  R*                   5       5      nU=(       d    [%        U R&                  S5      nU(       a  UOUnU(       a  UOU	nUR-                  U5      n[.        (       a  S[0        R2                  0nOS[0        R2                  0nUR,                  " U4SUS.UD6nUS-   n[5        [7        S5      5      [5        S5      :  nSnU(       a"  UR9                  UUUS9  UR9                  UUS9  O!UR                  UUS9  UR                  U5        [:        R<                  R?                  U5      (       a   [@        RB                  " U5        gg!    g= f)a  Quantizes the model with torchao and saves a torchao quantized checkpoint

Args
  `save_directory`: local folder path or huggingface hub ID when `push_to_hub` is set to True, e.g. `my_model`
  `torchao_config` (TorchAOBaseConfig): configuration for torchao quantization, full list: https://docs.pytorch.org/ao/main/api_ref_quantization.html#inference-apis-for-quantize
  `push_to_hub` (bool): whether to push the checkpoint to huggingface hub or save locally
Nr  zHUnsloth: Please specify a torchao_config for post-training quantization!Fr   r   r   r  r   r   )AutoModelForCausalLMAutoTokenizerTorchAoConfigAutoModelForImageTextToTextAutoProcessor)	quantize_)r  ro   rK  c              3   D   #    U  H  nUR                  S 5      v   M     g7frM  rP  rR  s     rK   rT  :_unsloth_save_torchao_with_given_config.<locals>.<genexpr>  s$      
/ JJIJJ/rV  rW  r1  r   auto)
device_mapr   z-torchaotorchaoz0.14.0r  r   r  rI   )"r   r   r   r"   r#   r_   r`  r   r   r   transformersr  r  r  r  r  r  r  rX   ro   r_  rK  from_pretrainedr   r   r   r   importlib_versionr   rY   rZ   r]   rG   ra   )rb   r   r   r  r   r   r  r   r  r  r  r  r  r  r   r  
auto_modelauto_processorkwargsquantized_modeltorchao_save_directoryr   s                         rK   r  r  
  sq      R"RR  	"RQR" VXI$Im-Im"#e122:eY;W;Wn-!!.1)y)1X


   ".00,+H FuhGELL/$J$J 
\\//
 
 A75<<A06,<PJ&,]-N..~>I 05>>* !001 	O ,j8 !!29!=>ARR##"9KUZ 	$ 	
 	4eD''"9K 	( 	
 	!!"89 
ww~~n%%	MM.) &	s   I6 6I:c           	      2   Uc  U(       a
  [        5       n[        U S5      =(       a    U R                  SLnUb  U(       a   S5       e[        U UUUUUS9  OU(       d   S5       e[	        U UUUUS9  [        S5       H  n[        R                  " 5         M     g)a  Saves a torchao quantized model checkpoint.

This function handles two mutually exclusive workflows:

1. **QAT (Quantization-Aware Training)**: If the model was trained with `qat_scheme`
   parameter, do NOT pass `torchao_config`. The function will convert the QAT
   fake-quantized weights to real quantized weights and save directly.

2. **PTQ (Post-Training Quantization)**: If you want to apply quantization to a
   regular model, pass a `torchao_config`. The model must NOT have been trained
   with `qat_scheme`.

Args:
  `save_directory`: local folder path or huggingface hub ID when `push_to_hub` is True
  `tokenizer`: the tokenizer to save alongside the model
  `torchao_config` (TorchAOBaseConfig): configuration for torchao quantization.
      Required for PTQ, must be None for QAT models.
      Options: https://docs.pytorch.org/ao/main/api_ref_quantization.html#inference-apis-for-quantize
  `push_to_hub` (bool): whether to push to huggingface hub or save locally
  `token`: HuggingFace token for pushing to hub
N_torchao_configzUnsloth: You passed `torchao_config` but this model was trained with `qat_scheme`. For QAT models, do not pass `torchao_config` - the quantization config is already attached to the model from training.r  zUnsloth: No `torchao_config` provided and model was not trained with `qat_scheme`. Either train with `qat_scheme` parameter, or provide a `torchao_config` for post-training quantization.)rb   r   r   r   r   r   )r    rX   r  r  r  r   r   r   )r  r   r   r  r   r   has_qat_configr   s           rK   unsloth_save_pretrained_torchaor  K  s    : } 	'(MT-A-A-M  !! 	
3	
!
 	0+!+%	
  	
*	
~
 	3+!%	
 1X


 rJ   c                      [        S5      e)NzAUnsloth: Sorry GGUF is currently not supported for vision models!)NotImplementedError)rm  r  s     rK   not_implemented_saver    s    
K rJ   c                    SS K nSS KnSSKJnJnJnJn  U R                  R                  S:X  a  U R                  nOU R                  n[        UR                  U5      5      R                  SS5      n	U	SS  n	[        R                  " SSU	5      n	UR                  R!                  S	5      R#                  S	5      n
S
U	 SU
 S3n[%        U['        5       5        U n [)        US5      (       ai  UR                  R                  S:w  aO  UR                  Ul	        UR+                  [,        U5      Ul        [)        US5      (       a  UR/                  S/5        [)        US5      (       a  UR0                  nOOM  U(       d  [)        U S5      (       a  UR+                  [2        U 5      U l        UR+                  [6        U 5      U l        UR+                  [:        U 5      U l        UR+                  [>        U 5      U l         UR+                  [B        U 5      U l"        UR+                  [F        U 5      U l$        UR+                  [J        U 5      U l&        U $ UR+                  [2        U 5      U l        UR+                  [6        U 5      U l        UR+                  [:        U 5      U l        UR+                  [>        U 5      U l         UR+                  [B        U 5      U l"        U $ )Nr   )r   r   r   r   unsloth_push_to_hubNoneTyper  r   z<function save at .+?>z
torch.saverd  zdef unsloth_push_to_hub(self, z:
    """
    a  
    """
    arguments = dict(locals())
    del arguments["self"]
    if "tags" in arguments and arguments["tags"] is not None:
        assert(isinstance(arguments["tags"], (list, tuple)))
        arguments["tags"] = list(arguments["tags"]) + ["unsloth",]
    elif "tags" in arguments:
        arguments["tags"] = ["unsloth",]
    elif hasattr(self, "add_model_tags"):
        self.add_model_tags(["unsloth",])

    if "commit_message" in arguments:
        commit_message = arguments["commit_message"]
        if commit_message is not None:
            if not commit_message.endswith(" "): commit_message += " "
            if "Unsloth" not in commit_message:
                commit_message += "(Trained with Unsloth)"
        else:
            commit_message = "Upload model trained with Unsloth"
        arguments["commit_message"] = commit_message

    if "commit_description" in arguments:
        commit_description = arguments["commit_description"]
        if commit_description is not None:
            if not commit_description.endswith(" "): commit_description += " "
            if "Unsloth" not in commit_description:
                commit_description += "(Trained with Unsloth 2x faster)"
        else:
            commit_description = "Upload model trained with Unsloth 2x faster"
        arguments["commit_description"] = commit_description

    # Update model tag
    if hasattr(self, "config"):
        _ = upload_to_huggingface(
            self, arguments["repo_id"], arguments["token"],
            "finetuned", "trl", file_location = None,
            old_username = None, private = arguments["private"],
        )
    pass

    try:
        self.original_push_to_hub(**arguments)
    except:
        del arguments["tags"]
        self.original_push_to_hub(**arguments)
    pass

    if hasattr(self, "config"):
        print("Saved model to https://huggingface.co/" + arguments["repo_id"])
    pass
    r   r   r   rb   ro   )'inspecttypestypingr   r   r   r   r   __name__r   r  	signaturer   r  r  __doc__encoderj  execglobalsrX   
MethodTyper  r   rb   r  push_to_hub_mergedr  save_pretrained_mergedr}  push_to_hub_ggufrg  save_pretrained_ggufr  save_pretrained_torchaor  push_to_hub_ggmlr  save_pretrained_ggml)rb   visionr  r  r   r   r   r   r   r  docspush_to_hub_textr;  s                rK   r'   r'     s   66 !!%::$99$00G%%&:;<DDZQWXI!"I/yII''..w7>>wGD9) E	F 35l 		79%N
 NM22**337LL2@2L2LN/).)9)9#^*N& ~'788--! >7+++11N) . 5(##','7'72E(E$ ,1+;+;6,E( &+%5%56NPU%VE").)9)9,e*E& -2,<,</-E) &+%5%5<e&E" */)9)9=u*E&$ L $)#3#3.$
  (-'7'72E(
$ "'!1!12JE!R%*%5%5(%&
" ).(8(8+U)
% LrJ   )_unsloth_sentencepiece_temp)F)i)Funsloth_finetuned_modelr/   NFF)Nr   Nr   NNr   FTNr   Nr   r  )NF)r   NNNT)Nr/   NNr   NNr   FTNr   Nr   rG  )	Nz#Converted LoRA to GGML with UnslothNNFNz)Convert LoRA to GGML format using Unslothr   rG  )r   rG  )NQ8_0NN)FN)NNFN)unsloth_zoo.utilsr   importlib.metadatar   r  unsloth_zoo.hf_utilsr   r   unsloth_zoo.llama_cppr   r   r	   r
   r   r   bitsandbytes.nnr   r   peft.tuners.lorar   r   r   r  r   r   r   r   r6  requestsr   rY   rG   r  r   (transformers.models.llama.modeling_llamar   kernelsr   r   r   rE  r   r  tokenizer_utilsr   models.loader_utilsr   models._utilsr   ollama_template_mappersr   r   r  r   rk   r   r    huggingface_hub.utilshuggingface_hub.utils._tokenpathlibr!   r   r"   r#   __all__rW  r[   r  r  keynamesr  r   r   r  r  rO   rL   rV  r$   rh   r{   r   r   inference_moder   r  PathLikeboolr   r  floatr%   rK  r[  r`  rs  r  r  r  r&   r  r  r  r  r(   r   r  r   r;  r>  rF  rg  r}  r  r  r  unsloth_zoo.saving_utilsr  r  r  r  r`  r  r  r  r  r  r  r'   rI   rJ   rK   <module>r     s   & ; C  9 : 2 2 2 
   	   	 ; K K   	 ; 3 / 1 V ' !;)  0  $))BJJOO-..!X- #x/ 
 OQ M 
V	
 \ 
[ K j j e d d ) + n  )!" #$ JH)' 05<, "*1C	' !>*&,@
 
 (, !%#jj&+#!!#'$:"K@"%1n	$ #r{{*+n	$ 	n	$
 n	$ E#t)$%n	$ n	$ n	$ n	$ #s(On	$ n	$ c]n	$ n	$ 4.n	$  SM!n	$" d^#n	$$ %n	$& 'n	$( )n	$* s)+n	$. /n	$0  1n	$ n	$b
)#X ,. >C
L>
" #4* H<H<H< H< 	H<
 H< H< H< H<\ %(, !%#jj&+#!!@"&!&#r{{*+& 	&
 & E#t)$%& & & & #s(O& & c]& & s)& &   !&X %#'$:"$(,1#K $@"&!)) 	)
 4.) SM) d^) sD!) #sD.)) ) ) ) ) 49
) )   !)X
2$. 	$"X Xv(B5p'.# '.3 '.S '.RU '.T!0 !0# !0C !0H+s + +RU +0 * (," !%#jj&+#!!@"&%P#r{{*+P
 P P E#t)$%P d^P P P P #s(OP P c]P P  s)!P" #P$  %Pl * #'$:"$(,1#K $@"&#^^
 ^ 4.^ SM^ d^^ sD!^ #sD.)^ ^ ^ ^ ^ 49
^  !^"  #^D	& $($I"$(I@"&S S 4.	S
 SMS d^S sD!S S S S S  St A"&== 	=
  =@ 0 
 Q Qh  /H(, !%#jj&+#!!#'$:"K@"%14 #r{{*+4 	4
 4 E#t)$%4 4 4 4 #s(O4 4 c]4 4 4.4  SM!4" d^#4$ %4& '4( )4* s)+4. /40  14 4t %(, !%#jj&+#!!@"&!&#r{{*+& 	&
 & E#t)$%& & & & #s(O& & c]& & s)& &   !&X %#'$:"$(,1#K $@"&!)) 	)
 4.) SM) d^) sD!) #sD.)) ) ) ) ) 49
) )   !)` (, 2#r{{*+ 2 	 2
 E#t)$% 2P (,e#r{{*+e
 e E#t)$%eV (,C#r{{*+C
 C E#t)$%CLEG[;;33;::s$   (W- -X0W99XXX