
    oi/>                        S SK Jr  SSKrSSKrSSKrSSKrSSKrSSKJr  SSK	J
r
JrJrJrJr  SSKJr  SSKJr  SS	KJr  SSKr\" \5      r\\" S
5      :  rSrSrSR5                  5       SR5                  5       SR5                  5       SR5                  5       SR5                  5       SR5                  5       SR5                  5       SR5                  5       SR5                  5       SR5                  5       0rS rS rS rS r       S+S jr S r!S,S jr"S\#4S jr$S\#S\#S\#4S  jr%S!\RL                  RN                  S\#4S" jr(S#\\)\#4   S$\)S%\)S&\)S'\)S(\)S)\)S\#4S* jr*g)-   )DEVICE_TYPE_TORCH    N)Union   )INT_TO_FLOAT_MAPPERFLOAT_TO_INT_MAPPERMAP_TO_UNSLOTH_16bitFLOAT_TO_FP8_BLOCK_MAPPERFLOAT_TO_FP8_ROW_MAPPER)__version__)TorchAOConfig)Versionz4.37)
LOCAL_RANKRANK)
WORLD_SIZEz"unsloth/Qwen3-32B-unsloth-bnb-4bitzunsloth/Qwen3-32B-bnb-4bitz&unsloth/Qwen3-30B-A3B-unsloth-bnb-4bitzunsloth/Qwen3-30B-A3Bzunsloth/Qwen3-30B-A3B-bnb-4bitz+unsloth/Qwen3-30B-A3B-Base-unsloth-bnb-4bitzunsloth/Qwen3-30B-A3B-Basez#unsloth/Qwen3-30B-A3B-Base-bnb-4bitc                     U  H3  n[         R                  R                  U5      nUc  M'   [        U5      s  $    g ! [         a     ME  f = fN)osenvirongetint
ValueError)keyskeyvalues      U/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/models/loader_utils.py_get_env_intr   3   sN    

s#=	u:    		s   
;
A	A	c                  d   [         R                  R                  5       (       a`  [         R                  R                  5       (       a=   [         R                  R	                  5       [         R                  R                  5       4$ [        [        5      [        [        5      4$ ! [         a     N*f = fr   )
torchdistributedis_availableis_initializedget_rankget_world_size	Exceptionr   LOCAL_RANK_KEYSWORLD_SIZE_KEYS     r   _infer_distributed_ranksr*   ?   s    %%''E,=,=,L,L,N,N	$$--/1B1B1Q1Q1SSS (,*GGG  		s   ;B" "
B/.B/c                  d    [        5       u  pU=(       d    SS:  =(       d    U S L=(       a    U S:  $ )Nr   r   )r*   )rank
world_sizes     r   is_distributedr.   H   s/    /1DO!q CT%5%B$(Cr)   c                     [        5       u  pU=(       d    SS:  =(       d    U S L=(       a    U S:  nU(       d  gU c  SOU nS[         SU 30n [        S:X  a#  [        R                  R	                  U5        US4$ [        S:X  a4  [        [        S5      (       a  [        R                  R	                  U5        US4$ ! [         a     US4$ f = f)	Nr   r   )NF :cudaxpuT)r*   r   r   r2   
set_devicehasattrr3   r%   )r,   r-   r    
local_rank
device_maps        r   prepare_device_mapr8   M   s    /1D?a'JD,<,IKlJ*+1ZL9:J&JJ!!*-
 t	 %'GE5,A,AII  , t  ts   )B> <>B> >
CCc                    [        U 5      n U R                  5       nUS;   d   eUS:w  aH  US:X  a8  [        R                  R	                  SS5      S:X  a  X;   a  Xx   $ X;   a  Xh   $  g X;   a  Xh   $ g [
        (       d!  X;   a  X(   n [        S[         SU  S	35        U $ U(       d  X;   a  X(   n	U	$ U(       d  X;   a  XH   n	U	$ U(       a.  [
        (       a#  X;   a  UR                  S
5      (       a  U$ X8   n	U	$ g )NTFblockFTUNSLOTH_HAS_FBGEMM01z&Unsloth: Your transformers version of z does not support native 4bit loading.
The minimum required version is 4.37.
Try `pip install --upgrade "transformers>=4.37"`
to obtain the latest transformers build, then restart this session.
For now, we shall load `z0` instead (still 4bit, just slower downloading).z	-bnb-4bit)	strlowerr   r   r   SUPPORTS_FOURBITprinttransformers_versionendswith)

model_nameload_in_4bitr   r   r	   load_in_fp8r
   r   lower_model_namenew_model_names
             r   __get_model_namerJ   _   s=    ZJ!'')0000e$BJJNN3G$MQT$T:.@@!>0BB ?   <0BB"2"I(:
45I4J K' (2l2b	d	
 .E,>
 .F-?	**/?/V $$[11##,>
 r)   c                  h    SS K n SnU R                  USS9 nUR                  nS S S 5        XR                  S5      S  nUR	                  SS5      R	                  SS	5      R	                  S
S5      n[        U[        5       5        [        [        [        4$ ! , (       d  f       Nx= f!   0 0 0 4s $ = f)Nr   zQhttps://raw.githubusercontent.com/unslothai/unsloth/main/unsloth/models/mapper.py   )timeout__INT_TO_FLOAT_MAPPERr   NEW_INT_TO_FLOAT_MAPPERr   NEW_FLOAT_TO_INT_MAPPERr	   NEW_MAP_TO_UNSLOTH_16bit)
requestsr   textfindreplaceexecglobalsrO   rP   rQ   )rR   
new_mappers     r   _get_new_mapperrY      s    h
\\*\2j#J 30G H JK
46OPW*,EFW+-GH 	 	Z###$
 	
 322rzs"   B( BA1B( 
B%!B( (B1c                    US;   d   e[        U U[        [        [        U[        [
        S9nUbA  [        U5      [        L a/  UR                  5       [        ;   a  [        UR                  5          nUcd  U R                  S5      S:X  aO  U S   R                  5       (       a7  [        5       u  pEn[        U UUUUU[        [
        S9nUb  [        SU  S35      eUS:w  a  U$ Ub  U$ U $ )	Nr:   )rE   rF   r   r   r	   rG   r
   r   /r   r   z	Unsloth: a5   is not supported in your current Unsloth version! Please update Unsloth via:

pip uninstall unsloth unsloth_zoo -y
pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
pip install --upgrade --no-cache-dir "git+https://github.com/unslothai/unsloth-zoo.git"
F)rJ   r   r   r	   r
   r   typer?   r@   BAD_MAPPINGScountisalnumrY   NotImplementedError)rE   rF   rG   rI   rO   rP   rQ   upgraded_model_names           r   get_model_namerb      s   0000%#113!$="9	N 	" C'  "l2%n&:&:&<= 	S!Q&qM!!##  	S:R /#'"9"9#;%(A&=	
 *%J< (l l  e+7>GZGr)   fp8_modec                     SSK JnJnJn  U S:X  a  U" 5       nO&U S:X  a  U" SS/5      U" SS/5      4nO[	        S5      eU" USS	9$ )
zn
Return a `torchao.quantization.Float8DynamicActivationFloat8WeightConfig`
to be used for `load_in_fp8=True`.
r   ))Float8DynamicActivationFloat8WeightConfigPerBlockPerRowrowr;   r      z5Unsloth: `load_in_fp8` supports only 'row' or 'block'g-q=)granularityactivation_value_lb)torchao.quantizationre   rf   rg   r   )rc   re   rf   rg   rj   s        r   _get_torchao_fp8_configrm      s`    
  5h	W	C)8S#J+?@PQQ4!# r)   rE   returnc                    [         R                  " 5       nU R                  S5      S   S-   U-   n[        R                  R                  X#5      n[        SU  SU S35        [        R                  R                  U5      (       Gd  SSKJ	nJ
nJnJnJnJn	  [        U5      n
U" U
5      n
U	R!                  U 5      n[#        S	 UR$                   5       5      nU=(       d    ['        US
5      nU(       a  UOUnU(       a  UOUnUR!                  U SSU
S9nUR!                  U 5      nUR)                  USS9  A[+        S5       H6  n[,        R.                  R1                  5         [2        R4                  " 5         M8     UR)                  U5        U$ )a  
Quantizes the model to fp8 using torchao and saving the quantized model to a
temporary location. Return the path to the quantized model.

Note: Once on-the-fly quantization is added in vllm in
https://github.com/vllm-project/vllm/pull/26327, we should
dynamically quantize the model there instead:

  llm = LLM(
    ...
    hf_overrides={"quantization_config_file": "torchao_config.json"},
  )
r[   z-fp8-zUnsloth: Quantizing 'z' to fp8, using model_name='z	' insteadr   )AutoModelForCausalLMAutoModelForImageTextToTextAutoTokenizerAutoProcessorTorchAoConfig
AutoConfigc              3   D   #    U  H  nUR                  S 5      v   M     g7f))ForConditionalGenerationForVisionText2TextN)rD   ).0xs     r   	<genexpr>+_offline_quantize_to_fp8.<locals>.<genexpr>(  s$      
) JJIJJ)s    vision_configauto)torch_dtyper7   quantization_configF)safe_serializationr   )tempfile
gettempdirsplitr   pathjoinrB   isdirtransformersrq   rr   rs   rt   ru   rv   rm   from_pretrainedanyarchitecturesr5   save_pretrainedranger   r2   empty_cachegccollect)rE   rc   temp_dirrI   rq   rr   rs   rt   ru   rv   qconfigconfigis_vlm
auto_modelauto_processormodel	tokenizer_s                     r   _offline_quantize_to_fp8r     so    ""$H%%c*2.88CNWW\\(;N	

|+GGWW`a 77==((	
 	
 *(3(++J7 
))
 
 ;76?;4:0@T
*0m** ")	 + 
 #22:>	n5IqAJJ""$JJL  	!!.1r)   r   c                 N     [        U5      n[        SUS4/S9U l        g!    g= f)zX
Tag a model with a `TorchAOConfig` so downstream callers will know what to do with it.
N)
qat_schemebase_config_and_filter_fns)rm   r   torchao_config)r   rc   base_configs      r   "_tag_model_with_fp8_torchao_configr   ?  s6    -h7,+6*=)> 
s     $rG   fast_inferencefull_finetuningrF   load_in_8bitload_in_16bituse_exact_model_namec                 R   U SLd   eU SL a  SnOU nUS;  a  [        SU S35      eU(       d  [        S5      eU(       a  [        S5      eU(       d  U(       d  U(       a  [        S	5      eU(       a  [        S
5      e[        R                  R                  5       (       aA  [        R                  R                  (       a"  [        R                  R                  5       S:  d  [        S5      e[        [        R                  5      [        S5      :  a  [        S5      e[        R                  R                  S5      c  [        S5      eSSKnSUR                   S3n	[        UR                  5      [        S5      :  a  [        U	5      e[        R                  R                  S5      b~  [        R                  R                  S5      b^  SSKn
[        U
R                  5      [        S5      :  a8  S[        R                  S'   SSKJn  UR#                  SU
R                   S35        U$ )a3  
Assuming `load_in_fp8` is enabled, raise appropriate errors on incompatible settings
and environment. Currently this feature requires:

1. H100 GPUs or after
2. torchao 0.15.0+ (or nightly)
3. torch 2.9.0+
4. If fbgemm_gpu_genai is installed, require 1.4.1+

Returns the fp8 mode, one of "row" or "block".
FTrh   )rh   r;   z:Unsloth: `load_in_fp8` can only be 'row' or 'block', got ''zEUnsloth: `load_in_fp8` is only supported for `fast_inference` for nowz=Unsloth: `load_in_fp8` is not compatible with full finetuningz_Unsloth: `load_in_fp8` is not compatible with `load_in_4bit`, `load_in_8bit` or `load_in_16bit`z<Unsloth: `load_in_fp8` requires `use_exact_model_name=False`)	   r   z^Unsloth: On the fly `load_in_fp8` requires H100 GPUs or after. Try `unsloth/Qwen3-8B` instead.z2.9.0zXUnsloth: On the fly `load_in_fp8` requires torch 2.9.0+. Try `unsloth/Qwen3-8B` instead.torchaoNz^Unsloth: Please install torchao for on the fly float8 to work! Try `unsloth/Qwen3-8B` instead.r   zWUnsloth: `load_in_fp8` requires torchao 0.15.0+ (or nightly).
You have torchao version=z6
Use `pip install --upgrade --force-reinstall torchao`z0.15.0
fbgemm_gpuzfbgemm_gpu.experimentalz1.4.1r=   r<   )loggerzUnsloth: fbgemm_gpu_genai==z6 is old for FP8 loading. Using Triton kernels instead.)r   r   r2   r!   versionget_device_capabilityr   r   	importlibutil	find_specr   fbgemm_gpu.experimental.gen_air   r   unsloth_zoo.logr   info)rG   r   r   rF   r   r   r   rc   r   error_messager   r   s               r    _get_fp8_mode_and_check_settingsr   M  s   ( e###d ''H
RST
 	
 S
 	
 K
 	
 |}m
 	
 WXX 	

!!MMJJ,,.&8l
 	

 u  !GG$44f
 	
 ~~	*2l
 	
 	$$+$7$7#8 9@	@ 
 w""#gh&77'' 	  .:NN$$%>?K-:))*WW-==/2BJJ+,.KK-j.D.D-E F0 1 Or)   )TNNNFNN)TF)+device_typer   r   r   r   rer   typingr   mapperr   r   r	   r
   r   r   r   rC   unsloth.models._utilsr   unsloth_zoo.utilsr   r   rA   r&   r'   r@   r]   r   r*   r.   r8   rJ   rY   rb   r?   rm   r   nnModuler   boolr   r(   r)   r   <module>r      s   ,  	  	    = / % 	34 '76?: (! )..02N2T2T2V,2246M6S6S6U$**,.E.K.K.M1779;W;];];_)//13O3U3U3W	HD
(  $"?D02Hjc 06 6 6 6rehhoo  ^tSy!^^ ^ 	^
 ^ ^ ^ 	^r)   