
    oie             	        )   S r / SQrSSKrSSKJrJrJrJrJrJ	r	J
r
  SSKJr  \" 5       rSSKrSSKrSSKrSSKJrJr  SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKJrJ r   SSK!J"r#  S	S
K$J%r%J&r&J'r'J(r(J)r)J*r*  SSK+J,r,  SSK-J.r/  SSK0J1r1J2r2J3r3J4r4  SSK5J6r6J7r7J8r8J9r9J:r:  SSK;J<r<J=r=J>r>J?r?J@r@JArAJBrBJCrCJDrDJErE  SSKFJGrGJHrHJIrIJJrJ  SSKKJLrL  SSKMJNrNJOrP  SSKQJRrR  SSKSJTrT  \T H
  rU\U" 5         M     \R                  " S\WSS9  \R                  " S\XSS9  \R                  " S\WSS9  \R                  " S\XSS9  \R                  " S\WSS9  \R                  " S\XSS9  \R                  " S\XSS9  \R                  " S\YSS9  \R                  " S\WSS9  \R                  " S\XSS9  \R                  " S\YSS9  \R                  " S\YSS9  \R                  " S\WS S9  \R                  " S\WS!S9  \R                  " S"5      R                  \R                  S#-   5        S$r] " S% S&\R                  5      r_\R                  R                  S'S(5      S):w  Ga   SSKbJ,rc  \cR                  " \_" S*5      5        Cc SSKeJ,rf  \fR                  " \_" S*5      5        Cf SSKgJ,rh  \hR                  " \_" S+5      5        \hR                  " \_" S,5      5        \hR                  " \_" S-5      5        Ch SSKiJ,rj  \jR                  " \_" S.5      5        Cj SSKkJ,rl  \lR                  " \_" S.5      5        Cl SSKmJ,rn  \nR                  " \_" S/5      5        Cn SSKoJ,rp  \pR                  " \_" S05      5        CpSSKqJ,rr  \rR                  " \_" S15      5        \rR                  " \_" S25      5        \rR                  " \_" S35      5        CrSSKsJ,rt  \tR                  " \_" S45      5        \tR                  " \_" S55      5        Ct SSKuJ,rv  \vR                  " \_" S65      5        Cv SSKwJ,rx  \xR                  " \_" S75      5        Cx SSKyJ,rz  \zR                  " \_" S85      5        \zR                  " \_" S95      5        Cz SSK{J,r|  \|R                  " \_" S:5      5        C| SSK}J,r~  \~R                  " \_" S;5      5        C~ SSKJ,r  \R                  " \_" S<5      5        C SSKJ,r  \R                  " \_" S=5      5        C \R                  " SS>\WS?S@9   \R                  " SSA\WS?S@9   SSKJ,r  \R                  " \_" SB5      5        C SSKJ,r  \R                  " \_" SB5      5        C SSKsJ,r  \R                  " \_" SC5      5        C SSKJ,r  \R                  " \_" SC5      5        C SSKuJ,r  \R                  " \_" SD5      5        CSSKuJ,r   " SE SF\GR                  5      r " SG SH5      rSSIKJr  SJ rSSL jrSSKr\\GR$                  l        SSKsr\\GR&                  l        SM r SSNKJr  SSOKJ r   SSPKJr  / SRQr\ H  rSS\ ST\ 3rSS\ SU\ 3r\GR=                  5       GR?                  SVSW5       SX3r \" SY\ SZ\ 3\" 5       5         \GRF                  " \" \5      5      rS[\;   a   \" SY\ S\3\" 5       5        S]\;   a  M  \GRL                  " S^S_\5      r\S`:X  a  \" \5      \" Sa5      ::  a  \" \5      r\" \\" 5       5        \" Sb\ 3\" 5       5        \" \ Sc\ Sd\ 3\" 5       5        M     \R                   r\'Se;   a  \" \5      \" Sf5      :  aG  \GRP                  GRR                  GRT                  r\GRP                  GRR                  GRX                  rO\GRR                  GRU                  SgSh9r\GRR                  GRY                  SgSh9rOV\'Si:X  aP  \" \5      \" Sj5      :  a  \" Sk5      e\GRR                  GRU                  SiSh9r\GRR                  GRY                  SiSh9rSSlKJr  \" 5       (       a   SSmKJr  SSKrSSpKJr  SSqKJr  SKrSKrSKr\'Sg:X  a  \GRP                  GR}                  5       u  rr\GR                  " \GRP                  GR|                  5      \GRP                  l        \Sr:  aC  S?r\" Ss5      (       a1    SStKJr  S?rSSOKJ r  \" \5      \" Sv5      :  r\(       d  \" Sw5        OUSKrORSKrOO\'S{:X  aA  S?r\" Ss5      (       a1    SStKJr  S?rSSOKJ r  \" \5      \" Sv5      :  r\(       d  \" Sw5        O	O\'Si:X  a  S?r \R                  " S5      r\R                  \GR                  5        C SSOKJ r  \GRP                  GR}                  5       u  rr\ Sc\ 3S~;   a  \" \5      \" S5      4;   a  \" S5      e \" \5      \" S5      :  a$  \" \5      \" S5      :  a  \" S\ S\ S\ Sc35      e\" \5      \" S5      :  a$  \" \5      \" S5      :  a  \" S\ S\ S\ Sc35      e\" \5      \" Sf5      :  a$  \" \5      \" S5      :  a  \" S\ S\ S\ Sc35      eSSKJr   \" 5         SSKJs  Jr  \GR                  rSSOKJ r   SSK{r\" \GR                  GR                  S5      (       a  \" \GR                  GR                  GR                  5      \L aZ  S\GR                  GR                  GR                  ;  a3  \GR                  GR                  GR                  GR                  S5        \R                  R                  SS(5      S):H  r\R                  R                  SS(5      S):H  r\R                  R                  SS)5      S):H  rSSKJr  \GR                  " S5      S\4S j5       rSSKr\\GR                  GRj                  l        \8" \\\S9  S?S?S?\SKS.rSSKrS r\\GRj                  R$                  GR                  l        \\GRj                  GR                  l        \\GR                  GR                  l        CS r  SS\S\S\\   S\4S jjrSSOKJ r  SSKJr  \" \5      \" S5      :  Ga@  SSKJr   \GRF                  " \GR                  5      rSr\GR                  \5      G\ " \5      -   Gr\GR                  SG\5      Gr\GR                  " S\\GR                  S9S   Gr\GR?                  \G\G\ G\5      rG\ " \GR                  " S\5      GR                  S5      5      Gr\GR                  S5      Gr	SGR                  S G\	 5       5      r\GRL                  " SS\5      r\GR?                  SS5      r\" \\" 5       5        SSKJr  G\\l        SSGKJr  G\\l        SSGKGr\R                  R                  SS(5      S):H  GqG[        (       a'  G\GR                   GR#                  S5      c  \" S5      eSSGKGr\GR                  " S#5      SS j5       GrSSKrSS jGrSS jGrSSGKGJGrGJGr  \GRF                  " G\GR2                  5      Gr\GRL                  " SSWG\\GR                  S9GrG\GR                  S5      GrG\ " \GR                  " SG\S   5      GR                  S5      5      GrSGR                  S G\ 5       5      GrG\GR?                  SS5      Gr\" G\\" 5       5        \)S#:X  aP  SSGKGJGr  S GrSSGKrG\\GR@                  GRB                  Gl        S \GR                  GRD                  Gl#        S Gr$SSGKrG\%\GRj                  GRL                  GR.                  Gl        SSGK'Gr' SS\4S jjGr( SS\4S jjGr) SS\4S jjGr*S Gr+S Gr,    SS jGr-      SS jGr.SS jGr/S Gr0S Gr1S Gr2S r.S Gr3                            SS jrOSGr4S Gr5S Gr6 " S S5      Gr7G\7" 5       Gr8G\9" \GRt                  5      Gr;G\<" G\;5       Hx  u  Gr=Gr>G\>GR                  S5      (       d  M"  G\>GR                  S5      (       d  M<  \" SG\= SG\> S3\" 5       G\A" 5       5         \" SG\> SG\= 3\" 5       G\A" 5       5        Mz     S GrBS GrCS GrDS GrE SSGKFGJGGrG   SSGKHGJIGrI  \ " S S5      5       GrJS\GR                  GR                  SS4S jGrMS\GR                  GR                  S\\GR                  GR                  \/\4   S\
\   4S jGrNS GrOS\GR                  GR                  S\\G\J4   S\GR                  GR                  4S jGrPS GrQG\Q" 5         S GrRS\GR                  GR                  4S jGrSSS\\   S\\   4S jjGrTS GrUg!    GN= f!    GNy= f!    GN1= f!    GN= f!    GN= f!    GN= f!    GN= f!    GN3= f!    GN= f!    GN= f!    GN= f!    GN= f!    GN= f!    GN= f!    GN= f!    GNs= f!    GN[= f!    GNC= f!    GN+= f!    GN= f!    GN= f!    GN= f!   SSQKJr   GN= f!    GM  = f!    GM  = f!    GM  = f!   \" Sn5        SSKrSo r\\GRj                  l         G
N= f!   SSuKJr   G
NN= f!   \" Sx5        SSKrSy \GRj                  GR                  l        SSKrSz \GRj                  l        SKr G
N= f!   SSuKJr   G
N[= f!   \" Sx5        SSKrS| \GRj                  GR                  l        SSKrS} \GRj                  l        SKr G
Nt= f!    G
NL= f! \ a  r\" S\" \5      -   5      eSrCff = f! \ a
    SrSrSr G	Nj\ aG  r\R                  R                  S'S(5      S(:w  a  \" S5        \" \" \5      5        SrSrSr SrCG	NSrCff = f!   \,GR                  " S5         GN= f!    GM  = f!   \" S5        SGrI GNf= f!   SGrGSGrI GNt= f)z2026.1.3)9SUPPORTS_BFLOAT16is_bfloat16_supportedis_vLLM_availableprepare_model_for_kbit_trainingxformersxformers_attentionxformers_version__version__importlib_versionHAS_FLASH_ATTENTIONHAS_FLASH_ATTENTION_SOFTCAPPINGUSE_MODELSCOPEplatform_systempatch_tokenizerget_statistics'Unsloth_Offloaded_Gradient_Checkpointeroffload_to_diskoffload_input_embeddingsoffload_output_embeddings%unsloth_offloaded_gradient_checkpointtorch_compile_optionspatch_linear_scalingpatch_llama_rope_scalingcreate_boolean_masktorch_amp_custom_fwdtorch_amp_custom_bwdpatch_gradient_accumulation_fixpatch_compiling_bitsandbytespatch_regional_compilationpatch_layernormpatch_torch_compilepatch_model_and_tokenizer$patch_unsloth_gradient_checkpointing&unpatch_unsloth_gradient_checkpointingpatch_gradient_checkpointingunpatch_gradient_checkpointingHAS_CUT_CROSS_ENTROPYEMPTY_LOGITSfused_linear_cross_entropyunsloth_fused_ce_loss*patch_unsloth_smart_gradient_checkpointing,unpatch_unsloth_smart_gradient_checkpointingpatch_compiled_autogradprocess_vision_infounsloth_compile_transformerspatch_fast_loravalidate_loftq_configRaiseUninitializedfast_inference_setuppatch_peft_fast_inferenceerror_out_no_vllmdequantize_module_weightpatch_hf_quantizer verify_fp8_support_if_applicable#_get_inference_mode_context_managerhf_loginmake_fast_generate_wrapper    N)UnionOptionalListAnyCallableTupleIterator)system)	dataclassfield)Versionget_quant_type)version   )is_hipget_device_typeDEVICE_TYPEDEVICE_TYPE_TORCHDEVICE_COUNTALLOW_PREQUANTIZED_MODELS)logger)r   )check_python_modulescreate_locked_down_functionexecute_with_time_limitBenchmarker)r   r   r    r!   r,   )
r   r   r"   r#   Unsloth_Gradient_Checkpointerunsloth_gradient_checkpointr$   r%   r*   r+   )r&   r(   _unsloth_get_batch_samplesr)   )r-   )get_transformers_model_typer.   prepare_model_for_training)TEMPORARY_PATCHESignoretorch)actioncategorymodulehuggingface_hubtrlr   
subprocesstransformers
acceleratemultiprocessingmultiprocesstritonbitsandbytesz$transformers.tokenization_utils_base   zCError: torchao not found, please install with `pip install torchao`c                   $    \ rS rSrSrS rS rSrg)HideLoggingMessage   textc                     Xl         g Nrn   )selfro   s     O/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/models/_utils.py__init__HideLoggingMessage.__init__   s    	    c                 <    U R                   UR                  5       ;  $ rq   )ro   
getMessage)rr   xs     rs   filterHideLoggingMessage.filter   s    II/0rv   N)__name__
__module____qualname____firstlineno__	__slots__rt   rz   __static_attributes__ rv   rs   rl   rl      s    I1rv   rl   UNSLOTH_ENABLE_LOGGING01zSleep mode freedzto fall asleepz
to wake upzExecutor is not sleepingzreset prefix cachez@Regarding multimodal models, vLLM currently only supports addingzCannot use FA versionzThe speedupsztorch.distributedaverage_tokens_across_deviceszNo label_nameszThe tokenizer has newForCausalLMLosszThe model weights are not tiedz(Setting `pad_token_id` to `eos_token_id`compile_configzfollowing generation flagszstrongly recommendedhf_xetzrequires tritonz.*quantization_config.*T)r^   messager_   appendz$.*Logical operators 'and' and 'or'.*z
`use_fast`z`use_cache=True`zanti-patternc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )_RaiseUninitializedi  c                 "   > [         TU ]  5         g rq   )superrt   )rr   	__class__s    rs   rt   _RaiseUninitialized.__init__  s    rv   c                     [        U5      R                  5       nSU;   aX  SU;  aQ  SU;  aJ  SU;  aC  SU;  a<  [        R                  R	                  SS5      S:X  a  [        S[        U5       35      eg g g g g g )	Nzsome weights ofzscore.weightzclassifier.weightzcls.predictionszpredictions.decoderUNSLOTH_WARN_UNINITIALIZEDr   zUnsloth: Critical error since some weights are not initialized.
Please try updating Unsloth, transformers and timm via:
`pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo transformers timm`
)strlowerosenvironget	Exception)rr   recordrecord_lowers      rs   emit_RaiseUninitialized.emit  s    6{((*,.|3$L8",6&l: <cBcI} v;-!  J ; 7 9 4 /rv   r   )r|   r}   r~   r   rt   r   r   __classcell__)r   s   @rs   r   r     s     rv   r   c                        \ rS rSrS rS rSrg)r1   i  c                 b    [        5       U l        [        R                  " U R                  5        g rq   )r   error_handlertransformers_logger
addHandlerrr   s    rs   rt   RaiseUninitialized.__init__  s!    02&&t'9'9:rv   c                 D    [         R                  " U R                  5        g rq   )r   removeHandlerr   r   s    rs   removeRaiseUninitialized.remove  s    ))$*<*<=rv   )r   N)r|   r}   r~   r   rt   r   r   r   rv   rs   r1   r1     s    ;>rv   r1   )is_deepspeed_zero3_enabledc                     SnU R                  5        HI  u  p#UR                  R                  S:X  a  USUR                  5       -  -  nM7  XR                  5       -  nMK     U$ )zd
Calculate quant model param count based on difference in param class. Returns int for param count.
r;   
Params4bitrI   )named_parametersr   r|   numel)modelcountnameps       rs   extract_quant_model_param_countr     sY     E))+;;</Q]"EWWYE	 ,
 Lrv   Fc                   ^^ [        5       (       a  S mOS m[        UU4S jU R                  5        5       5      nT(       d<  [        U S5      (       a+  [        U R                  S5      (       a  [        U 5      nUb  UnU$ )zf
Calculate model's total param count. If trainable_only is True then count only those requiring grads
c                 \    [        U S5      (       a  U R                  $ U R                  5       $ )Nds_numel)hasattrr   r   r   s    rs   r   $get_model_param_count.<locals>.numel  s#    !(J!7!71::FQWWYFrv   c                 "    U R                  5       $ rq   )r   r   s    rs   r   r     s    779rv   c              3   h   >#    U  H'  nT(       a  UR                   (       d  M  T" U5      v   M)     g 7frq   )requires_grad).0r   r   trainable_onlys     rs   	<genexpr>(get_model_param_count.<locals>.<genexpr>  s%      ,QNaooa,s   22configquantization_config)r   sum
parametersr   r   r   )r   r   sapproxr   s    `  @rs   get_model_param_countr     sy     "##	G	 	 **, 	A E8$$ELL"78807AHrv   c                     SU ;  a<  SnU R                  SU5      n SnU R                  SU5      n SnU R                  SU5      n U $ )Nz
head_dim (zIf it is not specified, will default to `8`.
        head_dim (`int`, *optional*, defaults to `hidden_size // num_attention_heads`):
            The attention head dimension.z,If it is not specified, will default to `8`.z-num_key_value_heads=8,
        head_dim=None,znum_key_value_heads=8,zlself.sliding_window = sliding_window
        self.head_dim = head_dim or hidden_size // num_attention_heads
z$self.sliding_window = sliding_window)replace)r   add_head_dims     rs   patch_mistral_nemo_configr     s`    6!8 	
 :L
 H 8,G H FUMrv   )layer_type_validation)r	   )PreTrainedConfig)PretrainedConfig)	llamamistralgemmagemma2qwen2graniteqwen3	qwen3_moe	falcon_h1transformers.models.z.configuration_
.modeling__ Configzfrom z import RopeParametersz import RopeParametersrope_scalingz*(\*\*kwargs)[\s]{0,}\,[\s]{0,}\)[\s]{0,}\:zTrope_scaling=None,\n        **kwargs):\n\n        self.rope_scaling = rope_scaling\nr   z4.42.4zimport .z = )cudahip2.4.0r   )device_typexpuz2.6.0z8torch.xpu currently only supports torch.version >= 2.6.0)is_openai_available)OpenAIz4Unsloth: OpenAI failed to import - ignoring for now.c                      gNFr   r   rv   rs   _is_openai_availabler     s    rv   )AutoTokenizer_is_package_available   
flash_attn)flash_attn_gpu)flash_attn_cudaz2.6.3a'  Unsloth: If you want to finetune Gemma 2, upgrade flash-attn to version 2.6.3 or higher!
Newer versions support faster and less memory usage kernels for Gemma 2's attention softcapping!
To update flash-attn, do the below:

pip install --no-deps --no-build-isolation --upgrade "flash-attn>=2.6.3"aL  Unsloth: Your Flash Attention 2 installation seems to be broken?
A possible explanation is you have a new CUDA version which isn't
yet compatible with FA2? Please file a ticket to Unsloth or FA2.
We shall now use Xformers instead, which does not have any performance hits!
We found this negligible impact by benchmarking on 1x A100.c                      gr   r   argskwargss     rs   <lambda>r         Erv   c                      gr   r   r   s     rs   r   r     r   rv   r   c                      gr   r   r   s     rs   r   r     s    rv   c                      gr   r   r   s     rs   r   r     s    SXrv   )z10.0z11.0z12.0z0.0.32.post2zUnsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet. Please build from source via
```
pip install ninja
pip install -v --no-build-isolation -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
```
z2.2.0z0.0.24zUnsloth: You have torch = z but xformers = z/.
Please install xformers < 0.0.24 for torch = z2.3.0z0.0.26z/.
Please install xformers < 0.0.26 for torch = z0.0.27z0.
Please install xformers <= 0.0.27 for torch = )_register_extensionszUnsloth: Xformers was not installed correctly.
Please install xformers separately first.
Then confirm if it's correctly installed by running:
python -m xformers.info

Longer error message:
zP========
Switching to PyTorch attention since your Xformers is broken.
========
ALL_CACHE_IMPLEMENTATIONSdynamicUNSLOTH_COMPILE_DEBUGUNSLOTH_COMPILE_MAXIMUMUNSLOTH_COMPILE_IGNORE_ERRORS)DevicePropertiesreturnc                 B   [         S:X  aB  [        R                  " [        U 5      [        L a  [
        R                  " SU 5      OU 5      nSnOA[        R                  " [        U 5      [        L a  [
        R                  " SU 5      OU 5      nSnUR                  nX2:  a  gg)Nr      r   P   FT)rL   r   createtypeintr]   devicemulti_processor_count)indexpropmin_sms	avail_smss       rs   
is_big_gpur    s    e&&*.u+*<ELL&%
 &&+/;#+=ELL'5
 **Irv   )debugO3ignore_errors)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphsc                  ,    [        S5        SS[        S.$ )NzUnsloth: Enabled auto compilingTF)r   	fullgraphoptions)printr   r   s     rs   torch_compile_kwargsr    s    	
+,( rv   c                  X  ^ [         R                  R                  R                  S:X  a  g [	        [         R
                  5      [	        S5      :  a  g [         R                  R                  mS[        R                  S'   U4S jn TR                  U l        U [         R                  l        g )NUnslothModuleListz2.5.0r   UNSLOTH_PATCHEDc                     > [        U 5      S:X  aZ  [        U5      S:X  aK  [        U S   5      [        L a6  T" U S    Vs/ s H  n[        R                  " US[
        SS9PM      sn5      /n T" U 0 UD6$ s  snf )Nrj   r;   TF)r   r  r  )lenr  listr]   compiler   )r   r   ry   old_module_lists      rs   r  5patch_regional_compilation.<locals>.UnslothModuleList  s    t9>c&kQ.4Q=D3H "&a ")A &*&;(-	 ")
D ///s   %A3)	r]   nn
ModuleListr|   rF   r	   r   r   __doc__)r  r!  s    @rs   r   r     s}    xx##'::u  !GG$44hh))O$'BJJ !0" !0 7 7+EHH
rv   r   use_gradient_checkpointinguse_reentrantc                 "    [        U UUSSSSSS9$ )NFT)r   r&  r'  full_finetuningtrain_layernormstrain_embeddingtrain_lm_headfloat32_mixed_precisionrY   )r   r&  r'  s      rs   r   r     s)    
 &%?% "&	 	rv   )r5   z0.12.0)	LoraLayerzif weight is not None:
zself.to(weight.device)z^([ ]{1,})breakflagsz[\s]{1,}
c              #   2   #    U  H  o[         S  v   M     g 7frq   )spacesr   ry   s     rs   r   r     s     5u!VW:u   z([^\.])nn\.z\1torch.nn.zdef update_layerzdef LoraLayer_update_layerzUnsloth unsuccessfully patched LoraLayer.update_layer. Please file a bug report.
Luckily, your training run will still work in the meantime!UNSLOTH_USE_MODELSCOPE
modelscopezZYou are using the modelscope hub, please install modelscope by `pip install modelscope -U`c                 D   [         R                  R                  SS5      S:X  a  g [        R                  " U5        [        R                  " [        R
                  [        R                  5      R                  X45        g! [        R                   a
  n S nAgS nAff = f)NTRANSFORMERS_OFFLINEr   r   FT)	r   r   r   socketsetdefaulttimeoutAF_INETSOCK_STREAMconnecterror)hostporttimeoutexs       rs   has_internetrD  =  so    	zz~~,c2c9  )fnnf&8&89AA4,O<< s   AB BBc                   ^ ^^	^
 [         R                  " SS9nSSR                  [        R                  R                  5       5      -   n[        R                  R                  SS5      S:H  qT c   SSKJ	m  T" S	5      R                  5       (       a  S
m O^T" S5      R                  5       (       a&  T" S5      R                  5       (       a  US:X  a  SOSm OT" S5      R                  5       (       a  Sm T cS  SU;   a  S
m OJSU;   a	  US:X  a  Sm O;SU;   a  Sm O2SU;   a  Sm O)SU;   a  Sm O SU;   a  Sm OSU;   a  Sm OU4S jn U" 5       m T b?  SS Km
SSKJm	  SSKJn  [#        5       (       a  U	U U
4S jnU" S 5      " U5      n U" 5         g g g ! [         a     Nf = f! [         a    Sm  Naf = f! [$         a    [%        S!5      e[         a
    U" 5          g f = f)"NF)logicalr1  r6  r   r   r;   )Pathz/kaggle/workingkagglez/contentz
/opt/colabrj   colabcolabproz/runpod-volumerunpodz
KAGGLE_z
COLAB_z
RUNPOD_z
AWS_awsz
AZURE_azurez
INVOCATION_IDlambdac                     > Sn U  HX  nT" U5      nUR                  5       (       d  M"  UR                  5       R                  5       nSU;   a    gSU;   a    gSU;   d  MX    g   g)	N)z!/sys/class/dmi/id/product_versionz/sys/class/dmi/id/bios_vendorz/sys/class/dmi/id/product_namez#/sys/class/dmi/id/chassis_asset_tagz/sys/class/dmi/id/sys_vendoramazonrL  zmicrosoft corporationrM  googlegcpother)is_file	read_textr   )vendor_filesvendor_filepathfile_contentrG  s       rs   try_vllm_check'_get_statistics.<locals>.try_vllm_checky  sf    $L (4#K0<<>>+/>>+;+A+A+CL'<7',!8L!H'.!)\!9', (4 #rv   rS  )snapshot_download)rS   c                  p   > TR                  SS9 n T" ST 3SU U S9  S S S 5        g ! , (       d  f       g = f)NT)ignore_cleanup_errorsz
unslothai/)force_download	cache_dir	local_dir)TemporaryDirectory)fr\  
statisticstempfiles    rs   stats_check$_get_statistics.<locals>.stats_check  s?    000NRS%$ZL1)-$%$%	 ONNs   '
5x   a|  Unsloth: HuggingFace seems to be down after trying for 120 seconds :(
Check https://status.huggingface.co/ for more details.
As a temporary measure, use modelscope with the same model name ie:
```
pip install modelscope
import os; os.environ['UNSLOTH_USE_MODELSCOPE'] = '1'
from unsloth import FastLanguageModel
model = FastLanguageModel.from_pretrained('unsloth/gpt-oss-20b')
```)psutil	cpu_countjoinr   r   keysr   r   pathlibrG  existsr   re  ra   r\  unsloth_zoo.rl_environmentsrS   rD  TimeoutError)rd  r_  n_cpuskeynamesrZ  rS   rf  time_limited_stats_checkrG  r\  re  s   `       @@@rs   _get_statisticsrt  L  s   
 .Fdii

 122H ZZ^^$<cBcIN
	$%&--//%
j!((**tL/A/H/H/J/J(.!W
&'..00%

 h&%
x'FaK$
x''
(%
X%"
x'$
"h.%
#*)!/!1J 5G>> (?s'CK'P$(*  c  		\ ! )!(J),   "
 
  sC   0#F A F F F F- 
FFF*)F*-%GGc                    SS K nSUR                  ;   d   UR                  R                  SS5      S:X  a  g U (       a  g SSKJnJnJn  SnU" 5       (       d	  U" 5         Sn[        S 5        [        S	SS
9  [        S:X  a)  [        R                  R                  S5      R                  O([        R                  R                  S5      R                  nUS-  S-  S-  nUS::  a  SnO8US::  a  SnO/US::  a  SnO&US::  a  SnOUS::  a  SnOUS::  a  SnOUS::  a  SnOSn[        SU 35        [        [        S::  a  [        OS 5        U(       a  U" 5         g g )Nr;   UNSLOTH_DISABLE_STATISTICSr6  r   r   )disable_progress_barsenable_progress_barsare_progress_bars_disabledFTrepeat)r_  r   i   r   r        (   0   r  `   zvram-	   )r   r   r   huggingface_hub.utilsrw  rx  ry  rt  rL   r]   r   get_device_propertiestotal_memoryr   rN   )local_files_onlyr   rw  rx  ry  disabledr  vrams           rs   r   r     sM     	%

2::>>2C8C?  H%''DHu5 % 			''*77ZZ--a0== 
 $%,Dqy						eD6N#|q'8|a@B rv   )BitsAndBytesConfigQuantizationMethodzif[\s]{1,}kwargs\:[\s]{1,}.+?\nc              #   2   #    U  H  o[         S  v   M     g 7frq   )length_spacesr4  s     rs   r   r     s      '9!mn9r5  rt   _BitsAndBytesConfig__init__)DistributedTypec                 &    S [         R                  4$ rq   r  NO)rr   r   r   s      rs   _prepare_backendr  
  s    _''''rv   c                  "    [         R                  $ rq   r  r   s     rs   r   r     s
     2 2rv   c                    [        U [        5      (       a  [        R                  " U 5      n OZ[        U [        5      (       a  [        R                  " U 5      n O.[        U [        R                  5      (       a  O[        SU  35      e/ nU HF  nUR                  U :w  a"  UR                  UR                  U 5      5        M5  UR                  U5        MH     [        U5      S:  a  [        U5      $ US   $ )a*  
Move multiple tensors to target device if they're not already there.

Args:
    target_device: The target device to move tensors to
    *tensors: Variable number of tensors to potentially move

Returns:
    tuple: The tensors on the target device (same objects if already on device, new if moved)
zInvalid target device: rj   r;   )

isinstancer  r]   r  r   
ValueErrorr   tor  tuple)target_devicetensorsmoved_tensorstensors       rs   move_to_devicer    s     -%%]3	M3	'	']3	M5<<	0	02=/BCCM==M)  =!9:  (	 
 $'}#5#95O}Q?OOrv   temporary_locationc                    [         R                  R                  X1R                  R                  5      n[         R                  R                  U5      (       d  [         R                  " U5        [         R                  R                  XB S35      n[        U S5      (       a  U R                  OU n [        R                  " U U[        [        R                  S9  [        R                  " USSSS9nXVl        U$ )Nz.ptweight)pickle_modulepickle_protocolcpuTF)map_locationmmapweights_only)r   rX  rk  r   _name_or_pathrn  makedirsr   r  r]   savepickleHIGHEST_PROTOCOLload_offloaded_file_location)Wr   r   r  file_locationfilenameoffloaded_Ws          rs   r   r   >  s     GGLL!3\\5O5OPM77>>-((
M"ww||MV3<8HAx((aA	JJ	 11	 **tEK ,4(rv   c                     [        U R                  5       U SU5      n[        R                  R                  R                  U5      nUR                  Ul        U R                  U5        g )Ninput_embeddings)r   get_input_embeddingsr]   r#  	Embeddingfrom_pretrainedr  set_input_embeddings)r   r  r  new_input_embeddingss       rs   r   r   U  s`     """$e-?ASK !88--==kJ4?4X4X1	34
rv   c                    [        U R                  5       U SU5      n[        R                  R	                  SSS S9nU?X#l        UR                  S   Ul        UR                  S   Ul        UR                  Ul	        U R                  U5        g )Noutput_embeddingsrj   )biasr;   )r   get_output_embeddingsr]   r#  Linearr  shapein_featuresout_featuresr  set_output_embeddings)r   r  r  new_output_embeddingss       rs   r   r   a  s     "##%u.ACUK "HHOOAqO>$#. (3(9(9!(<%)4):):1)=& 	,, 2 
 56
rv   c                      [         $ rq   )r   r   rv   rs   r   r   v  s    rv   c                      [        S5      $ )Nvllmr   r   rv   rs   r   r   z  s     ((rv   c                 <  ^ Ub  Uc   eUc   eUR                   nUR                   nSU  SU  3nSU SU R                  5        SU R                  5        S3n [        R                  " UR                  5      nUR                  S5      mUR                  S	5      nS	R                  U4S
 jU 5       5      nU R                  5        S3n	UR                  SSU	 35      nUR                  SSU R                  5        S35      nSn
U
R                  UR                   UR                   S9n
[        R                  " SU[        R                  [        R                  -  S9n[        U5      S:X  a
  S US-   U-   4$ US   nUR                  XS5      nUS-   U-   nX4$ !    g= f)Nr   r   Zimport torch.nn as nn
from typing import Union, Optional, List, Any, Callable, Tuple
from  import logger, Attention, r   NNdefr1  c              3   ,   >#    U  H	  oTS  v   M     g 7frq   r   r   ry   wheres     rs   r   'patch_linear_scaling.<locals>.<genexpr>       5Hq56H   Attention__init__def __init__def super().__init__()super(Attention, self).__init__()a  
    if getattr(self.config, "rope_scaling", None) is None:
        self.rotary_emb = {rope_function}(
            dim = self.head_dim,
            max_position_embeddings=self.max_position_embeddings,
            base=self.rope_theta,
        )
    else:
        scaling_type = self.config.rope_scaling["type"]
        scaling_factor = self.config.rope_scaling["factor"]
        if scaling_type == "linear":
            self.rotary_emb = {scaled_rope_function}(
                dim = self.head_dim,
                max_position_embeddings=self.max_position_embeddings,
                scaling_factor=scaling_factor,
                base=self.rope_theta,
            )
        else:
            raise ValueError(f"Unknown RoPE scaling type {{scaling_type}}")
    pass
    )rope_functionscaled_rope_functionself\.rotary\_emb \= .+?\)r/  r;   

rj   r|   titleinspect	getsourcert   findsplitrk  r   formatrefindallDOTALL	MULTILINEr  )
model_namerope_modulescaled_rope_moduleattention_module	rope_namescaled_rope_namemodel_filepath	exec_codefunction	init_namefix_rope_function
rotary_embr  s               @rs   r   r     s    "'9'EEE'''$$I)22+J<z*NN/
k**:*:*<)=V	E $$%5%>%>? MM% E~~d#Hyy5H55H##%&&78I$yk0BCH
!!#$$?@H* *00#,,1:: 1  %		BLL(J
 :!Y'(222AJ
qAH6!H,Has    F Fc                 l  ^ Ub  Ub  Uc   eUc   eUR                   nUR                   nSU  SU  3nSU SU R                  5        SU R                  5        S3n	 [        R                  " UR                  5      n
U
R                  S5      mU
R                  S	5      n
S	R                  U4S
 jU
 5       5      n
U R                  5        S3nU
R                  SSU 35      n
U
R                  SSU R                  5        S35      n
SnUR                  UR                   UR                   UR                   Ub  UOUR                   S9n[        R                  " SU
[        R                  [        R                  -  S9n[        U5      S:X  a  S U
4$ US   nU
R                  XS5      n
U	S-   U
-   n
X4$ !    g= f)Nr   r   r  r  r  r   r  r  r1  c              3   ,   >#    U  H	  oTS  v   M     g 7frq   r   r  s     rs   r   +patch_llama_rope_scaling.<locals>.<genexpr>  r  r  r  r  r  r  r  r  a  
    if getattr(self.config, "rope_scaling", None) is None:
        self.rotary_emb = {rope_function}(
            dim = self.head_dim,
            max_position_embeddings=self.max_position_embeddings,
            base=self.rope_theta,
        )
    else:
        scaling_type1 = self.config.rope_scaling.get("type", None)
        scaling_type2 = self.config.rope_scaling.get("rope_type", None)
        scaling_type = scaling_type1 if scaling_type1 is not None else scaling_type2
        scaling_factor = self.config.rope_scaling.get("factor")

        if scaling_type == "linear":
            self.rotary_emb = {scaled_rope_function}(
                dim = self.head_dim,
                max_position_embeddings=self.max_position_embeddings,
                scaling_factor=scaling_factor,
                base=self.rope_theta,
            )
        elif scaling_type == "llama3":
            self.rotary_emb = {extended_rope_function}(
                dim = self.head_dim,
                max_position_embeddings=self.max_position_embeddings,
                base=self.rope_theta,
            )
        elif scaling_type == "longrope":
            self.rotary_emb = {longrope_rope_function}(
                dim = self.head_dim,
                max_position_embeddings = self.max_position_embeddings,
                original_max_position_embeddings = self.config.original_max_position_embeddings,
                base = self.rope_theta,
                short_factor = self.config.rope_scaling['short_factor'],
                long_factor  = self.config.rope_scaling['long_factor' ],
            )
        else:
            raise ValueError(f"Unknown RoPE scaling type {{scaling_type}}")
    pass
    )r  r  extended_rope_functionlongrope_rope_functionr  r/  r;   rj   r  r  )r  r  r  extended_rope_moduler  longrope_moduler  r  r  r  r  r  r  r  r  s                 @rs   r   r     s    	* ,	- '''$$I)22+J<z*NN/
k**:*:*<)=V	E $$%5%>%>? MM% E~~d#Hyy5H55H##%&&78I$yk0BCH
!!#$$?@H&P *00#,,1::!5!>!>.:O
( 1  %		BLL(J
 :!X~AJ
qAH6!H,HMs    F/ /F3c                 F   [         R                  " X [         R                  S9nUS:X  a  [         R                  " USUS9$ [         R                  " USUS9  [         R                  " UR                  U* UR                  S9  UR                  n[         R
                  " X"S9  U$ )Ndtyper;   rj   )diagonalout)r  )r]   onesbooltriuTlogical_not)nsliding_windowmasks      rs   r   r   ,  sw    ::aEJJ/Dzz$1D99	JJt.	JJtvv>/@66D	d'Krv   c            	      V   SSK Jn   [        SS5       GH  n[        SS5       H  nU " SUS9R                  SUU[        R
                  S9R                  S5      R                  S5      nX3R                  5       :H  n[        XS	9n[        R                  " X4:H  5      (       a  M   e   U " SS S9R                  SUU[        R
                  S9R                  S5      R                  S5      nX3R                  5       :H  n[        USS	9n[        R                  " X4:H  5      (       a  GM   e   g )
Nr;   )AttentionMaskConverterrI      rj   T)	is_causalr  r  )r  r  )
%transformers.modeling_attn_mask_utilsr  rangeto_causal_4dr]   float16squeezeminr   all)r  r  r   correct_maskour_masks        rs   test_mask_creationr  8  s$   L1b\q"A& $%& !MM	     (+;+;+==L*qEH99\56666# & # !% \	   WQZWQZ 	 $'7'7'99&1qAyy12222G rv   c                    S nSU;   a$  US   nUc  UR                  S5        O
SU;  a  XRS'   Uc  [        [        U SU 5      SS5      S:w  al  Un[        US5      (       a  UR                  n[        US5      (       a  UR                  nUR
                  R                  n[        R                  " SU S35        U R                  " X/UQ70 UD6nU$ )	Nnum_items_in_batchr   gradient_accumulation_stepsrj   
base_modelr   zUnsloth: Not an error, but z does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient)
popgetattrr   r  r   r   r|   rP   warning_once_old_compute_loss)	rr   r   inputsr   r   r  inner_modelr   outputss	            rs   _unsloth_pre_compute_lossr  a  s    v%#$89%JJ+,!/+='( 	"GD&$/1NPQRVWW;--%00K;((%++K$$--)$ 0_ _	

 $$UDTDVDGNrv   c                 	  ^ SS K n[        U S5      (       Ga  U R                  R                  S:X  a  g UR                  " U R                  5      R                  5       R                  S5      (       d  [        S5      eU R                  R                  S:w  a  [        U l        [        U S5      (       Gd$  UR                  " U R                  5      nSU;   d  SU;   a  UR                  S	5      mUR                  S
5      nS
R                  U4S jU 5       5      nSS Kn[        UR                  5      n/ nU H  nXb;   d  M
  UR!                  U5        M     [#        SSR                  S U 5       5      -   S-   [%        5       5        [&        R(                  " SSU5      n[#        U[%        5       5        [        U l	        U R                  U l        [,        U l	        O[.        R0                  " S5        U R2                  R                  S:X  Gd^  SUR4                  " U R2                  5      R6                  ;  Gd3  UR                  " U R2                  5      nUR                  S	5      mUR                  S
5      nS
R                  U4S jU 5       5      nSS Kn[        UR                  5      n/ nU H  nXb;   d  M
  UR!                  U5        M     [#        SSR                  S U 5       5      -   S-   [%        5       5        UR9                  SS5      nUR9                  SSS5      nUR9                  SS5      n[&        R(                  " SSU5      n[#        U[%        5       5        [:        U l        U R<                  R                  S :w  a   UR                  " U R<                  5      nUb  [@        RB                  " U5      nSS Kn[        UR                  5      n/ nU H  nXg;   d  M
  UR!                  U5        M     [#        SSR                  S" U 5       5      -   S-   [%        5       5        UR9                  S#S$S5      n[&        R(                  " S%S&U5      n[#        U[%        5       5        [D        U l        g g g ! [>         a    S!n Nf = f)'Nr;   get_batch_samplesrW   z(return batch_samples, num_items_in_batchz1Unsloth: Please make a Github issue immediately!!r  zloss *=zloss*=r  r1  c              3   ,   >#    U  H	  oTS  v   M     g 7frq   r   r  s     rs   r   2patch_gradient_accumulation_fix.<locals>.<genexpr>  s     (EHq56Hr  z"from transformers.trainer import (z, c              3   $   #    U  H  ov   M     g 7frq   r   r4  s     rs   r   r"    s     #:z!Az   )zloss[\s]{0,}\*\=zloss = loss *a  Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers, TRL and unsloth via:
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`_unsloth_training_stepr  c              3   ,   >#    U  H	  oTS  v   M     g 7frq   r   r  s     rs   r   r"    s     91uvYr  c              3   $   #    U  H  ov   M     g 7frq   r   r4  s     rs   r   r"    s     .:a:r$  z-loss *= self.args.gradient_accumulation_stepszPif num_items_in_batch is not None: loss *= self.args.gradient_accumulation_stepszdef training_stepzdef _unsloth_training_steprj   z"if self.model_accepts_loss_kwargs:z	if False:zelse:\n([\s]{4,})self\.accelerator\.backward\(loss, \*\*kwargs\)\n(.+?)if num_items_in_batch is None\:\n(.+?)return loss\.detach\(\) \/ self\.args\.gradient_accumulation_stepszelse:
if num_items_in_batch is None:
loss = loss / self.args.gradient_accumulation_steps
self.accelerator.backward(loss, **kwargs)_unsloth___init__r   c              3   $   #    U  H  ov   M     g 7frq   r   r4  s     rs   r   r"    s     2z!Azr$  r  zdef _unsloth___init__zGif[\s]+hasattr\(\s*unwrapped_model\s*,\s*"accepts_loss_kwargs"\s*\)\s*:z=if hasattr(unwrapped_model, "accepts_loss_kwargs") and False:)#r  r   r   r|   r  stripendswithNotImplementedErrorrW   compute_lossr  r  rk  transformers.trainerdirtrainerr   execglobalsr  subr  r  rP   r  training_step	signaturer   r   r&  rt   r   textwrapdedentr)  )	Trainerr  r  rd   items_in_trainer
good_itemsiteminit_functionr  s	           @rs   r   r     sA    w+,,$$--1MM!!'";";<UWX@AB &C  ((115QQ,F) 7$788",,W-A-AB(H,@$MM%0E'~~d3H#yy(EH(EEH 0'*<+?+?'@$!#J 0+&--d3 !1 <))#:z#::;  		  "vv+'  H
 79-+7G(,3,@,@)'@$	
 	&&*BB  !6!67BBC $$W%:%:;e$>>$'99999 	$|334
$D!!$' % 	0ii.:../ I		
 ##;^
 ##!=q
 ##0
 66W: 

 	Xwy! 6
   $77	#--g.>.>?M $$OOM:M ("<#7#78J((%%d+ ) 4))2z223 		 *11 7M
 FFZOM
 	*0G; % 8  	M	s   Q0 0Q?>Q?c                 j    [        X5      u  pU b!  U R                  R                  S[        05        X4$ )Nunsloth_version)_patch_tokenizerr   updater	   )r   	tokenizers     rs   r   r   #  s6    '9E.<=rv   c                  r    SS K n [        U R                  R                  R                  R
                  l        g )Nr;   )peft.tuners.lora.bnbfast_lora_forwardtunerslorabnb
Linear4bitforward)pefts    rs   r/   r/   *  s#    .?DKK##+rv   c                    [        [        5      [        S5      :  a  [        SS[         S3-   5        g U(       a  US:X  a  [        S5        US4$ [        [	        5       R                  U5      R                  5       5      nU(       a  US4$ S/nU HX  n [        U 40 SU_S	U_S
U_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_S U_6  MZ     [         H
  n!U!" 5         M     UUS!   4$ )"Nr   z==============================zwUnsloth: Unfortunately Unsloth vision and other newer optimized models need Torch 2.4 or later.
You have Torch version z. Please upgrade your Torch version by visiting https://pytorch.org/
For now your models will not get optimized, but will still work for now!Fz`Unsloth: We can't trace models if `trust_remote_code = True`, so turning off some optimizations!Tsdpa_dynamic_masksdpa_bool_maskssdpa_gqa_replacesdpa_dynamic_compilecompile_attentiondisable_causal_maskscompile_torch_modulescompile_custom_modulescompile_function_callsfuse_lm_headgradient_checkpointingmanual_replacementsfast_lora_forwardsfast_residual_streamaccurate_accumulationr  r  r  
cudagraphsr  r  import_from_cachedisablereturn_logitssupports_sdpar;   )	rF   torch_versionr  r  dictfromkeysrl  _unsloth_compile_transformersr[   )"r  r  model_typestokenrevisiontrust_remote_coderM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r[  r  r  r  r\  r  r  r]  r^  r_  unsloth_force_compiler`  
model_typetemporary_patchs"                                     rs   r.   r.   0  s   B } 00&&3_ 5WWW	
 	2e;1	
 E!!tv{388:;KE!!FM!
%	
 1	
 .	
  0		

 $8	
 !2	
 $8	
 %:	
 &<	
 &<	
 (	
 &<	
 #6	
 "4	
 $8	
  %:!	
" .#	
$ (%	
& *'	
( $)	
* +	
, "-	
. !2/	
0 1	
2 *3	
4 *5	
 "< - -a(((rv   a  Unsloth: Logits are empty from 2024.11 onwards. To get raw logits again, please set the environment variable `UNSLOTH_RETURN_LOGITS` to `"1" BEFORE starting to train ie before `trainer.train()`. For example:
```
import os
os.environ['UNSLOTH_RETURN_LOGITS'] = '1'
trainer.train()
```
No need to restart your console - just add `os.environ['UNSLOTH_RETURN_LOGITS'] = '1'` before trainer.train() and re-run the cell!c                       [        [        5      erq   )r-  LOGITS_ERROR_STRINGr   s     rs   raise_logits_errorrn    s    
1
22rv   c                      g rq   r   r   s     rs   return_nonerp    s    rv   c                   4    \ rS rSrS rS r\r\rS r	S r
Srg)EmptyLogitsi  c                     g rq   r   r   s    rs   rt   EmptyLogits.__init__      rv   c                 &    US:X  a  [         $ [        $ )Nr  )rp  rn  )rr   attrs     rs   raise_getattr_errorEmptyLogits.raise_getattr_error  s    "dl{B0BBrv   c                     [         $ rq   rm  r   s    rs   __repr__EmptyLogits.__repr__      ""rv   c                     [         $ rq   r{  r   s    rs   __str__EmptyLogits.__str__  r~  rv   r   N)r|   r}   r~   r   rt   rx  rn  __getitem____getattr__r|  r  r   r   rv   rs   rr  rr    s#    C %K%K##rv   rr  __z
def raise_z(*args, **kwargs): print('z')zEMPTY_LOGITS.z	 = raise_c                 4   SSK Jn  U c  0 n [        [        R                  " U5      5      nSU;   nUS:w  a  [
        R                  " SU S35        US:w  a  [
        R                  " SU S35        [        U5      [        L d  US:X  d  US	:X  d  US
:X  d  [        S5      eUS	:X  as  U(       d  SS K n[        SUR                   S35      eU 0 :X  a#  SSK Jn	  [
        R                  " S5        U	" SSS9n [        UR                  S5      (       a  [        S5      eU $ )Nr;   )
LoraConfigloftq_configzMUnsloth: Dropout = 0 is supported for fast patching. You are using dropout = zW.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.nonezLUnsloth: bias = `none` is supported for fast patching. You are using bias = gaussianloftqcordazXUnsloth: `init_lora_weights` must be either [True, False, "gaussian", "loftq", "corda"].zUnsloth: Your PEFT version of z does not support LoftQ init.
Please install PEFT 0.7.2 or higher.
You can also install from source: `pip install git+https://github.com/huggingface/peft.git)LoftQConfigzUnsloth: init_lora_weights = `loftq` is set, but `loftq_config` is None.
We shall use `loftq_config = LoftQConfig(loftq_bits = 4, loftq_iter = 1)`.   rj   )
loftq_bits
loftq_iterr   zUnsloth: You are using `loftq` init, yet `load_in_4bit = True` was set.
Reload your model without any quantization by setting `load_in_4bit = False`.)rK  r  r   r  r6  rP   r  r  r  r  RuntimeErrorr	   r  r   r   )
r  lora_dropoutr  init_lora_weightsr   r  r6  SUPPORTS_LOFTQrK  r  s
             rs   r0   r0     sT   G%%j12I#y0Nq[\h[i jd e	

 v~Z[_Z` ad e	
 	4'
*''f
 	
 G#01A1A0B Cm m  2(] 'AAFL5<<!677` 
 rv   c                    Sn[        5       (       d  [        R                  " S5        SnSSKJnJn  U" 5         U R                  S5      (       a8  U" X5      (       d+  [        R                  " SU  35        U S [        S5      *  S-   n X 4$ )	NTz;Unsloth: vLLM is not installed! Will use Unsloth inference!Fr;   )
patch_vllmvllm_dynamic_quant_supportedzunsloth-bnb-4bitzmUnsloth: Switching from Unsloth dynamic quant to normal quant since
we do not yet support fast inference for zbnb-4bit)r   rP   r  unsloth_zoo.vllm_utilsr  r  r,  r  )r  model_configfast_inferencer  r  s        rs   r2   r2     s    NI	
 
 L-..+JEE<<F<I $$>s+='>&>?*LJ%%rv   c                 X   [        U R                  SS 5      nUb  U R                  R                  U l        U R                  R                  U l        U R                  R                  U l        SSKJnJn  [        R                  " X 5      U l        [        R                  " X05      U l        g g )Nvllm_enginer;   )	save_lora	load_lora)
r  r   r  fast_generatefast_generate_batchesr  r  r  	functoolspartial)r   r  r  r  s       rs   r3   r3     s    %++}d;K!KK33#kk77&+kk&G&G# 	@#++I=#++I= rv   c                      [        S5      e)NzdUnsloth: vLLM is not yet supported for fast inference for this model! Please use `.generate` instead)r-  r   s     rs   r4   r4     s    
n rv   )AOBaseConfigInt4WeightOnlyConfigzDUnsloth: TorchAO changed `torchao.quantization.Int4WeightOnlyConfig`c                       \ rS rSr% Sr\\   \S'   \" S S9r	\
\S\\\R                  R                  \/\4      4      \S'   Sr\\\R                  R                  /S4      \S	'   S
rg)TorchAOConfigi.  int4
qat_schemec                      [        SS9S 4/$ )N   
group_sizec                 x    [        U [        R                  R                  5      =(       a    [	        U SS5      S:  $ )Nr  r;   r  )r  r]   r#  r  r  mr   s     rs   r   (TorchAOConfig.<lambda>.<locals>.<lambda>9  s.    Z588??; 8A}a0C78rv   r  r   rv   rs   r   TorchAOConfig.<lambda>6  s    $#68#
rv   )default_factoryr  base_config_and_filter_fnsNprequantization_transformr   )r|   r}   r~   r   r  r=   r   __annotations__rE   r  r>   rA   r@   r]   r#  Moduler  r  r   r   rv   rs   r  r  .  s     &J&
 	
	 nhx#0F0L'MNNO! 
 NRx%((//1BD1H(IJQrv   r  c                    [        U R                  S5      (       a  SU R                  l        U R                  5       nU R	                  5       =(       d    [        U SS5      nUc  [        S5      eUR                  R                  UR                  R                  :X  d5   SUR                  R                   SUR                  R                   35       eUR                  R                  5       UR                  R                  5       :X  aj  [        R                  " 5          UR                  R                  5       R                  5       nSSS5        [        R                  R                  W5      Ul        S nUR!                  X R"                  5      U l        UR                  R                  5       UR                  R                  5       :w  d   S	5       eg! , (       d  f       N= f)
z
Utility to untie input/output embeddings in a HuggingFace model.
This is useful if we want to quantize the input/ouput embeddings differently.
Model is modified in-place.
tie_word_embeddingsFlm_headNz,Couldn't locate output projection (lm_head).zShape mismatch: out_proj z vs in_emb c                     g rq   r   r   s    rs   _no_tie/_untie_input_output_embeddings.<locals>._no_tie`  ru  rv   zEmbeddings still tied!)r   r   r  r  r  r  AttributeErrorr  r  data_ptrr]   no_graddetachcloner#  	Parameter__get__r   tie_weights)r   in_embout_projr  r  s        rs   _untie_input_output_embeddingsr  C  sn    u||233+0( '')F**,Oy$0OHKLL 	!4!44[	"8??#8#8"9V]]EXEXDYZ[4 !V]]%;%;%==]]_$$&,,.A ((,,Q/  ?E 	  "fmm&<&<&>>  > _s   )G
G!	filter_fnc              #   d   #    U R                  5        H  u  p#U" X25      (       d  M  Uv   M     g7f)zq
Given a model and a filter function (m, fqn) -> bool,
yield fully qualified names (FQNs) of modules that match.
N)named_modules)r   r  fqnr`   s       rs   _filter_fn_to_fqnsr  k  s,      **,V!!I -s   !0	0c           	      N   SSK Jn  SSKJnJn  SSKJn  SSKJn  0 nU R                  R                   HT  u  pxU" X" USS9US9  Uc  S	U;   a  [        S
5      eXvS	'   M+  [        X5       H  n	X;   a  [        SU	 35      eXvU	'   M     MV     U R                  5       n
U R                  5       =(       d    [        U SS 5      n0 n[!        U
R"                  U5      (       d  Ub%  [!        UR"                  U5      (       a
  SUS'   / US'   U" U5      nU" SSU0UD6nXR$                  l        g )Nr;   )TorchAoConfig)	quantize_ModuleFqnToConfig	QATConfig)TorchAOBaseTensorconvertstepr  _defaultz0Cannot use multiple default quantization configsz(Found multiple quantization configs for r  Tinclude_input_output_embeddingsmodules_to_not_convert
quant_typer   )rd   r  torchao.quantizationr  r  torchao.quantization.qatr  torchao.utilsr  _torchao_configr  r  r  r  r  r  r  r  r   r   )r   r  r  r  r  r  module_to_fqn_dictbase_configr  r  r  r  r   quant_configr   s                  rs   _convert_torchao_modelr  x  s/   *A2/"'"7"7"R"R%;yAyY // !STT-8z*)%;,$'OPSu%UVV*53' < #S '')F**,Oy$0OHF&--!233HOO=N!O!O4801+-'($%78L'L\LVL':LL$rv   r  c           
        ^  SSK JnJn  SSKJnJn  SSKJn  US:X  a;  [        U R                  5      n[        S U 5       5      nU(       a  [        S5        Sn[        U[        5      (       Gd*  S	n	US
:X  a#   SSK Jn
  SmU
" 5       nU4S jn[        UX4/S9n	OUS:X  a   SSK Jn  U" U" 5       S9n[        XS	4/S9n	OUS:X  aR   SSK JnJn  [        UU" [(        R*                  U" S5      S9S 4U" [(        R,                  U" S5      S9S	4/[.        S9n	OtUS:X  a"   SSK Jn  SmU" TS9nU4S jn[        UX4/S9n	OLUS:X  a8   SSK Jn  SSKJn  U" [(        R*                  U" S5      S9nS n[        UX4/S9n	O[3        SU 35      eU	c
   S U 35       eOUn	U n[5        US!5      (       a&  U	Ul        UR8                  n[5        US!5      (       a  M&  U	Ul        U	R:                  b  U	R;                  U 5        U	R<                   H  u  pU" X" US"S#9US$9  M     U $ ! [         a    [        [        5      ef = f! [         a    [        [        5      ef = f! [         a    [        [        5      ef = f! [         a    [        [        5      ef = f! [         a    [        [        5      ef = f! [         a    [        [        5      ef = f)%a  
Transform a model for Quantization-Aware Training (QAT) during fine-tuning.

On a high level, this means fake quantizing the base (frozen) model during training.
Fake quantization refers to simulating quantization numerics in high precision (e.g. bf16).
This helps mitigate quantization degradations when the model is quantized after training.

QAT can be optionally combined with LoRA fine-tuning to for additional throughput improvement.
For more details: https://dev-discuss.pytorch.org/t/speeding-up-qat-by-1-89x-with-lora/2700
r;   )PerRowr  )PerGroupPerAxisr  z	int8-int4c              3   D   #    U  H  nS U;   =(       d    SU;   v   M     g7f)gemma3gemma_3Nr   )r   mts     rs   r   )_prepare_model_for_qat.<locals>.<genexpr>  s      PKbB9)r/9Ks    zUnsloth: Gemma3 has a large vocabulary causing int8 embedding issues. Switching to int4 weight-only QAT for training stability.r  Nzfp8-int4)'Float8DynamicActivationInt4WeightConfigr  c                 x   > [        U [        R                  R                  5      =(       a    U R                  T:  $ rq   r  r]   r#  r  r  r  r   r  s     rs   r   (_prepare_model_for_qat.<locals>.<lambda>  *    Z588??; 0MMZ/0rv   )r  r  zfp8-fp8))Float8DynamicActivationFloat8WeightConfig)granularity)%Int8DynamicActivationIntxWeightConfigIntxWeightOnlyConfig)weight_dtyper  c                 J    [        U [        R                  R                  5      $ rq   )r  r]   r#  r  )r  r  s     rs   r   r    s    z!UXX5G5G'Hrv       )r  weight_granularity)r  r  r  r  r  c                 x   > [        U [        R                  R                  5      =(       a    U R                  T:  $ rq   r  r  s     rs   r   r    r  rv   int8)r  )r  c                 J    [        U [        R                  R                  5      $ rq   )r  r]   r#  r  r  s     rs   r   r  	  s    Z588??%Crv   zUnexpected QAT scheme zTorchAOConfig was not set for r   preparer  r  )r  r  r   torchao.quantization.granularityr  r  r  r  ImportErrorTORCHAO_MSGrX   r   anyr  r  r  r  r  r  r  r]   r  r  r  r  r  r   r  r   r  r  )r   r  r  r  r  r  r  re  	is_gemma3torchao_configr  r  r  r  r  r  r  r  r  s                     @rs   _prepare_model_for_qatr    sE   ':F6 [ 1%,,?PKPP	L  Jj-0026#/X JACK0  +'/:.F-GN 9$/
 D$hK +'UYGZF[N ;&/ +' -+0::WQZ I	 >+0::HUWL 	. -K!N$ 6!/E J.JGK0  +'/:.F-GN 6!/ED /$zz%ajK DI*'/:.F-GN
 5j\BCC)X-KJ<+XX)# K
+w
'
'&4#!'' +w
'
' #1K//;007"0"K"K%;yAyY #L Lq  '+&&'(  /!+../"  /!+../  /!+../.  /!+../   /!+../sG   H 9H: "I I4 J J. H7:II14JJ+.Kc                  f   S n  SSK Jn  [        U 5      Ul        [        U 5      Ul         SSKJ	n  [        U 5      Ul        [        U 5      Ul        g ! [
         a#  n[        R                  " SU 35         S nANPS nAff = f! [
         a#  n[        R                  " SU 35         S nAg S nAff = f)Nc                     gNTr   r   s    rs   make_trainable*patch_hf_quantizer.<locals>.make_trainable)	  s    rv   r;   )FineGrainedFP8HfQuantizerz1Failed to patch FineGrainedFP8HfQuantizer. Error )FbgemmFp8HfQuantizerz,Failed to patch FbgemmFp8HfQuantizer. Error )
1transformers.quantizers.quantizer_finegrained_fp8r  propertyis_trainableis_qat_trainabler   rP   warning,transformers.quantizers.quantizer_fbgemm_fp8r  )r
  r  er  s       rs   r6   r6   '	  s    P	
 2:.1I!.5=n5M!2KU,4^,D)080H-  PJ1#NOOP  KEaSIJJKs.   &A &B 
B A;;B 
B0B++B0c                    [        U 5      nUS;   a  [        S:w  a  [        S[         S35      e[        S:X  a  [        R                  R                  5       u  p#US:X  a1  US:  a+  [        S[        R                  R                  5        S35      eUS	:X  a8  US
-  U-   S:  a+  [        S[        R                  R                  5        S35      eg g g )N)
fbgemm_fp8fp8r   zHUnsloth: FP8 quantization is only supported on CUDA GPUs. You are using r   r  r  zoUnsloth: FBGEMM FP8 quantization is only supported on H100 and higher GPUs. L4 is not supported. You are using zC. Refer to https://developer.nvidia.com/cuda-gpus for more details.r  
   Y   zwUnsloth: FP8 quantization is only supported on L4 and higher GPUs with compute capability 8.9 or higher. You are using )rG   rL   r  r]   r   get_device_capabilityget_device_name)r  quant_methodmajor_versionminor_versions       rs   r7   r7   B	  s1   !,/L,,1FVWbVccde
 	

 f',zz'G'G'I$<'MA,= B  CH  CM  CM  C]  C]  C_  B`  `c  d  5 ]R%7-%G"%L J  KP  KU  KU  Ke  Ke  Kg  Jh  hk  l  &M  rv   c                     [        U SS5      nUb"  UR                  c  [        R                  " 5       $ [        R                  " 5       $ )a  
If the state dict was quantized using torchao, we will run into
the following error when calling ops like aten.t() in inference mode.
This is a bug in PyTorch that affects all tensor subclasses.

    Cannot set version_counter for inference tensor

For now, we work around this issue by using `torch.no_grad()` in this case.
See https://github.com/pytorch/pytorch/issues/164872 for more details.
Otherwise, just return `torch.inference_mode()`.
r  N)r  r  r]   r  inference_mode)r   r  s     rs   r8   r8   X	  s?     U$4d;N!n&?&?&G}}##%%rv   rf  c                     U c   SSK Jn  U" 5       n U c  g   SSK Jn  U" U S9  U $ !    g = f! [         a$  n[        R
                  " SU 35         S nAU $ S nAff = f)Nr;   )	get_token)login)rf  z7Failed to login to huggingface using token with error: )ra   r"  r#  r   rP   info)rf  r"  r#  r  s       rs   r9   r9   k	  sr    }	1KE} S)e	  SMaSQRRLSs   & - *
AAAc                 F   ^  [         R                  " T 5      U 4S j5       nU$ )zo
Creates a wrapper around model.generate that checks for incorrect
vLLM-style usage when fast_inference=False.
c                  `  > SU;   a  [        S5      eSU;   a  [        S5      e[        U 5      S:  au  U S   nSn[        U[        5      (       a  SnOD[        U[        [
        45      (       a)  [        U5      S:  a  [        US   [        5      (       a  SnU(       a  [        S5      eT" U 0 UD6$ )	Nsampling_paramszUnsloth: `sampling_params` is only supported when `fast_inference=True` (vLLM). Since `fast_inference=False`, use HuggingFace generate arguments instead:
  model.fast_generate(**tokens.to('cuda'), max_new_tokens=64, temperature=1.0, top_p=0.95)lora_requestzUnsloth: `lora_request` is only supported when `fast_inference=True` (vLLM). Since `fast_inference=False`, LoRA weights are already merged into the model.r;   FTa  Unsloth: Passing text strings to `fast_generate` is only supported when `fast_inference=True` (vLLM). Since `fast_inference=False`, you must tokenize the input first:

  messages = tokenizer.apply_chat_template(
      [{"role": "user", "content": "Your prompt here"}],
      tokenize=True, add_generation_prompt=True,
      return_tensors="pt", return_dict=True
  )
  output = model.fast_generate(
      **messages.to('cuda'),
      max_new_tokens=64,
      temperature=1.0,
  ))r  r  r  r   r  r  )r   r   	first_argis_string_inputoriginal_generates       rs   _fast_generate_wrapper:make_fast_generate_wrapper.<locals>._fast_generate_wrapper	  s     &m  V#`  t9q=QI#O)S))"&Ie}55#i.1:LilC00&*O  " !$1&11rv   )r  wraps)r+  r,  s   ` rs   r:   r:   	  s*     __&',2 (,2\ "!rv   )F)TT)z8.8.8.85      r	  ) _unsloth_temporary_saved_buffers)r   NNN)r   NNNNN)i   i   )NNFTTTTTTTTTTTTTTTTFTFFTFFFFrq   (V  r	   __all__r]   typingr<   r=   r>   r?   r@   rA   rB   platformrC   r   numpynp
contextlibr  dataclassesrD   rE   r  r7  loggingwarningsrc   r  ri  r   mathunsloth_zoo.utilsrF   rG   importlib.metadatarH   r
   r   rJ   rK   rL   rM   rN   rO   unsloth_zoo.logrP   unsloth_zoo.tokenizer_utilsr   r@  ro  rQ   rR   rS   rT   unsloth_zoo.patching_utilsr   r   r    r!   r,   "unsloth_zoo.gradient_checkpointingr   r   r"   r#   rU   rV   r$   r%   r*   r+   unsloth_zoo.loss_utilsr&   r(   rW   r)   unsloth_zoo.vision_utilsr-   unsloth_zoo.compilerrX   r.   rd  unsloth_zoo.training_utilsrZ   unsloth_zoo.temporary_patchesr[   rk  filterwarningsUserWarningFutureWarningRuntimeWarning	getLoggersetLevelCRITICALr  Filterrl   r   r   vllm.worker.workervllm_worker_logger	addFiltervllm.v1.worker.gpu_workervllm_gpu_worker_loggervllm.executor.executor_basevllm_executor_logger$vllm.core.block.prefix_caching_blockvllm_prefix_caching_loggervllm.v1.core.block_poolvllm_block_pool_loggervllm.lora.modelsvllm_lora_model_loggervllm.attention.utils.fa_utils$vllm_attention_utils_fa_utils_loggertransformers.training_args!transformers_training_args_loggerr/  transformers_trainer_loggertransformers.modeling_utils"transformers_modeling_utils_loggeraccelerate.utils.modeling accelerate_utils_modeling_loggertransformers.generation.utils$transformers_generation_utils_logger+transformers.generation.configuration_utilsconfiguration_logger*transformers.models.gemma3.modeling_gemma3gemma3_loggerhuggingface_hub.file_download
hub_logger'transformers.quantizers.quantizer_mxfp4mxfp4_loggertransformers.processing_utilsprocessing_utils_logger.transformers.models.auto.image_processing_autotrainer_loggertransformers.utils.genericmodeling_utils_loggerr   Handlerr   r1   transformers.trainer_pt_utilsr   r   r   rd   trainer_pt_utilsr1  r    transformers.configuration_utilsr   transformers_versionr   r   model_architecturesr  config_filepathr  r  r   config_filenamer2  r3  r  evalr   r4  ra  r   amp
custom_fwdr   
custom_bwdr   r  transformers.utilsr   openair   r  r   utilsri   rH  r   transformers.utils.import_utilsr   r   r   r   r  r  r  cacheflash_attn.flash_attn_interfacer   r   r   flash_attn_versionimport_utilsis_flash_attn_2_available_xformers_loggerERRORr   r   r-  r  xformers._cpp_libr   r   r?  r   xformers.ops.fmhaopsfmhamemory_efficient_attentionr   ModuleNotFoundErrorr  rb   trl_versionr   
generationconfiguration_utilsr  r   r  r   r   r   r   torch._inductor.runtime.hintsr   	lru_cacher  r  torch._inductor.utils	_inductorr   re   r  TorchDynamoPlugin	to_kwargsacceleratorr   r   rK  peft_versionpeft.utils.integrationsr5   peft.tuners.lora.layerr.  update_layersourcero   r  r  startendr  r  r3  matchgroupr  linesrk  LoraLayer_update_layerpeft.tuners.lorar  	importlibr   util	find_specr:  rD  rt  r   &transformers.utils.quantization_configr  r  rt   BitsAndBytesConfig__init__r  accelerate.utils.dataclassesr  r  accelerate.statestatePartialStateAcceleratordistributed_typer  r  r   r  r   r   r   r   r   r   r   r   r  r  r   r/   rm  rn  rp  rr  r'   r0  Tensor	functions	enumeratejr  
startswithr,  localsr0   r2   r3   r4   torchao.core.configr  r  r  r  r#  r  r  r  r  r  r6   r7   r8   r9   r:   r   rv   rs   <module>r     s   <|  H H H .!#   	 (    6 6 6 5 ;  #      )O )
 	  kG T   mg V   kL] ^   -:K 	  kE R   me T   mj Y   n| \   kN [   ml [   .;L 	  n~ ^   kH U   kN [   8 9 B B7CSCSVWCW XS1 1 ::>>*C0C7C$$%78J%KLN(();<N)OP"N&&'9:J'KL&&'9,'GH&&'9:T'UV 	
 	#,,-?@T-UV&L(();<P)QR"
E((R	

 #
	
 	-6667	
 1
 S ! + +,>~,N O ! + +,>?R,S T ! + +67 & G  % %&89I&J K  % %&89P&Q R	X&001CDU1VW*
	T$..;< 	)
	 )22EF )223EFV3WX,
	 ""#56R#ST
	R./EFG
	B+H56
	N-.?@A
	,		9		O%%&8&FG
	 %%&8&FG
	=/0BCD
	C/0BCD
	K##$6~$FG F'// ,> > E
6 %6K   3 -B   *&	 G =.-
  &J,ZL
|TO+J<z*NN#))+33C;<FCOu_%Xo->?K""4#89 6!	5))?@')L VV5	8 	F Y'(GH,==.v6F7?#	$gi0OAo.c/1B	CWYOI &R !!/!} 00$zz~~88$zz~~88$yy33&3I$yy33&3IE} 00UVV$yy33%3H$yy33%3H6 3	F!  & A  "' &#(::#C#C#E M='0uzz7W7W'XEJJ$  ..*,PN '+# I29&3W%3&/ 7e: #( $E\**(	(LJ #' E.56H.IWN /+ 3a# +V E
	((4gmm,Q8
 $)::#C#C#E M=/=/*.FF !gn&=%??!
 	
  } 00W6		6 (7GHXGY Z<<I?!M
 	
 
	''"2	2w8		8 (7GHXGY Z<<I?!M
 	
 
	''"2	2w88 (7GHXGY Z==JO1N
 	

 7	
 )(!<<  + 	J 3
<""668STT\$$88RRS	 **>>XXY ##77QQXX 

'>DK **..)BCHCO JJNN2C8C?  ; T  $ #-     !	 1 *   <P
     . . 8/C
   " " ,5I
   ( ( 2N ,0$( ( D> 		( - <
<78,,0
""9#9#9:)D!CI-kk2E:.MaPuS 16:RXXk6288;<T"5u55? 24PQVWY 	5!7	.!7	   8#>#E>~~-5h
 	
  Q  hV4r 
 %../A/J/JK VV&LL	  8==dC BHH[*DQ*GHNNqQR!YY '9'   8??!   +1<<( 5EJ!!22 &&7P: .      & & 9 9 B  /Q(+0 &H	"	 &H"*) 	ET aH	&3R@_1D@  !!! !?U)vI 3# #" }	Y'KAx4  X%6%6t%<%<5hZrBGIvx	
	=
)A37FHM (8v&0>
 00$== R R R(% %((// % d % P
88??
#.45
 c]
 ;FI88??I(-c=.@(AI
XX__IXK0  ,&uxx &&HSM Xc] (5"qE:														l	
.--(	JFDE!	 2F.2POO ,R 7 2 ""//I * 2 ""< ',#LKK 	(N 3 . ++E &;XL8"'	v  
& ),E
	3
 	

  H 	zz~~.4;a	
 	c!fHp
J	
X	^$TU# Ls  AH 7AH
 AAH 'AH AH" 'AH* AH2 -AH: AI -7AI
 %AI AI %AI" AI* %AI2 9AI: AJ -AJ
 AJ -AJ AJ" AJ* AJ2 "AK 7AK	AK*AK ;AL (AL AM (AM+  /AN6 0C4AO %AN> ,AO D9AP; CAQC6	AQ2 D 	AQ HAHH
AHHAHHAHH"AH'H*AH/H2AH7H:AH?IAII
AIIAIIAII"AI'I*AI/I2AI7I:AI?JAJJ
AJJAJJAJJ"AJ'J*AJ/J2AJ=K AKK	AKKAKK#ALLALLAL LAAMMAM(M%AM+ M+AAN3N6AN;N>AOOAOOAOOAO OAP8O,AP8O1<AP3P3AP8P;AQQAQQAQ/Q,AQ2 Q/AQ2 Q2AQ=