
    oi1             /          / S Qr SSKJrJrJrJrJr  SSKrSSK	r	SSK
Jr  SSKrSSKJr  SSKrSSKrSSKrSSKrSSKrSSKrSSKJr  SSKrSSKrSSKrSSKrSSKrSSKJr  SS	KJrJ r J!r!  SS
K"7  SSK#J$r$J%r%J&r&  SSK'J(r(  SSK)J*r*J+r+  SSK,J-r-  SSK.J/r/  SSK0r0 " S S\0Rb                  5      r2 S r3S r4S r5SS jr6S r7 \Rp                  Rs                  S5      Gb  SSK:Jr;  \+(       d@   SSK<J-r=  \=R|                  " \2" S5      5        C= SSK?J-r=  \=R|                  " \2" S5      5        C= S\@4S jrA S rBS rC SSKDr:\E" \:R                  R                  R                  S5      (       a&  \B\:R                  R                  R                  lB        Oe\E" \:R                  R                  R                  R                  S5      (       a/  \C\:R                  R                  R                  R                  lC         SSKJr:\E" \:R                  R                  R                  R                  S5      (       dF   SS \R                  R                  S!\R                  S"\\R                     S#\R                  4S$ jjrQOz\:R                  R                  R                  R                  R                  rR SS \R                  R                  S!\R                  S"\\R                     S#\R                  4S% jjrQ  S& rS  " S' S(\:R                  R                  R                  R                  R                  5      rT \R                  4S) jrV S* rW S+ rX SS,KYJZr[  SS-K\J]r^J_r`  S. ra S/ rb S0 rcOS1 rS S2 rV S3 rW S4 rX S5 ra S6 rc  \Rp                  Rs                  S75      bf  SSKdrMSS8KeJfrfJgrg  \hS9\\@\4   S:\R                  S#S;4S< j5       rj SSKkrM " S= S>\MR                  R                  R                  5      rm S? rn S@ ro SA rpOSB rn SC ro SD rp  SE rq SF rr SSG jrs S#\t4SH jru SSJ jrvSSK jrw \R                  SL 5       ry \R                  \R                  SSI4SM j5       rz           SSN jr{ \R                  SO 5       r} SSP jr~ S#\t4SQ jr                         SST\@SU\SV\SW\GR                  SX\tSY\tSZ\S[\tS\\S]\S^\tS_\tS`\tSa\tSb\tSc\Sd\Se\Sf\tSg\tSh\tSi\tSj\4.Sk jjr SSl jr \R                  Sm 5       r \R                  Sn 5       r So r Sp r Sq r SSrKJr  \R                   SSs j5       r \R                  0 \GR                  4St j5       r \R                  SSu j5       r SSv jr SSw jr Sx r \R                  " 5       Sy 5       rSSz jr S#\@4S{ jr SS| jr \R                  SR\R                  S}S~SSSISISISISI4
S j5       r S rg!    GN= f!    GN= f!    GN = f))
patch_vllmvllm_dynamic_quant_supportedconvert_vllm_to_huggingfaceget_vllm_state_dictassert_same_state_dict	load_vllmcreate_batchesdelete_vllm	save_lora	load_loragenerate_batchesconvert_lora_modulesreturn_lora_modulesget_lora_supported_ranks    )OptionalListTupleDictAnyN)OrderedDictdeepcopy)__version__)partial   )
_get_dtypeget_quant_typeVersion)*)dtype_from_configadd_dtype_kwargsset_dtype_in_config)patch_model_and_tokenizer)get_torch_compile_optionsUNSLOTH_ENABLE_LOGGINGlogger)DEVICE_TYPEc                        \ rS rSrS rS rSrg)HideLoggingMessageG   c                     Xl         g Ntext)selfr/   s     P/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/vllm_utils.py__init__HideLoggingMessage.__init__H   s    $i    c                 <    U R                   UR                  5       ;  $ r-   )r/   
getMessage)r0   xs     r1   filterHideLoggingMessage.filterI   s    TYY!,,.%@Ar4   r.   N)__name__
__module____qualname____firstlineno__r2   r8   __static_attributes__ r4   r1   r*   r*   G   s    .Ar4   r*   c                      g r-   r?   )argskwargss     r1   _return_nothingrC   L   s    Tr4   c                     U $ r-   r?   r0   rA   rB   s      r1   _return_selfrF   M   s    r4   c                     U R                   $ r-   )	tokenizerrE   s      r1   _return_self_tokenizerrI   N   s    $..3Hr4   c                 z    [         S:X  a  [        R                  " SU 5      $ [        R                  " [         U 5      $ )Nhipcuda)r(   torchdevice)indexs    r1   get_target_devicerP   P   s,    e||FE**<<U++r4   c                      [         S:X  a#  [        R                  R                  5       u  pX4$ [        R                  R                  5       u  pX4$ )Nxpu)r(   rM   rR   mem_get_inforL   )free_memorytotal_memorys     r1   get_mem_inforV   U   sJ    e$)II$:$:$<! $$ %*JJ$;$;$=!$$r4   vllmzonly supports adding LoRAprefixc                    ^ U R                  S5      m[        U4S jU 5       5      n[        U4S j[        [	        T5      5       5       5      m[	        [        U5      T-  5      S:g  nU=(       d    U$ )N.c              3   .   >#    U  H
  nUT;   v   M     g 7fr-   r?   ).0module_name
componentss     r1   	<genexpr>'is_layer_skipped_bnb.<locals>.<genexpr>w   s      
4 :%4s   c              3   R   >#    U  H  nS R                  TSUS-    5      v   M     g7f)rZ   Nr   )join)r\   ir^   s     r1   r_   r`   }   s*     T=S*Tac"233=Ss   $'r   )splitanysetrangelen)rX   llm_int8_skip_modules
vllm_checkunsloth_checkr^   s       @r1   is_layer_skipped_bnbrl   s   sh    \\#&
 
4
 

 TU3z?=STT
C 56CDI*]*r4   c                     U $ r-   r?   )quant_statess    r1   dequantize_dqro          r4   c                     U$ r-   r?   )r0   rn   s     r1   _dequantize_dqrr      rp   r4   ro   rr   apply_bnb_4bitlayerr7   biasreturnc           	      h   SSK Jn  UR                  nUR                  nSnUR                  S:  a#  UR                  SUR                  S5      5      nSnUR                  nUR                  n	UR                  n
U	S   R                  nUR                  U5      nUR                  S   n[        U	R                  5        Vs/ s H  oS   R                  S   PM     sn5      n[        R                  " UUUUR                  S9nSn[!        [#        U	5      5       HL  nU	U   R                  S   nU" XU
U   U
US-       R%                  5       U	U   5      US S 2UUU-   24'   UU-  nMN     UR                  U5      nU(       a&  UR&                  " / US S QUR                  S5      P76 nUb  UU-  nU$ s  snf )	Nr   )matmul_4bitF   Tr   dtyperN   )bitsandbytesrx   r|   shapendimreshapesizeweightbnb_quant_statebnb_shard_offsetstosumitemsrM   emptyrN   rg   rh   tview)r0   rt   r7   ru   rx   original_typeoriginal_shapereshape_after_matmulqweightrn   offsetsinference_dtypebf_x	out_dim_0quant_state	out_dim_1outcurrent_indexrc   output_sizes                       r1   _apply_4bit_weightr      s    1GGMWWN#( vvzIIb!&&*-'+$llG"22L//G*1o33O44(D
I<H<N<N<PQ<P[Q%%a(<PQSI++i'$3%&XX/C
 M3|,-*1o33A6
 EP'!*WQU^<>>@,q/ESA}][%@@@A , . &&'C#hhBs 3BSXXb\BtJ5 Rs   >F/c                    UR                   nUR                  nSnUR                  S:  a#  UR                  SUR	                  S5      5      nSnUR
                  nUR                  nUR                  n	US   R                   n
UR                  U
5      nUR                  S   n[        UR                  5        Vs/ s H  oS   R                  S   PM     sn5      n[        R                  " UUU
UR                  S9n[        XX5        UR                  U5      nU(       a&  UR                  " / US S QUR	                  S5      P76 nUb  X-  nU$ s  snf )NFry   rz   Tr   r   r{   )r|   r~   r   r   r   r   r   r   r   r   r   rM   r   rN   rs   r   )r0   rt   r7   ru   r   r   r   r   rn   r   r   r   r   r   r   r   s                   r1   r   r      s:    GGMWWN#( vvzIIb!&&*-'+$llG"22L//G*1o33O44(D
I<H<N<N<PQ<P[Q%%a(<PQSI++i'$3%&XX/C 4'7&&'C#hhBs 3BSXXb\BJ Rs   8Ec                     SS K n [        U R                  R                  R                  R
                  l        [        U R                  R                  R                  R
                  R                  l         SSKJ	n  UR                  [        S5      5        UR                  [        S5      5        UR                  [        S5      5        UR                  [        S5      5        Ag !   SSK
J	n   Nt= f)Nr   r&   znot supportedzis not testedzis not fully optimizedznot set)4vllm.model_executor.layers.quantization.bitsandbytesrl   model_executorlayersquantizationr}   r   BitsAndBytesLinearMethodvllm.configr'   vllm.config.model	addFilterr*   )rW   vllm_config_loggers     r1   patch_vllm_bitsandbytesr      s    CTh""//<<Qk}""//<<UUh	G@ 	$$%7%HI$$%7%HI$$%78P%QR$$%7	%BC	GFs   6C& &C0c                   (   ^  \ rS rSrU 4S jrSrU =r$ )BitsAndBytesConfigi  c                    > [         R                  R                  SUS   5      nX2S'   [        SU 35        [        TU ]  " U0 UD6  g )NUNSLOTH_bnb_4bit_compute_dtypebnb_4bit_compute_dtypez1Unsloth: vLLM Bitsandbytes config using kwargs = )osenvirongetprintsuperr2   )r0   rA   rB   r|   	__class__s       r1   r2   BitsAndBytesConfig.__init__  sI    JJNN#CVLdEefE/4+,EfXNOGd-f-r4   r?   r:   r;   r<   r=   r2   r>   __classcell__r   s   @r1   r   r     s    	.
 	r4   r   c                 h   [         R                  R                  R                  R                  R
                  n[        U 5      n U R                  S5      (       a  U [        S5      S  n U [        R                  S'   [
        [         R                  R                  R                  R                  l        U$ Nztorch.r   )rW   r   r   r   r}   r   str
startswithrh   r   r   )r|   
old_configs     r1   patch_vllm_compute_dtyper     s     ((//<<II\\
E
H%%uS]^/Du7<

34Rd""//<<Or4   c                     SS K nXR                  R                  R                  R                  l        [        R                  S	 g )Nr   r   )r   r   r   r   r}   r   r   r   )r   rW   s     r1   unpatch_vllm_compute_dtyper   -  s0    CR\""//<<OJJ78r4   c                      SS K n [        U R                  R                  l        [        U R                  R                  l         SS Kn [        U R                  R                  R                  l        [        U R                  R                  R                  l         SS Kn [        U R                  R                  R                  l        [        U R                  R                  R                  l        g !    N\= f!    g = fNr   )!vllm.transformers_utils.tokenizerrC   transformers_utilsrH   get_lora_tokenizerget_lora_tokenizer_async7vllm.transformers_utils.tokenizer_group.tokenizer_grouptokenizer_group'vllm.transformers_utils.tokenizer_grouprI   TokenizerGrouprW   s    r1   patch_vllm_lora_tokenizerr   4  s    0?N))<ET))B	JYhD##33CCV_nD##33CC\	:XnD##33BBU^tD##33BB[		s   AC2 AC9 2C69C=LoRARequest)WorkerLoRAManagerLRUCacheWorkerLoRAManagerc                     SS K n [        U R                  R                  l        SS Kn [        U R                  R                  l        [        U R                  R                  l        [        U R                  R                  l
         SS Kn [        U R                  R                  R                  l
        [        R                   " SS5      S:X  a  g  SS Kn [        U R                  R$                  l
        g !    NE= f!    g = f)Nr   (UNSLOTH_DO_NOT_PATCH_V0_LRU_LORA_MANAGER01)vllm.lora.requestPatchedLoRARequestlorarequestr   vllm.lora.worker_managerworker_managerPatchedWorkerLoRAManagerr    PatchedLRUCacheWorkerLoRAManagerr   &vllm.v1.worker.lora_model_runner_mixinv1workerlora_model_runner_mixinr   getenvvllm.worker.model_runnermodel_runnerr   s    r1   patch_vllm_lora_load_tensorsr   M  s     (:		%'/A		  ,5M		  2=]		  :	9OoDGGNN22L 99?EL	+AaDKK$$>		s   -C4 #C; 4C8;C?c                 D    [        U[        5      (       a  SU S'   SU S'   g g )NFmax_autotuneTcoordinate_descent_tuning)
isinstanceint)configruntime_shapes     r1   set_inductor_configr   b  s+    mS)) &+F>"26F./	 *r4   c                  Z     SS K n [        U R                  R                  l        g !    g = fr   )#vllm.compilation.compiler_interfacer   compilationcompiler_interfacer   s    r1   patch_vllm_set_inductor_configr   j  s0    	6FYD//C 		s   #& *c                      g r-   r?   r?   r4   r1   r   r   s      r4   c                      g r-   r?   r?   r4   r1   r   r   w  r   r4   c                     g r-   r?   )r   s    r1   r   r   {  r   r4   c                      g r-   r?   r?   r4   r1   r   r     r   r4   c                      g r-   r?   r?   r4   r1   r   r     r   r4   c                      g r-   r?   r?   r4   r1   r   r     r   r4   r}   )pack_dict_to_tensorunpack_tensor_to_dictqs_dictrN   
QuantStatec                 B   UR                  5        VVs/ s H0  u  p4SU;   d  M  [        U[        R                  5      (       d  M.  UPM2     nnn[	        U5      (       d  SU;  a  [        S5      e[	        U5      S:w  d%  US   R                  S5      S   U R                  ;  a  [        SU R                   S	U S35      e[	        U5      S:X  a.  US   nUR                  [        UR                  U5      5      5        UR                  5        VVs0 s H  u  p4UR                  S5      S   U_M     nnn[        UR                  5       5      R                  U R                  5      (       d   eS
U;   a  [        R                  " SSS9   [        R                   " US   [        R"                  SS9nSSS5        U " US
   R%                  U5      US   US   R%                  U5      ['        [        US   5      S9nOSu  pxU " US   US   R%                  U5      US   US   R%                  U5      ['        [        [(        R*                  R-                  SUS   5      5      US   b  [        R.                  " US   5      OSWUS9n	U	$ s  snnf s  snnf ! , (       d  f       N= f)a'  
unpacks components of state_dict into QuantState
where necessary, convert into strings, torch.dtype, ints, etc.

qs_dict: based on state_dict, with only relevant keys, striped of prefixes.

item with key `quant_state.bitsandbytes__[nf4/fp4]` may contain minor and non-tensor quant state items.
r   
quant_typez<Expected packed or unpacked quant_state items, found neitherr   r   rZ   rz   z@There should be exactly one `quant_state` item with ending from z.
Detected nested_absmaxrL   F)device_typeenablednested_offsetr{   Nnested_blocksizenested_quant_mapnested_dtype)absmax	blocksizecoder|   NNr  r  	quant_mapr   r|   r~   )r   r  r  r  r|   r~   offsetstate2)r   r   rM   Tensorrh   
ValueErrorrd   valid_qs_type_keysupdater   poprf   keysissubsetvalid_qs_keysautocasttensorfloat32r   getattrr   r   r   Size)
clsr   rN   kvqs_keyfirst_qs_keyr  r  r   s
             r1   	from_dictr    si    !(f=A3E!*UVX]XdXdJe!f6{{|7:[\\[A!5b!9AWAW!WRSVSiSiRjjvw}v~~  A 
 v;!!!9LNN0\1JKL3:==?C?411773<#Q&?C7<<>"++C,=,=>>>>g% fFgo&>`fg G/226:!"45/033F;eW^%<=	F (NF|,8$''/k*%((0 %0PRYZaRb!cd29'2B2N%**WW-.TX
 U g D GFs"   JJJ!J
(J
Jc                   (   ^  \ rS rSrU 4S jrSrU =r$ )
Linear4biti  c                    > [         R                  R                  SS 5      nUb  [        [        U5      nX2S'   [
        TU ]  " U0 UD6  g )Nr   compute_dtype)r   r   r   r  rM   r   r2   )r0   rA   rB   r  r   s       r1   r2   Linear4bit.__init__  sD    JJNN+KTRM( '} =*7'Gd-f-r4   r?   r   r   s   @r1   r  r    s    	. 	r4   r  c                      [         [        R                  R                  l         [        [        R
                  R                  l        g r-   )r  r}   
functionalr   r  nnmodulesr?   r4   r1   patch_bitsandbytes_quant_stater$    s(    7@**4-7*r4   c                     [        U 5      n U R                  S5      (       a  U [        S5      S  n U [        R                  S'   g r   )r   r   rh   r   r   r|   s    r1    patch_bitsandbytes_compute_dtyper'    s=    E
H%%uS]^/Du7<

34r4   c                  &    [         R                  S	 g )Nr   )r   r   r?   r4   r1   "unpatch_bitsandbytes_compute_dtyper)    s    JJ78r4   c                      g r-   r?   r?   r4   r1   r$  r$    r   r4   c                     g r-   r?   r&  s    r1   r'  r'    r   r4   c                      g r-   r?   r?   r4   r1   r)  r)    r   r4   c                  V  ^^	^
^^^^^ SSK Jm
JmJmJmJm   SSKJm  SSK	J
n JnJnJm	  [        R                  " S5        UU	U
4S jn SSXU[         S4   [         4      S	S 4U
UUU4S
 jjjn SSU ["        [               S	S 4UUU4S jjjn S m S n S n U" [$        R&                  R(                  5      [$        R&                  l        U" [$        R*                  R(                  5      [$        R*                  l        UT
l        UT
l        UT
l        UT
l        g !   SSKJm   GN
= f)Nr   )CuMemAllocator	libcudartunmap_and_releasecreate_and_mapAllocationData)is_pin_memory_available)r   Unionr   r   z#Unsloth: Enabling vLLM standby modec                 B  > S H4  n[         R                  R                  US5      nSU;  a  M+   SU S35       e   0 U l        TR                  U l        0 U l        [        U S5      (       a  U R                  U l	        [        U S5      (       a  U R                  U l        g g )N)PYTORCH_CUDA_ALLOC_CONFPYTORCH_HIP_ALLOC_CONFPYTORCH_ALLOC_CONF zexpandable_segments:TruezXStandby mode is not supported with expandable segments.
Please set environment variable z% without `expandable_segments:True`.
_python_malloc_callback_python_free_callback)r   r   r   pointer_to_datadefault_tagcurrent_tagallocator_and_poolshasattrr:  python_malloc_callbackr;  python_free_callback)r0   checkconfr2  r   r.  s      r1   r2   .patch_vllm_enable_sleep_mode.<locals>.__init__  s     bE::>>%,D-T9 338'9_a9 b ;= . : :35 4233 +/*F*FD'4011(,(B(BD% 2r4   offload_tags.rv   c           	      x  > Uc  TR                   4nO[        U[        5      (       a  U4n[        U[        5      (       d   e[        R
                  " SU 35        [        U R                  R                  5        VVs/ s H  u  p#UR                  PM     snn5      n[        R
                  " SU S[        U R                  R                  5       5       35        U R                  5         SnSnSnU R                  R                  5        H  u  pUS-  nUR                  n	UR                  S:X  a  M(  UR                  U;   a\  U	S   n
[        R                  " U
[        R                  ST" 5       S	9nUR!                  5       nTR#                  XU
5        Xl        US-  n[        R
                  " S
UR                   SUR                  U;    35        T" U	5        US-  nM      [        R
                  " SU SU SU 35        [&        R(                  " 5         [        R*                  R-                  5         gs  snnf )a#  
Put the allocator in sleep mode.
All data in the memory allocation with the specified tag will be
offloaded to CPU memory, and others will be discarded.

:param offload_tags: The tags of the memory allocation that will be
    offloaded. The rest of the memory allocation will be discarded.
NzSleeping allocator with tags: zSet of tags z and len of data r   r   weightscpu)r|   rN   
pin_memoryzdata's tag is z and is offloaded to cpu? zCPU offloads z true offloads z total )r=  r   r   tupler'   debugrf   r<  r   tagrh   print_memory_summaryhandlerM   r   uint8data_ptr
cudaMemcpycpu_backup_tensorgccollectrL   empty_cache)r0   rF  _dataset_of_tagscpu_offloadstrue_offloadstotal_offloadsptrrO  size_in_bytesrS  cpu_ptrr.  r3  r/  r0  s                r1   sleep+patch_vllm_enable_sleep_mode.<locals>.sleep  s     +669Lc**(+L,....5l^DE43G3G3M3M3OP3O4883OPQ|K=0A#dFZFZF`F`FbBcAdef!!#--335ICaN[[Fxx9$xx<' &q	$)KK!++ 68	%:!
 ,446$$W=A):&!LL>$((3MdhhZfNfMghif%QM) 6* 	}\N/-PWXfWghi




 C Qs   =H6
tagsc                   > T	" 5         U R                   R                  5        H  u  p#UR                  S:X  a  M  Ub  UR                  U;   az  UR                  nT" U5        UR                  bY  UR                  nUbJ  UR                  5       UR                  5       -  nUR                  5       nT
R                  X'U5        SUl        M     g)aD  
Wake up the allocator from sleep mode.
All data that is previously offloaded will be loaded back to GPU
memory, and the rest of the data will have empty memory.

:param tags: The tags of the memory allocation that will be loaded
    back to GPU memory. If None, all memory allocation will be loaded
    back to GPU memory.
rH  N)	r<  r   rM  rO  rS  numelelement_sizerQ  rR  )r0   rb  r]  rX  rO  rS  r^  r_  r1  delete_memoryr/  s           r1   wake_up-patch_vllm_enable_sleep_mode.<locals>.wake_upS  s     	--335ICxx9$|txx4/v&))5(,(>(>%(4(9(?(? )-::<)="3"<"<">!,,S=I15. 6  	r4   c                  j    [         R                  R                  5         [        R                  " 5         g r-   )rM   rL   rV  rT  rU  r?   r4   r1   rf  3patch_vllm_enable_sleep_mode.<locals>.delete_memoryq  s    

 


r4   c                 d   SnSnSnSnU R                   R                  5        HH  nUR                  S   nUR                  S:X  a  US-  nX-  nM-  UR                  S:X  d  M?  X&-  nUS-  nMJ     [        R
                  " SUS-  S SU S	35        [        R
                  " S
US-  S SU S	35        g)zC
Print the total memory usage for weights and KVCache allocations.
r   r   rH  kv_cachezTotal weights memory: g    eAz.2fz GB for z itemszTotal KVCache memory: N)r<  valuesrO  rM  r'   rL  )r0   weights_totalkv_cache_totalkv_cache_countweights_countrX  r   s          r1   rN  :patch_vllm_enable_sleep_mode.<locals>.print_memory_summaryv  s     ((//1D;;q>Dxx9$"%Z'&!# 2 	-mc.A#-Fh}o]cde-ns.B3-GxP^O__efgr4   c                 P   ^ ^ S mSS K nUR                  " T 5      UU 4S j5       nU$ )Nc                     [        U SU 5      n[        US5      =(       a>    [        UR                  S5      =(       a!    [        UR                  R                  SS5      $ )N
llm_enginevllm_configmodel_configenable_sleep_modeF)r  r@  rv  rw  )r0   engines     r1   check_sleep_modeTpatch_vllm_enable_sleep_mode.<locals>.get_patched_generate.<locals>.check_sleep_mode  ss    T<6F6=1  kgf>P>PR`6a  kfmnt  oA  oA  oN  oN  Pc  ej  gk  kr4   r   c                 V   > T" U 5      (       a  U R                  5         T" U /UQ70 UD6$ r-   )rg  )r0   rA   rB   rz  original_generates      r1   new_generatePpatch_vllm_enable_sleep_mode.<locals>.get_patched_generate.<locals>.new_generate  s.      %%$T:4:6::r4   )	functoolswraps)r}  r  r~  rz  s   `  @r1   get_patched_generate:patch_vllm_enable_sleep_mode.<locals>.get_patched_generate  s2    	k
 		*	+	; 
,	;
 r4   r-   )vllm.device_allocator.cumemr.  r/  r0  r1  r2  
vllm.utilsr3  vllm.utils.platform_utilstypingr   r4  r   r   r'   infor   r   rW   LLMgenerateAsyncLLMEnginer2   r`  rg  rN  )r   r4  r   r2   r`  rg  rN  r  r2  r   r.  r1  rf  r3  r/  r0  s           @@@@@@@@r1   patch_vllm_enable_sleep_moder    s   xxF6 32
KK57C4 597!"sCx,/*0 $1 27! >B7! 7!p 	HT#Y/ 4  8 	 	h( 	 	,TXX->->?DHH#78K8K8T8T#UD &N N$N*>N'OFEEs   D D(c                  >  ^^^	^
^^ SSK Jn   SSKmSSKmSSKJn  U U4S j5       m  SSKJnJm  TR                  " S5        UR                  m
U" T
5      UUU
UU4S j5       n X2l
        [        [        R                  5      [        S
5      :  aD   SSKJnJm  TR                  " S5        UR                  m	U" T	5      UUU	UU4S j5       n Xel
        gg! [         a  n[        S	U 35         SnANSnAff = f! [         a  n[        SU 35         SnAgSnAff = f)z
Temporarily disable ``gc.collect`` to speed up CUDA graph capture.
This is a workaround to avoid the overhead of garbage collection
during the graph capture with torch.compile.
r   )contextmanagerN)r  c               3   f   >#    TR                   n S Tl          S v   U Tl         g ! U Tl         f = f7f)Nc                      g r-   r?   r?   r4   r1   <lambda>Gpatch_vllm_graph_capture.<locals>.suppress_gc_collect.<locals>.<lambda>  s    Tr4   )rU  )original_gc_collectrT  s    r1   suppress_gc_collect5patch_vllm_graph_capture.<locals>.suppress_gc_collect  s.      jj!
	-,BJ,BJs   1% 1	.1)GPUModelRunnerr'   z'Unsloth: Patching vLLM v1 graph capturec                 r  > TR                   " S5        TR                  5       nT
" 5          T	" U /UQ70 UD6nS S S 5        TR                  5       nTR                   " SXS-
  5        [        S5       H2  nTR                  " 5         [        R
                  R                  5         M4     W$ ! , (       d  f       Nv= f)Nz1Unsloth: Running patched vLLM v1 `capture_model`.z=Unsloth: Patched vLLM v1 graph capture finished in %.0f secs.ry   r  perf_counterrg   rU  rM   rL   rV  )r0   rA   rB   
start_timeresultend_timerW  rT  r'   original_capture_model_v1r  times          r1   capture_model_wrapper_v1:patch_vllm_graph_capture.<locals>.capture_model_wrapper_v1  s    KKKL**,J$&24I$I&I ' ((*HKKO% 1X



&&(  M '&   B((
B6z0Unsloth: Could not patch vLLM V1 graph capture: 0.11.0)GPUModelRunnerBaser'   z'Unsloth: Patching vLLM v0 graph capturec                 r  > TR                   " S5        TR                  5       nT
" 5          T	" U /UQ70 UD6nS S S 5        TR                  5       nTR                   " SXS-
  5        [        S5       H2  nTR                  " 5         [        R
                  R                  5         M4     W$ ! , (       d  f       Nv= f)Nz1Unsloth: Running patched vLLM v0 `capture_model`.z=Unsloth: Patched vLLM v0 graph capture finished in %.0f secs.ry   r  )r0   rA   rB   r  r  r  rW  rT  r'   original_capture_model_v0r  r  s          r1   capture_model_wrapper_v0:patch_vllm_graph_capture.<locals>.capture_model_wrapper_v0  s    OP!..0
(*6tMdMfMF +  ,,.S) qAJJLJJ**, "  +*r  z0Unsloth: Could not patch vLLM V0 graph capture: )
contextlibr  rT  r  r  r  vllm.v1.worker.gpu_model_runnerr  r'   r  capture_model	Exceptionr   r   rW   r   r   r  )r  r  r  r  er  r  rT  r'   r  r  r  r  s          @@@@@@r1   patch_vllm_graph_capturer    s(    *- - 	FJ=>$2$@$@!	(	)	 	 
*	  	'?$ t 78#44	JKKKAB(:(H(H%,-  .  /G,3 5  F@DEEF:  	JDQCHII	Js1   AC AC: 
C7C22C7:
DDDc                    [         R                  " S5        S[        R                  S'   U (       d  [        R                  " SS5      S:X  a  S[        R                  S'   [        5         [        5         [        5         [        5         [        5         [        R                  " SS5      S:X  a   [         R                  " S	5        [        5         [        5         S
qg )NzUnsloth: Patching vLLMr   VLLM_ENABLE_V1_MULTIPROCESSINGr%   r   INFOVLLM_LOGGING_LEVELUNSLOTH_VLLM_STANDBYz)Unsloth: Patching vLLM to enable standby.r   )r'   r  r   r   r   r   r$  r   r   r   r  r  LORA_REQUEST_ID)rL  s    r1   r   r     s     KK(*36BJJ/0		2C8C?+1

'("$"$ "	yy'-4?A$&Or4   c                    U R                  5       R                  S5      (       d  gSU;  a  gUR                  R                  S0 5      n/ nU H8  n[        R
                  " SU5      (       d  SU;  d  M'  UR                  U5        M:      [        U5      nSR                  S U 5       5      n[        R                  " U5      nU H  nUR                  U5      b  M    g	   g)
Nzunsloth-bnb-4bitTquantization_configri   z[\d]\.[^\.]{1,}$rZ   |c              3   N   #    U  H  n[         R                  " U5      v   M     g 7fr-   )reescape)r\   r7   s     r1   r_   /vllm_dynamic_quant_supported.<locals>.<genexpr>%  s     M0L1"))A,,0Ls   #%F)
lowerendswithr  r   r  searchappendrf   rb   compile)
model_namer   ri   parent_llm_int8_skip_modulesmodule
find_regexs         r1   r   r     s     &&'9::4F*4"66::;RTVW
 $& '99(&11S5F(//7 ( 	#&'C#D M0LMMJJ'J'V$,U ( r4   Fc                     [        U5      S:X  a  [        R                  " 5       nO[        R                  " 5       nU   [	        XX#5      sS S S 5        $ ! , (       d  f       g = f)Ntorchao)r   rM   no_gradinference_mode_get_vllm_state_dict)llmreturn_state_dictr   is_vision_modelctx_managers        r1   r   r   /  sA     f*mmo**,	#CFT 
s   A
A!c           
      P  ^-^.^/^0^1^2  [        U S[        U SU 5      5      n[        US5      (       a?  UR                  R                  R                  R                  R
                  R                  nO+UR                  R                  R
                  R                  n  Uc   e[        US
S5      n[        USU5      nUR                  m2[        5       n[        5       m0[         R"                  R%                  5       nUS   S-  US   -   m1 SSKJn  SSKJn  SU" U5      ;   m/Sm.Sm-T/(       a9   SSKJn  U" 5       m. [         R6                  R8                  R;                  T15      m- SAU-U.U/U0U1U24S jjn [        US5      (       a  UR                  nSnO5[        US5      (       a  SnUR<                  R                  nO[        S5      eUR>                  nU" U S3SUUSS9  [A        5       n/ n[C        [E        URF                  5      5       GH;  nURF                  U   n[        US5      (       a_  U S U S!3nURH                  RJ                  nURH                  RL                  nU" U S"3SUU5        U" U S#3SUU5        U" U S$3S%UU5        O[        US&5      (       a  U S U S'3nURN                  RJ                  nURN                  RL                  n[P        RR                  " S(S)URU                  SS*S5      S+-   5      n[W        S,U 35      n U RX                  S-   n!U RX                  S.   n"U" U S"3SUU!5        U" U S#3SUU"5        U" U S$3S%UU"5        U" W S/3SUW5        URZ                  R\                  n#U" U S U S03SUU#5        U" U S U S13SUU#5        URZ                  R^                  n#U" U S U S23SUU#5        US3    Vs/ s H  nURa                  US49PM     n$nU$ H\  n%U%RU                  S5U S53S6U S735      RU                  US85      n& [W        U&5      Rc                  5       S9   n'U% S:3n%U'UU%'   UU%   T0U%'   M^     GM>      [E        U5      S:w  a   [i        S;[k        [m        U5      5       35         U(       a  [o        X_T0U5        U S<3n(URp                  Rr                  Rt                  UU('   UU(   T0U('   [        US=S5      (       d;  URw                  5        VV)s/ s H  u  nn)S>U;   d  M  U)PM     n*nn)U" S>SUU*S   SS9  OU S?3n+U+U;   a  UU+   n,U,US@'   U,T0S@'   U(       d  S nUT04$ !    0 nU R                  S[        5       S9nU R                  S[        5       S9S   nXS      nUR                  5        H  u  n	u  pU
" U6 Xi'   M      [        S5      e! [         a  n[        S	[        U5       35      eS nAff = f= f! [         a&  n[.        R0                  " SU 35        Sm/ S nAGNS nAff = f! [         a$  n[.        R0                  " SU 35         S nAGNS nAff = f! [         a$  n[.        R0                  " SU 35         S nAGNS nAff = fs  snf ! [         a/  nURe                  U%Rg                  S55      S   5         S nAGM  S nAff = fs  sn)nf )BNru  ry  engine_corereport_device_id)rA   get_weight_ipc_handlesr   z5Unsloth: Currently vLLM RPC is not yet fully enabled!z6Unsloth: Cannot get internal vLLM states with error = 
model_type	causal_lmtext_config
   r   )#maybe_post_process_fp8_weight_block)	getsourcez&layer.weight_scale.data.T.contiguous()z*Unsloth: Could not import vLLM fp8_utils: F)is_deep_gemm_supportedz*Unsloth: Could not import vLLM deep_gemm: z<Unsloth: Could not import vLLM cutlass_block_fp8_supported: rz   c           
       	  > [        USU5      nUR                  n[        USS 5      nUb  [        R                  " S/U-   5      nOSUR                  S   /nUR
                  [        R                  :X  Gas  [        US5      (       a  UR                  n	O,[        US5      (       a  UR                  n	O[        SU  35      eS/UR                  -   n
[        R                  " U
5      n
SnU	R                  S:X  Ga  U	R                  S	   S	:  Ga  S
nUR                  S   nS[        [!        [        USS 5      5      5      ;   nU(       a  SnOT(       a  T=(       ad    [        US[        R"                  5      [        R"                  :H  =(       a1    UR                  S   S-  S:H  =(       a    UR                  S	   S-  S:H  nT!S:X  aM  T(       aF  U(       d?  U	R$                  n	[&        R(                  " SU  SUR                   SU	R                   35         UR                  u  nnU	R                  u  nnUU-  UR                  S   :X  a  UU-  UR                  S	   :X  d1   SU  SUR                   SU	R                   SUR                   35       eOUR$                  nS	nU
 Vs/ s H  nUU-  PM
     nnU(       a  U	UU   UUS	-       n	U(       a  XjU   XS	-       nOUnXX-   '   U	T X-   '   [        USS 5      nUb  UR*                  n
U(       aN  XjU   XS	-       nUU   T U S-   '   UU   R-                  SS9nUR/                  5        H  u  nnUX S-   U-   '   M     OkUnUS   T U S-   '   US   R-                  SS9nUR/                  5        H  u  nnUX S-   U-   '   M     O'UR1                  S5        U(       a  XhU   XS	-       nOUn[        UST"5      nU(       a$  SU ;   d  SU ;   a  UR                  S   U:  a  US U nUX S -   '   UT U S -   '   [        US!S 5      nUb^  UR1                  S5        U(       a  UX   XS	-       nOUnUb$  SU ;   d  SU ;   a  UR                  S   U:  a  US U nUX S"-   '   UT U S"-   '   g g s  snf )#N
base_layeroutput_sizesr   weight_scaleweight_scale_invz1Unsloth: Cannot find weight scale for FP8 weight .weight_scalery   r   .weight_scale_invCompressedTensorsquant_method
orig_dtype   Z   z%Unsloth: Transposed weight scale for z for weight shape z and scale shape zUnsloth: vLLM weight for z has unexpected weight shape z and scale z and block size r   .weight.quant_stateT)packedz.weight.Forg_vocab_sizeembed_tokenslm_head.weightru   .bias)r  r   npcumsumr~   r|   rM   float8_e4m3fnr@  r  r  r	  logical_widthsr   weight_block_sizer   typebfloat16Tr'   r  r   as_dictr   requires_grad_)#rX   kk
state_dictprojslice_weightsslice_indexr   r  dim_offsetsr  r   scale_suffix
block_sizeis_compressed_linearshould_use_deepgemmabpqr7   scale_offsetsr   rn   r   r  r  shrink_sizeru   bias_tensorcutlass_block_fp8_supportedr  needs_transpose_checkquant_state_dictsm_cap
vocab_sizes#                                r1   get_state_dict,_get_vllm_state_dict.<locals>.get_state_dict  s   t\40++ t^T:#))QC,$67KgmmA./K ==E///t^,,#00122#44 #TU[T\!]^^cD///Gii(G*L  A%%%a(1,
 $7L!%!7!7!:J+>#d7SWYgimKnFoBp+p(+ (7..D  /GQUWcejesesItx}  yG  yG  JG  /G  LS  LY  LY  Z[  L\  _b  Lb  fg  Lg  /G  ls  ly  ly  z{  l|  B  lB  FG  lG+!2:*ENa ,8>>L"KK*OPVxWijqjwjwix  yJ  KW  K]  K]  J^  )_  `"==DAq'--DAq6T%;%;A%>>16TMcMcdeMfCf  @  kD  EK  DL  Li  jq  jw  jw  ix  xC  DP  DV  DV  CW  Wg  hl  h~  h~  g  i@  @fCf &iiG!"J8? @1J @ #/b0AMRTWXRXDY#ZL wAv? 0<v,-6BV23 w(94@#//G wAv?COPRCS *?!?@*2.666E'--/DAq:;J
2Q67 0 !COPQ? *?!?@*1o55t5D'--/DAq:;J
2Q67 0 ""5) R;Av3FG 
 d#3Z@Nf4	V8K||A,-)/
I%&/5)+, tVT*&";?[a5HI" &Nf,D	U[H[$$Q'+5"-l{";K+6J'(1<Vg-. c !As    Q;modellanguage_modelzmodel.language_modelz)Unsloth: Cannot find vllm_internal_model!z.embed_tokens)r  	self_attnz.layers.z
.self_attnz.q_projz.k_projz.v_projry   
cross_attnz.cross_attnz	\.(\d+)\.[\1].language_model.modelz	.qkv_projzvllm_internals.q_proj_decoderkv_proj_encoderz.o_projz.mlp.gate_projz.mlp.up_projz.mlp.down_proj
layernormsr  rZ   [z].vllm_text_modelr   r  +Unsloth: Just some info: will skip parsing z.norm.weighttie_word_embeddingsr  z.embed_tokens.weightlm_head.weight)Trz   )<r  r@  r  r   driver_workerr   r  collective_rpcrK  r   NotImplementedErrorr  RuntimeErrorr   r
  r   rM   rL   get_device_capability7vllm.model_executor.layers.quantization.utils.fp8_utilsr  inspectr  r'   r  vllm.utils.deep_gemmr  ops_C$cutlass_scaled_mm_supports_block_fp8r  r  get_model_layer_configrg   rh   r   r  qkv_projo_projr  r  subreplaceevalr  mlpgate_up_proj	down_projformatr  r  rd   r   listrf   extract_vision_layersnormr   rX  named_modules)3r  r  r   r  ru  vllm_internalsvllm_state_dictgpu_idsrH  weight_name
to_cuda_fx	cuda_datar  r  r  r  
capabilityr  r  vllm_is_deep_gemm_supportedr  r  vllm_text_model_prefixr  layer_configskipped_layernormsr  rt   rX   r(  r)  namecross_attn_layerq_projkv_projr  layernorm_nameslayernorm_name	vllm_name	layernormnorm_prefixmodlm_layer	embed_key	lm_weightr  r  r  r  r	  r
  s3                                                @@@@@@r1   r  r  @  sI   bS,Xs0KL
:}--'33??NN\\iiooN (66DDQQWWN 	 {;J &-8K''JJ"}113J]R*Q-/F
&
 	p% HIVyLz z
 #"'	Jb%@%B"	\*/)),,*[*[\b*c' 	v= v=n 	 ~w''(..!(	!1	2	2!7(77==FHH"//L,-];Q
Lhmn *+L C../0&&r*5+&&./xt:FF//H__++FfXW-q*hGfXW-q*hGfXW-q*hGUL))./xt;GF''00H%%,,F66,&..AWXnpq2r  vA  3A  BD#odV$<=%**+;<F&++,=>GfXW-q*fEfXW-q*gFfXW-q*gF&)1j&Ayy%%01"^LaQ[]ab01"\JaQ[]abyy""01"^LaQ[]ab ;G|:TU:T$4;;";-:TU-N&..2$ayAbT*EMMNdfwxII O668B	$2#37!;-6
>*3=n3M 0 . 	W 1X 	
!#;DEWAX<Y;Z[\n:JN[ ,,L9K-2299>>J{$.{$;[! ; 5u==(6(D(D(F\(FHD)W[J[C(F\y!Z!ER ...BC	
""9-I+4J'(1:-.4j'''gb
	b O(();EG(LG(()A%'(RSTUGaj)G9@55j/99/E, :I%&]^^ 	b!WX[\]X^W_`aa	b:  &@DE %&  	JKKDQCHII	J
  	\KKVWXVYZ[[	\b V  I")).*>*>s*CB*GHHI* ]s   A'T2 0*T2 W +W? 9)X0 Y!.Y&#Z"4Z"2W	5A+V  
W*WWW	
W<W77W<?
X-	X((X-0
Y:YY&
Z0#ZZc                   ^ ^ TR                  5       T R                  5       -  nU[        S5      -  n[        U5      S:w  aa  TR                  5       T R                  5       -
  nT R                  5       TR                  5       -
  n[        SU SU 35        [	        SU 35      e 0 nT  H  n T U   nTU   nUR
                  UR
                  :w  d  UR                  5       S:  a>  UR                  [        R                  5      nUR                  [        R                  5      n[        R                  R                  XxSSS	S
9  M     [        U5      S:  a]  SR                  UR!                  5        VV	s/ s H  u  piSU S[#        U	5       3PM     sn	n5      n[	        S[        U5       SU 35      eg ! [         a  n	US:X  a  [        U 4S jUSS4 5       S 5      n
[        U4S jUSS4 5       S 5      nU
bd  Uba   [        R                  R                  T U
   R                  5       TU   R                  5       SS9   S n	A	GN! [         a    XU'     S n	A	GNf = fXU'    S n	A	GN$XU'    S n	A	GN.S n	A	ff = fs  sn	nf )N)zmodel.lm_head.weightz#model.language_model.lm_head.weightr  r   z;Unsloth: Failed comparing state_dict with Missing from hf: z
Missing from vllm: z*Unsloth: Failed comparing state_dict with ry   Fg-C6?gMbP?)check_strideatolrtolr  c              3   6   >#    U  H  oT;   d  M
  Uv   M     g 7fr-   r?   )r\   r  old_state_dicts     r1   r_   )assert_same_state_dict.<locals>.<genexpr>  &       O(v1  @N  {NQQ(v   		zmodel.embed_tokens.weightz(model.language_model.embed_tokens.weightc              3   6   >#    U  H  oT;   d  M
  Uv   M     g 7fr-   r?   )r\   r  new_state_dicts     r1   r_   rS    rT  rU  T)rN  
r  ]
z: )r  rf   rh   r   r  r|   re  r   rM   r  testingassert_closer  next
contiguousrb   r   r   )rR  rW  
differencemissing_from_hfmissing_from_vllmfailureskeyold_valnew_valerrorkey1key2error_messages   ``           r1   r   r   d  s     $$&)<)<)>>J#fggJ
:!(--/.2E2E2GG*//1N4G4G4IIKOK\\q  sD  rE  F  	GG
|TUUH	&$S)G$S)G}}-'2F2F2H12L!**U]]3!**U]]3MM&&wVZcg&h  	1 2 8}q		hnnN^"_N^
Qse3s5zl#;N^"_`GHVXYfXghii'  	&&&  O.IKu(v  O  QU  V  O.IKu(v  O  QU  V#(8.22>$3G3R3R3TVdeiVjVuVuVw  IM2  N$ .(-. %*SM %	&" #`sD   +BF22I7
2
I4<>I/;AIII/II/%I//I4c                    ^) [        X5        [        XU5      u  pVpxUR                  [        5       US9n[	        US0 5      n	[        U5      n
[        5       nUnU	0 :w  d  UGb7  U	0 :w  a  U
S:X  a#  U	S   US'   U	S   US'   [        U	S   5      US	'   GOU
S
:X  a  U	S   US'   U	S   US'    SSKJ	n  OU
S:X  a,  [        R                  " U	S   /[        5       S9US'    SSKJn  OU
S:X  ax  SUS'   SS/nU	R                  SS 5      nU(       a  UR                  SS 5      OS nU(       a  UR                  SS 5      OS nU(       a  UR                  SU5      OUnXS'    SSKJ	n  O:O9Ub6  UR                   US'   UR"                  US'   [        UR$                  5      US	'    SSKJnJn  SSKJn  / SQnS n / n[1        U5       GH  nU GH  m)T)R3                  US 9m)S!T);   a  T)R5                  S!S"5      m)S#nT)U ;   a  U T)   nS$nO=T) S%3U ;  a+  S&T);   a#  UR7                  T)R9                  S'5      S(   5        Mo   U T) S%3   nT) S)3U ;   a)  S#nU T) S)3   n[        R:                  R=                  US$S*9nOS$nS n S nT) S+3U ;   a	  U T) S+3   nOT) S,3U ;   a  U T) S,3   n Ub$  UR>                  S-;   d   S.UR>                   35       eT)U ;   aY  [@        RB                  " S/S0T)R5                  S1S2S35      5      n[        R:                  R=                  US$S*9n [E        S4U S535        GM[  UGbs  UR>                  S3:X  a  W" SSUUS69R                  [        5       5      n URF                  S3   U l$        URF                  S   U l%        [        R:                  R=                  US$S*9U l&        UU l'        US   U l(        [        R:                  R=                  US$S*9U l)        US   U RL                  l(        SU l*        GOUUR>                  S7:X  a  W" SSUX+S   [        5       US   S89n URF                  S3   U l$        URF                  S   U l%        [        R:                  R=                  US$S*9U l&        UU l'        [        R:                  R=                  US$S*9U l+        S
U l*        GOT) S93U ;   a  U T) S93   n!U" SP[        5       UUS:.UD6n U!RF                  S3   U l$        U!RF                  S   U l%        U" SQUS$S;.UD6U l&        U!U RL                  l,        UU l'        [[        UU 5      U l        [[        UU RL                  5      U RL                  l        GO[]        U)4S< jU 5       5      (       do  U" SS[        5       US=9n URF                  S3   U l$        URF                  S   U l%        [        R:                  R=                  [	        US>U5      S$S*9U l&        UU l'        Ox[        R:                  R=                  US$S*9n"[@        RB                  " S/S0T)5      n[E        S4U S?35        [E        S4U S@35        Ub  [E        S4U SA35        [E        S4U SB35        GM   [@        RB                  " SCSD T)5      m)[E        S4T) S535        GM     GM      [_        XPU5        Ub  [a        Xe5        [	        USEU5      n#[	        USFS 5      n$URc                  5        GH  n%[e        U%SG5      (       a'  U%Rf                  Ri                  U#[        5       SH9U%l3        [e        U%SI5      (       a]  U$c   SJ5       eU$Rj                  U$Rl                  -  n&U%Rn                  Ri                  U&S7-  5      R                  [        5       5      U%l7        [e        U%SK5      (       aM  [q        U#5      n'U#Rr                  U'l:        SLSM0U'l;        U%Rx                  Ri                  U'[        5       SH9U%l<        A'GM      [[        UU5      Ul        UR{                  5         [1        SN5       H6  n([|        R~                  " 5         [        R                  R                  5         M8     [        U5      S:w  a   [        SO[        [        U5      5       35        U$ !   [        S5      e= f!   [        S5      e= f!   [        S5      e= f)RN)rN   r|   r  r}   bnb_4bit_use_double_quantcompress_statisticsbnb_4bit_quant_typer   bnb_4bit_quant_storagequant_storagefp8activation_schemer  r  r   )	FP8LinearzrUnsloth: FP8 models need importing FP8Linear from `transformers.integrations.finegrained_fp8` but we don't see it.
fbgemm_fp8activation_scale_ubrN   input_scale_ub)FbgemmFp8LinearzsUnsloth: FP8 models need importing FbgemmFP8Linear from `transformers.integrations.fbgemm_fp8` but we don't see it.zcompressed-tensorsdynamicr  config_groupsr   )r  
Params4bit)Linear)input_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernormq_normk_normlayer_norm1layer_norm2post_layernormmm_soft_emb_normnorm1norm2r3  c                 8    U R                   " U0 UD6$ !   U s $ = fr-   )r   rE   s      r1   _override_to1convert_vllm_to_huggingface.<locals>._override_to  s    GGT,V,,ts    r  r  r  TFr  r3  rZ   rz   r  )requires_gradr  r  )r   ry   zQwe only support row quantized (ndim=1) and block quantized(ndim=2) fp8 but found z\.([\d]{1,})\.r  zmodel.r9  r   z
new_model.z = layer)in_featuresout_featuresru   weight_dtypery   )r  r  ru   r|   r  rN   rp  r  )rN   ru   r  )rX  r  c              3   ,   >#    U  H	  oT;   v   M     g 7fr-   r?   )r\   r7   
layer_names     r1   r_   .convert_vllm_to_huggingface.<locals>.<genexpr>4  s     B/Qj/s   )rN   ru   rX  z.weight = Nonez.weight = weight_paramz.bias = Nonez.bias = biasz\.([\d]{1,})c                 ,    SU R                  S5       S3$ )Nr  r   ])group)r7   s    r1   r  -convert_vllm_to_huggingface.<locals>.<lambda>K  s    Qqwwqzl!;Lr4   r  vision_config
rotary_emb)r   rN   rotary_pos_embzHUnsloth: vision_config is required for models with vision rotary_pos_embrotary_emb_local	rope_typedefault   r  )r   r   r?   )Fr"   create_empty_modelr   rP   r  r   dictr   )transformers.integrations.finegrained_fp8rq  ImportErrorrM   r  $transformers.integrations.fbgemm_fp8rv  r   rj  rl  rm  bitsandbytes.nn.modulesr  ry  torch.nn.modulesrz  rg   r0  r+  r  rd   r"  	Parameterr   r  r*  execr~   r  r  r   ru   ru  r  r  r  r   r   re   set_additional_modulescopy_attributesr#  r@  r  r   hidden_size	num_headsr  r   rope_local_base_freq
rope_thetarope_scalingr  r,  rT  rU  rL   rV  rh   r   r1  rf   )*r  r   r|   
bnb_configr  	new_modeloriginal_meta_modellayer_countlayer_namesr  r  rB   r  rq  rv  r  rx  group_0rH  r  ry  rz  rD  r  r?  r  	is_weightr   has_biasru   fp8_weight_scalelayer_name_brrt   r   weight_paramr  r  r  head_dimlocal_rope_configrW  r  s*                                            @r1   r   r     s	    &?QRXap?q<IK&7&95II!&*?D!&)LVFMb J$:"$~-0CD_0`,-':;P'Q|$*45HIa5b*c'&.ABU.V*+':;N'O|$\S -+0<<9LMb9c8d  pA  pC  ,D'(]T !55.7*+!3Z
 3 7 7 N8E-++At449@'++h5dFMW[[zBS]
'1|$\S 6 #,6,P,PF()#-#A#AF< &01R1R&SF?#>'O$ 	K %J#***3J%3'//0FHXY
I--)*5!	 \)1AA+*11*2B2B32G2KL)ZL*@AU#'77':,e(<=xx))$)F   $]+/??#3zl-4P#Q <015EE#3zlBS4T#U +4D4I4IU4R  .@  Wh  iy  i~  i~  h  U@  .@4R-- "'8(JDVDVW_`bcdDe f**65*Iz-9:!-#((A-+!AV^otuxx  zK  zM  NE(.QE%)/aE&#(88#5#5fe#5#TEL!%EJ+12B+CE().););<L^c);)dE&289I2JELL/)5E&%**a/%AaPXbg  ~J  wK  Vg  Vi  E  FY  Z  [E(.QE%)/aE&#(88#5#5fe#5#TEL!%EJ-2XX-?-?@Pbg-?-hE*).E&<237GG.*=P/QR"  A2C2Ehhu  Ay  A%0%6%6q%9!%0%6%6q%9")YYRXY+6(!
 #<7"),"EB/BBBq!.?.A(S%+\\!_!%+\\!_"  %xx11'&&&2Qch1i!
  %xx11&1N "'8(J Oz-?@z-0FGH#:m_LAB:m_LAB 1LjYJ:j\23S &T 	W !X 	9?&+7 &-8KFOT:M##%6<(( & 1 1 ; ;$*, !< !F 6+,, !,x.xx,$00M4K4KKH$*$9$9$C$CHaK$P$S$STeTg$hF!6-.. !) 5+6+K+K(.99-E*&,&=&=&G&G**, 'H 'F# "3 &4 	 <3ILNN 1X




   !#;DEWAX<Y;Z[\y\%  '[  \  \
]%  '\  ]  ]\%  '[  \  \s$   (a  a0 b   a-0a= bc                    [        5       u  pXK-  nU R                  nU R                  nU R                  nU R                  nU R
                  n[        U SS5      n[        U SS5      nUU-  U-  nUU-   U-   U-   nUU-  nUU-  S-  nSU-  nX-  n[        U SS5      (       a  SOX-  nX-  S	-  nXnU-   U-   U-   -  nX-  S-  UU-  -   nUUU-   -  Xn-  -   nUU-   U-   U-   nUU-  nUU-  S-  nU(       d  SnUU-   nUS	-  n Sn!UU!-  UU-   U-   -  n"UU!-  S-  n#UU!-  UU-   -  n$UU-  n%U"U#-   U$-   U%-   n&U&S
-  S-  n&U	(       d  Sn&X-
  U&:  a  UU&-
  nX-  n'UU-   U-  S-  n(UU-   U-   S-  n)Sn*U(       a  Sn*O	U(       a  Sn*U(U*-  U)-   U-   n+U(       a  S
OSn,US-  U-  U,-  n-UU+-
  n.U.S::  a  Sn.[        SU.U--  -  5      n/U/S-  S-  n/[        U/U-  5      n0U.S-  S-  S-  n1U/U0U'U14$ )Nnum_key_value_headsr   num_attention_headsr  ry   r  Tr      g      ?g	@g?ffffff?      )rV   r
  r  max_position_embeddingsintermediate_sizenum_hidden_layersr  r   )2r   load_in_4bitload_in_8bitmax_seq_lengthgpu_memory_utilizationenable_loramax_lora_rank	max_lorasfloat8_kv_cacheaccount_for_gradientsparallel_sequencesrT   rU   r
  hdcontext_lengthmlp_sizen_layers
n_kv_headsn_headskv_sizeqkvor-  r  r  r  qkvo_Aqkvo_Bmlp_Amlp_Blora_elementsgradient_lora_elementsparameter_lora_elementsbszactivation_qkvresidual_memoryactivation_mlprH  maximum_activationactual_gpu_memory_utilizationtotal_quantizable_elementstotal_float16_elementsfactorbytes_for_modelfloat_byteskv_elementsmemory_left_for_kv_cachemax_num_batched_tokensapprox_max_num_seqsmemory_left_for_kv_cache_gbs2                                                     r1   approximate_vllm_memory_usager    s     !-K(6K""J			B33N''H''H!6:J!6:GGmj(G <'!B&D"9DMQCRJ?L6#8$??aZ_G !#F7lW4r9:F!#h&>>Eh12]5GGEVOe+e3M!I-M!(*Q.M ,m;+Ao C$s*b7lW.DEN%+Q.O$s*h.ABNmG(>9GC  -T1Q6 q"4!$66"%77$/$>! #'*h!6!:",|";g"Eq!HFdV	v"V+.DD}T  *$qKQ;)[8K*_<1$&> !'?+'M!NO4;sB4~EF #;T"AD"H4"O 	 3%'B	CCr4   c                      / SQn  SS K n[        UR                  R                  S5      (       a*  [	        UR                  R                  R
                  5      n Oe[        R                  " UR                  R                  5      nSnUR                  U5      nUS:w  a"  UR                  SU[        U5      -   5      nX$U n [        U 5      [        L a2  [        R                  " SU 5      n U  Vs/ s H  n[        U5      PM     n nU $ !    NJ= fs  snf )N)          @   r  r  i@  i   r   MaxLoRARankspossible_max_ranksrz   rX  z[\d]{1,})vllm.config.lorar@  r   r   r   r  r"  r  findrh   r  r  findallr   )r  rW   lora_configr/   lrr7   s          r1   r   r     s    <4;;##^44!$T[[%5%5%B%B!C!++DKK,<,<=K'D  &ABw$$T1s4y=9%0Q%7" 3&ZZ5GH.@A.@c!f.@A Bs   AD A%D +DDc                 X    [        5       nU H  nX :  d  M
  Us  $    [        SU  S35      e)zDvLLM doesn't allow any LoRA rank, so we need to get the next largestz-Unsloth: vLLM does not support LoRA ranks of z+.
Only `{possible_max_ranks}` is supported.)r   r  )	lora_rankr  r  s      r1   determine_max_lora_rankr  
  sB    13+%   , 
7	{ C4 	4 r4   c                     SSK Jn  [	        U SS5      =(       d    / n[        U[        5      (       a  U/nSnU H5  n[	        US0 5      R                  U5      nUc  M$   UR                  5       n  O   Uc  g[        R                  " U5      nUc  gS	[        4S
 jnU" U5      (       + $ ! [         a  n[        SW SU 35         SnAgSnAff = f! [         a  n[        SU SU 35         SnA  gSnAff = f)z
Approximately checks if a vLLM model supports FLASHINFER by checking
vLLM's ModelRegistry, then inspecting if an `if self.attn_backend not in { ... }`
guard excludes FLASHINFER.

For eg Qwen3-VL does not work with flashinfer.
r   )ModelRegistryz2Unsloth: Failed loading vLLM model class for arch z$ during `vllm_supports_flashinfer`.
NTarchitecturesmodelsrv   c                 $   [         R                  " S[         R                  5      n [        R                  " U 5      n[        UR                  U5      5      nU(       d  gU H  nUR                  S5      nSU;   d  M    g   g! [
         a     gf = f)Nz;if\s+self\.attn_backend\s+not\s+in\s*{\s*(?P<body>.*?)\s*}:Fbody
FLASHINFERT)	r  r  DOTALLr"  r  r  r1  finditerr  )r  ATTENTION_BACKEND_GUARD_REGEXsourcematchesmr  s         r1   _module_disallows_flashinfer>vllm_supports_flashinfer.<locals>._module_disallows_flashinferE  s    (*

JII)
%	&&v.F
 4==fEF A776?Dt#	  %  		s   B 
BB)#vllm.model_executor.models.registryr  r  r   r  r   r   r   load_model_clsr"  	getmodulebool)	r   r  r  archr  	model_cls
registeredr  r
  s	            r1   vllm_supports_flashinferr    s'   E FOT:@bM-%%& I]Hb9==dC
		"113I  y)F~ 6 ,F333  @ G334#7	
 *  	DTF K778c; 	s/   B) C)
C3C		C
C7C22C7.unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit      ?r  r  r  r|   trainingr  random_stater  r  r  	use_async
use_enginedisable_log_statsenforce_eagerenable_prefix_cachingcompilation_configconservativenessmax_logprobsuse_bitsandbytesunsloth_vllm_standbyr  return_argsmax_num_seqsc                 h   Uc   e[        U5      [        L d   eUS:  a  US::  d   eU=(       d    [        R                  " SS5      S:g  n[        R                  " SS5      S:g  n[	        5       u  nnUS-  S-  S-  nUS-  n[
        (       a  [        R                  " SU 35        US	:  a  S
nO&US:  a  SnOUS:  a  SnOUS:  a  SnOUS:  a  SnOSn[
        (       a  [        R                  " SU 35        [        [        5      [        S5      :  aJ  [
        (       a  [        R                  " S5        US-  n[
        (       a  [        R                  " SU 35        U(       a6  U(       d/  UU:  a  Un[        SU S35        OUU:  a  [        SU S35        Un[        S:X  aJ  [        R                  R                  5       u  nnUS:  a  [        S5      eU(       a  US:  a  [        S5      e[!        US 5      (       a  UR"                  n OUn [%        U	5      n!U!U	:w  a  [        S!U! S"U	 S#35        U!n	['        U5      n"U=(       d+    U R)                  5       R+                  S$5      =(       d    U"S%:H  nS&U R)                  5       ;   =(       d    U"S';   n#U(       a  U#(       a   S(5       e[-        U UU#UUUU	U
UUS)9
u  n$n%n&n'S*n(S+UR.                  ;   n)U)(       a  S,n(U(       a   S-5       eUS.:  d   S/5       eO%U$S0::  a  S1nS1n$U$U::  a  [        S2U S3U S35        U$n [        S:X  a  WS:  a  [        R0                  n*OF[        S4:X  a  [        R0                  n*O+[        S5:X  a  [        R0                  n*O[        R2                  n*U[        R0                  :X  a0  U*[        R2                  :X  a  [        S65        [        R2                  nOMUc  U*n[        S7U S#35        O8U[        R2                  :X  d  U[        R0                  :X  a  O[        S8U S935      e[	        5       u  nn[5        US-  S-  S-  S:5      n+[7        U5      n,[8        R:                  R=                  S;5      (       Gax  S<[        R>                  ;   a)  [        R>                  S<   S=:X  a  [        R>                  S<	 O[A        U5      (       dY  [        R>                  RC                  S<S=5      S>:X  a  [        S?U  S@35        S<[        R>                  ;   a  [        R>                  S<	 Ou[        R>                  RC                  S<S=5      S=:w  a  OPU(       d  WS:  a  S>[        R>                  S<'   O/[        [        5      [        S5      :  a  S>[        R>                  S<'   WS:  a  SA[        R>                  SB'   OC[        [        5      [        S5      :  a  SA[        R>                  SB'   OS[        R>                  SB'    [        S:X  aA  [        R                  R                  5       u  nnUS:  d  US:X  a  USC:  a  [        SD5        S,nO[        S4:X  a  S*nO[        S5:X  a  S*n S0SEK"J#n-J$n.J%n/J&n0J'n1   Un%SFn$U'S:::  a  SGu  n$n%OkU'SH::  a  SIu  n$n%O_U'S::  a  SJu  n$n%OSU'SK::  a  SLu  n$n%OGU'SM::  a  SNu  n$n%O;U'SO::  a  SPu  n$n%O/U'SQ::  a  SRu  n$n%O#U'SS::  a  STu  n$n%OU'SU::  a  SVu  n$n%OU'SU:  a  SWu  n$n%U(       a  [        SX5        SYn%[Q        S.U5      n$U(       a  [S        U%SZ-  5      n%SFn2U(       dC  U'S::  a  Sn2O8U'SK::  a  S[n2O/U'SM::  a  SFn2O&U'SO::  a  S\n2OU'SQ::  a  S]n2OU'SS::  a  S^n2OU'SU::  a  S.n2OS.n2Un2[S        U%U-  5      n%[Q        U%SY5      n%[T        RV                  " 5       n3U3RX                  S-  S-  S-  n4SHn5U4SH::  a  S0n5O/U4S::  a  S0n5O&U4SK::  a  S0n5OU4SM::  a  S0n5OU4SO::  a  S:n5OU4SS::  a  SHn5OS_n5[        S5:X  a3  S`n6[        RZ                  R]                  S05      R^                  n7U6 SaU7 3n8O-Sbn6[        R                  R                  5       u  nnU6 ScU SU 3n8 [        SdU  Se[5        U&Sf-  S:5       SgU8 ShU+ SiU SjU2 SkU% Sl[5        U'S:5       SmU5 Sn35        [a        5       n9USo:X  Ga   S0SpK1J2n:  [        [        5      [        S5      :  d  [        [f        5      [        Sq5      :  a  S,n;S*n<U+Sr::  a  S,n<S,n<OS*n;S,n<[i        SoSsSYS*S*S*[k        S*S,S*S,U;S,S*U<S*S*S*S,St9Su9n=[l        Rn                  " U:5      Rp                  Rs                  5       n>SvU>;   a   S0SwK1J:n?  U?Rv                  U=Sv'   U=Sx	 O[        Sz5        [{        U=Rs                  5       5      nAUA H  nBUBU>;  a  U=WB	 [        S{UB S|35        M      U:" S0 U=D6n [i        S0 S~U _SU&_SU_SU(       a  S%OS _SU(       a  S%OS_SU(       a  S&OS_SU_SU$_SU%_SU_SU_SU_SU	_SU
_SU_SU_SU(_SU_SU_SU5_SU9_SU_6nCU(       a  SYS0S.WCS'   [        S:X  aX  [        R                  R                  5       u  nnUS:  a1  [        [        5      [        S5      :  a  S,nDOS*nD[        S5        WDWCS'    [l        Rn                  " U(       a  U1OU05      Rp                  Rs                  5       n>[{        WCRs                  5       5      nAUA H  nBUBU>;  a  WCWB	 [        S{UB S35        M      U(       a  WC$ S0nE  U(       a  U/R                  U1" S0 WCD65      nFO'U(       a  U.R                  U0" S0 WCD65      nFOU-" S0 WCD6nF   U%WFlE        [        U,5        [        So5       H6  nH[        R                  " 5         [        R                  R                  5         M8     WF$ ! [x         a  n@[        Sy5         S n@A@GNXS n@A@ff = f! [x         a"  n@[        S}[}        U@5       35         S n@A@GNDS n@A@ff = f! [x         a  nGWESY-  nE[        So5       H6  nH[        R                  " 5         [        R                  R                  5         M8      [}        WG5      nGWES::  d  U(       a  [        WG5      eSWG;   d  SWG;   a9  [S        U%S-  5      n%U%WCS'   UCS==   S-  ss'   [        SU% SU$ SWG 35         S nGAGO[        WG5      eS nGAGff = f GM  )N        r  r  r   "UNSLOTH_VLLM_STANDBY_UTIL_OVERRIDEr  g?z10% of your GPU VRAM = g      @g?g      @g?g       @g      ?gffffff?g333333?皙?g      ?zstandby_target_gpu_util = r  z>Decreasing VRAM further since vLLM version >= 0.11.0 uses morer  z*Further reduced standby_target_gpu_util = zGUnsloth: Standby mode is enabled. Changing `gpu_memory_utilization` to rZ   zUnsloth: Standby mode is enabled. However your setting of `gpu_memory_utilization` will OOM.
Changing `gpu_memory_utilization` to rL      zUnsloth: Your GPU is too old!r  zBUnsloth: Your GPU is too old for float8 KV cache! Set it to False.r  z+Unsloth: Changing the maximum lora rank to z from z
 for vLLM.	-bnb-4bitr}   ro  )ro  rr  zm`load_in_4bit` and `load_in_8bit` should be set to false for loading FP8 quantized models with fast inference)	r  r  r  r  r  r  r  r  r  TmllamaFz9Unsloth: MLLama does not support LoRA with fast inference    zBUnsloth: MLLama requires max_seq_length >= 8192 for fast inferencer   r  z4Unsloth: Your GPU cannot handle sequence lengths of zk due to limited GPU memory.
Unsloth: Your GPU can only handle approximately the maximum sequence length of rK   rR   z\Unsloth: We switched to dtype = torch.float16 since your GPU does not support torch.bfloat16zUnsloth: Using dtype = z#Unsloth: We do not support dtype = z yet!ry   
flashinferVLLM_ATTENTION_BACKENDr9  r  z
Unsloth: `zD does not support `VLLM_ATTENTION_BACKEND==FLASHINFER`. Will disabler   VLLM_USE_FLASHINFER_SAMPLER   zAUnsloth: Your GPU does not support prefix caching - will disable!)r  	LLMEnginer  
EngineArgsAsyncEngineArgs   )r3  r  r  )r3  r  )   r     )r4  0   r  )   r     )r7  P   (   )r+  `   r6  )r+  p   r9  )r+  r  )r+  r  z@Unsloth: Vision model detected, setting approx_max_num_seqs to 1r   g?i   i   r4  i      z	Intel GPUz has eu:CUDAz compute capability zUnsloth: vLLM loading z with actual GPU utilization = d   z%
Unsloth: Your GPU has z with VRAM = z' GB.
Unsloth: Using conservativeness = z. Chunked prefill tokens = z. Num Sequences = z).
Unsloth: vLLM's KV Cache can use up to z GB. Also swap space = z GB.r  )CompilationConfigz2.9.0F   inductor)epilogue_fusionr   shape_paddingrL  
cudagraphsr   loggingcombo_kernelsgroup_fusionmemory_planninguse_block_ptrmulti_kernel)levelbackendcudagraph_num_of_warmupsfull_cuda_graphuse_cudagraphuse_inductorinductor_compile_configcudagraph_mode)CUDAGraphModerO  zfUnsloth: Failed getting `from vllm.config import CUDAGraphMode` and `CUDAGraphMode.FULL_AND_PIECEWISE`zOUnsloth: `cudagraph_mode` is not in `from vllm.config import CompilationConfig`zUnsloth: Not an error, but `z>` is not supported in vLLM.config.CompilationConfig. Skipping.z8Unsloth: FAILED getting compilation_config with error = r  r  max_model_lenr   load_formatautokv_cache_dtyper|   r  r#  r  seedr  r  r  r  r  enable_chunked_prefillr  r  
swap_spacerN   rx  )imagevideolimit_mm_per_prompt	   zSUnsloth: Disabling `disable_cascade_attn` in vLLM to allow for better on policy RL!disable_cascade_attnz%` is not supported in vLLM. Skipping.memoryz"Unsloth: Retrying vLLM to process z sequences and z tokens in tandem.
Error:
r?   )Gr  r  r   r   rV   r%   r'   logr   vllm_versionr   r(   rM   rL   r   r  r@  r  r  r   r  r  r  r  r  float16roundr   	importlibutil	find_specr   r  r   rW   r  r0  r  r1  r2  maxr   psutilvirtual_memory	availablerR   get_device_propertiesgpu_eu_countrP   r   r@  torch_versionr  r$   r"  	signature
parametersr  rT  FULL_AND_PIECEWISEr  r1  r   from_engine_argsrg   rT  rU  rV  r  r  r   )Ir  r   r  r  r|   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r  r"  r#  standby_util_overriderT   rU   total_gbten_percentstandby_target_gpu_utilmajor_versionminor_version
mem_confignew_max_lora_rankr  is_fp8r  r  r  r  rZ  	is_mllama_dtypetotal_memory_gbr   r  r0  r  r1  r2  chunked_prefill_tokensra  RAM_GBr[  platformrn  messagerN   r@  rE  rG  compile_flags	good_keysrT  r  old_keysrb  engine_argsr`  trialsr  re  rW  sI                                                                            r1   r   r   d  sb   8  !T)*)s"'73'>?>/bBII>TVY4Z^a4aII&JCPTWW ,K D %d*HS.K

,[M:;		u4		s4		u4		t4		s4$(
!

/0G/HIJ| 11!!JJWY4'!!JJCD[C\]^$9!$;;%<"[\r[sstuv#&==88O7PPQS &="f',zz'G'G'I$}1$78W$XX }q0%&jkkv}%%''

 0>M);<M;NfUbTccmno%M!&)L' U##K0U5A^5S  j&&((S\=R-RF V  _  1^  _, "''!7!%) (?/!#> "F---I!&[ [[%k'kk%
 "Q& N%("!^3F~FV Wbbpaqqrt 4N f!!3ennV				6U]]#:lm	'wj9:	%--	5ENN#:D!$Gwe"TUU ,KL4/$6=qAO 2%8 ~~--#rzz1bjjAY6Z^`6`

34)&11zz~~6;|K
:,.rst'2::5JJ78ZZ^^4b9R?!mq&83?BJJ/0\"gh&773?BJJ/0 A8;BJJ45\"gh&778;BJJ458;BJJ45 f',zz'G'G'I$}A=A#5-!:KUV$)!		 $		 $ QP& '!	$	*Za,W,BDW	$	*Zb,W,BDW	$	*Zb,W,BDW	$	*Zb,W,BDW	$	*Zb,W,BDW	$	*Zb,W,BDW	$	*Zb,W,BDW	$	*Zc,W,BDW	$	*Zc,W,BDW	$	*Zc,W,BDW 	PR "%T>!: c2E2L.M+ "(Q.0F(B.0F(B.0F(B.0F(B.0F(B.0F(B.0F'+$ "0 14DDE115 ""$F$t+d2FJ	1Az	1Az	2Az	2Az	2Az	2Az*eyy66q9FFJh|n5',zz'G'G'I$}J2=/=/R	
 ,KERopsRsuvLwKx y!!(	6G H--=,>>YZpYq  rD  EX  DY Y2278SUV2W1XXopzo{{	A  FQC	W5 |$wx'88GM<RU\]dUe<e"
 $"b($)M !&!
 % $
 ,-"& $#*C&*#($(!!+05"$1#'&*$(#(+M8  ))*;<GGLLNI  9,D96C6V6VM"23%&78 ghM..01Hi'%c*8={|}	  
 !2!C]!C 	 !+!> "0 4D	
 4D +:v "' "8 "5 ". ". "-  "/!" "+#& "3'( "7)* "8+0 "412 "/34 ",56 "(7< "6=K@  891-E)* f',zz'G'G'I$}1|$(99',$ (,$kl2FK./!!Y/JOZZ__aIK$$&'HiC 05Z[\	 
 	 ;& F
	*$55o6T6TU001Jk1JK(K(2 	1C 12 1X




   JQ ! D  C  D  DD  	WLSQRVHUVV	Wf  	*aKF1X



&&(  JE{2 #5))'50H4E&)*=*D&E#.AN+45=589L8M_]s\t u$g' 
 #5))-	*. 	C sd   B&n1 -n An1 ,Ao  
n.n)#n1 )n..n1 1
o;oo 
r-*B.r(r((r-c                 n   [        [        R                  " [        U 5      U-  5      5      n[        R
                  " S[        U 5      U5      nUS   [        U 5      :w  a!  [        R                  " U[        U 5      45      n[        [        U5      S-
  5       Vs/ s H  o@X4   X4S-       PM     nnU$ s  snf )Nr   rz   r   )r   mathceilrh   r  arangehstackrg   )requestsnum_sequencesn_splitsr   rc   batchess         r1   r   r   g  s     499S]]:;<Hii3x=-8Gr{c(m#))Wc(m45:?GQ:OP:OQ
7Q3<0:OGPN Qs   B2c                 *   U R                  5       nU R                  5       R                  R                  nUR	                  5        VVs0 s H$  u  pgSU;   d  SU;   d  M  XgR                  U5      _M&     nnnXCS'   U R                  " USU0UD6  g s  snnf )N.lora_A..lora_B.r  save_directory)r  get_input_embeddingsr   r|   r   r   save_pretrained)r  r  rA   rB   r  r|   r  r  s           r1   r
   r
   s  s     !!#J&&(//55E,6,<,<,>e,>DA*PQ/U_cdUd-!DDK-,>Je%<	DK>KFK fs   BBc                     [        [        R                  R                  U S5      SS9 n[        R
                  " U5      nS S S 5        U$ ! , (       d  f       W$ = f)Nzadapter_config.jsonutf-8encoding)openr   pathrb   jsonload)r  fr   s      r1   get_peft_configr    sF    	bggll>+@Ag	VZ[1 
WM 
W	VMs   A


Ac                    U R                   R                  R                  R                  R                  nUR
                  R                  R                  R                  nUR                  R                  R                  nUS   n[        X$R                  R                  R                  S   R                  5       5        [!        U5      S:g  $ r   )vllm_engineru  r   r  r   lora_manager_adapter_manager_active_adapterscacher  r   r   r  r(  lora_a_stackedrQ  rh   )r  r	  
lora_cacher   v_layers        r1   vllm_lora_already_loadedr    s     	$$33AANNA00AAGGJWW]]!!FQiG	*''00??BKKMNz?ar4   c                    [        U S5      (       d   e/ / p!/ / pCU R                  R                  R                  R                  R
                  R                  n[        UR                  R                  U R                  R                  R                  5       GH  u  pgUR                  UR                  R                  R                  R                  R                  5        UR                  UR                  R                  R                  R                  R                  5        UR                  UR                  R                   R                  R                  R                  5        UR                  UR                  R"                  R$                  S   5        UR                  UR                  R"                  R$                  S   5        UR                  UR                  R"                  R$                  S   5        UR                  R                  R&                  S   nUR                  R                  R&                  S   n	UR                  R                   R&                  S   n
US:X  a  S OUnU	S:X  a  S OU	n	U
S:X  a  S OU
n
UR                  UR                  R                  R(                  R                  R                  5        UR                  UR                  R                  R(                  R                  R                  5        UR                  UR                  R                   R(                  R                  R                  5        UR                  UR                  R"                  R*                  S   U45        UR                  UR                  R"                  R*                  S   U	45        UR                  UR                  R"                  R*                  S   U
45        UR                  R,                  R&                  S   nUS:X  a  S OUnUR                  UR                  R,                  R                  R                  R                  5        UR                  UR                  R,                  R$                  S   5        UR                  UR                  R,                  R(                  R                  R                  5        UR                  UR                  R,                  R*                  S   U45        UR                  UR.                  R0                  R                  R                  R                  5        UR                  UR.                  R0                  R                  R                  R                  5        UR                  UR.                  R2                  R$                  S   5        UR                  UR.                  R2                  R$                  S   5        UR.                  R0                  R&                  S   nUR.                  R4                  R&                  S   nUS:X  a  S OUnUS:X  a  S OUnUR                  UR.                  R0                  R(                  R                  R                  5        UR                  UR.                  R0                  R(                  R                  R                  5        UR                  UR.                  R2                  R*                  S   U45        UR                  UR.                  R2                  R*                  S   U45        UR.                  R6                  R&                  S   nUS:X  a  S OUnUR                  UR.                  R6                  R                  R                  R                  5        UR                  UR.                  R6                  R$                  S   5        UR                  UR.                  R6                  R(                  R                  R                  5        UR                  UR.                  R6                  R*                  S   U45        GM      [        X5       H>  u  nnUR9                  5       R:                  UR9                  5       R:                  :X  a  M>   e   [        X$5       HA  u  nu  nnUR9                  5       R:                  UR9                  5       R:                  :X  a  MA   e    Xl        X l        X0l         X@l!        g )Nr  r   r   ry   r  r  )"r@  r  ru  r   r  r   r  zipr   r  r  rB  lora_Ar  r   k_projv_projr(  r  scalinglora_Blora_b_stackedr)  r-  	gate_projr.  up_projr/  squeezer~   model_loras_Amodel_loras_Bvllm_loras_Avllm_loras_B)r  r  r  r  r  
vllm_modelr  m_layersqsksvsosgsusdmodel_lora_Avllm_lora_Amodel_lora_Bvllm_lora_Bss                       r1   prepare_vllm_lora_loadingr    s    5-(()( $&r=#%r<""--<<JJWW]]J  
 1 1 8 8%++:K:K:R:RSW..55<<DDKKLW..55<<DDKKLW..55<<DDKKLW..77FFqIJW..77FFqIJW..77FFqIJ%%--i8%%--i8%%--i83YTB3YTB3YTBg//66==EELLMg//66==EELLMg//66==EELLMg//88GGJBPQg//88GGJBPQg//88GGJBPQ%%--i83YTBW..55<<DDKKLW..55DDQGHg//66==EELLMg//66EEaH"NOW[[2299AAHHIW[[2299AAHHIW[[55DDQGHW[[55DDQGH[[""**95[[""**953YTB3YTBgkk33::BBIIJgkk33::BBIIJgkk66EEaH"NOgkk66EEaH"NO[[""**953YTBW[[2299AAHHIW[[22AA!DEgkk33::BBIIJgkk33BB1ErKL_ T` 	 &)%E!k##%++{/B/B/D/J/JJKJ &F+.}+K''{A##%++{/B/B/D/J/JJKJ ,L ('&&
r4   c                 L   U R                   nU R                  nU R                  nU R                  n[	        X5       H  u  pVUR                  USS9  M      [	        X$5       H!  u  nu  pUR                  USS9  U	c  M  X-  nM#      [        R                  R                  5         g )NTnon_blocking)	r  r  r  r  r  copy_rM   rL   synchronize)
r  r  r  r  r  r  r  r  r  r  s
             r1   load_lora_directlyr    s     ''M''M''L''L%(%E!,t< &F +.m*J&&{,t<=+*+ +K 		JJr4   )PeftTypec                 ^   [        Uc  [        U R                  5      OU5      n[        U S5      (       a  SU R                  ;   a  U R                  S   R
                  [        R                  :X  a  U R                  5       R                  5       nU VVs0 s HC  u  p4UR                  U:w  d  M  SU;   d  SU;   d  M%  X4R                  5       R                  5       _ME     nnn[        U5      S:X  a  0 $ U R                  5        H  u  pVUS-   U;   d  M  [        SU S35        M!      U$ 0 $ s  snnf )	Npeft_configr  z.lora_A.defaultz.lora_B.defaultr   .default.weight
module.to())r   r    r   r@  r  	peft_typer  LORAr  r   r|   detachclonerh   r4  r  )r  r|   r  r  r  r@  r  s          r1   r   r     s   
 %-(6USE}%%)u7H7H*Hy)33x}}D%%'--/
/9
/9tq5  #!Q&*;q*@ #A
  ""z 	 

 z?a!//1LD'':5z%*+ 2 	I
s   D)-
D);"D)c                    U0 :X  d  Uc  g [        Uc  [        U R                  5      OU5      n[        U S5      (       a  SU R                  ;   aq  U R                  S   R
                  [        R                  :X  aF  U R                  5        H0  u  p4US-   nUR                  US 5      nUc  M!  [        SU S35        M2      g g )Nr  r  r  r  r  )r   r    r   r@  r  r  r  r  r4  r   r  )r  r  r|   r@  r  old_name
old_weights          r1   r   r   	  s     R:-v%-(6USE}%%)u7H7H*Hy)33x}}D!//1LD//H#$7J%z%*+	 2 	
r4   c                    [         c  Sq [        R                  R                  U5      (       a
  [         S:X  aY  U(       a  U R                  S   R                  U5        O3[        R                  R                  U5      (       d  [        SU S35      e SSKJn  U(       az  [        U5      nU R                  5       nUR                  5       nU VVs0 s H&  u  pxSU;   d  SU;   d  M  UR                  S	S
5      U_M(     nnnU" [        [         5      [         XTS9n	OU" [        [         5      [         U5      n	 [         S-  q U	$ s  snnf )Nr   r  zUnsloth: LoRA filepath = z does not exist!r   r   r  r  z.defaultr9  )lora_tensorsr  )r  r   r  existsr  r  OSErrorr   r   r  r  r   r+  r   )
r  r  load_tensorsr   r  r  r   r  r  lora_requests
             r1   r   r   ,	  s     ! 77>>.))_-Ai(88H//5n5EEUVWW-%n5%%'
  ">CjedazUVZdhiZi1aii
B/1e
j #3#7Yc #3#7.Y qO ) ks   E %E c                 B   Uc  S[         R                  ;   a  [        [         R                  S   5      nO[        5       u  pg[	        US-  S-  S-  S5      nUS::  a  U R
                  S-  nO8US::  a  U R
                  S-  nO"US::  a  U R
                  S-  nOU R
                  n[        U5      [         R                  S'   X R
                  :w  a  [        S	U S
35          [        X5      n	X5S'   / n
U	 H'  nU R                  " U/UQ70 UD6nU
[        U5      -  n
M)      U
$ )NUNSLOTH_VLLM_BATCHESr  ry   r  r  r  r/  r8  zUnsloth: Will use z/ batches to reduce memory usage for generation!r  )r   r   r   rV   re  r  r   r   r   r  r1  )r  inputs	n_batchesr  rA   rB   rT   rU   r  r  output_listbatchoutputss                r1   r   r   d	  s+    !RZZ/BJJ'=>?I(4%K#L4$7$$>$EqIO Q&C4K4Kr4Q	 B&C4K4Kq4P	 B&C4K4Kq4P	!55)14YBJJ-.333*9+5def V/G)>K,,u6t6v6tG}$  	r4   c                    SSK JnJn  U" 5         U" 5         U b  U R                  ?A S n [
        R                  " [        5         [        R                  R                  5         S S S 5        [        R                  " 5         [        R                  R                  5          SS KnUR!                  5         U $ ! , (       d  f       NX= f!    U $ = f)Nr   )destroy_model_paralleldestroy_distributed_environment)vllm.distributed.parallel_stater  r  ru  r   r  suppressAssertionErrorrM   distributeddestroy_process_grouprT  rU  rL   rV  rayshutdown)r  r  r  r  s       r1   r	   r	   	  s    
 #%
NN)			^	,//1 
-JJL	JJ J 
-	,Js   B4C 4
CC
c                    SSK JnJn  SSKJn  U R
                  R                  U5      nUR
                  R                  U5      n[        R                  R                  U R
                  R                  R                  UR
                  R                  R                  5        [        R                  R                  Xg5        [        R                  " UR                  S   SS9nUR                  SUR                  S   45      nU R
                  R                  Xh5      n	UR
                  R                  R                  UR                   SS9n
U
" Xx5      n[        R                  R                  U	S   US   5        [        R                  R                  U	S   US   5        [#        [%        U R
                  R&                  UR
                  R&                  5      5       G	H(  u  nu  p[)        USS9  UnUn[        R                  R                  UR*                  R                  UR*                  R                  5        UR+                  U5      nUR+                  U5      nUR-                  UR/                  5       S U	S	9u  nnUR-                  UR/                  5       S US	9u  nn[        R                  R                  UUS
SS9  [        R                  R                  U" UR,                  R0                  5      U" UR,                  R0                  5      5        [        R                  R                  U" UR,                  R2                  5      U" UR,                  R2                  5      5        [        R                  R                  U" UR,                  R4                  5      U" UR,                  R4                  5      5        UR                  S S n/ UQSPUR,                  R6                  P7nUR,                  R1                  U5      R9                  U5      R;                  SS5      nUR,                  R3                  U5      R9                  U5      R;                  SS5      nUR,                  R5                  U5      R9                  U5      R;                  SS5      nUR                  S S n/ UQSPUR,                  R6                  P7nUR,                  R1                  U5      R9                  U5      R;                  SS5      nUR,                  R3                  U5      R9                  U5      R;                  SS5      nUR,                  R5                  U5      R9                  U5      R;                  SS5      n[        R                  R                  UUS
SS9  [        R                  R                  UUS
SS9  [        R                  R                  UUS
SS9  U" UU/U	Q76 u  nnU" UU/UQ76 u  nn[        R                  R                  UUS
SS9  [        R                  R                  UUS
SS9  XMR,                  R                   R<                     nU" UR,                  UUUS UR,                  R>                  (       d  SOUR,                  R@                  UR,                  RB                  S9u  nnXNR,                  R                   R<                     nU" UR,                  UUUS UR,                  R>                  (       d  SOUR,                  R@                  UR,                  RB                  S9u  n n[        R                  R                  UU 5        URD                  " / UQSP76 RG                  5       nUR,                  RI                  U5      nU RD                  " / UQSP76 RG                  5       nUR,                  RI                  U5      n[        R                  R                  XgS
SS9  [        R                  R                  UUS
SS9  [        R                  R                  UUS
SS9  [        R                  R                  UUS
SS9  UnUn[        R                  R                  URJ                  R                  URJ                  R                  5        URK                  U5      nURK                  U5      n[        R                  R                  XgS
SS9  URM                  UR/                  5       5      nURM                  UR/                  5       5      n[        R                  R                  UUS
SS9  URL                  RO                  U5      n!URL                  RO                  U5      n"[        R                  R                  U!U"S
SS9  URL                  RQ                  U5      n#URL                  RQ                  U5      n$[        R                  R                  U#U$S
SS9  URL                  RS                  U!5      U#-  nURL                  RS                  U"5      U$-  nURL                  RU                  U5      nURL                  RU                  U5      n[        R                  R                  XgS
SS9  [        R                  R                  UUS
SS9  [        R                  R                  UUS
SS9  [        R                  R                  UUS
SS9  X-   nUU-   n[        R                  R                  XgS
SS9  UR/                  5       nG	M+      U R
                  RW                  U5      nUR
                  RW                  U5      n[        R                  R                  Xg5         [Y        U S5      (       a  [Y        US5      (       av  U RZ                  R                  b_  URZ                  R                  bH  [        R                  R                  U RZ                  R                  URZ                  R                  5        [Y        U S5      (       aR  [Y        US5      (       aA  U R[                  U5      nUR[                  U5      n[        R                  R                  Xg5        g ! [\         a  n%[)        SU% 35         S n%A%g S n%A%ff = f)Nr   )apply_rotary_pos_embALL_ATTENTION_FUNCTIONS)dequantize_module_weightr   rL   rt  ,)end)attention_maskposition_embeddingsg{Gz?g{Gzt?)rP  rO  rz   ry   r%  )r  dropoutr  r  z%Unsloth: lm_head test failed. Error: )/(transformers.models.llama.modeling_llamar  r  peft.utils.integrationsr  r  r  rM   rZ  r[  r   r  r~   repeatr  r   r   	enumerater  r   r   r{  r  r  rB  r  r  r  r   	transpose_attn_implementationr  attention_dropoutr  r   r]  r)  r|  r-  r  r  act_fnr/  r3  r@  r  r  )&r  r  	input_idsr  r  dfABposition_idsrotary_A
new_rotaryrotary_Brc   oldnew	residualA	residualBAArW  BBinput_shapeAhidden_shapeAQAKAVAinput_shapeBhidden_shapeBQBKBVBr  
attentionA
attentionBgateAgateBupAupBr  s&                                         r1   _test_same_modelr#  	  s	    GKK$$Y/A$$Y/A	MMu{{77>>	@\@\@c@cd	MMq$<<	 2VDL&&9??1+='>?L;;))!:H++55i6F6FQW5XJ!*H	MMx{HQK8	MMx{HQK8"3u{{'9'99??;Q;Q#RS:Cas		""3#6#6#=#=s?R?R?Y?YZ""aggi$V^_Aaggi$V^_A""2r$u"E""2cmm&:&:#;R@T@T=UV""2cmm&:&:#;R@T@T=UV""2cmm&:&:#;R@T@T=UVwws|C,CCCMM,B,BC]]!!!$))-8BB1aH]]!!!$))-8BB1aH]]!!!$))-8BB1aHwws|C,CCCMM,B,BC]]!!!$))-8BB1aH]]!!!$))-8BB1aH]]!!!$))-8BB1aH""2r$u"E""2r$u"E""2r$u"E%b"8x8B%b"8x8B""2r$u"E""2r$u"E#MM$8$8$M$MN#--R!!$!7!7cS]]=\=\mm++

A
 $MM$8$8$M$MN#--R!!$!7!7cS]]=\=\mm++

A
 	"":z:11b1<<>MM  #11b1<<>MM  #""1U"C""2r$u"E""2qe"D""2qe"D		""3#?#?#F#FHdHdHkHkl((+((+""1U"CWWQWWYWWQWWY""2r$u"E!!!$!!!$""5%U"Kggooa ggooa ""3D"GGGNN5!C'GGNN5!C'GGa GGa ""1U"C""2r$u"E""2qe"D""2qe"DMM""1U"CGGIs Tt 	KKQAQA	MMq$;5)$$I)F)F}}##/I4E4E4L4L4X**5==+?+?ARARAYAYZ 5)$$I)F)Fa A!!!$AMM&&q,   ;5aS9::
;s    C;q 
q>&q99q>c                     [        S5        [        X5         [        U R                  5       UR                  5       5        [        S5        [        S5        g! [         a  n[        SU 35         SnAgSnAff = f)zl
Simplified model testing using clean comparison utilities.
Replaces the complex _test_same_model function.
z=== MODEL CONVERSION TEST ===u!   ✅ State dict comparison passed!u"   ❌ State dict comparison failed: NFu$   ✅ Model conversion test completed!T)r   compare_attributesr   r  r  )original_modelr  r  s      r1   test_model_conversionr'  *
  st     

)* ~1~88:I<P<P<RS12
 

01  21#67s   3A 
A9!A44A9c                    U R                   UR                   :X  d   eU R                  UR                  :X  d   eSSSS.SSS./S./nUR                  US	S
S9nUR                  US
S
S
SS9R                  U R                   U R                  S9n[        R
                  " 5          U " S0 UD6nU" S0 UD6n[        R                  R                  UR                  UR                  5        [        S5        S S S 5        US   US'   U " S0 UD6nU" S0 UD6n[        R                  R                  UR                  UR                  5        [        S5        U(       Ga  [        [        5      [        [        5      [        [        5      S.n	[        [        5      [        [        5      [        [        5      S.n
[        X	5        [        X5        SSKJn  U" US   5      US'   S
US   l        U " S0 UD6nU" S0 UD6n[        R                  R                  UR                  UR                  5        [        S5        UR                  R#                  5         UR                  R#                  5         / n/ nU	S   R%                  5        Hl  n[        R&                  " U	S   U   5      n[        R&                  " U
S   U   5      n [        R                  R                  UUSS9  UR)                  U5        Mn     [        S[-        U5       SU 35        [        S[-        U5       SU 35        g g ! , (       d  f       GN<= f! [*         a-  n[        SU SU 35        UR)                  U5         S nAM  S nAff = f) Nuserr\  zhttps://files.worldwildlife.org/wwfcmsprod/images/Sloth_Sitting_iStock_3_12_2014/story_full_width/8l7pbjmj29_iStock_000011145477Large_mini__1_.jpg)r  r\  r/   z(Which films does this animal feature in?)r  r/   rolecontentFT)tokenizeadd_generation_promptpt)r.  r-  return_dictreturn_tensorsr&  zForward pass logits match!r  labelszLosses match !)prepostbackwardr   r   zLosses match!r5  gư>)rO  zGradient mismatch in layer 'z': z'Backward gradient statistics match for z	 layers: z*Backward gradient statistics mismatch for r?   )rN   r|   apply_chat_templater   rM   r  rZ  r[  logitsr   lossdefaultdictr1  register_hookscopyr   r  r5  r  r  r  r  rh   )r  r  	processortest_backwardmessagesr/   r  original_outputsnew_outputsoriginal_model_statsnew_model_statsr   r  
mismatchesr  original_grads	new_gradsr  s                     r1   _test_is_same_vlmrF  @
  sK   <<9+++++;;)//)))   )}  ~(RT
 H ((5 ) D **t +  	bU[[b) 
 
 ?6?)&)""#3#:#:K<N<NO*,	 
 k*F8v%f%K	MM/44k6F6FG	
 t$%#D) 
 t$%#D)
 	u3y2 	"#F;$78x,0{) !?6?)&) 	""#3#8#8+:J:JKo 	&&(!!# 
.z:??AJ"\\*>z*J:*VWN_Z%@%LMI.**>94*Pz* B 	7G~YwiXY:3z?:K9U_T`aba  
p  .4ZLA3GH!!*--.s%   "AL'=0L9'
L69
M0"M++M0c                     Sn SSK JnJnJn  SSKJn   U" S5      =(       d    S HU  n[        XS5      (       d  M  UR                  n[        U5      S:  d  M2  [        USS  5      U :X  d  MF  UR                  S	S
9s  $    [        U[        R                  5       H8  nUR                  " U 6 nUR                  5       (       d  M)  UR                  S	S
9s  $    [!        S5      e! U a     Nif = f)N)unslothr   z	vision.pyr   )filesPackageNotFoundErrorPackagePath)PathrH  r?   r  r  r  z>Could not locate unsloth/models/vision.py without importing it)importlib.metadatarI  rJ  rK  pathlibrL  r   partsrh   rK  	read_textmapsysr  joinpathis_fileFileNotFoundError)	_VISION_TAILrI  rJ  rK  rL  entryrP  base	candidates	            r1   _read_unsloth_vision_sourcer[  
  s    5LKK9%++E%--u:?uU23Z'8L'H ??g?>>	 , D#((#MM<0	&&'&:: $ \
]]   s(   "C C C +C <C C('C(c                   ^  [        5       n[        R                  " U5      nUR                   H  n[	        U[        R
                  5      (       aH  [        U 4S jUR                   5       5      (       a"  [        R                  " UR                  5      s  $ Mj  [	        U[        R                  5      (       d  M  [        UR                  SS5      T :X  d  M  [        R                  " UR                  5      s  $    [        T  S35      e)zF
Parse VLLM_SUPPORTED_VLM from unsloth/models/vision.py as a literal.
c              3   D   >#    U  H  n[        US S5      T:H  v   M     g7f)idN)r  )r\   r   	_VAR_NAMEs     r1   r_   )get_vllm_supported_vlm.<locals>.<genexpr>
  s     M171dD)Y6s    r^  Nz3 not found as a literal in unsloth/models/vision.py)r[  astparser  r   Assignre   targetsliteral_evalvalue	AnnAssignr  targetr	  )r_  srctreenodes   `   r1   get_vllm_supported_vlmrl  
  s     &
'C99S>D 		dCJJ''MMMM''

33 Ncmm,,t{{D$/9<''

33  	{"UV
WWr4   ffffff?r?  c
                    [         R                  " 5         [        R                  R	                  5         SSKJn
  U
R                  U S S SSS9nXl        [        X5      (       d  [        SU  S35      eSSKJnJn  S nU R                  5       R                  S	5      =(       d    UnU(       a	  U" S
S
SUS9n [        5       nU(       a  XS'   [!        X5      n[#        5         [%        USS5      nUS:g  nU	(       d  UnO=['        5       nUU;   a  SS Kn[%        UUR(                  S   5      nO[+        SU S35      e[-        SU 35        UR                  " U 4SSS
S.UD6nUR/                  5        H  nUR1                  S5        M     [3        US 5      u  nnUR5                  5         [7        5         [9        U UUUUUUUU	US9
n[;        US
UU	S9u  nn[=        UR?                  5       U5        [A        UXU	S9n[C        UU5        U	(       d  SSKJ"n  UR                  U 5      nSSS./SSS./SSS./SSS./SS S./SS!S./SS"S./SS#S.//U-  nURG                  USS
S
S$9nSS%K$J%n   U " S&S'SSS(S)9n!U(       dM  [M        UURN                  5      n"/ n#U" H-  n$URQ                  U$U!5      n%U#RS                  S* U% 5       5        M/      A#U" US   SS+S,9n&U&S-   RU                  S.S
S/9n&[W        UUU&5        O%SS0KJ,n'  U'R                  U 5      n([[        UUU(S5        []        U5         S UR^                  R`                  l1        S UR^                  R`                  l1        [e        [g        UR^                  Rh                  5      5       H5  n)S UR^                  Rh                  U)'   S UR^                  Rh                  U)'   M7      S UR^                  Rj                  l1        S UR^                  Rj                  l1        S URl                  l1        S URl                  l1        S Ul/        S Ul/        AA[-        S15        [e        S25       H6  n[         R                  " 5         [        R                  R	                  5         M8     g !    NW= f)3Nr   )
AutoConfigFsdpa)tokenrevisiontrust_remote_codeattn_implementationzUnsloth: Dynamic quant of z not supported in vLLM)AutoModelForCausalLMr   r)  Tnf4)r  rj  rl  r   r  r  r  r*  zUnsloth: Model type z  not supported for vision modelszLoading model with type 
sequential)
device_maprt  low_cpu_mem_usage)
r  r   r  r|   r  r  r!  r   r  r  )r  r   r  )r  )AutoTokenizerr)  z2Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,r*  z"Write a long poem about the world.z+What is the capital of France? Describe it.zWhy is the sky blue?z%Explain Newton's third law of motion.zWhy is spacetime bent?zExplain heliocentricism.zEDerive the formula for an infinite sum of 1, 1/2, 1/4, 1/8 and so on.)r-  r.  padding)SamplingParamsr'  r  r  )temperaturetop_plogprobsprompt_logprobs
max_tokensc              3   ^   #    U  H#  oR                     H  o"R                  v   M     M%     g 7fr-   )r  	token_ids)r\   completionsr   s      r1   r_   ,_test_get_vllm_state_dict.<locals>.<genexpr>P  s      %nwZmZmSVmmZmmws   +-r/  )add_special_tokensr1  r  rL   r  )AutoProcessorzTest passed!r  )7rT  rU  rM   rL   rV  transformersro  from_pretrainedr  r   r  ru  r   r  r  r  r!   r$  r  rl  r  r	  r   rq  r  r#   r,  r   r   r   r   r  r   r'  rz  r6  rW   r|  r   r  r  extendr   r#  r  rF  r	   r  r  r   rg   rh   r   r3  r  )*r  r|   r  countsr  r  r!  r  skip_generationr  ro  r   ru  r   r  rB   r  r  model_classVLLM_SUPPORTED_VLMr  r  paramrW  r  r  r  r  rz  rH   r>  r  r|  sampling_paramsr  completion_idsr  r  r  r  r<  rc   s*                                             r1   _test_get_vllm_state_dictr  
  s    JJL	JJ'''!$ ( F #'
;;!$>zlJ`"abbEJ##%..{;K|L'(,(,(-(-	

 	VFZ12e,F"${;J(K*35++!,0D0DQ0GHK3J<?_`aa	$[M
23''*$" E !!#U# $(5HE1	JJL L
!+!'!7!&!1!0!5!-!0!,C $7 )	$ J  5++-z:+,<f_noI%+ .!11*=	)]^`)MNP)VWY)?@B)PQS)ABD)CDF)pqs	
 	 ..$(	 / 
 	(( 
 $VS-D-DEGN ,,uo>%%%nw%nn !  fQieVZ[	k*--fT-J		95 	/!11*=	%Iu=*.  '.2	$$+s5;;--./A$(EKKq!(,IOO""1% 0 	"&&*	###'	 	 		L1X




  s   C>Q/ /Q3c                     [        5         [        5       u  p/ SQn[        R                  nUS:  a  U/ SQ-  n[        R                  n [        U5       H  u  nu  pV[        R                  " 5         [        R                  R                  5         US-  S:X  a  [        R                  OUn[        SU SU S35        U[        R                  :X  a  US	-  nS
nSn	Sn
OSnSn	Sn
 [        UUU
UUU	[        S9  [        R                  " 5         [        R                  R                  5         M     g ! [         a!  n[        U5      n[        SU SU 35      eS nAff = f)N)z&unsloth/Llama-3.2-1B-Instruct-bnb-4bitr?  )z.unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bitr?  )r  2   l    _ % ))zunsloth/Qwen2.5-3B-Instructr  r  )z3unsloth/meta-Llama-3.1-8B-Instruct-unsloth-bnb-4bit   )z$unsloth/Qwen2.5-7B-Instruct-bnb-4bitr  ry   r   z##### Testing z with dtype = z #####r  r'  Tg      ?r  rm  )r  r|   r  r  r  r  r!  r  rY  )r   rV   rM   rd  r  r   rT  rU  rL   rV  r   r  r!  r  r   r  )rT   rU   model_namesbfloat16_dtyperc   r  r  r|   r  r  r  re  s               r1   test_get_vllm_state_dictr  }  sQ   L ,KK
 ]]N.. 
 	
 $-k$:  J




 !"Q!zl.vFGU]]*q[F""O%("""O%("	;%')?#3"1'; 	



 9 %;: 	  	;JE:,c%9::	;s   #D..
E8EE)r   r-   )T)FNF)
FFr3  r'  Tr  r   FTr  )r  )r  Nr'  r+  NTFr   Tr  r   FFFFTr  r  r   TFFFr  )r  )Fr  )r  )__all__r  r   r   r   r   r   importlib.utilrf  r  collectionsr   numpyr  r;  r   r  rT  r   ra  rS  rM   r   ro  r  rj  r  r  r"  r   utilsr   r   r   empty_modelhf_utilsr    r!   r"   patching_utilsr#   temporary_patches.commonr$   r%   rb  r'   r   r(   rF  Filterr*   rC   rF   rI   rP   rV   rg  rh  rW   rc  r   vllm_loggerr   r   r   rl   ro   rr   4vllm.model_executor.model_loader.bitsandbytes_loaderr@  r   model_loaderbitsandbytes_loaderBitsAndBytesModelLoaderr   r   r   r}   r"  Moduler  r   rs   r   r   rd  r   r   r   vllm_lora_requestr   r   vllm_lora_worker_managerr   r   r   r   r   r   r   bitsandbytes.functionalbitsandbytes.utilsr   r   classmethodrN   r  r  r#  r  r$  r'  r)  r  r  r   r  r   r   r  r  r   r   r  r  r   r  r  floatr   r|   r   r   r
   r  r  r  r  peftr  r   r  r   r   r   r	   r#  r'  rF  r[  rl  r  r  r?   r4   r1   <module>r     s%  "" 4 3  	 #    	 	 
 
  .       6 6  
 6  $ B B  1 4 H,
% >>F#/0 "	F!!"45P"QR	T!!"45P"QR 	+S + 	
C,,@@
 
 R_D,,@@N,,@@XX
 
 kyD,,@@XXg
 @""//<<  ,0	2	88??2	 ||2	 5<<(	2	
 \\2	f 	 ,,33@@MM\\
 ,0	$	88??$	 ||$	 5<<(	$	
 \\$	J 	" 		""//<<OO	 	).  	9
 	$ 	D& 	7 	 	  >>N+7"M 5S#X 5 5 5 5l 	"\__,,77  	8 	 	 	 k?X LJZ &  
@ U"`(B	  ,	 ,	Z BG--^bv{ s sh 
   cCH   & 	 I4 I4T  &V!%(%)+/$($)$%$($&$%$)$)$)$)$($%%($%$($)$)$)#&1  # !	
 #[[ " " ! " ! ! " " " "  "!" !#$ #%& !'( ")* "+, "-. "/0 !1@   L L    	  IT &    .   MM (  3 3h !D . DJ  *Rcf ^S ^( X   AMM  x! x!r 3	h SZ	
	Ps%   -W# W+ <B;W3 #W(+W03W8