
    oir                        S SK Jr  S SKJr  S SK Jr  S SKrS SKrS SKJr  S SK	J
r
  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJrJrJr  S SKJr  / S	Qr\" S
5      rSS/r\" \ V s/ s H  o R;                  5       PM     sn \ V s/ s H  o R;                  5       S-   PM     sn -   5      rSR=                  \5      \R>                  S'   SSR=                  \R>                  RA                  5       5      -   r!S\!;   r"S\!;   r#Sr$C!S&S jr%S r& S'S jr'Sr(Sr)S r* S'S jr+S r,      S(S jr-      S(S jr.S r/S r0S r1     S)S  jr2S SK3r3S S!K3J4r4  S SK5r5S SK6r5S S"K67  S S"K77   S S#K6J8r8  S% r9gs  sn f s  sn f !   S$ r8 N= f)*    AutoTokenizer)convert_slow_tokenizer)PreTrainedTokenizerFastN)logger)PeftModelForCausalLM)mean_of_trained_tokensadd_new_tokensfix_untrained_tokens)fix_zero_training_loss)load_correct_tokenizerfix_sentencepiece_tokenizercheck_tokenizerr
   fix_sentencepiece_gguf)CodeLlamaTokenizerFastCodeLlamaTokenizerz#unsloth/Qwen2.5-Coder-1.5B-Instructz!unsloth/Qwen2.5-Coder-7B-Instructz	-bnb-4bit
UNSLOTH_IGNORED_TOKENIZER_NAMESz
COLAB_z
KAGGLE_z/tmpc                    [        U S5      (       a  U R                  nO[        U 5      nUR                  5       nSnU(       d  XC;   a  UR	                  USS5      n[        U 5      nU Vs/ s H3  ofR                  S5      (       d  M  UR                  S5      S:X  d  M1  UPM5     nnU H  n[        XS 5      n	U	c  M  [        XS-   S 5      n
SU
 S	3nUR                  U5      [        U5      -   nUS
:X  a  MN  UR                  SU5      nX<U nX:w  d  Mk  U U S3nU U	 S3nUR	                  UUS5      nSU SU
 S3nSU	 SU
 S3nUR	                  UUS5      nM     UR                  U5      nU$ s  snf )N
_tokenizeru#   {"type":"Prepend","prepend":"▁"},    _token__idz"id":z,"content":"z","z":,)hasattrr   r   to_strreplacedirendswithcountgetattrfindlenfrom_str)	tokenizerprependconverted_tokenizertokenizer_stringprepend_text	dir_namesxtoken_names
token_nametokentoken_id	find_textstartend	bad_tokenbad_text	good_textfixed_tokenizers                     Q/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/tokenizer_utils.pytry_fix_tokenizerr<   K   s   y,'''224Y?*113 9L|7+33L"aHII'Vi::h+?1AGGCLTUDU1iKV!
	t4=95&8$? H:\2	 %%i03y>AB;##D%0$3/	#YKr2H$+eWB/I/77)QO 9+Rz3HE7"XJa0I/77)QO/ "2 *223CDO9 Ws   *E0E0E0c                     [        U R                  5       5      nU R                  5        VVs0 s H  u  p#X2_M	     nnn0 nU H  nXB   nX%U'   M     U$ s  snnf N)sortedvaluesitems)
dictionarysorted_keyskeyvalueinverted_dictionarysorted_dictionarys         r;   get_sorted_dictrH   y   sg    **,-K8B8H8H8JK8J*#5:8JK#(#&%    Ls   Ac                    [        U SS5      nU(       a  U $  U R                  R                  nUR                  5       nUR	                  S5      (       a"  US [        S5      *  n[        SU SU S35      nO[        n UR                  nXwR                  S5      S  n[        R                  " SU[        R                  S	9nU V	s/ s H  oR	                  S
5      (       a  M  U	PM     nn	[        R                  nXwR                  S5      S  n[        R                  " SU[        R                  S	9n
U
 V	s/ s H  oR	                  S
5      (       a  M  U	PM     n
n	[        [        X-   5      5      n0 nU H  n[        XS 5      X'   M     [        U SS9US'   U" S0 UD6n[!        U R#                  5       5      n[!        UR#                  5       5      nX:H  nU R$                  UR$                  :H  nU(       a  U(       d  U $ ['        X5      (       d'  [        U SS9US'   U" S0 UD6n['        X5      (       d  U $ U R(                  R+                  SS5      n[,        R.                  R1                  U5      (       d  [,        R2                  " U5        U SU 3nU R5                  U5        UR5                  U5        [6        R8                  " U5      n['        X5      (       a  U$ U $ !   [        n GNj= fs  sn	f s  sn	f )Nis_fastFr)   z!__import__(f"transformers.models.z").FastzArgs:z\n[\s]+([^\s]{1,}) \(flags_fileT)r*   tokenizer_object/r    )r%   	__class____name__lowerr#   r'   evalr   __doc__r&   refindall	MULTILINElistsetr<   rH   	get_vocaball_special_tokensassert_same_tokenizationname_or_pathr!   ospathexistsmakedirssave_pretrainedr   from_pretrained)slow_tokenizertemporary_locationrJ   tokenizer_namelowered_tokenizer_name
class_nameFastTokenizerdocsargsr/   args2kwargsargfast_tokenizersorted_slow_tokenizersorted_fast_tokenizercheck_vocabcheck_specialnamenew_locations                       r;   convert_to_fast_tokenizerrx      s    ni7G0'11::!/!5!5!7!**;77/0C3{3C2CDJ 3J<s>BRRVWM 4M
   D		'"$%D::.bllKD7t!::g#6AtD7 #**D		'"$%DJJ/r||LE91ZZ%8QE9DL!"DFn48 !2>T!RF",V,N ,N,D,D,FG+N,D,D,FG'@K))^-N-NN 
 m $NCC%6~QV%W!"&00'GG!! &&..sC8D77>>,--
&'()4&1L""<0""<0 #22<@N??q0/ 8 :s*   AK 6K K*!K*4K/K/K'a  {% if messages[0]['role'] == 'system' %}{% if messages[1]['role'] == 'user' %}{{ '[INST] ' + messages[0]['content'] + ' ' + messages[1]['content'] + ' [/INST]' }}{% set loop_messages = messages[2:] %}{% else %}{{ '[INST] ' + messages[0]['content'] + ' [/INST]' }}{% set loop_messages = messages[1:] %}{% endif %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}a  {% if messages[0]['role'] == 'system' %}{% if messages[1]['role'] == 'user' %}{{ '[INST] <<SYS>>
' + messages[0]['content'] + '
<</SYS>>

' + messages[1]['content'] + ' [/INST]' }}{% set loop_messages = messages[2:] %}{% else %}{{ '[INST] ' + messages[0]['content'] + ' [/INST]' }}{% set loop_messages = messages[1:] %}{% endif %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'].strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'].strip() + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}c                 l  ^  [        T S5      (       d  g[        T 5      n[        [        S U 4S jU 5       5      5      n[        [	        UT R
                  -   5      5      nSR                  S5      nU Vs/ s H  ofU:w  d  M
  UPM     nnSnSnSn	 U=(       a    U=(       a    U	n
 SR                  U5      S-   SR                  U5      -   nT " U5      R                  U" U5      R                  :H  nU
=(       a    U$ s  snf !   T R                  5       R                  S	S
5      S   [        ;   a  U
s $  g= f)Nr]   Tc              3      >#    U  H@  nUR                  S 5      (       d  M  UR                  S5      S:X  d  M2  [        TU5      v   MB     g7fr   r   r   N)r#   r$   r%   ).0r/   rf   s     r;   	<genexpr>+assert_same_tokenization.<locals>.<genexpr>
  sE      "A::h' +,-GGCLA,= +**"s   AAAs   ï¿½utf-8r   z5A quick brown fox jumps over the lazy dog!!

Hi</s>

r   (r   r   F)r   r"   rZ   filterr[   r]   decodejoin	input_ids__repr__splitIGNORED_TOKENIZER_CHECKING)rf   rq   r.   special_tokensr]   replacement_charr/   check_chat_template1check_chat_template2check_chat_template3check_chat_templatestringcheck_special_tokenss   `            r;   r^   r^     s`   >#788N#I"	
	N c.>3T3T"TUV 399'B%7Q%7@P;P!%7Q  $L 	N!5N:N 
II()IJgg()* 	 6",,v0F0P0PP 	 #;';;w Rx ""$**32159SS&&s   4	C;C;%AD   /D3c           
      z    SSK Jn  U" 5       n[        R                  R                  U5      (       d  [        R                  " U5        [        R                  R                  U S35      (       d  U$ U R!                  U5        WR#                  5       n
U
R%                  ['        U S3S	5      R)                  5       5        UR!                  U5        UR+                  5        Ho  u  pU " U/S
S9R,                  nUS   n[/        U5      S:w  a  [1        SU SU SU S35        MB  US   n U
R2                  U   nUR4                  U:X  d   eXl        Mq     ['        U S3S5       nUR7                  U
R9                  5       5        S S S 5        SSKJn  UR>                  " UUR@                  URB                  S9nU$ ! [         a^  n SS KnSSKJn  U" UR                  R                  5      n	X" S5      :  a  [        SU	 S35      e S nAGN!   SSK	J
n    S nAGN	= fS nAff = f!    GMI  = f! , (       d  f       N= f)Nr   )import_protobuf)Versionz3.20.3z!Unsloth: Your protobuf version = zR is too new.
Please downgrade via `pip install --force-reinstall protobuf==3.20.3`sentencepiece_model_pb2/tokenizer.modelrbF)add_special_tokensr   zSkip mapping z to z since z is already in the tokenizer!wbr   )	eos_token	pad_token)"#transformers.convert_slow_tokenizerr   	Exceptiongoogle.protobufunsloth_zoo.utilsr   protobuf__version__RuntimeErrortransformers.utilsr   r`   ra   rb   rc   isfilerd   
ModelProtoParseFromStringopenreadrA   r   r'   printpiecespiecewriteSerializeToStringtransformersr   re   r   r   )old_tokenizernew_tokenizertoken_mappingrg   r   r   egoogler   protobuf_versiontokenizer_file	old_token	new_tokenidstokenizer_piecefiler   r)   s                     r;   r   r   [  sF   CG"1"3  77>>,--
&' 77>>/00@ABB !!"45,779N"""##34d;@@B
 !!"45 !. 3 3 5	YKeDNN!fs8q=	{$ykKhi !f	,33C8O $$	111 )! !6& 
#$$45t	<

>3356 
= +--!++!++I
 A  C	C"1&v'B'BC'("33"78H7I J\ ]  4
	CBBC^	
 
=	<sB   F8 (H#" H,8
H A H		HHHH #H),
H:c           	      b   SSK Jn  SSKJn  SSKnSSKJn   " S SU5      nUR                  5       n[        R                  R                  U  S35      (       d  gUR                  [        U  S3S	5      R                  5       5        [        UR                  5      n[        R                  R                  U  S
35      (       d  g[        U  S
3SSS9 nUR!                  U5      n	SSS5        [        W	5      S:X  a  g[#        [%        U	R'                  5       S S95      n	U[        U	5      -   n
[(        R*                  " [-        U	R/                  5       5      5      n[(        R0                  " U5      nUR3                  5       S:w  d  UR5                  5       S:w  a  gUR3                  5       U:w  a  g[6        R8                  " SU  SU SU
 S35        U" UR                  [        U5      * S 5      n[;        XR=                  5       5       H3  u  pUR?                  S5      Ul         SUl!        URD                  Ul#        M5     UR                  RI                  U5        [        U  S3S5       nURK                  URM                  5       5        SSS5        g! , (       d  f       GN= f! , (       d  f       g= f)z
Fixes sentencepiece tokenizers which did not extend the vocabulary with
user defined tokens.
Inspiration from https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py
r   )deepcopyr   N)IntEnumc                   ,    \ rS rSrSrSrSrSrSrSr	Sr
g	)
7fix_sentencepiece_gguf.<locals>.SentencePieceTokenTypesi  r                  rQ   N)rS   
__module____qualname____firstlineno__NORMALUNKNOWNCONTROLUSER_DEFINEDUNUSEDBYTE__static_attributes__rQ       r;   SentencePieceTokenTypesr     s     r   r   r   r   z/added_tokens.jsonrr   )encodingc                     U S   $ )Nr   rQ   )items    r;   <lambda>(fix_sentencepiece_gguf.<locals>.<lambda>  s    T!Wr   )rD   r   zUnsloth: Extending zP/tokenizer.model with added_tokens.json.
Originally tokenizer.model is of size (z6).
But we need to extend to sentencepiece vocab size (z).g     @r   )'copyr   r   r   jsonenumr   r   r`   ra   r   r   r   r   r'   r   loaddictr?   rA   nparrayrZ   r@   diffminmaxr   warningzipkeysencoder   scorer   typeextendr   r   )saved_locationr   r   r   r   r   r   sentence_piece_sizer   added_tokens_jsonnew_sizeadded_tokens_idsr   
new_tokensr   added_tokens                   r;   r   r     su    :'  -779N77>>^,,<=>>""/0$7<<> n334 77>>^,,>?@@	  23SW	MQU IIdO 
N
" &&(0DE #S):%;;H xx%6%=%=%? @A77#$DxxzQ$((*/!44 NN
n- .22E1F G>>FZr	K
 .//5E1F0F0HIJJ"%j2H2H2J"K	%,,W5	!	0==	 #L
   ,	  014	8D

>3356 
9 M 
N	M@ 
9	8 s   J% J 
J 
J.c                    [         (       a  UnO2[        (       a%  [        R                  R	                  [
        U5      nOS nS n [        R                  " U UUUUSSSUS9	n[        U5      [        L a  S n[        R                  " U UUUUUS9nU(       a
  U [        ;   a  U$ SU R                  5       ;   a  U$ SU R                  5       ;   a  U$ Ub  [        US5      (       a"  [        US5      (       a  UR                  Ul        [        US5      (       a"  [        US5      (       a  UR                  Ul        [        Xx5      (       a  U$ [         R"                  " S	U  S
35        [%        U5      $ U$ !   S n GN= f)NFT)model_max_lengthpadding_sider2   trust_remote_codeuse_fastlegacy	from_slow	cache_dir)r   r   r2   r   r   mistralzphi-4add_bos_tokenadd_eos_tokenzUnsloth: Will load z as a legacy tokenizer.)IS_COLAB_ENVIRONMENTIS_KAGGLE_ENVIRONMENTr`   ra   r   
KAGGLE_TMPr   re   r   boolIGNORED_TOKENIZER_NAMESrT   r   r   r   r^   r   r   rx   )	rh   r   r   r2   r   r   fix_tokenizerrf   rq   s	            r;   _load_correct_tokenizerr     s    			 GGLLY7		 N&66/' 1!
& Nt#"22+#-N N.EE	n**,	,	N((*	*		#>?33O9
 9
 ,:+G+GN(>?33O9
 9
 ,:+G+GN( $NCC!!NN%n%55LM -^<< _s   E. .E5c           
         [        U UUUUUUS9n[        USS 5      nS[        [        USS5      5      R                  5       ;   a  Un	O;Ub  SU;   a  SU;   a  SU;   a	  S	U;   a  Un	O[	        U5      n	Ub  U	c  [        S
5      e Xl        U$ )N)rh   r   r   r2   r   r   r   chat_templater   r_   r   z[/INST]z[INST]	bos_tokenr   zHUnsloth: Fixing chat template failed - please file a report immediately!)r   r%   strrT   fix_chat_templater   r  )
rh   r   r   r2   r   r   r   r)   old_chat_templater  s
             r;   r   r   F  s     ('+#-%I  	?DA C	>2>?EEGG) 	%**)),,,,) *)4(]-BZ  	+r   c                 v    U R                  U5      nU R                  U5      nX4s=:X  a  S:X  a   g   X4:  a  U$ U$ )Nr   )r&   )templateendforendifwhere_endforwhere_endifs        r;   _find_end_positionr  v  sA    ==(L--&K(b( )		#r   c                    SnSn[        XU5      nUc  SnSn[        XU5      nUc  U $ U R                  U5      nX[        U5      -   S  nUR                  S5      (       a  SOSnSU-   S	-   U;  a  SU-   S
-   U;  ax  UR                  S5      (       ab  UR	                  S5      (       aL  UR                  S5      S:X  a7  UR                  S5      S:X  a"  SU-   S-   U-   U-   nU S U[        U5      -    U-   n U $ )Nz{% endfor %}z{% endif %}z{%- endfor %}z{%- endif %}z{%--r   z{%z ifz set z{{z}}r   z if add_generation_prompt %})r  r&   r'   
startswithr#   r$   )r  r  r	  
chosen_endwhereafter_endfordashs          r;   _fix_chat_templater    s(   FE#M5AJ 'uE
z*E Z!8!:;L''..3BD 	te</4K'!5##D))!!$''t$)t$) 4K88<G%O 	 &&?J(?@<Or   c                 H   [        U SS 5      nUc  g S n SSS./nU R                  USSS9  SnUc  U$ U(       d  SSS.OSSS./nU R                  USSS9nU R                  US	SS9nXE:X  a  S
U;  al  SU;  af  [        U5      nS
U;  a,  SU;  a&  [        SU R                   SU R                   S35      e[
        R                  " SU R                   S35        Un U$ [        SU R                   S35      eU$ !    SSS./nU R                  USSS9  S	n N!   S n  N= f= f)Nr  userzWho are you?)rolecontentF)add_generation_prompttokenizehuman)fromrE   Tz{% if add_generation_prompt %}z{%- if add_generation_prompt %}zUnsloth: The tokenizer `zz`
does not have a {% if add_generation_prompt %} for generation purposes.
Please file a bug report to the maintainers of `z` - thanks!zUnsloth: We successfully patched the tokenizer to add a {% if add_generation_prompt %} to the chat_template.
This is not a bug, but please notify the maintainers of `z`
has a {% if add_generation_prompt %} for generation purposes, but wasn't provided correctly.
Please file a bug report immediately - thanks!)r%   apply_chat_templater  r   r_   r   warning_once)r)   r  is_sharegptmessagesnoyesnew_chat_templates          r;   r  r    s   I=M K7
 	%%e 	& 	
  
  N37H
 
	&	&%E 
' 
B 
'
'$5 ( C 
y -MA1F !3= A08II5=NN".y/E/E.F GGGPG]G]F^^ik  ##PPYPfPfOggrt !2  *9+A+A*B CA A 
 u
		 >:H ))%E *  K	Ks#   C9 9D!<DDD!D!c                 p   UR                  5       R                  SS5      S   [        ;   a  U$ U R                  R                  R
                  R                  S   nUR                  nUR                  5        V	V
s0 s H  u  pU	[        U
5      _M     nn	n
[        U5      nU Vs0 s H  oX   _M	     nn[        UR                  5       5       GH  u  pX:  d  M  [        UR                  5       5      US  n[        UR                  5       5      US  nU(       Gd  UR                  R                  5        Vs/ s H  n[        U5      PM     nnUR                  nSS Kn[#        UR$                  R'                  S UR                  5        5       5      5      nU Vs/ s H  nUU;  d  M  UPM     nnU Vs/ s H%  nUUR(                  R                  5       ;   d  M#  UPM'     nn[+        U5      [+        U5      :H  =(       a    [+        U5      [+        U5      :H  nSn/ nU(       d  [-        U5      nS U 5       nU Vs/ s H  nU[/        UUS 5      4PM     nn/ nU H9  nU H0  u  nnUU:X  d  M  UR1                  U5        UR1                  U5        M2     M;     [+        U5      [+        U5      :H  nU(       a  SnUnU(       a  [        U5       Hf  u  nnUR(                  U   nUR2                  U	 UR(                  U	 U(       d  M8  WU   U:X  d  MC  [5        UUU   S 5        [5        UUU   S-   S 5        Mh     [7        UR                  R                  5       5      U:  a0  [8        R:                  " S	U S
U SU SU S3	5        [=        U5      s  $ [?        SU SU SU SU S3	5      e[@        (       d  [B        (       a  Sn OS n  [D        RF                  " UUUUSSSU S9n[I        U UUUUUSS9s  $    [=        U5      $ s  sn
n	f s  snf s  snf s  snf s  snf s  snf !   [8        R:                  " S5        Us s  $ = f)Nr   r   r   c              3   P   #    U  H  n[        U5      [        L a  U/OUv   M     g 7fr>   )r   r  r|   r/   s     r;   r}   "check_tokenizer.<locals>.<genexpr>  s%      2>UtAw#~14>Us   $&Fc              3      #    U  H5  oR                  S 5      (       d  M  UR                  S5      S:X  d  M1  Uv   M7     g7fr{   )r#   r$   r&  s     r;   r}   r'  *  s0      #(aJJx,@QWWS\UVEV5s   ??	?Tr   z#Unsloth loaded a broken tokenizer `z$`, but managed to repair it!
Tokens z
 with ids z exceeds the max vocab size of zX.
We removed these bad tokens. If you think this is incorrect, fix your tokenizer first.zUnsloth tried to load `z`, but cannot succeed.
Tokens zG.
Fix your tokenizer since it'll perform out of bounds memory accesses.huggingface_tokenizers_cache)r   r   r2   r   r   r   r   )modelr)   
model_namer   r   r2   _reloadzUnsloth: Tokenizer is most likely buggy, and Unsloth failed to repair it.
It will still work, but beware of out of bounds memory accesses.
Please file an issue on the model owner's repo about this issue.)%r   r   r   r*  embed_tokensweightshapeadded_tokens_decoderrA   r  r?   	enumerater   rZ   r@   special_tokens_map	itertools	frozensetchainfrom_iterable_added_tokens_encoderr'   r"   r%   append_added_tokens_decodersetattrr   r   r  rx   r   r   r   r   re   r   )!r*  r)   r+  r   r   r2   r,  max_embedding_sizeadded_tokens_fastindexrE   rC   rD   jbad_indices
bad_tokensr/   added_tokensr   r3  can_be_removed1can_be_removed2can_be_removedremove_generic
try_mappernamesgeneric_tokenstry_removal
name_tokencheck_tokenr7   	remove_idr   s!                                    r;   r   r     s     !!#q)!,0JJ1188>>qA!66.?.E.E.G.Gles5z.G   *+K@KL/44L/4467&05578<K/6689!"=J090N0N0U0U0WX0W1A0WX!*!=!= !*OO11 2>L>S>S>U2 "
 /9"TjA^<S1j"T -#,I;;@@BB ,   # #&o"6#j/"I "(C
O; 
 "'
%	NE#(E QV%VPU1q')Q*E&FPUN%V"$K!+7E3J*e3 + 2 25 9 * 1 1* = 8F ", &)%5Z%HN%)-&0O!(1/(B9$-$C$CI$N	%;;IF%;;IF)>{1~/J#Iz!}dC#Iz!}u/DdK )C 999>>@ADVV++A* N&&0\K=Hghzg{ |uu
  9CC #-j\ :(\K=@_`r_s t\]  $#'<'<:	 	!)99'7#/!$" $ )
	 '! )!+'7#/!# A 8f %Y//s M  Y #U#& &WN! ##W
 ! s<   :O8$O>6P
P(P4"PP:P?)PP5)	getsource)*)neftune_post_forward_hookc                 0   U R                   (       a  [        R                  " UR                  S5      UR                  S5      -  5      nU R                  [        R
                  " U5      -  nU[        R                  " U5      R                  U* U5      -   nU$ )a
  
Implements the NEFTune forward pass for the model using forward hooks. Note this works only for
torch.nn.Embedding layers. This method is slightly adapted from the original source code
that can be found here: https://github.com/neelsjain/NEFTune

Simply add it to your model as follows:
```python
model = ...
model.embed_tokens.neftune_noise_alpha = 0.1
model.embed_tokens.register_forward_hook(neftune_post_forward_hook)
```

Args:
    module (`torch.nn.Module`):
        The embedding module where the hook is attached. Note that you need to set
        `module.neftune_noise_alpha` to the desired noise alpha value.
    input (`torch.Tensor`):
        The input tensor to the model.
    output (`torch.Tensor`):
        The output tensor of the model (i.e. the embeddings).
r   r   )trainingtorchtensorsizeneftune_noise_alphasqrt
zeros_likeuniform_)moduleinputoutputdimsmag_norms        r;   rO  rO    sp    , ??<<AQ ?@D11EJJt4DDHe..v6??	8TTFr   c            	        ^  [        S5      n [        [        R                  R                  5      nS GH  u  nn[        X5      (       d  M  [        [        SU 35      5      nUR                  S5      mUR                  S5      nSR                  U4S jU 5       5      nSnUR                  S5      nSR                  U4S	 jU 5       5      nUR                  5       S-   nUcf  [        R                  " S
U 3S-   U[        R                  [        R                  -  S9n[        U5      S:X  a  M  US   nUR!                  X3U-   5      nOUR!                  X5U-   5      nU Vs/ s H  ofU;   d  M
  UPM     snn[#        SSR                  U5       S3[%        5       5        [#        U[%        5       ['        5       5        [#        SU SU 3['        5       5        GM     S H  nSu  p#[        [        SU SU 35      5      nUR                  S5      mUR                  S5      nSR                  U4S jU 5       5      nSnUS-  nUS-  nUS-  nUR                  S5      nSR                  U4S jU 5       5      nUR!                  X5U-   5      n[#        U['        5       5        [#        SU SU SU 3['        5       5        M     g!    g= fs  snf )z"
Patches the trainer with changes
z"trl.trainer.sft_trainer.SFTTrainerN))_prepare_non_packed_dataloaderN)_prepare_datasetNzsft_trainer.defr   c              3   ,   >#    U  H	  oTS  v   M     g 7fr>   rQ   r|   r/   r  s     r;   r}   .patch_sft_trainer_tokenizer.<locals>.<genexpr>       91uvY   a  
if 'tokenizer'          not in locals(): tokenizer = processing_class
if 'formatting_func'    not in locals(): raise RuntimeError('Unsloth: Please file a bug report - `formatting_func` does not exist!')
if 'dataset_text_field' not in locals() and 'args' in locals(): dataset_text_field = args.dataset_text_field
if 'dataset_text_field' not in locals(): raise RuntimeError('Unsloth: Please file a bug report - `dataset_text_field` does not exist!')
test_text = dataset[0][dataset_text_field] if (formatting_func is None and dataset_text_field is not None) else formatting_func(dataset[0])[0]
chat_template = getattr(tokenizer, 'chat_template', None)
chat_template = '' if chat_template is None else chat_template
has_bos_token_already = (test_text.startswith(tokenizer.bos_token) or tokenizer.bos_token in chat_template) if getattr(tokenizer, 'bos_token', None) is not None else False
if 'add_special_tokens' not in locals() and has_bos_token_already:
    from functools import partial
    tokenizer = partial(tokenizer, add_special_tokens = False)
    processing_class = tokenizer
else:
    add_special_tokens = False if has_bos_token_already else add_special_tokens

c              3   4   >#    U  H  nS T-  U-   v   M     g7f NrQ   rc  s     r;   r}   rd         C
1sU{Q
   zdef z\(.*?\).*?\:\nrL   r   z%from trl.trainer.sft_trainer import (r   )z#trl.trainer.sft_trainer.SFTTrainer.z = )zsft_trainer.SFTTrainerzdpo_trainer.DPOTrainerzkto_trainer.KTOTrainer)trainz#if resume_from_checkpoint is False:ztrl.trainer..c              3   ,   >#    U  H	  oTS  v   M     g 7fr>   rQ   rc  s     r;   r}   rd     re  rf  a  
import subprocess, re, gc, numpy as np
a = np.array([0,])
try:
    a = subprocess.check_output('nvidia-smi --query-gpu=memory.used --format=csv', shell = True)
    a = re.findall(rb'([\d]{1,})[\s]{1,}M', a)
    a = np.array([int(x.decode('utf-8'))/1024 for x in a])
except:
    if not torch.cuda.is_available():
        raise RuntimeError('Unsloth: We do not support AMD / Intel machines yet - it is a work in progress!')
if ((a - PRE_CHECK) >= 1).sum() > 1:
    raise RuntimeError('Unsloth currently does not support multi GPU setups - but we are working on it!')
for _ in range(3):
    gc.collect()
    torch.cuda.empty_cache()
pass

tokenizer = self.processing_class if hasattr(self, 'processing_class') else self.tokenizer
fix_untrained_tokens(self.model, tokenizer, self.train_dataset, IGNORED_TOKENIZER_NAMES, eps = 1e-16)

fix_zero_training_loss(self.model, tokenizer, self.train_dataset)

ao  
try:
    gradient_accumulation_steps = self.args.gradient_accumulation_steps
    if type(gradient_accumulation_steps) is int and gradient_accumulation_steps > 1:
        from transformers import __version__ as transformers_version
        from packaging.version import Version
        if Version(transformers_version) <= Version('4.45.2'):
            print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n'\
                  '`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`')
except:
    pass


a  
if hasattr(self, 'neftune_hook_handle'):
    self.neftune_hook_handle.remove()
    if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle

if getattr(self, 'neftune_noise_alpha', None) is not None:
    self.model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha
    self.neftune_hook_handle = self.model.get_input_embeddings().register_forward_hook(neftune_post_forward_hook)
pass

a  
if hasattr(self.train_dataset, 'column_names'):
    column_names = set(self.train_dataset.column_names)
    check = ['chosen', 'rejected', 'prompt', 'chosen_input_ids', 'chosen_attention_mask',
        'chosen_labels', 'rejected_input_ids', 'rejected_attention_mask', 'rejected_labels',
        'prompt_input_ids', 'prompt_attention_mask']
    if all(x in column_names for x in check):
        self.train_dataset = self.train_dataset.remove_columns(['chosen', 'rejected', 'prompt'])
    del check, column_names

c              3   4   >#    U  H  nS T-  U-   v   M     g7frh  rQ   rc  s     r;   r}   rd  F  rj  rk  )rU   r"   trltrainersft_trainerr   rM  r&   r   r   rstriprW   rX   rY   DOTALLr'   r!   execlocalsglobals)	rs  all_importsfunction_namereplacerfunction
check_textr/   path_to_trainerr  s	           @r;   patch_sft_trainer_tokenizerr    s   ?A ckk--.K

	 {22TL"@ABe$>>$'99999b 	&  %%d+
YYC
CC
&&(4/
zz}o&)::ryy0H
 8}!{H''Z2GHH''x2GHH#51H}Q54SXXa[MCVXNXvx+1-M?SI	
y
D
 #QTL0A=/"RSTe$>>$'99999T 	0 		

  			

 			

  %%d+
YYC
CC
##H8.CDXwy!?+1]O3}oNI	
iS~ 6s   K "	K	/K	K)T)_unsloth_sentencepiece_temp)NrightNFr)  T)zunsloth/llama-2-7b-bnb-4biti   r  NT):r   r   r   r   r   rW   r`   (transformers.models.llama.modeling_llamar   peftr   rR  r3  collectionsnumpyr   gc
subprocesspsutilunsloth_zoo.tokenizer_utilsr	   r
   r   unsloth_zoo.training_utilsr   __all__r4  r   r   rT   r   environr   keynamesr   r   r   r<   rH   rx   mistral_templatellama_templater^   r   r   r   r   r  r  r  r   inspectrM  rq  trl.trainer.sft_trainertransformers.trainerrO  r  )r/   s   0r;   <module>r     s   ' F 0 	 	 ; %     	   
 '  *' 
 $/0/1WWY/0(?@(?1wwy;(?@A  15		:Q0R

, - $))BJJOO-..!X- #x/ 
+\ 7J^ 2 0Vz 7	L^ET .Ql .-`BJ` /N0b   
  % "A@f
U 1@Ts   )E2E7
(E< <F