
    oi=                         S /r SSKrSSKrSSKJr  SSKrSSKrS\R                  S'   SSKrSSK	r	SSK
r
SSKrSSKrSSKJrJrJr  SSKJr  SSKrSS	KJr  SS
\R,                  4S jjr " S S5      r " S S 5      rg)SyntheticDataKit    N)deque1HF_HUB_ENABLE_HF_TRANSFER)	load_vllm
patch_vllmdelete_vllm)logger   )synthetic_qa_configprocc           	         U b  U R                  5       b  g  SS KnUR                  U R                  5      nUR	                  SS9 H  nUR                  5         M     UR                  5         UR                  US-  S9  g !    O= f[        R                  S:X  aF   [        R                  " SSS	S
[        U R                  5      /SSS9  U R                  SS9  g !    O= fU R                  5          U R                  SS9  g !    g = f)Nr   T)	recursive   timeoutnttaskkillz/Tz/Fz/PID   )capture_outputr   r   )pollpsutilProcesspidchildren	terminatewaitosname
subprocessrunstrkill)r   r   r   parentchilds        T/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/dataprep/synthetic.pyterminate_treer'   +   s    |tyy{.
)___6EOO 7gk*	ww$		NNT4TXX?!%
 III"	IIK		A	s$   A'A? ?B=C C1D Dc                   Z    \ rS rSrSr       SS jrS rSS jrS rSS jr	SS	 jr
S
rg)PipeCaptureN   zNon blocking pipe captureNc	                    Xl         [        US9U l        [        R                  " 5       U l        X0l        X@l        XPl        X`l	        Xpl
        [        R                  " 5       U l        [        R                  " 5       U l        S U l        Ub-  [        US5      (       d  [         R"                  " U5      nXl        [        R$                  " U R&                  SS9U l        U R(                  R+                  5         g )N)maxlensearchT)targetdaemon)piper   buf	threadingLocklockechor   textencodingerrorsEventready_eventclosed_eventready_regexhasattrrecompileThread_readertstart)	selfr0   
keep_linesr5   r   r6   r7   r8   r<   s	            r&   __init__PipeCapture.__init__Q   s     	*-NN$				 $??,%OO-";11 jj5*!!4<<$G    c                     U R                   (       a  SOSn[        U R                  R                  U5       H  nU R                   (       d'  UR	                  U R
                  U R                  5      nOUnUR                  S5      nU R                  (       a   SU;  a  [        U R                   SU 35        U R                     U R                  R                  U5        S S S 5        U R                  c  M  U R                  R                  U5      (       d  M  U R                   R#                  5         M      U R                  R%                  5         U R(                  R#                  5         g ! , (       d  f       N= f! [&         a     N8f = f!  U R                  R%                  5         O! [&         a     Of = fU R(                  R#                  5         f = f)N rH   z
zplatform isz: )r6   iterr0   readlinedecoder7   r8   rstripr5   printr   r4   r1   appendr<   r-   r:   setclose	Exceptionr;   )rD   sentinelraw_linelines       r&   rA   PipeCapture._readerq   sS   	$!YYrCH !3!3X>yy#??4==$++FD#D{{6*99$D02dV45YYHHOOD)  ##/D4D4D4K4KD4Q4Q$$((* ?"		! !!# Y  		! !!#sf   C F E+F 5F F 6E< +
E9	5F <
F	F	GF)(G)
F63G5F66Gc                 8    U R                   R                  U5      $ N)r:   r   rD   r   s     r&   wait_for_readyPipeCapture.wait_for_ready   s    $$W--rH   c                 6    U R                   R                  5       $ rY   )r;   is_setrD   s    r&   
has_closedPipeCapture.has_closed   s      ''))rH   c                 8    U R                   R                  U5      $ rY   )r;   r   rZ   s     r&   wait_until_closedPipeCapture.wait_until_closed   s      %%g..rH   c                     U R                      SR                  [        U R                  5      U* S  5      sS S S 5        $ ! , (       d  f       g = f)N
)r4   joinlistr1   )rD   ns     r&   tailPipeCapture.tail   s0    YY99T$((^QBC01 YYs	   (?
A)r1   r;   r5   r7   r8   r4   r   r0   r:   r<   rB   r6   )  FrJ   Tutf-8replaceNrY   )   )__name__
__module____qualname____firstlineno____doc__rF   rA   r[   r`   rc   rj   __static_attributes__ rH   r&   r)   r)   N   s;    #
 @$4.*/2rH   r)   c                       \ rS rSr       SS jr\      SS j5       r\S 5       rS rS r	S r
S	 rSS
 jr         SS jrSrg)r      Nc                 	   [        U5      [        L d   e[        U5      [        L d   e[        U5      [        L d   e[        U5      [        L d   e[        U5      [        L d   eUb  [        U5      [        L d   eXl        X l        SSKJn	J	n
  U	R                  UUS9U l        U
R                  UUS9U l        [        SS9  [        S1UU R                  UUSUUSSSSS.UD6nS	U;   a  US	   nU[        R                   :X  a  S
nO-U[        R"                  :X  a  SnOU[        R$                  :X  a  SnXS	'   ['        US5      (       a  UR(                  US	'   OB[+        U[        5      (       a-  UR-                  S5      (       a  UR/                  S5      S   US	'   1 SknUS	   U;  a  SUS	'   SU;   a  US	 SU;   a  US	 SS[        U5      /nUR1                  5        Hf  u  nnUR3                  SS5      nUS:X  a  M   [        U5      R3                  SS5      nUS:X  a  USU-   /-  nML  US:X  a  MT  US:X  a  M\  USU-   U/-  nMh     [4        R6                  " U5        [8        R:                  " U[8        R<                  [8        R<                  SS9n[>        R@                  " S 5      nUU l!        [E        URF                  S!SS"USS#9U l$        [E        URJ                  S$SS%S SS#9U l&        U RH                  RO                  US&9nU(       d  U RH                  RQ                  5       (       d  U RB                  RS                  5       bV  [U        S'5        [U        S(U RH                  RW                  S)5      5        [U        S*U RL                  RW                  S)5      5        OY[U        S+U S,35        [U        S(U RH                  RW                  S)5      5        [U        S*U RL                  RW                  S)5      5        [Y        U RB                  5        g [U        S-5        SnU R[                  5       (       d  US.:  ak  [U        S/5        [U        S(U RH                  RW                  S)5      5        [U        S*U RL                  RW                  S)5      5        [Y        U RB                  5        g US0-  n[\        R^                  " S05        U R[                  5       (       d  M  g )2Nr   )
AutoConfigAutoTokenizer)tokenF)debugT   )
model_nameconfiggpu_memory_utilizationmax_seq_lengthdisable_log_statsfloat8_kv_cacheconservativenessreturn_argsenable_lorause_bitsandbytescompilation_configdtypefloat16bfloat16float32r   ztorch..>   autohalffloatr   r   r   r   devicemodelvllmserve_-r   rJ   Truez--FalseNone)stdoutstderrstart_new_sessionz*Starting vLLM API server(?:\s+\d+)?\s+on\bi  zvLLM STDOUT)rE   r5   r   r<   r6   rl   zvLLM STDERRr   z6Stdout stream ended before readiness message detected.z
--- stdout tail ---
2   z
--- stderr tail ---
z/Unsloth: vllm_process failed to load! (timeout=)zvLLM Server Ready Detectedd   z%Unsloth: vllm_process failed to load!r   rv   )0typer"   intr   boolr   r   transformersrz   r{   from_pretrainedr   	tokenizerr   r   torchr   r   r   r=   r   
isinstance
startswithsplititemsrn   r
   infor    PopenPIPEr>   r?   vllm_processr)   r   stdout_capturer   stderr_capturer[   r`   r   rO   rj   r'   check_vllm_statustimesleep)rD   r   r   r   r   r   r|   r   kwargsrz   r{   engine_args	dtype_valvalid_dtypessubprocess_commandskeyvalueflagwhichr   ready_rereadytrials                          r&   rF   SyntheticDataKit.__init__   s    J3&&&N#s****+u444O$,,,$%...}Us 222$,: 00 1 
 '66 7 
 	5! 
#[[%;+ $-/$!"
 
 k!#G,IEMM)%	enn,&	emm+%	#, y&))'0~~G$Is++	0D0DX0N0N'0s';B'?G$VL7#<7'-G${"H%k!G$ 
O

 &++-JC;;sC(D** J&&x4E#4K( # '!&#4K( #) .0 	'(!''____ $	
 ::KL() "
 * 
 ##22W2E""--//43D3D3I3I3K3WNO/1D1D1I1I"1MN/1D1D1I1I"1MNGyPQRS/1D1D1I1I"1MN/1D1D1I1I"1MN4,,-./((**|=>/1D1D1I1I"1MN/1D1D1I1I"1MNt001QJEJJqM ((** 	rH   c           
      &    [        SU UUUUUS.UD6$ )N)r   r   r   r   r   r|   rv   )r   )r   r   r   r   r   r|   r   s          r&   r    SyntheticDataKit.from_pretrained2  s2       
#+%;-/
 
 	
rH   c                       [         R                  " S5      n U R                  S:X  a  gg ! [         R                  R                   a     gf = f)Nzhttp://localhost:8000/metricsro   TF)requestsgetstatus_code
exceptionsConnectionError)responses    r&   r   "SyntheticDataKit.check_vllm_statusF  sJ    	||$CDH##s* +""22 		s   &* A
Ac                    [        U S5      (       d  g U R                  n[        S5         UR                  5         UR	                  SS9  [        S5        [        S5       H6  n[        R                  R                  5         [        R                  " 5         M8     [!        S S9  g ! [
        R                   a9    [        S5        UR                  5         UR	                  5         [        S5         N[         a  n[        SU 35         UR                  5       c6  [        S	5        UR                  5         UR	                  5         [        S
5         S nAN! [         a  n[        SU 35         S nA S nAGN!S nAff = fS nAff = f)Nr   z5Attempting to terminate the VLLM server gracefully...
   r   zServer terminated gracefully.zEServer did not terminate gracefully after 10 seconds. Forcing kill...zServer killed forcefully.z4An error occurred while trying to stop the process: z(Attempting forceful kill due to error...z%Server killed forcefully after error.zError during forceful kill: )llm)r=   r   rO   r   r   r    TimeoutExpiredr#   rS   r   ranger   cudaempty_cachegccollectr	   )rD   r   ekill_er   s        r&   cleanupSyntheticDataKit.cleanupO  sB   t^,,((EF	?""$+12$ rAJJ""$JJL 
 	$- (( 	/W -. 		?HLM?$$&.DE %%' %%'AB ?4VH=>>?		?sC   *B$ $A
F0	F9E>AE
E;E6,E>6E;;E>>Fc                     U $ rY   rv   r_   s    r&   	__enter__SyntheticDataKit.__enter__q  s    rH   c                 $    U R                  5         g rY   r   )rD   excs     r&   __exit__SyntheticDataKit.__exit__t      rH   c                 $    U R                  5         g rY   r   r_   s    r&   __del__SyntheticDataKit.__del__w  r   rH   c                 R   Uc   e[         R                  R                  U5      (       d   e[        U S5      (       d   e[        U S5      (       d  [	        S5      e[        U S5      (       a  [        U S5      (       d  [	        S5      e[        USSS	9 nUR                  5       nS S S 5        U R                  U R                  S
-  -
  S-
  nUS::  a  [	        S5      eU R                  WSS9R                  n[        U5      n[        [        R                  " XdU R                  -
  -  5      5      n[        R                  " [        R                   " SX`R                  -
  U5      5      R#                  [        5      n[        R$                  " US S XR                  -   SS  45      R&                  n[        R(                  " X5      R+                  5       n[         R                  R-                  U5      u  pUR/                  S5      (       a  US S n/ n
[1        U5       Hd  u  nu  pU R                  R3                  X\U 5      nU SU U	 3nU
R5                  U5        [        USSS	9 nUR7                  U5        S S S 5        Mf     U
$ ! , (       d  f       GN= f! , (       d  f       M  = f)Nr   r   z7Please use SynthetidDataKit.from_pretrained(...) first!overlapmax_generation_tokensz'Please use prepare_qa_generation first!rrm   r7   r      r   z"Generation length is way too long!F)add_special_tokensr   r   r   /r   w)r   pathexistsr=   RuntimeErroropenreadr   r   r   	input_idslenr   npceilr   linspaceastypestackTminimumtolistsplitextendswith	enumeraterM   rP   write)rD   filenamefr6   
max_tokensr   lengthn_chunks
boundaries	extensionall_filenamesileftrightchunked_textnew_filenames                   r&   
chunk_dataSyntheticDataKit.chunk_dataz  sW   ###ww~~h''''t[))))t-..I  tY''wt=T/U/UHII(CG4668D 5 $"<"<q"@@3F 	 ?CDDNN4eNDNN	 Yrwwvdll)BCDEWWR[[F\\,A8LMTT

 XXz#2ll1JAB0OPQSS
ZZ
3::<
 !gg..x8S!!}H )* 5A}>>001FGL&ZqI;7L  .lCG<% =<	 !6 ? 54: =<s   J%J
J
J&	c
                     [        U S5      (       d   e[        U S5      (       d   eX R                  :  d   eSn
U
R                  S5      n
U
 H5  n[        R                  " [        R
                  R                  X5      SS9  M7     X l        [        R                  " S[        U5      5      R                  S[        U R                  5      5      R                  S	[        U5      5      R                  S
[        U5      5      R                  S[        U R                  US-  -
  S-
  5      5      R                  S[        U5      5      R                  S[        U5      5      R                  S[        U5      5      R                  S[        U5      5      R                  S[        U5      5      R                  S[        U	5      5      n[        SSSS9 nUR                  U5        S S S 5        XPl        g ! , (       d  f       N= f)Nr   r   z<pdf,html,youtube,docx,ppt,txt,output,generated,cleaned,final,T)exist_okz{data_output_location}z{model_name}z{temperature}z{top_p}z{chunk_size}r   z	{overlap}z{max_tokens}z{default_num_pairs}z{cleanup_threshold}z{cleanup_batch_size}z{cleanup_temperature}zsynthetic_data_kit_config.yamlr   rm   r   )r=   r   r   r   makedirsr   rg   r   r   rn   r"   r   r   r  r   )rD   output_folderr   temperaturetop_pr   default_num_pairscleanup_thresholdcleanup_batch_sizecleanup_temperature	locationsr   r   r  s                 r&   prepare_qa_generation&SyntheticDataKit.prepare_qa_generation  s    t\****t-....$':'::::R	OOC(	DKK]9dK  &;"  ''(@#mBTUW^S%9:W_c+&67WYE
+WD$7$7:ORS:S$SVW$W X W[#g,/W^S)>%?@W*C0A,BCW*C0A,BCW+S1C-DEW,c2E.FG 	  2CGLPQGGFO M  MLs   G//
G=)	r   r   r   r   r   r   r   r   r   ).unsloth/Llama-3.1-8B-Instruct-unsloth-bnb-4bit   g\(\?F      ?Ni  )r  r   g?Fr!  NrY   )	datai   gffffff?gffffff?@      r!     g333333?)rp   rq   rr   rs   rF   staticmethodr   r   r   r   r   r   r  r  ru   rv   rH   r&   r   r      s     F!%Vp E!$
 
&    D+^  #!*rH   )   )__all__r    r2   collectionsr   r   r   environr   r   r   r>   unsloth_zoo.vllm_utilsr   r   r	   unsloth_zoo.logr
   numpyr   synthetic_configsr   r   r'   r)   r   rv   rH   r&   <module>r/     s          	*-

& '   	  	 
 # 
 ))  FH2 H2Vx xrH   