
    ^hYf                         S SK r S SKJrJrJrJrJrJr  S SKrS SK	r	S SK	J
r
  S SKJrJr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJrJr  \(       a	  S SKJr  \rO\r " S S\5      r " S S\5      rg)    N)TYPE_CHECKINGAnyDictListOptionalUnion)token_counter)verbose_loggerverbose_router_logger)	DualCache)CustomLogger)!_get_parent_otel_span_from_kwargs)RouterErrors)LiteLLMPydanticObjectBase)get_utc_datetimeprint_verbose)Spanc                   $    \ rS rSr% Sr\\S'   Srg)RoutingArgs   <   ttl N)__name__
__module____qualname____firstlineno__r   int__annotations____static_attributes__r       c/home/james-whalen/.local/lib/python3.13/site-packages/litellm/router_strategy/lowest_tpm_rpm_v2.pyr   r      s    Cr!   r   c                      \ rS rSr% SrSr\\S'   Sr\	\S'   Sr
\	\S'   Sr\	\S	'   0 4S
\S\S\4S jjrS\S\\   4S jrS\S\\   S\\   4S jrS rS r  S"S\S\S\S\\   S\S\\   S\\\\\4         S\\\\4      S\\   4S jjr  S"S\S\S\\\\\4         S\\\\4      4S jjr   S#S\S\S\\\\\4         S\\\\4      S\\   4
S  jjrS!rg)$LowestTPMLoggingHandler_v2   z
Updated version of TPM/RPM Logging.

Meant to work across instances.

Caches individual models, not model_groups

Uses batch get (redis.mget)

Increments tpm/rpm limit using redis.incr
F	test_flagr   logged_successlogged_failurei  default_cache_time_secondsrouter_cache
model_listrouting_argsc                 <    Xl         X l        [        S0 UD6U l        g )Nr   )r*   r+   r   r,   )selfr*   r+   r,   s       r"   __init__#LowestTPMLoggingHandler_v2.__init__0   s     )$'7,7r!   
deploymentreturnc                     [        5       nUR                  S5      nUR                  S0 5      R                  S5      nU SU 3nU R                  R	                  USS9nSnUc  UR                  S5      nUc!  UR                  S	0 5      R                  S5      nUc!  UR                  S0 5      R                  S5      nUc  [        S
5      nUb  Xg:  a  [        R                  " SR                  Xv5      SUR                  S	0 5      R                  S5      [        R                  " SSR                  [        R                  R                  UUUUR                  SS5      5      [        R                  " SSS9S9S9eU R                  R                  USU R                   R"                  S9nUb  X:  a  [        R                  " SR                  Xx5      SUR                  S	0 5      R                  S5      [        R                  " SSR                  [        R                  R                  UU5      [        R                  " SSS9S9S9eU$ ! [$         a-  n	['        U	[        R                  5      (       a  U	eUs Sn	A	$ Sn	A	ff = f)zw
Pre-call check + update model rpm

Returns - deployment

Raises - RateLimitError if deployment over defined RPM limit
%H-%M
model_infoid:rpm:Tkey
local_onlyNrpmlitellm_paramsinf6Deployment over defined rpm limit={}. current usage={} model  zr{} rpm limit={}. current usage={}. id={}, model_group={}. Get the model info by calling 'router.get_model_info(id)
model_nametpm_rpm_limits"https://github.com/BerriAI/litellmmethodurl)status_codecontentrequestmessagellm_providerr@   response   r9   valuer   !{} rpm limit={}. current usage={})r   strftimegetr*   	get_cachefloatlitellmRateLimitErrorformathttpxResponser   user_defined_ratelimit_errorrQ   Requestincrement_cacher,   r   	Exception
isinstance)
r.   r1   dtcurrent_minutemodel_idrpm_keylocal_resultdeployment_rpmresultes
             r"   pre_call_check)LowestTPMLoggingHandler_v2.pre_call_check7   s   D	
 "#B[[1N!~~lB7;;DAH!
%'78G,,66 7 L "N%!+!6%!+0@"!E!I!I%!P%!+b!A!E!Ee!L%!&u'L,J,,T[[& "$$..)92>BB7K"^^$' !U  !\  !\(EEKK*($&NN<<! !&5EKo p
 ( **::qd.?.?.C.C ;  %&*A!00 X _ _*! &((nn-=rBFFwO!&(+$G$N$N , I I O O . &%
 %*MM9IOs$t"    	!W3344	s   II 
I?"I:4I?:I?parent_otel_spanc                 D  #     [        5       nUR                  S5      nUR                  S0 5      R                  S5      nU SU 3nU R                  R	                  USS9I Sh  vN nSnUc  UR                  S5      nUc!  UR                  S	0 5      R                  S5      nUc!  UR                  S0 5      R                  S5      nUc  [        S
5      nUb  Xx:  a  [        R                  " SR                  X5      SUR                  S	0 5      R                  S5      [        R                  " SSR                  [        R                  R                  UU5      S[        S5      0[        R                  " SSS9S9S9eU R                  R!                  USU R"                  R$                  US9I Sh  vN n	U	b  X:  a  [        R                  " SR                  X5      SUR                  S	0 5      R                  S5      [        R                  " SSR                  [        R                  R                  UU	5      S[        S5      0[        R                  " SSS9S9S9eU$  GN N! [&         a-  n
[)        U
[        R                  5      (       a  U
eUs Sn
A
$ Sn
A
ff = f7f)a  
Pre-call check + update model rpm
- Used inside semaphore
- raise rate limit error if deployment over limit

Why? solves concurrency issue - https://github.com/BerriAI/litellm/issues/2994

Returns - deployment

Raises - RateLimitError if deployment over defined RPM limit
r4   r5   r6   r7   Tr8   Nr;   r<   r=   r>   r?   r@   rA   rR   retry-afterr   rC   rD   rE   rH   rI   headersrJ   rK   rO   r9   rQ   r   rk   )r   rS   rT   r*   async_get_cacherV   rW   rX   rY   rZ   r[   r   r\   rQ   strr]   async_increment_cacher,   r   r_   r`   )r.   r1   rk   ra   rb   rc   rd   re   rf   rg   rh   s              r"   async_pre_call_check/LowestTPMLoggingHandler_v2.async_pre_call_check   s    G	
 "#B[[1N!~~lB7;;DAH!
%'78G!%!2!2!B!B "C " L "N%!+!6%!+0@"!E!I!I%!P%!+b!A!E!Ee!L%!&u'L,J,,T[[& "$$..)92>BB7K"^^$' C J J(EEKK*(!
 "/B 8 %5EKo p	 &  $00FF))--%5	  G    %&*A!00 X _ _*! &((nn-=rBFFwO!&(+$G$N$N , I I O O . &%
 &3CG$<$)MM9IOs$t	" $ uB4  	!W3344	s[   J A I& $I!%EI& 1I$2B.I&  J !I& $I& &
J0"JJJ JJ c                      US   R                  S5      c  g US   S   R                  SS 5      nUS   R                  S0 5      R                  SS 5      nUb  Uc  g [        U[        5      (       a  [        U5      nUS   S   n[	        5       nUR                  S5      n	U S	U	 3n
U R                  R                  XU R                  R                  S
9  U R                  (       a  U =R                  S-  sl        g g ! [         a8  n[        R                  " SR                  [        U5      5      5         S nAg S nAff = f)Nr<   metadatamodel_groupr5   r6   usagetotal_tokensr4   :tpm:rP   rO   `litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {})rT   r`   r   rr   r   rS   r*   r^   r,   r   r&   r'   r_   r
   	exceptionrY   )r.   kwargsresponse_obj
start_timeend_timerx   r6   rz   ra   rb   tpm_keyrh   s               r"   log_success_event,LowestTPMLoggingHandler_v2.log_success_event   sR   .	 &'++J7?$%56zBFF!4 ,-11,CGGdS&"*C((RB+G4^D
 &'!#"  Dn%56 !!119J9J9N9N 2  >>''1,' " 	$$ryyF
 	s$   C= AC= BC= =
D?.D::D?c                   #      US   R                  S5      c  g US   S   R                  SS 5      nUS   R                  S0 5      R                  SS 5      nUb  Uc  g [        U[        5      (       a  [        U5      nUS   S   n[	        5       nUR                  S5      n	U S	U	 3n
[        U5      nU R                  R                  U
UU R                  R                  US
9I S h  vN   U R                  (       a  U =R                  S-  sl        g g  N,! [         a8  n[        R                  " SR!                  [        U5      5      5         S nAg S nAff = f7f)Nr<   rw   rx   r5   r6   ry   rz   r4   r{   rp   rO   r|   )rT   r`   r   rr   r   rS   r   r*   rs   r,   r   r&   r'   r_   r
   r}   rY   )r.   r~   r   r   r   rx   r6   rz   ra   rb   r   rk   rh   s                r"   async_log_success_event2LowestTPMLoggingHandler_v2.async_log_success_event  sx    2	 &'++J7?$%56zBFF!4 ,-11,CGGdS&"*C((RB+G4^D
 &'!#"  Dn%56
 $EV#L ''==&))--%5	 >    >>''1,' "  	$$ryyF
 	sX   ED EAD EBD 'D(*D ED 
E .EEEENrx   healthy_deploymentstpm_keys
tpm_valuesrpm_keys
rpm_valuesmessagesinputc	                 $   Ub  Uc  g0 n	[        U5       H  u  pXJ   XU
   '   M     0 n[        U5       H  u  pXj   XU
   '   M      [        XxS9n[        R                  " SU 35        [        S5      nU	c  0 n	U H  nSXS   S   '   M     OD[        5       nUR                  S5      nU H#  nUS   S    S	U 3nUU	;  d  U	U   b  M  SU	U'   M%     U	n/ nUR                  5        GHd  u  nnSnUR                  S
5      S   nU H  nUUS   S   :X  d  M  UnM     Uc  M;  Uc  M@  SnUc  UR                  S5      nUc!  UR                  S0 5      R                  S5      nUc!  UR                  S0 5      R                  S5      nUc  [        S5      nSnUc  UR                  S5      nUc!  UR                  S0 5      R                  S5      nUc!  UR                  S0 5      R                  S5      nUc  [        S5      nUU-   U:  a  GM$  Ub  UU;   a  UU   S-   U:  a  GM<  UU:X  a  UR                  U5        GMV  UU:  d  GM_  UnU/nGMg     [        S5        [        U5      S:  a  [        R                  " U5      $ g! [         a    Sn GN>f = f)zQ
Common checks for get available deployment, across sync + async implementations
N)r   textr   zinput_tokens=r=   r5   r6   r4   r{   :tpmr<   r;   rO   z+returning picked lowest tpm/rpm deployment.)	enumerater	   r_   r   debugrV   r   rS   itemssplitrT   appendr   lenrandomchoice)r.   rx   r   r   r   r   r   r   r   tpm_dictidxr9   rpm_dictinput_tokens
lowest_tpmr1   ra   rb   dr   all_deploymentspotential_deploymentsitemitem_tpm_deploymentm_deployment_tpm_deployment_rpms                               r"   #_common_checks_available_deployment>LowestTPMLoggingHandler_v2._common_checks_available_deploymentB  s    !3!(+HC&0oHc]# , !(+HC&0oHc]# ,	((GL 	##mL>$BC 5\
H1
;<L1$78 2 "#B[[N )|_T2358HI(*hw.?.G()HW%	 ) # "-335ND(K::c?1%D(1\?400"#K ) "!"O&"-//%"8&"-//2BB"G"K"KE"R&"-//,"C"G"G"N&"',"O&"-//%"8&"-//2BB"G"K"KE"R&"-//,"C"G"G"N&"',,&8&48+;"o5Z'%,,[9J&%
)4%S 6T 	CD$%)==!677U  	L	s   	I? ?JJc                   #    [         R                  " SU SU 35        [        5       nUR                  S5      n/ n/ nU H  n	[	        U	[
        5      (       d  M  U	R                  S0 5      R                  S5      n
SR                  X5      nSR                  X5      nUR                  U5        UR                  U5        M     Xx-   nU R                  R                  US9I S	h  vN nUb  US	[        U5       nU[        U5      S	 nOS	nS	nU R                  UUUUUUUUS
9n Uc   eU$  NF! [         Ga    0 n[        U5       GHC  u  nn[	        U[
        5      (       d  M  UR                  S0 5      R                  S5      n
S	nUc  UR                  SS	5      nUc"  UR                  S0 5      R                  SS	5      nUc"  UR                  S0 5      R                  SS	5      nUc  [        S5      nU(       a  UU   OSnS	nUc  UR                  SS	5      nUc"  UR                  S0 5      R                  SS	5      nUc"  UR                  S0 5      R                  SS	5      nUc  [        S5      nU(       a  UU   OSnUUUUS.UU
'   GMF     [         R"                  " [$        R&                  R(                   SU SU 3SU[*        R,                  " SSS[/        S5      0[*        R0                  " SSS9S9S9ef = f7f)zb
Async implementation of get deployments.

Reduces time to retrieve the tpm/rpm values from cache
6get_available_deployments - Usage Based. model_group: , healthy_deployments: r4   r5   r6   	{}:tpm:{}	{}:rpm:{})keysNrx   r   r   r   r   r   r   r   r   r<   r=   r   r;   current_tpm	tpm_limitcurrent_rpm	rpm_limitz. 12345 Passed model=. Deployments=r?   rA   rm   r   rC   rD   rE   rn   rK   )r   r   r   rS   r`   dictrT   rY   r   r*   async_batch_get_cacher   r   r_   r   rV   rW   rX   r   no_deployments_availablerQ   rZ   r[   rr   r]   )r.   rx   r   r   r   ra   rb   r   r   r   r6   r   rd   combined_tpm_rpm_keyscombined_tpm_rpm_valuesr   r   r1   deployment_dictindexr   r   r   r   r   s                            r"   async_get_available_deployments:LowestTPMLoggingHandler_v2.async_get_available_deployments  si     	##D[MQhi|h}~	
 W-$A!T""UU<,00 &,,R@%,,R@(( % !) 3(,(9(9(O(O& )P )
 #
 #.03x=AJ0XAJJJ==# 3!! > 	

=	)))1#
2  :	 O&/0C&D"{k400$r:>>tDB&*O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK '+O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK (3%4'2%4	+OB'O 'EZ (('@@FFGG\]h\iiw  yH  xI  J! #*CG4!MM1AGkl		
 
a:	s8   AK:B	K:#D*$>K:#D, )K:,2K7"FK77K:c                 @   [         R                  " SU SU 35        [        5       nUR                  S5      n/ n/ n	U H  n
[	        U
[
        5      (       d  M  U
R                  S0 5      R                  S5      nSR                  X5      nSR                  X5      nUR                  U5        U	R                  U5        M     U R                  R                  XS9nU R                  R                  XS9nU R                  UUUUU	UUUS	9n Uc   eU$ ! [         Ga    0 n[        U5       GHC  u  nn[	        U[
        5      (       d  M  UR                  S0 5      R                  S5      nS
nUc  UR                  SS
5      nUc"  UR                  S0 5      R                  SS
5      nUc"  UR                  S0 5      R                  SS
5      nUc  [        S5      nU(       a  UU   OSnS
nUc  UR                  SS
5      nUc"  UR                  S0 5      R                  SS
5      nUc"  UR                  S0 5      R                  SS
5      nUc  [        S5      nU(       a  UU   OSnUUUUS.UU'   GMF     [        [         R"                  R$                   SU SU 35      ef = f)z5
Returns a deployment with the lowest TPM/RPM usage.
r   r   r4   r5   r6   r   r   )r   rk   r   Nr   r<   r=   r   r;   r   z. Passed model=r   )r   r   r   rS   r`   r   rT   rY   r   r*   batch_get_cacher   r_   r   rV   
ValueErrorr   r   rQ   )r.   rx   r   r   r   rk   ra   rb   r   r   r   r6   r   rd   r   r   r1   r   r   r   r   r   r   r   s                           r"   get_available_deployments4LowestTPMLoggingHandler_v2.get_available_deployments!  s    	##D[MQhi|h}~	
 W-$A!T""UU<,00 &,,R@%,,R@(( % &&66 7 

 &&66 7 

 ==# 3!! > 	

5	))) 2	 O&/0C&D"{k400$r:>>tDB&*O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK '+O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK (3%4'2%4	+OB'O 'EZ 88>>?{m[ijyiz{ a2	s   
D 2JEJ)r+   r*   r,   )NN)NNN)r   r   r   r   __doc__r&   boolr   r'   r   r(   r)   r   listr   r/   r   r   ri   r   rt   r   r   rr   r   r   r   r   r   r    r   r!   r"   r$   r$      s   
 ItNCNC&11 OQ8%8378GK8L L(4. L\UU2:4.U	$Un/b3z 48,0ee "e 	e
 TNe e TNe 4S#X/0e c4i()e 
$eV 48,0vv "v 4S#X/0	v
 c4i()vx 48,0+/ff "f 4S#X/0	f
 c4i()f #4.f fr!   r$   ) r   typingr   r   r   r   r   r   rZ   rW   r	   litellm._loggingr
   r   litellm.caching.cachingr   "litellm.integrations.custom_loggerr   'litellm.litellm_core_utils.core_helpersr   litellm.types.routerr   litellm.types.utilsr   litellm.utilsr   r   opentelemetry.tracer   _Spanr   r$   r   r!   r"   <module>r      sY     B B   ! B - ; U - 9 91DD+ i	 i	r!   