
    ^h(              "       $   S SK JrJrJr  S SKJrJr  S SKrS SKJ	r	  S SK
Jr  SSKJr  / SSSSSSSSSSSSSSSS	4S
\S\S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   4 S jjrS rS rg)    )FIRST_COMPLETEDThreadPoolExecutorwait)ListOptionalN)print_verbose)get_optional_params   )handleriX  d   modelmessages	functionsfunction_calltemperaturetop_pnstream
max_tokenspresence_penaltyfrequency_penalty
logit_biasuserrequest_timeouttimeoutmax_workersc                 p   [        5       nUn/ nU n SnU R                  SS5      S   [        R                  ;   a*  U R                  SS5      S   nU R                  SS5      S   n US:X  aF  [	        UUUUUU=(       d    SUU	U
UUUU US9n[
        R                  " U U[        R                  US9nU$ S	 n[        US
9 nU" US5       H  nU Hv  nUR                  5       nUR                  S5        UUS'   0 nSU;   a  UR                  S5      nUR                  " [        R                  40 UDUD6nUR                  U5        Mx     M     SSS5        / nU H#  n UR                  UR                  5       5        M%     U$ ! , (       d  f       N;= f! [         a  n UR                  U 5         Sn A M[  Sn A ff = f)a  
Batch litellm.completion function for a given model.

Args:
    model (str): The model to use for generating completions.
    messages (List, optional): List of messages to use as input for generating completions. Defaults to [].
    functions (List, optional): List of functions to use as input for generating completions. Defaults to [].
    function_call (str, optional): The function call to use as input for generating completions. Defaults to "".
    temperature (float, optional): The temperature parameter for generating completions. Defaults to None.
    top_p (float, optional): The top-p parameter for generating completions. Defaults to None.
    n (int, optional): The number of completions to generate. Defaults to None.
    stream (bool, optional): Whether to stream completions or not. Defaults to None.
    stop (optional): The stop parameter for generating completions. Defaults to None.
    max_tokens (float, optional): The maximum number of tokens to generate. Defaults to None.
    presence_penalty (float, optional): The presence penalty for generating completions. Defaults to None.
    frequency_penalty (float, optional): The frequency penalty for generating completions. Defaults to None.
    logit_bias (dict, optional): The logit bias for generating completions. Defaults to {}.
    user (str, optional): The user string for generating completions. Defaults to "".
    deployment_id (optional): The deployment ID for generating completions. Defaults to None.
    request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
    max_workers (int,optional): The maximum number of threads to use for parallel processing.

Returns:
    list: A list of completion results.
N/   r   vllmF)r   r   r   r   r   r   stopr   r   r   r   r   r   custom_llm_provider)r   r   custom_prompt_dictoptional_paramsc              3   V   #    [        S[        U 5      U5       H  nXX!-    v   M     g7f)z)Yield successive n-sized chunks from lst.r   N)rangelen)lstr   is      W/home/james-whalen/.local/lib/python3.13/site-packages/litellm/batch_completion/main.pychunks batch_completion.<locals>.chunks_   s(     1c#h*aen$ +s   ')r   r   r   r   kwargs)localssplitlitellmprovider_listr	   vllm_handlerbatch_completionsr#   r   copypopsubmit
completionappendresult	Exception)!r   r   r   r   r   r   r   r   r!   r   r   r   r   r   deployment_idr   r   r   r.   argsbatch_messagescompletionsr"   r$   resultsr+   executor	sub_batchmessage_listkwargs_modifiedoriginal_kwargsfutureexcs!                                    r*   batch_completionrH      s   ` 8DNKE{{31!6!66#kk#q1!4C#A&f$-'#?U!-/! 3
" 00#&99+	
L N;	%
  K8H#NC8	$-L&*iikO#''62>OJ/&(O?2*9*=*=h*G%__**.=APF  &&v. %. 9 9" !F$v}}/ " N1 98*  $s##$s%   <BE>F>
F
F5F00F5c                  8  ^ SU;   a  UR                  S5        SU;   a  US   mUR                  S5        0 n[        [        T5      S9 nT H+  nUR                  " [        R
                  /U Q7SU0UD6X$'   M-     [        UR                  5       U4S jS9 H1  u  pEUR                  5       c  M  UR                  5       s  sSSS5        $    SSS5        gSU;   Ga2  US   nUR                  S5        UR                  S5        UR                  S	0 5      n0 n[        [        U5      S9 nU HS  nUR                  5        H  n	X;  d  M
  X   X'   M     0 UEUEnUR                  " [        R
                  40 UD6X(S   '   MU     U(       a{  [        S
5        [        UR                  5       [        S9u  p[        SU
 35        U
 H  n UR                  5       nUs  sSSS5        $    [        S5        [        U5        U(       a  M{  SSS5        gg! , (       d  f       g= f! [         a^    [        S5        [        U5        0 nUR                  5        H  u  pX^:X  a  [        SU	 35        M  XU	'   M      Un[        SU 35         M  f = f! , (       d  f       g= f)a  
Send a request to multiple language models concurrently and return the response
as soon as one of the models responds.

Args:
    *args: Variable-length positional arguments passed to the completion function.
    **kwargs: Additional keyword arguments:
        - models (str or list of str): The language models to send requests to.
        - Other keyword arguments to be passed to the completion function.

Returns:
    str or None: The response from one of the language models, or None if no response is received.

Note:
    This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
    It sends requests concurrently and returns the response from the first model that responds.
r   modelsr-   c                 ,   > TR                  U S   5      $ )Nr   )index)xrJ   s    r*   <lambda>)batch_completion_models.<locals>.<lambda>   s    v||AaD/A    )keyNdeployments
model_listr.   z

 waiting for next result

)return_whenz
done list
z3

got an exception, ignoring, removing from futureszremoving keyznew futuresz 

done looping through futures

)r6   r   r'   r7   r1   r8   sorteditemsr:   keysr   r   valuesr   r;   )r=   r.   futuresrA   r   rF   rR   nested_kwargs
deploymentrQ   done_r:   new_futuresvaluerJ   s                  @r*   batch_completion_modelsr`      s   & &

76!

8CK8H!)&&")-"5:">D"  
 "(%A" ==?.!==?* 98" 9t _ 
&	 ]+

=!

< 

8R0C,<=)
!;;=C-*0+
	 )
 9J8-8/7&&0*007+, * @Aw~~/_MD623"F!!'%) >=" #* DEg&7 ' >R u 98t ) % !%S &g.&(*1--/JC% -SE.B C (38C 0 +: #.%G9&=> !+ >=R sW   	A"H/H
H)J	A=JH J$J
H A$JJJJ
Jc                     SSK nSU;   a  UR                  S5        SU;   a  US   nUR                  S5        O[        S5      e/ nUR                  R	                  [        U5      S9 n[        U5       H]  u  pgUR                  " [        R                  /U Q7SU0UD6nUR                  5       c  M>  UR                  UR                  5       5        M_     SSS5        U$ ! , (       d  f       U$ = f)a  
Send a request to multiple language models concurrently and return a list of responses
from all models that respond.

Args:
    *args: Variable-length positional arguments passed to the completion function.
    **kwargs: Additional keyword arguments:
        - models (str or list of str): The language models to send requests to.
        - Other keyword arguments to be passed to the completion function.

Returns:
    list: A list of responses from the language models that responded.

Note:
    This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
    It sends requests concurrently and collects responses from all models that respond.
r   Nr   rJ   z'models' param not in kwargsr-   )concurrent.futuresr6   r;   rY   r   r'   	enumerater7   r1   r8   r:   r9   )	r=   r.   
concurrentrJ   	responsesrA   idxr   rF   s	            r*   %batch_completion_models_all_responsesrg      s    $  &

76!

8677I				.	.3v;	.	G8#F+JC__W%7%7V$VeVvVF}}*  1 , 
H  
H	G s   (AC2#C
C.)rb   r   r   r   typingr   r   r1   litellm._loggingr   litellm.utilsr	   llms.vllm.completionr   r3   strfloatintbooldictrH   r`   rg    rP   r*   <module>rr      sH   H H !  * - :  $#'#'!!	 $(,)-!%%) !$'qq q ~	q
 C=q %q E?q }q TNq q uoq  q q 3-q" c]#q$ c]%q& #'qlSl&rP   