
    ^h'                         S r SSKrSSKJrJrJrJrJrJr  SSK	J
r
Jr  SSKJr  SSKJr  SSKJrJrJr  SSKJr  SS	KJrJrJrJrJrJr  S
SKJr   " S S\5      r " S S\5      r  " S S\ 5      r! " S S\5      r"g)z\
Translates from OpenAI's `/v1/chat/completions` endpoint to Triton's `/generate` endpoint.
    N)AnyDictListLiteralOptionalUnion)HeadersResponse)prompt_factory)BaseModelResponseIterator)
BaseConfigBaseLLMExceptionLiteLLMLoggingObj)AllMessageValues)ChatCompletionToolCallChunkChatCompletionUsageBlockChoicesGenericStreamingChunkMessageModelResponse   TritonErrorc                   4   \ rS rSrSrS\S\S\\\	4   S\
4S jr SS\S	\S
\\   S\S\\   S\4S jjrS	\S\4S jrS\S\S	\S\S\4
S jr  S S	\S\S\S\S\S
\\   S\S\S\S\\   S\\   S\4S jjrS	\S
\\   S\S\S\S\4S jrS\S\S   4S jrSrg)!TritonConfig   zl
Base class for Triton configurations.

Handles routing between /infer and /generate triton completion llms
error_messagestatus_codeheadersreturnc                     [        X!US9$ )N)r   messager   r   )selfr   r   r   s       g/home/james-whalen/.local/lib/python3.13/site-packages/litellm/llms/triton/completion/transformation.pyget_error_classTritonConfig.get_error_class%   s     #G
 	
    Nmodelmessagesoptional_paramsapi_keyc                 
    SS0$ )NzContent-Typezapplication/json )r#   r   r(   r)   r*   r+   s         r$   validate_environment!TritonConfig.validate_environment,   s      233r'   c                 
    SS/$ N
max_tokensmax_completion_tokensr-   )r#   r(   s     r$   get_supported_openai_params(TritonConfig.get_supported_openai_params6   s    566r'   non_default_paramsdrop_paramsc                 \    UR                  5        H  u  pVUS:X  d  US:X  d  M  XbU'   M     U$ r1   )items)r#   r6   r*   r(   r7   paramvalues          r$   map_openai_paramsTritonConfig.map_openai_params9   s9     /446LE$1H(H).& 7 r'   raw_responsemodel_responselogging_objrequest_datalitellm_paramsencoding	json_modec                     UR                  SS5      nU R                  U5      nUS:X  a!  [        5       R                  UUUUUUUUU	U
US9$ US:X  a!  [	        5       R                  UUUUUUUUU	U
US9$ U$ )Napi_base generate)r(   r>   r?   r@   rA   r)   r*   rB   rC   r+   rD   infer)get_get_triton_llm_typeTritonGenerateConfigtransform_responseTritonInferConfig)r#   r(   r>   r?   r@   rA   r)   r*   rB   rC   r+   rD   rF   llm_types                 r$   rM   TritonConfig.transform_responseE   s     "%%j"5,,X6z!')<<)-')! /-!# =    $&99)-')! /-!# :   r'   c                     UR                  SS5      nU R                  U5      nUS:X  a  [        5       R                  UUUUUS9$ US:X  a  [	        5       R                  UUUUUS9$ 0 $ )NrF   rG   rH   )r(   r)   r*   rB   r   rI   )rJ   rK   rL   transform_requestrN   )r#   r(   r)   r*   rB   r   rF   rO   s           r$   rR   TritonConfig.transform_requests   s     "%%j"5,,X6z!');;! /- <    $&88! /- 9   	r'   rF   )rH   rI   c                 z    UR                  S5      (       a  gUR                  S5      (       a  g[        SU 35      e)Nz	/generaterH   z/inferrI   zInvalid Triton API base: )endswith
ValueError)r#   rF   s     r$   rK   !TritonConfig._get_triton_llm_type   s=    [))x((8
CDDr'   r-   )NNN)__name__
__module____qualname____firstlineno____doc__strintr   r   r	   r   r%   r   r   r   r.   r4   boolr<   r
   r   r   r   rM   dictrR   r   rK   __static_attributes__r-   r'   r$   r   r      s   
 
/2
=B4==Q
	
 "&44 4 '(	4
 4 #4 
47 7 7
 
 
 	

 
 

. "&$(,, , &	,
 ', , '(, , , , #, D>, 
,\ '( 	
   
8ES EW=P5Q Er'   r   c                       \ rS rSrSrS\S\\   S\S\S\S\4S	 jr	  SS\S\
S\S\S\S\\   S\S\S\S\\   S\\   S\4S jjrSrg
)rL      zH
Transformations for triton /generate endpoint (This is a trtllm model)
r(   r)   r*   rB   r   r    c                     UR                  5       nUR                  SS5      n[        XS9[        UR	                  SS5      5      S/S/S.[        U5      S.nUS	   R                  U5        U$ )
NstreamF)r(   r)   r2   i  rG   )r2   	bad_words
stop_words)
text_input
parametersrf   rj   )copypopr   r_   rJ   r`   update)	r#   r(   r)   r*   rB   r   inference_paramsrf   data_for_tritons	            r$   rR   &TritonGenerateConfig.transform_request   s}     +//1!%%h6(uH!/"5"5lD"IJ T!d
 6l+
 	%,,-=>r'   Nr>   r?   r@   rA   rC   r+   rD   c                      UR                  5       n[        S[        US   S9S9/Ul        U$ ! [         a    [        UR                  UR                  S9ef = f)Nr"   r   r   text_outputcontentindexr"   )json	Exceptionr   textr   r   r   choices)r#   r(   r>   r?   r@   rA   r)   r*   rB   rC   r+   rD   raw_response_jsons                r$   rM   'TritonGenerateConfig.transform_response   sn    	 , 1 1 3 !W5F}5U%VW"
   	$))|7O7O 	s	   . )Ar-   rX   rY   rZ   r[   r\   r]   r^   r   r   ra   rR   r
   r   r   r   r   r   r`   rM   rb   r-   r'   r$   rL   rL      s     '( 	
   
B "&$(  &	
 '  '(    # D> 
 r'   rL   c                       \ rS rSrSrS\S\\   S\S\S\S\4S	 jr	  SS\S\
S\S\S\S\\   S\S\S\S\\   S\\   S\4S jjrSrg
)rN      zb
Transformations for triton /infer endpoint (his is an infer model with a custom model on triton)
r(   r)   r*   rB   r   r    c                 t   US   R                  SS5      nSSS/SU/S./0nUR                  5        Ha  u  pUS	:X  a  M  US
:X  a  M  [        U	[        5      (       a  SOSn
[        U	[        5      (       a  SOU
n
US   R                  US/X/S.5        Mc     SU;  a  US   R                  SS/SS/S.5        U$ )Nr   ru   rG   inputsri      BYTES)nameshapedatatypedatarf   max_retriesINT32FP32r2      )rJ   r9   
isinstancer_   floatappend)r#   r(   r)   r*   rB   r   ri   ro   kvr   s              r$   rR   #TritonInferConfig.transform_request   s     a[__Y3
(S ''L		
 $))+DAMQ-%7&0C&8&87g%/5%9%96x)00!(CP	 , .H%,,(S 'D	 r'   Nr>   r?   r@   rA   rC   r+   rD   c                     UR                  5       nUS   S   S   nS n[        U[        5      (       a  SR                  U5      nOUn[        S[        US9S9/Ul
        U$ ! [         a    [        UR                  UR                  S9ef = f)Nrr   outputsr   r   rG   rt   rv   )rx   ry   r   rz   r   r   listjoinr   r   r{   )r#   r(   r>   r?   r@   rA   r)   r*   rB   rC   r+   rD   r|   _triton_response_datatriton_response_datas                  r$   rM   $TritonInferConfig.transform_response   s    	 , 1 1 3 !2) <Q ? G.2+T22#%77+@#A #8  (<="
 '  	$))|7O7O 	s   A! !)B
r-   rX   r~   r-   r'   r$   rN   rN      s    && '(& 	&
 & & 
&f "&$(## # &	#
 '# # '(# # # # ## D># 
# #r'   rN   c                   &    \ rS rSrS\S\4S jrSrg)TritonResponseIteratori!  chunkr    c           
      (    SnS nSnSnS nS n[        UR                  SS5      5      nUR                  SS5      nUR                  SS5      nUR                  SS5      n[        UUUUUUUS9$ ! [        R                   a    [        S	U 35      ef = f)
NrG   Frw   r   rs   stop_reasonis_finished)rz   tool_user   finish_reasonusagerw   provider_specific_fieldsz"Failed to decode JSON from chunk: )r_   rJ   r   rx   JSONDecodeErrorrV   )	r#   r   rz   r   r   r   r   r   rw   s	            r$   chunk_parser#TritonResponseIterator.chunk_parser"  s    	KD>BHKM8<E'+$		'1-.E 99]B/D!IImR8M))M59K(!'+)A  ## 	KA%IJJ	Ks   A+A. .#Br-   N)rY   rZ   r[   r\   ra   r   r   rb   r-   r'   r$   r   r   !  s    K$ K+@ Kr'   r   )#r]   rx   typingr   r   r   r   r   r   httpxr	   r
   3litellm.litellm_core_utils.prompt_templates.factoryr   )litellm.llms.base_llm.base_model_iteratorr   )litellm.llms.base_llm.chat.transformationr   r   r   litellm.types.llms.openair   litellm.types.utilsr   r   r   r   r   r   common_utilsr   r   rL   rN   r   r-   r'   r$   <module>r      sy     < < # N O 
 7  'wE: wEt3< 3lP, PfK6 Kr'   