
    h+                        d Z ddlZddlmZmZmZmZmZmZm	Z	m
Z
 ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZmZmZmZm Z  ddl!m"Z"  G d de      Z# G d de#      Z$ G d de#      Z% G d de      Z&y)z\
Translates from OpenAI's `/v1/chat/completions` endpoint to Triton's `/generate` endpoint.
    N)AnyAsyncIteratorDictIteratorListLiteralOptionalUnion)HeadersResponse)DEFAULT_MAX_TOKENS_FOR_TRITON)prompt_factory)BaseModelResponseIterator)
BaseConfigBaseLLMExceptionLiteLLMLoggingObj)AllMessageValues)ChatCompletionToolCallChunkChatCompletionUsageBlockChoicesGenericStreamingChunkMessageModelResponse   TritonErrorc                      e Zd ZdZdededeeef   de	fdZ
	 	 d"deded	ee   d
ededee   dee   defdZdedefdZded
edededef
dZ	 d#dee   dee   ded
ededee   defdZ	 	 d"dededededed	ee   d
edededee   dee   defdZded	ee   d
edededefdZdeded   fdZ	 d$deee   ee   ef   d edee   defd!Zy)%TritonConfigzx
    Base class for Triton configurations.

    Handles routing between /infer and /generate triton completion llms
    error_messagestatus_codeheadersreturnc                     t        |||      S )N)r    messager!   r   )selfr   r    r!   s       i/var/www/Befach/backend/env/lib/python3.12/site-packages/litellm/llms/triton/completion/transformation.pyget_error_classzTritonConfig.get_error_class&   s     #]G
 	
    Nmodelmessagesoptional_paramslitellm_paramsapi_keyapi_basec                 
    ddiS )NzContent-Typezapplication/json )r%   r!   r)   r*   r+   r,   r-   r.   s           r&   validate_environmentz!TritonConfig.validate_environment-   s      233r(   c                 
    ddgS N
max_tokensmax_completion_tokensr0   )r%   r)   s     r&   get_supported_openai_paramsz(TritonConfig.get_supported_openai_params9   s    566r(   non_default_paramsdrop_paramsc                 V    |j                         D ]  \  }}|dk(  s|dk(  s|||<    |S r3   )items)r%   r7   r+   r)   r8   paramvalues          r&   map_openai_paramszTritonConfig.map_openai_params<   s=     /446LE5$1H(H).& 7 r(   streamc                 Z    |t        d      | j                  |      }|dk(  r|r|dz   S |S )Nzapi_base is requiredgenerate_stream)
ValueError_get_triton_llm_type)r%   r.   r-   r)   r+   r,   r>   llm_types           r&   get_complete_urlzTritonConfig.get_complete_urlH   s@     344,,X6z!fi''r(   raw_responsemodel_responselogging_objrequest_dataencoding	json_modec                     |j                  dd      }| j                  |      }|dk(  r$t               j                  |||||||||	|
|      S |dk(  r$t	               j                  |||||||||	|
|      S |S )Nr.    r@   )r)   rF   rG   rH   rI   r*   r+   r,   rJ   r-   rK   infer)getrC   TritonGenerateConfigtransform_responseTritonInferConfig)r%   r)   rF   rG   rH   rI   r*   r+   r,   rJ   r-   rK   r.   rD   s                 r&   rQ   zTritonConfig.transform_responseX   s     "%%j"5,,X6z!')<<)-')! /-!# =    $&99)-')! /-!# :   r(   c                     |j                  dd      }| j                  |      }|dk(  rt               j                  |||||      S |dk(  rt	               j                  |||||      S i S )Nr.   rM   r@   )r)   r*   r+   r,   r!   rN   )rO   rC   rP   transform_requestrR   )r%   r)   r*   r+   r,   r!   r.   rD   s           r&   rT   zTritonConfig.transform_request   s     "%%j"5,,X6z!');;! /- <    $&88! /- 9   	r(   )r@   rN   c                 f    |j                  d      ry|j                  d      ryt        d|       )Nz	/generater@   z/inferrN   zInvalid Triton API base: )endswithrB   )r%   r.   s     r&   rC   z!TritonConfig._get_triton_llm_type   s7    [)x(8
CDDr(   streaming_responsesync_streamc                     t        |||      S )N)rW   rX   rK   )TritonResponseIterator)r%   rW   rX   rK   s       r&   get_model_response_iteratorz(TritonConfig.get_model_response_iterator   s     &1#
 	
r(   NN)N)F)__name__
__module____qualname____doc__strintr
   r   r   r   r'   r   r   dictr	   r1   r6   boolr=   rE   r   r   r   r   rQ   rT   r   rC   r   r   r[   r0   r(   r&   r   r      s|   
 
/2
=B4==Q
	
 "&"&
4
4 
4 '(	
4
 
4 
4 #
4 3-
4 

47 7 7
 
 
 	

 
 

& "&3- # 	
    
6 "&$(,, , &	,
 ', , '(, , , , #, D>, 
,\ '( 	
   
8ES EW=P5Q E %*	

!(3-s1C]"RS

 

 D>	


 


r(   r   c                       e Zd ZdZdedee   dedededefdZ	 	 dded
e	de
dededee   dedededee   dee   de
fdZy	)rP   zP
    Transformations for triton /generate endpoint (This is a trtllm model)
    r)   r*   r+   r,   r!   r"   c                     |j                         }|j                  dd      }t        ||      dt        |j	                  dt
                    it        |      d}|d   j                  |       |S )Nr>   F)r)   r*   r4   )
text_input
parametersr>   rh   )copypopr   rb   rO   r   rd   update)	r%   r)   r*   r+   r,   r!   inference_paramsr>   data_for_tritons	            r&   rT   z&TritonGenerateConfig.transform_request   s{     +//1!%%h6(uxHc#''6ST
 6l+
 	%,,-=>r(   NrF   rG   rH   rI   rJ   r-   rK   c                     	 |j                         }t        dt        |d               g|_        |S # t        $ r" t        |j                  |j                        w xY w)Nr$   r    r   text_outputcontentindexr$   )json	Exceptionr   textr    r   r   choices)r%   r)   rF   rG   rH   rI   r*   r+   r,   rJ   r-   rK   raw_response_jsons                r&   rQ   z'TritonGenerateConfig.transform_response   sn    	 , 1 1 3 !W5F}5U%VW"
   	$))|7O7O 	s	   4 +Ar\   r]   r^   r_   r`   ra   r   r   rc   rT   r   r   r   r   r   r	   rd   rQ   r0   r(   r&   rP   rP      s     '( 	
   
B "&$(  &	
 '  '(    # D> 
r(   rP   c                       e Zd ZdZdedee   dedededefdZ	 	 dded
e	de
dededee   dedededee   dee   de
fdZy	)rR   zj
    Transformations for triton /infer endpoint (his is an infer model with a custom model on triton)
    r)   r*   r+   r,   r!   r"   c                 T   |d   j                  dd      }dddgd|gdgi}|j                         D ]T  \  }}	|d	k(  r|d
k(  rt        |	t              rdnd}
t        |	t              rdn|
}
|d   j                  |dg|
|	gd       V d|vr|d   j                  ddgddgd       |S )Nr   rr   rM   inputsrg      BYTES)nameshapedatatypedatar>   max_retriesINT32FP32r4      )rO   r:   
isinstancerb   floatappend)r%   r)   r*   r+   r,   r!   rg   rm   kvr   s              r&   rT   z#TritonInferConfig.transform_request   s     a[__Y3
(S ''L		
 $))+DAqMQ-%7&0C&87g%/5%96x)00!(QCP	 , .H%,,(S 'D	 r(   NrF   rG   rH   rI   rJ   r-   rK   c                     	 |j                         }|d   d   d   }d }t        |t              rdj                  |      }n|}t        dt        |            g|_
        |S # t        $ r" t        |j                  |j                        w xY w)Nro   outputsr   r   rM   rq   rs   )ru   rv   r   rw   r    r   listjoinr   r   rx   )r%   r)   rF   rG   rH   rI   r*   r+   r,   rJ   r-   rK   ry   _triton_response_datatriton_response_datas                  r&   rQ   z$TritonInferConfig.transform_response  s    	 , 1 1 3 !2) <Q ? G.2+T2#%77+@#A #8  (<="
 '  	$))|7O7O 	s   A" "+Br\   rz   r0   r(   r&   rR   rR      s    %% '(% 	%
 % % 
%d "&$(## # &	#
 '# # '(# # # # ## D># 
#r(   rR   c                       e Zd ZdedefdZy)rZ   chunkr"   c           	      ,   	 d}d }d}d}d }d }t        |j                  dd            }|j                  dd      }|j                  dd      }|j                  dd      }t        |||||||      S # t        j                  $ r t        d	|       w xY w)
NrM   Frt   r   rp   stop_reasonis_finished)rw   tool_user   finish_reasonusagert   provider_specific_fieldsz"Failed to decode JSON from chunk: )rb   rO   r   ru   JSONDecodeErrorrB   )	r%   r   rw   r   r   r   r   r   rt   s	            r&   chunk_parserz#TritonResponseIterator.chunk_parser@  s    	KD>BHKM8<E'+$		'1-.E 99]B/D!IImR8M))M59K(!'+)A  ## 	KA%IJJ	Ks   A.A1 1"BN)r]   r^   r_   rc   r   r   r0   r(   r&   rZ   rZ   ?  s    K$ K+@ Kr(   rZ   )'r`   ru   typingr   r   r   r   r   r   r	   r
   httpxr   r   litellm.constantsr   3litellm.litellm_core_utils.prompt_templates.factoryr   )litellm.llms.base_llm.base_model_iteratorr   )litellm.llms.base_llm.chat.transformationr   r   r   litellm.types.llms.openair   litellm.types.utilsr   r   r   r   r   r   common_utilsr   r   rP   rR   rZ   r0   r(   r&   <module>r      s|     U U U # ; N O 
 7  'U
: U
p3< 3lO OdK6 Kr(   