
    h:k                        d dl Z d dlmZmZmZmZmZmZ d dlZd dl	Z	d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ ddlmZ erd dlmZ  ee ef   ZneZ G d de      Z! G d dee      Z"y)    N)TYPE_CHECKINGAnyDictListOptionalUnion)token_counter)verbose_loggerverbose_router_logger)	DualCache)CustomLogger)!_get_parent_otel_span_from_kwargs)RouterErrors)LiteLLMPydanticObjectBaseStandardLoggingPayload)get_utc_datetimeprint_verbose   )BaseRoutingStrategy)Spanc                       e Zd ZU dZeed<   y)RoutingArgs<   ttlN)__name__
__module____qualname__r   int__annotations__     e/var/www/Befach/backend/env/lib/python3.12/site-packages/litellm/router_strategy/lowest_tpm_rpm_v2.pyr   r      s    Cr!   r   c                      e Zd ZU dZdZeed<   dZeed<   dZ	eed<   dZ
eed<   i fd	ed
edefdZdedee   fdZdedee   dee   fdZd Zd Zdee   dededefdZ	 	 d$dedededee   dedee   deeeeef         d eeeef      dee   fd!Z	 	 d$dededeeeeef         d eeeef      fd"Z	 	 	 d%dededeeeeef         d eeeef      dee   f
d#Zy)&LowestTPMLoggingHandler_v2z
    Updated version of TPM/RPM Logging.

    Meant to work across instances.

    Caches individual models, not model_groups

    Uses batch get (redis.mget)

    Increments tpm/rpm limit using redis.incr
    F	test_flagr   logged_successlogged_failurei  default_cache_time_secondsrouter_cache
model_listrouting_argsc                 r    || _         || _        t        di || _        t	        j
                  | |dd       y )NTg?)
dual_cacheshould_batch_redis_writesdefault_sync_intervalr    )r)   r*   r   r+   r   __init__)selfr)   r*   r+   s       r"   r0   z#LowestTPMLoggingHandler_v2.__init__2   s=     )$'7,7$$#&*"%		
r!   
deploymentreturnc                 \   	 t               }|j                  d      }|j                  di       j                  d      }|j                  di       j                  d      }| d| d| }| j                  j	                  |d	      }d
}||j                  d      }|!|j                  di       j                  d      }|!|j                  di       j                  d      }|t        d      }|||k\  rt        j                  dj                  ||      d|j                  di       j                  d      t        j                  ddj                  t        j                  j                  ||||j                  dd            t        j                  dd                  | j                  j                  |d| j                   j"                        }	|	|	|kD  rt        j                  dj                  ||	      d|j                  di       j                  d      t        j                  ddj                  t        j                  j                  ||	      t        j                  dd                  |S # t$        $ r(}
t'        |
t        j                        r|
|cY d
}
~
S d
}
~
ww xY w)z
        Pre-call check + update model rpm

        Returns - deployment

        Raises - RateLimitError if deployment over defined RPM limit
        %H-%M
model_infoidlitellm_paramsmodel::rpm:Tkey
local_onlyNrpminf6Deployment over defined rpm limit={}. current usage={}   zr{} rpm limit={}. current usage={}. id={}, model_group={}. Get the model info by calling 'router.get_model_info(id)
model_nametpm_rpm_limits"https://github.com/BerriAI/litellmmethodurl)status_codecontentrequestmessagellm_providerr9   responser   r=   valuer   !{} rpm limit={}. current usage={})r   strftimegetr)   	get_cachefloatlitellmRateLimitErrorformathttpxResponser   user_defined_ratelimit_errorrR   Requestincrement_cacher+   r   	Exception
isinstance)r1   r2   dtcurrent_minutemodel_iddeployment_namerpm_keylocal_resultdeployment_rpmresultes              r"   pre_call_checkz)LowestTPMLoggingHandler_v2.pre_call_check?   s   G	
 "#B[[1N!~~lB7;;DAH(nn-=rBFFwOO!
!O#4E.9IJG,,66 7 L "N%!+!6%!+0@"!E!I!I%!P%!+b!A!E!Ee!L%!&u'LN,J,,T[[& "$$..)92>BB7K"^^$' !U  !\  !\(EEKK*($&NN<<! !&5EKo p
 * **::qd.?.?.C.C ;  %&>*A!00 X _ _*F! &((nn-=rBFFwO!&(+$G$N$N , I I O O . &%
 %*MM9IOs$t"    	!W334	s   I7I: :	J+J& J+&J+parent_otel_spanc                   K   	 t               }|j                  d      }|j                  di       j                  d      }|j                  di       j                  d      }| d| d| }| j                  j	                  |d	       d
{   }d
}	|	|j                  d      }	|	!|j                  di       j                  d      }	|	!|j                  di       j                  d      }	|	t        d      }	|||	k\  rt        j                  dj                  |	|      d|j                  di       j                  d      t        j                  ddj                  t        j                  j                  |	|      dt        d      it        j                  dd            |j                  d            | j!                  |d| j"                  j$                         d
{   }
|
|
|	kD  rt        j                  dj                  |	|
      d|j                  di       j                  d      t        j                  ddj                  t        j                  j                  |	|
      dt        d      it        j                  dd            |j                  d            |S 7 7 # t&        $ r(}t)        |t        j                        r||cY d
}~S d
}~ww xY ww)aI  
        Pre-call check + update model rpm
        - Used inside semaphore
        - raise rate limit error if deployment over limit

        Why? solves concurrency issue - https://github.com/BerriAI/litellm/issues/2994

        Returns - deployment

        Raises - RateLimitError if deployment over defined RPM limit
        r5   r6   r7   r8   r9   r:   r;   Tr<   Nr?   r@   rA   rB   rC   rS   retry-afterr   rE   rF   rG   rJ   rK   headersrL   num_retries)rN   rO   r9   rP   rq   r   rQ   )r   rT   rU   r)   async_get_cacherW   rX   rY   rZ   r[   r\   r   r]   rR   strr^   "_increment_value_in_current_windowr+   r   r`   ra   )r1   r2   rl   rb   rc   rd   re   rf   rg   rh   ri   rj   s               r"   async_pre_call_checkz/LowestTPMLoggingHandler_v2.async_pre_call_check   s    F	 "#B[[1N!~~lB7;;DAH(nn-=rBFFwOO!
!O#4E.9IJG!%!2!2!B!B "C " L "N%!+!6%!+0@"!E!I!I%!P%!+b!A!E!Ee!L%!&u'LN,J,,T[[& "$$..)92>BB7K"^^$' C J J(EEKK*(!
 "/B 8 %5EKo p	 !+} =! (  $FFqd.?.?.C.C  G    %&>*A!00 X _ _*F! &((nn-=rBFFwO!&(+$G$N$N , I I O O . &%
 &3CG$<$)MM9IOs$t	" %/NN=$A! $ qD.  	!W334	s[   K!BJ- J(EJ- "J+#CJ- 'K!(J- +J- -	K6KKK!KK!c                    	 	 |j                  d      }|t        d      |j                  d      }|d   j                  d      }|j                  d      }|||y t        |t              rt	        |      }|j                  d      }	t               }
|
j                  d      }| d	| d
| }| j                  j                  ||	| j                  j                         | j                  r| xj                  dz  c_        y y # t        $ r7}t        j                  dj!                  t	        |                   Y d }~y d }~ww xY w)Nstandard_logging_object&standard_logging_object not passed in.model_grouphidden_paramslitellm_model_namerd   total_tokensr5   r:   :tpm:rQ   r   zUlitellm.proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {})rU   
ValueErrorra   r   rs   r   rT   r)   r_   r+   r   r%   r&   r`   r
   	exceptionrZ   )r1   kwargsresponse_obj
start_timeend_timerw   ry   r9   r7   r|   rb   rc   tpm_keyrj   s                 r"   log_success_eventz,LowestTPMLoggingHandler_v2.log_success_event   sZ   .	 IO

)I# '. !IJJ155mDK+O<@@AUVE(,,Z8B"bjEMB$W266~FL
 "#B[[N AeWE.)9:G --<T5F5F5J5J .  ~~##q(#  	$$gnnF
 	s   AD B$D 	E-D??Ec                   K   	 	 |j                  d      }|t        d      |j                  d      }|d   d   }|j                  d      }||y t        |t              rt	        |      }|j                  d      }	t               }
|
j                  d      }| d	| d
| }t        |      }| j                  j                  ||	| j                  j                  |       d {    | j                  r| xj                  dz  c_        y y 7 '# t        $ r7}t        j                   dj#                  t	        |                   Y d }~y d }~ww xY ww)Nrw   rx   ry   rz   r{   rd   r|   r5   r:   r}   )r=   rR   r   rl   r   z[litellm.proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {})rU   r~   ra   r   rs   r   rT   r   r)   async_increment_cacher+   r   r%   r&   r`   r
   r   rZ   )r1   r   r   r   r   rw   ry   r9   r7   r|   rb   rc   r   rl   rj   s                  r"   async_log_success_eventz2LowestTPMLoggingHandler_v2.async_log_success_event  su    0	 IO

)I# '. !IJJ155mDK+O<=QRE(,,Z8B"bjB$W266~FL "#B[[N AeWE.)9:G
  AH##99"%%))!1	 :    ~~##q(#   	$$mttF
 	sN   EAD EBD $D%%D 
ED 	E-E	E	EEhealthy_deploymentsall_deploymentsinput_tokensrpm_dictc                    t        d      }g }|j                         D ]M  \  }}d }	|j                  d      d   }|D ]  }
||
d   d   k(  s|
}	 |	5|8d }||	j                  d      }|!|	j                  di       j                  d      }|!|	j                  di       j                  d      }|t        d      }d }||	j                  d      }|!|	j                  di       j                  d      }|!|	j                  di       j                  d      }|t        d      }||z   |kD  r|||v r||   ||   d	z   |k\  r*||k(  r|j	                  |	       B||k  sI|}|	g}P |S )
Nr@   r:   r   r6   r7   tpmr8   r?   r   )rW   itemssplitrU   append)r1   r   r   r   r   
lowest_tpmpotential_deploymentsitemitem_tpm_deploymentm_deployment_tpm_deployment_rpms                r"   _return_potential_deploymentsz8LowestTPMLoggingHandler_v2._return_potential_deploymentsJ  s    5\
 "-335ND(K::c?1%D(1\?400"#K ) "!"O&"-//%"8&"-//2BB"G"K"KE"R&"-//,"C"G"G"N&"',"O&"-//%"8&"-//2BB"G"K"KE"R&"-//,"C"G"G"N&"',,&8%$(*:TN.d^a'?:Z'%,,[9J&%
)4%W 6X %$r!   Nry   tpm_keys
tpm_valuesrpm_keys
rpm_valuesmessagesinputc	                 8   ||yi }	t        |      D ]"  \  }
}||
   |	||
   j                  d      d   <   $ i }t        |      D ]"  \  }
}||
   |||
   j                  d      d   <   $ 	 t        ||      }t	        j
                  d|        |	i }	|D ]  }d|	|d   d   <    n|D ]  }|d   d   }||	vs|	|   d|	|<    |	}| j                  ||||      }t        d	       t        |      dkD  rt        j                  |      S y# t        $ r d}Y w xY w)
za
        Common checks for get available deployment, across sync + async implementations
        Nr:   r   )r   textzinput_tokens=r6   r7   )r   r   r   r   z+returning picked lowest tpm/rpm deployment.)	enumerater   r	   r`   r   debugr   r   lenrandomchoice)r1   ry   r   r   r   r   r   r   r   tpm_dictidxr=   r   r   r2   dr   r   r   s                      r"   #_common_checks_available_deploymentz>LowestTPMLoggingHandler_v2._common_checks_available_deployment  s}    !3!(+HC4>sOHXc]((-a01 , !(+HC4>sOHXc]((-a01 ,	((GL 	##mL>$BC
 H1
;<L1$78 2 )L/$/(*hw.?.G()HW%	 ) # $ B B 3+%	 !C !
 	CD$%)==!677?  	L	s   +D DDc                   K   t        j                  d| d|        t               }|j                  d      }g }g }|D ]  }	t	        |	t
              s|	j                  di       j                  d      }
|	j                  di       j                  d      }dj                  |
||      }d	j                  |
||      }|j                  |       |j                  |        ||z   }| j                  j                  |
       d{   }||dt        |       }|t        |      d }nd}d}| j                  ||||||||      }	 |J |S 7 G# t        $ r i }t        |      D ]*  \  }}t	        |t
              s|j                  di       j                  d      }
d}||j                  dd      }|"|j                  di       j                  dd      }|"|j                  di       j                  dd      }|t        d      }|r||   nd}d}||j                  dd      }|"|j                  di       j                  dd      }|"|j                  di       j                  dd      }|t        d      }|r||   nd}||||d||
<   - t!        j"                  t$        j&                  j(                   d| d| d|t+        j,                  dddt/        d      it+        j0                  dd                  w xY ww)zz
        Async implementation of get deployments.

        Reduces time to retrieve the tpm/rpm values from cache
        6get_available_deployments - Usage Based. model_group: , healthy_deployments: r5   r6   r7   r8   r9   {}:{}:tpm:{}{}:{}:rpm:{})keysNry   r   r   r   r   r   r   r   r   r@   r   r?   current_tpm	tpm_limitcurrent_rpm	rpm_limit. Passed model=. Deployments=rB   rC   rn   r   rE   rF   rG   ro   rM   )r   r   r   rT   ra   dictrU   rZ   r   r)   async_batch_get_cacher   r   r`   r   rW   rX   rY   r   no_deployments_availablerR   r[   r\   rs   r^   )r1   ry   r   r   r   rb   rc   r   r   r   r7   re   r   rf   combined_tpm_rpm_keyscombined_tpm_rpm_valuesr   r   r2   deployment_dictindexr   r   r   r   r   s                             r"   async_get_available_deploymentsz:LowestTPMLoggingHandler_v2.async_get_available_deployments  s     	##D[MQhi|h}~	
 W-$A!T"UU<,00 #$%%(8""="A"A'"J(//O^T(//O^T(( % !)8 3(,(9(9(O(O& )P )
 #
 #.03x=AJ0XAJJJ==# 3!! > 	

=	)))1#
2  :	 O&/0C&D"{k40$r:>>tDB&*O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK '+O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK (3%4'2%4	+OB'O 'EZ (('@@FFGWbVccq  sB  rC  D! #*CG4!MM1AGkl		
 
a:	s9   ALB0LEA LE 
L.L<FLLc           
      V   t        j                  d| d|        t               }|j                  d      }g }g }	|D ]  }
t	        |
t
              s|
j                  di       j                  d      }|
j                  di       j                  d      }dj                  |||      }d	j                  |||      }|j                  |       |	j                  |        | j                  j                  ||
      }| j                  j                  |	|
      }| j                  |||||	|||      }	 |J |S # t        $ rg i }t        |      D ]*  \  }}t	        |t
              s|j                  di       j                  d      }d}||j                  dd      }|"|j                  di       j                  dd      }|"|j                  di       j                  dd      }|t        d      }|r||   nd}d}||j                  dd      }|"|j                  di       j                  dd      }|"|j                  di       j                  dd      }|t        d      }|r||   nd}||||d||<   - t        t         j"                  j$                   d| d|       w xY w)zE
        Returns a deployment with the lowest TPM/RPM usage.
        r   r   r5   r6   r7   r8   r9   r   r   )r   rl   r   Nr   r@   r   r?   r   r   r   )r   r   r   rT   ra   r   rU   rZ   r   r)   batch_get_cacher   r`   r   rW   r~   r   r   rR   )r1   ry   r   r   r   rl   rb   rc   r   r   r   r7   re   r   rf   r   r   r2   r   r   r   r   r   r   r   s                            r"   get_available_deploymentsz4LowestTPMLoggingHandler_v2.get_available_deployments7  s    	##D[MQhi|h}~	
 W-$A!T"UU<,00 #$%%(8""="A"A'"J(//O^T(//O^T(( % &&66,< 7 

 &&66,< 7 

 ==# 3!! > 	

5	))) 2	 O&/0C&D"{k40$r:>>tDB&*O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK '+O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK (3%4'2%4	+OB'O 'EZ 88>>?{m[ijyiz{ a2	s   1D7 7.J(&EJ()NN)NNN)r   r   r   __doc__r%   boolr   r&   r   r'   r(   r   listr   r0   r   r   rk   r   ru   r   r   r   r   rs   r   r   r   r   r    r!   r"   r$   r$       s,   
 ItNCNC&11 OQ
%
37
GK
O O(4. ObTT2:4.T	$Tl/b1f5%!$Z5% 5% 	5%
 5%~ 48,0;; "; 	;
 TN; ; TN; 4S#X/0; c4i(); 
$;B 48,0ww "w 4S#X/0	w
 c4i()wz 48,0+/gg "g 4S#X/0	g
 c4i()g #4.gr!   r$   )#r   typingr   r   r   r   r   r   r[   rX   r	   litellm._loggingr
   r   litellm.caching.cachingr   "litellm.integrations.custom_loggerr   'litellm.litellm_core_utils.core_helpersr   litellm.types.routerr   litellm.types.utilsr   r   litellm.utilsr   r   base_routing_strategyr   opentelemetry.tracer   _Spanr   r$   r    r!   r"   <module>r      sh     B B   ! B - ; U - Q 9 61DD+ ~	!4l ~	r!   