
    hs                     T   d Z ddlZddlmZ ddlmZmZmZmZmZm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ erdd
lmZ ddlmZ ddlmZ eeef   ZeZneZeZdZ G d de
d      Z  G d de
      Z! G d de
      Z" G d de
      Z# G d de
      Z$ G d de      Z%y)z
This is a rate limiter implementation based on a similar one by Envoy proxy. 

This is currently in development and not yet ready for production.
    N)datetime)	TYPE_CHECKINGAnyDictListLiteralOptional	TypedDictUnioncast)HTTPException)	DualCache)verbose_proxy_logger)CustomLogger)UserAPIKeyAuth)Span)InternalUsageCacheRedisPipelineIncrementOperationa  
local results = {}
local now = tonumber(ARGV[1])
local window_size = tonumber(ARGV[2])

-- Process each window/counter pair
for i = 1, #KEYS, 2 do
    local window_key = KEYS[i]
    local counter_key = KEYS[i + 1]
    local increment_value = 1

    -- Check if window exists and is valid
    local window_start = redis.call('GET', window_key)
    if not window_start or (now - tonumber(window_start)) >= window_size then
        -- Reset window and counter
        redis.call('SET', window_key, tostring(now))
        redis.call('SET', counter_key, increment_value)
        redis.call('EXPIRE', window_key, window_size)
        redis.call('EXPIRE', counter_key, window_size)
        table.insert(results, tostring(now)) -- window_start
        table.insert(results, increment_value) -- counter
    else
        local counter = redis.call('INCR', counter_key)
        table.insert(results, window_start) -- window_start
        table.insert(results, counter) -- counter
    end
end

return results
c                   N    e Zd ZU ee   ed<   ee   ed<   ee   ed<   ee   ed<   y)"RateLimitDescriptorRateLimitObjectrequests_per_unittokens_per_unitmax_parallel_requestswindow_sizeN)__name__
__module____qualname__r	   int__annotations__     k/var/www/Befach/backend/env/lib/python3.12/site-packages/litellm/proxy/hooks/parallel_request_limiter_v3.pyr   r   G   s*    }$c]"#C=(#r"   r   F)totalc                   2    e Zd ZU eed<   eed<   ee   ed<   y)RateLimitDescriptorkeyvalue
rate_limitN)r   r   r   strr    r	   r   r!   r"   r#   r&   r&   N   s    	HJ;<<r"   r&   c                   F    e Zd ZU eed<   eed<   eed<   ed   ed<   eed<   y)RateLimitStatuscodecurrent_limitlimit_remainingrequeststokensr   rate_limit_typedescriptor_keyN)r   r   r   r*   r    r   r   r!   r"   r#   r,   r,   T   s%    
IJKKr"   r,   c                   (    e Zd ZU eed<   ee   ed<   y)RateLimitResponseoverall_codestatusesN)r   r   r   r*   r    r   r,   r!   r"   r#   r6   r6   \   s    ?##r"   r6   c                   (    e Zd ZU ee   ed<   eed<   y) RateLimitResponseWithDescriptorsdescriptorsresponseN)r   r   r   r   r&   r    r6   r!   r"   r#   r:   r:   a   s    )**r"   r:   c                       e Zd ZdefdZdee   dededee   fdZ	ded	ed
e
d   defdZdee   dee   deeef   defdZ	 	 d#dee   dee   dedefdZdedededefdZded	ed
e
d   deded   f
dZde
d   fdZd  Zd! Zdedefd"Zy)$$_PROXY_MaxParallelRequestsHandler_v3internal_usage_cachec                    || _         | j                   j                  j                  9| j                   j                  j                  j                  t              | _        nd | _        t        t        j                  dd            | _	        y )NLITELLM_RATE_LIMIT_WINDOW_SIZE<   )
r?   
dual_cacheredis_cacheasync_register_scriptBATCH_RATE_LIMITER_SCRIPTbatch_rate_limiter_scriptr   osgetenvr   )selfr?   s     r#   __init__z-_PROXY_MaxParallelRequestsHandler_v3.__init__g   sp    $8!$$//;;G))44@@VV- * .2D*ryy)I2NOr"   keysnow_intr   returnc                    K   g }t        dt        |      d      D ]R  }||   }||dz      }d}| j                  j                  |dd       d{   }	|	|t	        |	      z
  |k\  r| j                  j                  |t        |      |dd       d{    | j                  j                  |||dd       d{    |j                  t        |             |j                  |       | j                  j                  |dd       d{   }
|
t	        |
      nd|z   }| j                  j                  |||dd       d{    |j                  |	       |j                  |       U |S 7 $7 7 7 l7 4w)z
        Implement sliding window rate limiting logic using in-memory cache operations.
        This follows the same logic as the Redis Lua script but uses async cache operations.
        r         NT)r'   litellm_parent_otel_span
local_onlyr'   r(   ttlrR   rS   )rangelenr?   async_get_cacher   async_set_cacher*   append)rJ   rL   rM   r   resultsi
window_keycounter_keyincrement_valuewindow_startcurrent_counternew_counter_values               r#   in_memory_cache_sliding_windowzC_PROXY_MaxParallelRequestsHandler_v3.in_memory_cache_sliding_windowt   s       q#d)Q'AaJq1u+KO "&!:!:!J!J)- "K " L ##l2C(C'S//??"g,#-1# @    //??#)#-1# @    s7|,/ )-(A(A(Q(Q#-1# )R ) # -<,GC(Q#%$! //??#+#-1# @    |,01c (f [#s\   AE>E3AE>E6'E>;E8<AE>E:9E>E<,E>6E>8E>:E><E>r'   r(   r3   r0   c                     d| d| d| }|S )zI
        Create the rate limit keys for the given key and value.
        {:z}:r!   )rJ   r'   r(   r3   r^   s        r#   create_rate_limit_keysz;_PROXY_MaxParallelRequestsHandler_v3.create_rate_limit_keys   s#     3%qs?*;<r"   keys_to_fetchcache_valueskey_metadatac           	         g }d}t        dt        |      d      D ]  }d}||   }||dz      }	||dz      }
||   d   }||   d   }||   d   }d}d}|	j                  d	      r|}d
}n+|	j                  d      r|}d}n|	j                  d      r|}d}||||
t        |
      dz   |kD  rd}d}|
|t        |
      z
  n|}|j	                  ||||||   d   d        t        ||      S )z?
        Check if the cache values are over the limit.
        OKr   rP   rQ   requests_limitmax_parallel_requests_limittokens_limitNz	:requestsr1   z:max_parallel_requestsr   z:tokensr2   
OVER_LIMITr4   )r-   r.   r/   r3   r4   )r7   r8   )rV   rW   endswithr   rZ   r6   )rJ   rh   ri   rj   r8   r7   r\   	item_coder]   r^   counter_valuerm   rn   ro   r.   r3   r/   s                    r#   is_cache_list_over_limitz=_PROXY_MaxParallelRequestsHandler_v3.is_cache_list_over_limit   sm    +-q#l+Q/AI&q)J'A.K(Q/M)*56FGN*6z*B-+' (
3NCL ,0M   ##K0 .",%%&>? ;"9%%i0 ,"*$(?(S-?!-Cm-S+(	
 !, M 22"  OO%%2'6'6&2:&>?O&PQ 0d !lXNNr"   Nr;   parent_otel_span	read_onlyc                   K   t        j                         j                         }t        |      }g }i }|D ]=  }|d   }	|d   }
|j	                  di       xs i }|j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      xs | j
                  }d|	 d	|
 d
}d}|(| j                  |	|
d      }|j                  ||g       d}|(| j                  |	|
d      }|j                  ||g       d}|(| j                  |	|
d      }|j                  ||g       d}|s|t        |      nd|t        |      nd|t        |      ndt        |      |	d||<   @ | j                  j                  ||d       d{   }|| j                  |||      }|d   dk(  r|S | j                  | j                  ||| j
                  g       d{   }t        dt        |      d      D ]  }||   }||dz      }||   }||dz      }| j                  j                  ||| j
                  |d       d{    | j                  j                  ||| j
                  |d       d{     n&| j                  ||| j
                         d{   }| j                  |||      }|S 7 ,7 7 y7 I7 "w)z
        Check if any of the rate limit descriptors should be rate limited.
        Returns a RateLimitResponse with the overall code and status for each descriptor.
        Uses batch operations for Redis to improve performance.
        r'   r(   r)   r   r   r   r   re   rf   z}:windowFNr1   Tr2   )rm   ro   rn   r   r4   )rL   ru   rS   r7   rp   )rL   argsr   rP   rQ   rT   )rL   rM   r   )r   now	timestampr   getr   rg   extendr?   async_batch_get_cachert   rG   rV   rW   rY   rc   )rJ   r;   ru   rv   ry   rM   rh   rj   
descriptorr4   descriptor_valuer)   rm   ro   rn   r   r]   rate_limit_setrpm_keytpm_keymax_parallel_requests_keyri   rate_limit_responser\   r^   window_valuers   s                              r#   should_rate_limitz6_PROXY_MaxParallelRequestsHandler_v3.should_rate_limit  s     lln&&(c( $&%J'.N)'2#b9?RJ'^^,?@N%>>*;<L*4..9P*Q'$..7K4;K;KKn-Q/?.@	JJ"N)55"$4j $$j'%:;!%'55"$4h $$j'%:;!%*6,0,G,G"$46M-) $$j2K%LM!%! "- #&n"55A5ML 1SW.: 033N/O";/"0
(L$E &^ "66LL- M 
 
 #"&"?"?|\# #>2lB** ))5!%!?!?"t//0 "@ " L 1c,/3*1-
+AE2+A ,QU 3//??#'((-=# @    //??"&((-=# @    4( "&!D!D" ,, "E " L #;;<
 #"e
s\   FKKAK*K+A$KK	1KK(K*K+KK	KKKuser_api_key_dictcachedata	call_typec                 \  K   ddl m}m} t        j                  d       g }|j
                  rT|j                  t        d|j
                  |j                  |j                  |j                  | j                  d             |j                  rI|j                  t        d|j                  |j                  |j                  | j                  d             |j                  rI|j                  t        d	|j                  |j                   |j"                  | j                  d             |j$                  rI|j                  t        d
|j$                  |j&                  |j(                  | j                  d             |j+                  dd      }|r ||       ||      y ||      xs i }	 ||      xs i }
d}||	v rd}n||
v rd}|rPd}d}||	v r|	|   }||
v r|
|   }|j                  t        d|j
                   d| ||| j                  d             | j-                  ||j.                         d{   }|d   dk(  rYt1        |d         D ]G  \  }}|d   dk(  s||   }t3        dd|d    d|d    d|d    dt5        | j                        i       y||d<   y7 kw) zP
        Pre-call hook to check rate limits before making the API call.
        r   )get_key_model_rpm_limitget_key_model_tpm_limitzInside Rate Limit Pre-Call Hookapi_key)r   r   r   r   )r'   r(   r)   user)r   r   r   teamend_usermodelNFTmodel_per_keyrf   )r;   ru   r7   rp   r8   r-   i  zRate limit exceeded for r'   z: r(   z. Remaining: r/   zretry-after)status_codedetailheaders!litellm_proxy_rate_limit_response)litellm.proxy.auth.auth_utilsr   r   r   debugr   rZ   r&   	rpm_limit	tpm_limitr   r   user_iduser_rpm_limituser_tpm_limitteam_idteam_rpm_limitteam_tpm_limitend_user_idend_user_rpm_limitend_user_tpm_limitr{   r   ru   	enumerater   r*   )rJ   r   r   r   r   r   r   r;   requested_model_tpm_limit_for_key_model_rpm_limit_for_key_modelshould_check_rate_limitmodel_specific_tpm_limitmodel_specific_rpm_limitr<   r\   statusr~   s                     r#   async_pre_call_hookz8_PROXY_MaxParallelRequestsHandler_v3.async_pre_call_hookx  s&    	

 	""#DE  $$#!+33->-H-H+<+F+F1B1X1X'+'7'7	 	 $$#+33->-M-M+<+K+K'+'7'7 
 $$#+33->-M-M+<+K+K'+'7'7 
 ((#"+77->-Q-Q+<+O+O'+'7'7 
 ((7D1#$56B&'89E'>?P'Q'WUW$'>?P'Q'WUW$&+#"::*.' $<<*.'&:>(:>("&>>/G/X,"&>>/G/X,""'+!2!:!: ;1_<MN1I/G+/+;+;$
 //#.?? 0 
 

 N#|3&x
';<	6&>\1!,QJ'$'!9*U:K9LBzZaObNccpqw  yJ  rK  qL   M)3t/?/?+@!  = 9AD45)
s   H<J,>J*?(J,(AJ,total_tokensr   c                     ddl m} g }| j                  ||d      }|j                   |||| j                               |S )z?
        Create pipeline operations for TPM increments
        r   r   r2   r'   r(   r3   r'   r_   rU   )litellm.types.cachingr   rg   rZ   r   )rJ   r'   r(   r3   r   r   pipeline_operationsr^   s           r#   _create_pipeline_operationsz@_PROXY_MaxParallelRequestsHandler_v3._create_pipeline_operations  s[     	JEG11$ 2 

 	""+ ,$$	
 #"r"   outputinputr$   c                 D    ddl m} |j                  dd      }|r|dvry|S )Nr   )general_settingstoken_rate_limit_typer   r   r$   )litellm.proxy.proxy_serverr   r{   )rJ   r   specified_rate_limit_types      r#   get_rate_limit_typez8_PROXY_MaxParallelRequestsHandler_v3.get_rate_limit_type  s9    ?$4$8$8#X%
! ),E N
 -

 ((r"   c                   K   ddl m} ddlm} ddlm} ddlm}m}	 | j                         }
 ||      }	 t        j                  d       |d   d   j                  d	      }|d   d   j                  d
      }|d   d   j                  d      }|j                  d      xs |d   d   j                  d      } ||      }d}t        ||      rPt        |dd      }|rAt        ||	      r5|
dk(  r|j                  }n#|
dk(  r|j                   }n|
dk(  r|j"                  }g }|r\| j%                  d|d      }|j'                   ||d| j(                               |j+                  | j-                  d|d|             |r$|j+                  | j-                  d|d|             |r$|j+                  | j-                  d|d|             |r$|j+                  | j-                  d|d|             |r+|r)|j+                  | j-                  d| d| d|             |r0| j.                  j0                  j3                  ||       d{    yy7 # t4        $ r+}t        j6                  dt9        |              Y d}~yd}~ww xY ww) zb
        Update TPM usage on successful API calls by incrementing counters using pipeline
        r   !_get_parent_otel_span_from_kwargs)#get_model_group_from_litellm_kwargsr   )ModelResponseUsagez5INSIDE parallel request limiter ASYNC SUCCESS LOGGINGlitellm_paramsmetadatauser_api_keyuser_api_key_user_iduser_api_key_team_idr   user_api_key_end_user_idusageNr   r   r$   r   r   r   r   r2   )r'   r(   r3   r   r   r   r   rf   increment_listrR   z#Error in rate limit success event: )'litellm.litellm_core_utils.core_helpersr   )litellm.proxy.common_utils.callback_utilsr   r   r   litellm.types.utilsr   r   r   r   r   r{   
isinstancegetattrcompletion_tokensprompt_tokensr   rg   rZ   r   r|   r   r?   rC   async_increment_cache_pipeline	Exception	exceptionr*   )rJ   kwargsresponse_obj
start_timeend_timer   r   r   r   r   r3   rR   r   r   r   r   model_groupr   _usager   r^   es                         r#   async_log_success_eventz<_PROXY_MaxParallelRequestsHandler_v3.async_log_success_event'  s    	
	
 	J<2246W7
 p	 &&G
 ""23J?CCNSL#)*:#;J#G#K#K&$  $**:#;J#G#K#K&$  (.zz&'9 (.VDT=U>c,- % >fEK L,6 w=j7&(2'-'?'?(G3'-';';(G3'-':': JL "99!&$; : 
 $**3'(* ,, $**44%*(0%1	 5  $#**44"2(0%1	 5  $#**44"2(0%1	 5  (#**44&6(0%1	 5  |#**44+!-a}=(0%1	 5  #//::YY#6-E Z    #
  	 **5c!fX> 	sA   3JHI II JI 	J!J>JJJc                   K   ddl m} ddlm} 	  ||      }|d   d   j	                  d      }g }	|r8| j                  d|d	      }
|	j                   ||
d
| j                               |	r0| j                  j                  j                  |	|       d{    yy7 # t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY ww)zI
        Decrement max parallel requests counter for the API Key
        r   r   r   r   r   r   r   r   r   r   r   r   Nz#Error in rate limit failure event: )r   r   r   r   r{   rg   rZ   r   r?   rC   r   r   r   r   r*   )rJ   r   r   r   r   r   r   rR   r   r   r^   r   s               r#   async_log_failure_eventz<_PROXY_MaxParallelRequestsHandler_v3.async_log_failure_event  s    	
 	J	 2&9 % ""23J?CCNSLIK"99!&$; : 
 $**3'(* ,, #//::YY#6-E Z    #  	 **5c!fX> 	sA   CBB  BB  CB   	C)!C
CCCc                 @  K   	 ddl m} t        t        t           |j                  dd            }|t        |d      rt        |d      }nd}|t        ||      st        |t              rxt        ||      r|j                         }|j                  di       xs i }|d   D ]*  }d|d	    }	|d
   ||	 d|d    <   |d   ||	 d|d    <   , t        |di |d|i       yyyy# t        $ r+}
t        j                  dt        |
              Y d}
~
yd}
~
ww xY ww)zN
        Post-call hook to update rate limit headers in the response.
        r   )	BaseModelr   N_hidden_paramsadditional_headersr8   zx-ratelimit-r4   r/   z-remaining-r3   r.   z-limit-z$Error in rate limit post-call hook: )pydanticr   r   r	   r6   r{   hasattrr   r   dict
model_dumpsetattrr   r   r   r*   )rJ   r   r   r<   r   r   r   _additional_headersr   prefixr   s              r#   async_post_call_success_hookzA_PROXY_MaxParallelRequestsHandler_v3.async_post_call_success_hook  s    -	*04*+<dC1-
 1<8%56%,X7G%HN%)N!-~y9!.$7!.)<)7)B)B)D '**+?DJ (
 #DJ"O#/7G0H/I!J ##45 ,%hk&9J2K1LM
 #?3 ,%hgf5F.G-HI #P  (U>U+?ATU' 8 . =D  	 **6s1vh? 	s/   DCC' #D'	D0!DDDD)NF)r   r   r   r   rK   r   r*   r   r   rc   r   rg   r   r6   rt   r&   r	   r   boolr   r   r   r   r   r   r   r   r   r   r!   r"   r#   r>   r>   f   s   P-? P@3i@ @ 	@
 
c@D  !!NO	
 
>OCy>O 3i>O 38n	>O
 
>OF ,0	s#-.s# #4.s# 	s#
 
s#jCA)CA CA 	CA
 CAJ## # !!NO	#
 # 
/	0#8)W-G%H )BH(T33-;3r"   r>   )&__doc__rH   r   typingr   r   r   r   r   r	   r
   r   r   fastapir   litellmr   litellm._loggingr   "litellm.integrations.custom_loggerr   litellm.proxy._typesr   opentelemetry.tracer   _Spanlitellm.proxy.utilsr   _InternalUsageCacher   r   rF   r   r&   r,   r6   r:   r>   r!   r"   r#   <module>r      s   
 
 
 
 
 "  1 ; /1MED,D @% =) =i $	 $
 y  
b
< b
r"   