
    hA                         d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ ddlmZ  G d	 d
e      Zy)a  
Redis Semantic Cache implementation for LiteLLM

The RedisSemanticCache provides semantic caching functionality using Redis as a backend.
This cache stores responses based on the semantic similarity of prompts rather than
exact matching, allowing for more flexible caching of LLM responses.

This implementation uses RedisVL's SemanticCache to find semantically similar prompts
and their cached responses.
    N)AnyDictListOptionalTuplecast)print_verbose)get_str_from_messages)EmbeddingResponse   )	BaseCachec                   D   e Zd ZU dZdZeed<   	 	 	 	 	 	 	 ddee   dee   dee   dee   d	ee   d
edee   fdZ	dee
   fdZdedee   fdZdedefdZdededdfdZdedefdZdedee   fdZdededdfdZdedefdZdeeef   fdZdeeeef      ddfdZy)RedisSemanticCachea  
    Redis-backed semantic cache for LLM responses.

    This cache uses vector similarity to find semantically similar prompts that have been
    previously sent to the LLM, allowing for cache hits even when prompts are not identical
    but carry similar meaning.
    litellm_semantic_cache_indexDEFAULT_REDIS_INDEX_NAMENhostportpassword	redis_urlsimilarity_thresholdembedding_model
index_namec                 "   ddl m}	 ddlm}
 || j                  }t        d|        |t        d      || _        d|z
  | _        || _	        |Q	 |xs t        j                  d   }|xs t        j                  d	   }|xs t        j                  d
   }d| d| d| }t        d|         |
| j                        } |	|||| j                  d      | _        y# t        $ r'}|j                  d   }t        d| d| d      |d}~ww xY w)a-  
        Initialize the Redis Semantic Cache.

        Args:
            host: Redis host address
            port: Redis port
            password: Redis password
            redis_url: Full Redis URL (alternative to separate host/port/password)
            similarity_threshold: Threshold for semantic similarity (0.0 to 1.0)
                where 1.0 requires exact matches and 0.0 accepts any match
            embedding_model: Model to use for generating embeddings
            index_name: Name for the Redis index
            ttl: Default time-to-live for cache entries in seconds
            **kwargs: Additional arguments passed to the Redis client

        Raises:
            Exception: If similarity_threshold is not provided or required Redis
                connection information is missing
        r   )SemanticCache)CustomTextVectorizerNz*Redis semantic-cache initializing index - z2similarity_threshold must be provided, passed Noner   
REDIS_HOST
REDIS_PORTREDIS_PASSWORDz&Missing required Redis configuration: z
. Provide z or redis_url.z	redis://:@:z Redis semantic-cache redis_url: F)namer   
vectorizerdistance_threshold	overwrite)redisvl.extensions.llmcacher   redisvl.utils.vectorizer   r   r	   
ValueErrorr   r#   r   osenvironKeyErrorargs_get_embeddingllmcache)selfr   r   r   r   r   r   r   kwargsr   r   emissing_varcache_vectorizers                 `/var/www/Befach/backend/env/lib/python3.12/site-packages/litellm/caching/redis_semantic_cache.py__init__zRedisSemanticCache.__init__'   sP   < 	>@66JB:,OP  'QRR %9!
 #$&:":. 7rzz,77rzz,7#Crzz2B'C $H:QtfAdV<I8DE 00C0CD%'#66
  ffQi <[M J*m>; s   AC 	D'"D		Dreturnc                 B    |j                  d      }|t        |      }|S )a  
        Get the TTL (time-to-live) value for cache entries.

        Args:
            **kwargs: Keyword arguments that may contain a custom TTL

        Returns:
            Optional[int]: The TTL value in seconds, or None if no TTL should be applied
        ttl)getint)r.   r/   r7   s      r3   _get_ttlzRedisSemanticCache._get_ttlx   s%     jj?c(C
    promptc           
          t        t        t        j                  | j                  |ddd            }|d   d   d   }|S )z
        Generate an embedding vector for the given prompt using the configured embedding model.

        Args:
            prompt: The text to generate an embedding for

        Returns:
            List[float]: The embedding vector
        Tzno-storezno-cachemodelinputcachedatar   	embedding)r   r   litellmrD   r   )r.   r<   embedding_responserD   s       r3   r,   z!RedisSemanticCache._get_embedding   sP     "**#'T:
 'v.q1+>	r;   cached_responsec                 D   ||S t        |t              r|j                  d      }	 t        j                  |      }|S # t        j
                  $ rN 	 t        j                  |      }Y |S # t        t        f$ r"}t        dt        |              Y d}~Y yd}~ww xY ww xY w)z
        Process the cached response to prepare it for use.

        Args:
            cached_response: The raw cached response

        Returns:
            The processed cache response, or None if input was None
        Nzutf-8zError parsing cached response: )
isinstancebytesdecodejsonloadsJSONDecodeErrorastliteral_evalr'   SyntaxErrorr	   str)r.   rG   r0   s      r3   _get_cache_logicz#RedisSemanticCache._get_cache_logic   s     """ ou--44W=O	"jj9O  ## 	"%"2"2?"C
 	 ,  ?AxHI	s-   > BA**B9BBBBkeyvaluec           	         t        d|        d}	 |j                  dg       }|st        d       yt        |      }t        |      } | j                  di |}|(| j
                  j                  ||t        |             y| j
                  j                  ||       y# t        $ r(}t        d|xs | dt        |              Y d}~yd}~ww xY w)	a>  
        Store a value in the semantic cache.

        Args:
            key: The cache key (not directly used in semantic caching)
            value: The response value to cache
            **kwargs: Additional arguments including 'messages' for the prompt
                and optional 'ttl' for time-to-live
        z(Redis semantic-cache set_cache, kwargs: Nmessages)No messages provided for semantic caching)r7   zError setting z in the Redis semantic cache:  )	r	   r8   r
   rR   r:   r-   storer9   	Exception)	r.   rT   rU   r/   	value_strrW   r<   r7   r0   s	            r3   	set_cachezRedisSemanticCache.set_cache   s     	@IJ#'		zz*b1HIJ*84FE
I  $--)&)C##FI3s8#D##FI6 	 !3e 44RSVWXSYRZ[ 	s#   B! AB! B! !	C*CCc           
         t        d|        	 |j                  dg       }|st        d       yt        |      }| j                  j	                  |      }|sy|d   }t        |d         }d|z
  }|d	   }	|d
   }
t        d| j                   d| d| d|	        | j                  |
      S # t        $ r!}t        dt        |              Y d}~yd}~ww xY w)aN  
        Retrieve a semantically similar cached response.

        Args:
            key: The cache key (not directly used in semantic caching)
            **kwargs: Additional arguments including 'messages' for the prompt

        Returns:
            The cached response if a semantically similar prompt is found, else None
        z(Redis semantic-cache get_cache, kwargs: rW   .No messages provided for semantic cache lookupN)r<   r   vector_distancer   r<   response!Cache hit: similarity threshold: , actual similarity: , current prompt: , cached prompt: rG   z,Error retrieving from Redis semantic cache: )
r	   r8   r
   r-   checkfloatr   rS   r[   rR   )r.   rT   r/   rW   r<   results	cache_hitr`   
similaritycached_promptrG   r0   s               r3   	get_cachezRedisSemanticCache.get_cache   s    	@IJ$	Szz*b1HNO*84Fmm)))8G   
I#I.?$@AO
 _,J%h/M'
3O3D4M4M3N O&&0\ 2##)( +""/2 (((II 	SHQQRR	Ss#   B/ )B/ AB/ /	C8CCc                 N  K   ddl m}m} ||D cg c]  }|d   	 c}ng }	 || j                  |v rq|j	                  di       j	                  dd      }|j                  | j                  |ddd	|d|j	                  di       j	                  d
d      d       d{   }n-t        j
                  | j                  |ddd	       d{   }|d   d   d   S c c}w 7 B7 # t        $ r4}	t        dt        |	              t        dt        |	             |	d}	~	ww xY ww)a  
        Asynchronously generate an embedding for the given prompt.

        Args:
            prompt: The text to generate an embedding for
            **kwargs: Additional arguments that may contain metadata

        Returns:
            List[float]: The embedding vector
        r   )llm_model_list
llm_routerN
model_namemetadatauser_api_key Tr>   trace_id)rs   zsemantic-cache-embeddingru   )r@   rA   rB   rr   r?   rC   rD   z"Error generating async embedding: zFailed to generate embedding: )litellm.proxy.proxy_serverro   rp   r   r8   
aembeddingrE   r[   r	   rR   r'   )
r.   r<   r/   ro   rp   mrouter_model_namesrs   rF   r0   s
             r3   _get_async_embeddingz'RedisSemanticCache._get_async_embedding  sP     	J
 ) '55Q|_5 		O%$*>*>BT*T%zz*b9==nbQ+5+@+@.. '+>(448$*JJz2$>$B$B:t$T	 ,A 	, 	&" ,3+=+=.. '+>, &" &f-a0==9 6	&&  	O>s1vhGH=c!fXFGQN	OsW   D%CD%A;C% C!-C% C#C% D%!C% #C% %	D"./DD""D%c                   K   t        d|        	 |j                  dg       }|st        d       yt        |      }t        |      } | j                  |fi | d{   } | j
                  di |}|(| j                  j                  ||||       d{    y| j                  j                  |||       d{    y7 g7 .7 	# t        $ r!}	t        dt        |	              Y d}	~	yd}	~	ww xY ww)	aM  
        Asynchronously store a value in the semantic cache.

        Args:
            key: The cache key (not directly used in semantic caching)
            value: The response value to cache
            **kwargs: Additional arguments including 'messages' for the prompt
                and optional 'ttl' for time-to-live
        z.Async Redis semantic-cache set_cache, kwargs: rW   rX   N)vectorr7   )r|   zError in async_set_cache: rY   )	r	   r8   r
   rR   rz   r:   r-   astorer[   )
r.   rT   rU   r/   rW   r<   r\   prompt_embeddingr7   r0   s
             r3   async_set_cachez"RedisSemanticCache.async_set_cache?  s     	FvhOP	Azz*b1HIJ*84FE
I &?T%>%>v%P%PP  $--)&)Cmm**+	 +    mm**+ +     Q

  	A6s1vh?@@	Asz   C9C C9,C C:C CC C9!C  C
C C9C C 
C 	C6C1,C91C66C9c           
        K   t        d|        	 |j                  dg       }|s!t        d       d|j                  di       d<   yt        |      } | j                  |fi | d{   }| j
                  j                  ||       d{   }|sd|j                  di       d<   y|d	   }t        |d
         }d|z
  }	|d   }
|d   }|	|j                  di       d<   t        d| j                   d|	 d| d|
        | j                  |      S 7 7 # t        $ r6}t        dt        |              d|j                  di       d<   Y d}~yd}~ww xY ww)a]  
        Asynchronously retrieve a semantically similar cached response.

        Args:
            key: The cache key (not directly used in semantic caching)
            **kwargs: Additional arguments including 'messages' for the prompt

        Returns:
            The cached response if a semantically similar prompt is found, else None
        z.Async Redis semantic-cache get_cache, kwargs: rW   r_   g        rr   zsemantic-similarityN)r<   r|   r   r`   r   r<   ra   rb   rc   rd   re   rf   zError in async_get_cache: )r	   r8   
setdefaultr
   rz   r-   acheckrh   r   rS   r[   rR   )r.   rT   r/   rW   r<   r~   ri   rj   r`   rk   rl   rG   r0   s                r3   async_get_cachez"RedisSemanticCache.async_get_cachej  s     	FvhOP/	Kzz*b1HNOKN!!*b12GH*84F &?T%>%>v%P%PP !MM00GW0XXG   !!*b1) 
I#I.?$@AO
 _,J%h/M'
3O HRFj"-.CD3D4M4M3N O&&0\ 2##)( +""/2 (((IIC  Q Y>  	K6s1vh?@GJFj"-.CD	Ksk   E4D E!D (D)$D DD )E*A)D ED D 	E!,EEEEc                    K   | j                   j                          d{   }|j                          d{   S 7 7 w)z
        Get information about the Redis index.

        Returns:
            Dict[str, Any]: Information about the Redis index
        N)r-   _get_async_indexinfo)r.   aindexs     r3   _index_infozRedisSemanticCache._index_info  s5      }}5577[[]"" 8"s   A=A?AA
cache_listc                   K   	 g }|D ]+  }|j                   | j                  |d   |d   fi |       - t        j                  |  d{    y7 # t        $ r!}t        dt        |              Y d}~yd}~ww xY ww)z
        Asynchronously store multiple values in the semantic cache.

        Args:
            cache_list: List of (key, value) tuples to cache
            **kwargs: Additional arguments
        r   r   Nz#Error in async_set_cache_pipeline: )appendr   asynciogatherr[   r	   rR   )r.   r   r/   tasksvalr0   s         r3   async_set_cache_pipelinez+RedisSemanticCache.async_set_cache_pipeline  s~     	JE!1T11#a&#a&KFKL "..%((( 	J?AxHII	JsA   B AA AA B A 	A=A83B 8A==B )NNNNNztext-embedding-ada-002N)__name__
__module____qualname____doc__r   rR   __annotations__r   rh   r4   r9   r:   r   r,   r   rS   r]   rm   rz   r   r   r   r   r   r   rY   r;   r3   r   r      s    %CcB #""&#'047$(O
smO
 smO
 3-	O

 C=O
 'uoO
 O
 SMO
bHSM S T%[ ,  : S    4  D1SS 1Ss 1Sf.O .O4; .O`)A )AS )At )AV<K <K3 <K|#4S> #JuS#X/J	Jr;   r   )r   rO   r   rL   r(   typingr   r   r   r   r   r   rE   litellm._loggingr	   8litellm.litellm_core_utils.prompt_templates.common_utilsr
   litellm.types.utilsr   
base_cacher   r   rY   r;   r3   <module>r      sA   	    	 9 9  * 2 !fJ fJr;   