
    hh                        d dl Z d dlZd dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZm Z  d dl!m"Z"m#Z# d dl$m%Z& d dl'm(Z(m)Z)m*Z*m+Z+m,Z,  G d de      Z-y)    N)datetime)	AnyAsyncGeneratorDictListLiteralOptionalTupleUnioncast)
get_secret)verbose_proxy_logger)	DualCache)BlockedPiiEntityError)CustomGuardrail)UserAPIKeyAuth)GuardrailEventHooksLitellmParams	PiiActionPiiEntityTypePresidioPerRequestConfig)PresidioAnalyzeRequestPresidioAnalyzeResponseItem)	CallTypes)EmbeddingResponseImageResponseModelResponseModelResponseStreamStreamingChoicesc                   f    e Zd ZdZdZ	 	 	 	 	 	 	 	 	 d+dedee   dee   dee   dee   dee   dee   d	ee	e
ef      d
ee   f fdZ	 	 d,dee   dee   fdZdedee   dedefdZdedee   dedeee   e	f   fdZdededede	eef   def
dZdeee   e	f   fdZdededee   dedef
dZdedededefdZdedededeeef   fdZdedededeeef   fd Zdeded!ee e!e"f   fd"Z#ded!edede$e%df   fd#Z&dedee   fd$Z'd% Z(	 	 d,ded&ee   d'eee
      defd(Z)d)e*ddfd*Z+ xZ,S )-_OPTIONAL_PresidioPIIMaskingNmock_testingmock_redacted_textpresidio_analyzer_api_basepresidio_anonymizer_api_baseoutput_parse_piipresidio_ad_hoc_recognizerslogging_onlypii_entities_configpresidio_languagec
                 P   |du rd| _         t        j                   |
d<   t        |   di |
 i | _        || _        |xs d| _        |xs i | _        |	xs d| _        |du ry |}|0	 t        |d      5 }t        j                  |      | _        d d d        | j#                  ||
       y # 1 sw Y   xY w# t        $ r t        d|       t        j                  $ r}t        dt!        |       d|       d }~wt        $ r}t        d	t!        |       d|       d }~ww xY w)NT
event_hookFenrzFile not found. file_path=zError decoding JSON file: z, file_path=zAn error occurred: )r$   r%    )r(   r   super__init__
pii_tokensr#   r&   r)   r*   openjsonloadad_hoc_recognizersFileNotFoundError	ExceptionJSONDecodeErrorstrvalidate_environment)selfr"   r#   r$   r%   r&   r'   r(   r)   r*   kwargsr6   filee	__class__s                 m/var/www/Befach/backend/env/lib/python3.12/site-packages/litellm/proxy/guardrails/guardrail_hooks/presidio.pyr1   z%_OPTIONAL_PresidioPIIMasking.__init__>   sb    4 $D#6#C#CF< "6" 	 #5 0 9E%2 	  "3!:d48),c2 >d.2iioD+> 	!!'A)E 	" 	
> >$ S"<=O<P QRR'' 0QEWDXY   )#a&>P=QR s<   (B7 4B+B7 +B40B7 7)D% C::D%D  D%c                    |xs t        dd       | _        |xs t        j                   dd       | _        | j                  t	        d      | j                  j                  d      s| xj                  dz  c_        | j                  j                  d      s/| j                  j                  d      sd| j                  z   | _        | j                  t	        d      | j                  j                  d      s| xj                  dz  c_        | j                  j                  d      s1| j                  j                  d      sd| j                  z   | _        y y y )NPRESIDIO_ANALYZER_API_BASEPRESIDIO_ANONYMIZER_API_BASEz5Missing `PRESIDIO_ANALYZER_API_BASE` from environment/zhttp://zhttps://z7Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment)r   r$   litellmr%   r8   endswith
startswith)r<   r$   r%   s      rA   r;   z1_OPTIONAL_PresidioPIIMasking.validate_environmento   sN    ' 
*($+
 	' ) 
G,>,>*D-
 	) **2STT..77<++s2+++66yA..99*E D;;; + ,,4UVV0099#>--4---88C00;;JG D=== - H D    textpresidio_configrequest_datareturnc                    t        || j                        }| j                  | j                  |d<   | j                  r&t	        | j                  j                               |d<   |r|j                  r|j                  |d<   t        t        |      }|j                  | j                  |             t        t         |      S )z
        Construct the payload for the Presidio analyze request

        API Ref: https://microsoft.github.io/presidio/api-docs/api-docs.html#tag/Analyzer/paths/~1analyze/post
        )rJ   languager6   entitiesrO   )rL   )r   r*   r6   r)   listkeysrO   r   dictupdate)get_guardrail_dynamic_request_body_params)r<   rJ   rK   rL   analyze_payloadcasted_analyze_payloads         rA   %_get_presidio_analyze_request_payloadzB_OPTIONAL_PresidioPIIMasking._get_presidio_analyze_request_payload   s     3I++3
 "".484K4KO01##*.t/G/G/L/L/N*OOJ' 77*9*B*BOJ''+D/'B%%:::U	
 *,BCCrI   c           
        K   	 t        j                         4 d{   }| j                  | j                  cddd      d{    S | j                   d}| j	                  |||      }t        j                  d||       |j                  ||      4 d{   }|j                          d{   }t        j                  d|       g }	|D ]  }
|	j                  t        di |
        |	cddd      d{    cddd      d{    S 7 7 7 }7 g7 7 # 1 d{  7  sw Y   nxY wddd      d{  7   y# 1 d{  7  sw Y   yxY w# t        $ r}|d}~ww xY ww)zV
        Send text to the Presidio analyzer endpoint and get analysis results
        NanalyzerJ   rK   rL   z&Making request to: %s with payload: %sr4   analyze_results: %sr/   )aiohttpClientSessionr#   r$   rX   r   debugpostr4   appendr   r8   )r<   rJ   rK   rL   sessionanalyze_urlrV   responseanalyze_resultsfinal_resultsitemr?   s               rA   analyze_textz)_OPTIONAL_PresidioPIIMasking.analyze_text   sx    	,,. ) )'**622) ) )
 "&!@!@ AI >>!(7%1 ?    %**<# #<</<J ) )h,4MMO&;O(../DoV$&M /%,,-H-P4-PQ !0() ) )+) ) ) )*)&;)+)*) ) )+) ) ) ) )8  	G	s  E0E DE EE DE 	E0
AEDED!3D4>D!2E>D?EE DE E0E E ED!EE !D3	'D*(D3	/E6E EE E0EEEE E0E 	E-&E((E--E0rf   masked_entity_countc                 p  K   	 t        j                         4 d{   }| j                   d}t        j                  d|       ||d}|j                  ||      4 d{   }|j                          d{   }	ddd      d{    |}
	t        j                  d|	       |	d   D ]  }|d   }|d	   }|d
   }|d   dk(  rD|du r@|| j                  v r |t        t        j                               z   }|
|| | j                  |<   |
d| |z   |
|d z   }
|j                  dd      }||j                  |d      dz   ||<    |	d
   cddd      d{    S t        d|	       7 Q7 7 7 # 1 d{  7  sw Y   xY w7 2# 1 d{  7  sw Y   yxY w# t        $ r}|d}~ww xY ww)z`
        Send analysis results to the Presidio anonymizer endpoint to get redacted text
        N	anonymizezMaking request to: %s)rJ   analyzer_resultsr\   zredacted_text: %sitemsstartendrJ   operatorreplaceTentity_typer      zInvalid anonymizer response: )r^   r_   r%   r   r`   ra   r4   r2   r:   uuiduuid4getr8   )r<   rJ   rf   r&   rj   rc   anonymize_urlanonymize_payloadre   redacted_textnew_textrh   ro   rp   replacementrs   r?   s                    rA   anonymize_textz+_OPTIONAL_PresidioPIIMasking.anonymize_text   s    *	,,. 'U 'U'#'#D#D"EY O$**+BMR (7%!
 #<<!(9 (  : :*2--/$9M: :
   ,(../BMR -g 6 $W"5k&*6l
+y8=MQU=U  +doo=.9C

<M.M;C %c<DOOK8 $,FU#3k#AHSTN#R&*hh}d&C&2 3 7 7Q G! K 0<# !7( )0K'U 'U 'UN $&CM?$STTO'U: %:: : : :'U 'U 'U 'UP  	G	s   F6F# E-F# AF"E0#F&E7:E3;E7?F
E5B#F/FF# FF# F6F-F# 0F3E75F7F		=F >F		FF# F FF F# F6 F# #	F3,F..F33F6c                 "   | j                   yt        |t              ry|D ]l  }|j                  d      }|st	        t
        |      }|| j                   v s6| j                   |   t        j                  k(  sWt        || j                         y)zE
        Raise an exception if blocked entities are detected
        Nrs   )rs   guardrail_name)
r)   
isinstancer   rw   r   r   r   BLOCKr   r   )r<   rf   resultrs   casted_entity_types        rA   ,raise_exception_if_blocked_entities_detectedzI_OPTIONAL_PresidioPIIMasking.raise_exception_if_blocked_entities_detected  s     ##+ot, %F **]3K484T"&$*B*BB001CD	W/$/'+':':  &rI   c                 Z  K   t        j                         }d}d}i }d}		 | j                  | j                  }
n| j                  |||       d{   }t	        j
                  d|       | j                  |       | j                  ||||       d{   i }|dk(  r)t        |t              r|D cg c]  }t        |       }}n|	}| j                  ||||j                         t        j                         j                         t        j                         |z
  j                         |       S |
d	   i }|dk(  r)t        |t              r|D cg c]  }t        |       }}n|	}| j                  ||||j                         t        j                         j                         t        j                         |z
  j                         |       S 7 7 Cc c}w c c}w # t        $ r}d
}t        |      }	|d}~ww xY w# i }|dk(  r/t        |t              r!|D cg c]  }t        |       nc c}w }}n|	}| j                  ||||j                         t        j                         j                         t        j                         |z
  j                         |       w xY ww)zY
        Calls Presidio Analyze + Anonymize endpoints for PII Analysis + Masking
        Nsuccess r[   r]   )rf   )rJ   rf   r&   rj   )guardrail_json_responserL   guardrail_status
start_timeend_timedurationrj   rJ   failure)r   nowr#   ri   r   r`   r   r}   r   r   rS   :add_standard_logging_guardrail_information_to_request_data	timestamptotal_secondsr8   r:   )r<   rJ   r&   rK   rL   r   rf   statusrj   exception_strrz   r   rh   r?   s                 rA   	check_piiz&_OPTIONAL_PresidioPIIMasking.check_pii:  s     \\^
TX09.02	&&2 $ 7 7 )-(9(9$3!- ): ) # %**+@/R
 AA$3 B 
 "00$3%5(;	 1   PR#"ot4FU.VdtDz.V+.V*7'KK(?)!'%//1!113",,.:5DDF$7 L  !( PR#"ot4FU.VdtDz.V+.V*7'KK(?)!'%//1!113",,.:5DDF$7 L K# $ /W.V  	FFMG	 PR#"ot4FU.VdtDz.V.V+.V*7'KK(?)!'%//1!113",,.:5DDF$7 L s   J+0G% GAG% GG% J+4GA/J+5G% 9J+G &A/J+G% G% 
J+%	H.G==HH J(!H43A5J((J+user_api_key_dictcachedata	call_typec           	        K   	 |j                  dd      }t        j                  d|       | j                  |      }|t        j
                  j                  t        j                  j                  fv r|d   }g }|D ]V  }	|	j                  dd      }
|
t        |
t              s)|j                  | j                  |
| j                  ||             X t        j                  |  d{   }t        |      D ]6  \  }}||   j                  dd      }
|
t        |
t              s/|||   d<   8 t        j                   d|d           ||d<   |S t        j                  d|        |S 7 # t"        $ r}|d}~ww xY ww)	a  
        - Check if request turned off pii
            - Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls')

        - Take the request data
        - Call /analyze -> get the results
        - Call /anonymize w/ the analyze results -> get the redacted text

        For multiple messages in /chat/completions, we'll need to call them in parallel.
        content_safetyNzcontent_safety: %smessagescontentrJ   r&   rK   rL   ,Presidio PII Masking: Redacted pii message: z.Not running async_pre_call_hook for call_type=)rw   r   r`   'get_presidio_settings_from_request_dataLitellmCallTypes
completionvalueacompletionr   r:   rb   r   r&   asynciogather	enumerateinfor8   )r<   r   r   r   r   r   rK   r   tasksmr   	responsesindexr.   r?   s                  rA   async_pre_call_hookz0_OPTIONAL_PresidioPIIMasking.async_pre_call_hook}  s    $,	!XX&6=N &&';^L"JJ4PO ++11 ,,22 
  
+!AeeIt4G !'3/ NN%,151F1F0?-1	 +  " #*..%"88	 )) 4HE1&uo11)TBG !'3/  !% !5 %))B4
CSBTU $,Z 
 K %**DYKP K# 9$  	G	sT   FB E1 %AE1 *E/+=E1 )+E1 FE1 .F/E1 1	F:E<<FFr=   r   c                      ddl m}  fd}	 t        j                         } |d      5 }|j	                  |      }|j                         cd d d        S # 1 sw Y   y xY w# t        $ r
  |       cY S w xY w)Nr   )ThreadPoolExecutorc                  <   t        j                         } 	 t        j                  |        | j                  j	                              | j                          t        j                  d       S # | j                          t        j                  d       w xY w)z9Run the coroutine in a new event loop within this thread.)r=   r   r   N)r   new_event_loopset_event_looprun_until_completeasync_logging_hookclose)new_loopr   r=   r   r<   s    rA   run_in_new_loopzB_OPTIONAL_PresidioPIIMasking.logging_hook.<locals>.run_in_new_loop  s    --/H	-&&x022++%f	 ,   &&t,  &&t,s   7A4 4'Brt   )max_workers)concurrent.futuresr   r   get_running_loopsubmitr   RuntimeError)	r<   r=   r   r   r   r   _executorfutures	   ````     rA   logging_hookz)_OPTIONAL_PresidioPIIMasking.logging_hook  sq     	:	-	%((*A $2 'h!9}}' ' '  	%"$$	%s.   A( !A	A( A%!A( %A( (A;:A;c           	         K   |dk(  s|dk(  r|j                  dd      }g }|||fS | j                  |      }|D ]P  }d}|j                  dd      }	|	t        |	t              s+|	}|j	                  | j                  |d||             R t        j                  |  d{   }
t        |
      D ]6  \  }}||   j                  dd      }	|	t        |	t              s/|||   d<   8 t        j                  d	|        ||d<   ||fS 7 iw)
zK
        Masks the input before logging to langfuse, datadog, etc.
        r   r   r   Nr   r   Fr   r   )rw   r   r   r:   rb   r   r   r   r   r   r   )r<   r=   r   r   r   r   rK   r   text_strr   r   r   r.   s                rA   r   z/_OPTIONAL_PresidioPIIMasking.async_logging_hook  sD     %m)C'-zz*d'CHEv~%"JJ6RO%%	40?gs+&HLL!)-2,;)/	 '   &nne44I%i0q"5/--i>?gs+  UO! 1 !%%>xjI "*F:v~ 5s   A"D%=D"D#=D!,Dre   c                   K   t        j                  d| j                   dt        |              | j                  du rt        j                  du r|S t        |t              rt        |j                  d   t              st        |j                  d   j                  j                  t              rt        j                  d| j                   d|j                  d   j                  j                          | j                  j                         D ]T  \  }}|j                  d   j                  j                  j                  ||      |j                  d   j                  _
        V |S w)ze
        Output parse the response object to replace the masked tokens with user sent values
        z(PII Masking Args: self.output_parse_pii=z; type of response=Fr   zself.pii_tokens: z; initial response: )r   r`   r&   typerF   r   r   choicesr   messager   r:   r2   rn   rr   )r<   r   r   re   keyr   s         rA   async_post_call_success_hookz9_OPTIONAL_PresidioPIIMasking.async_post_call_success_hook  s<     	""6t7L7L6MM`aefnao`pq	
   E)g.F.F%.OOh.zQ!18
 (**1-55==sC$**''88LXM]M]^_M`MhMhMpMpLqr #'//"7"7"9JC:B:J:J;gggggc5&9 $$Q'//7 #: s   E)E+c                   K   | j                   r| j                  s|2 3 d{   }| ddlm} ddlm}m} 	 d}d}	|2 3 d{   }|}	t        |d      s|j                  s%t        |j                  d   d      s?t        |j                  d   j                  d      sct        |j                  d   j                  j                  t              s||j                  d   j                  j                  z  }7 6 y7 6 |	s|2 3 d{  7  }| 6 y| j                  j                         D ]  \  }
}|j                  |
|      }  |t        |	j                   |	j"                  |	j$                  |	j&                   | |d	|
      dd      g      d      }|2 3 d{  7  }| 6 y# t(        $ r>}t+        j,                  dt        |              |2 3 d{  7  }| 6 Y d}~yd}~ww xY ww)a  
        Process streaming response chunks to unmask PII tokens when needed.

        If PII processing is enabled, this collects all chunks, applies PII unmasking,
        and returns a reconstructed stream. Otherwise, it passes through the original stream.
        Nr   )MockResponseIterator)ChoicesMessager   r   deltar   	assistant)roler   stop)r   r   finish_reason)idobjectcreatedmodelr   F)model_response	json_modez#Error in PII streaming processing: )r&   r2   )litellm.llms.base_llm.base_model_iteratorr   litellm.types.utilsr   r   hasattrr   r   r   r   r:   rn   rr   r   r   r   r   r   r8   r   error)r<   r   re   rL   chunkr   r   r   collected_content
last_chunktokenoriginal_textmock_responser?   s                 rA   'async_post_call_streaming_iterator_hookzD_OPTIONAL_PresidioPIIMasking.async_post_call_streaming_iterator_hook*  s     %%$//'  e 	S89	 "J' H He"
 E9-a 0':a 0 6 6	B"5==#3#9#9#A#A3G%q)9)?)?)G)GG%1xHx #+    %K $, )-(=(=(?$}$5$=$=e]$S! )@ 1,!}}%,,&..$**$+%0(9% #$*0	    #M*  -  e  -  	 &&)LSQRVH'UV'  e  (	s   G>C?C=C?G>F4 DDD	F4 F4 &F4  #F4 $1F4 'F4 =C??G>DF4 DDDF4 G>BF4 "F2&F)'F2+F4 3G>4	G;=#G6 G0$G'%G0)G61G>6G;;G>c                 v    d|v r5|j                  dd       }|y |j                  d      }|rt        di |}|S y )Nmetadataguardrail_configr/   )rw   r   )r<   r   	_metadata_guardrail_config_presidio_configs        rA   r   zD_OPTIONAL_PresidioPIIMasking.get_presidio_settings_from_request_data{  sP     T2I  ).@ A #;#P>O#P ''rI   c                     	 t        j                  |       t        j                  rt	        |       y y # t
        $ r Y y w xY w)N)r   r`   rF   set_verboseprintr8   )r<   print_statements     rA   print_verbosez*_OPTIONAL_PresidioPIIMasking.print_verbose  s=    	 &&7""o& # 		s   04 	A A rO   rP   c                 `   K   | j                  || j                  di        d{   }|S 7 w)a  
        UI will call this function to check:
            1. If the connection to the guardrail is working
            2. When Testing the guardrail with some text, this function will be called with the input text and returns a text after applying the guardrail
        Nr   )r   r&   )r<   rJ   rO   rP   s       rA   apply_guardrailz,_OPTIONAL_PresidioPIIMasking.apply_guardrail  s>      ^^!22 	 $ 
 
 
s   #.,.litellm_paramsc                 @    |j                   r|j                   | _         yy)z@
        Update the guardrails litellm params in memory
        N)r)   )r<   r   s     rA   update_in_memory_litellm_paramsz<_OPTIONAL_PresidioPIIMasking.update_in_memory_litellm_params  s      --'5'I'ID$ .rI   )	FNNNFNNNN)NN)-__name__
__module____qualname__user_api_key_cacher6   boolr	   rS   r:   r   r   r   r1   r;   r   r   rX   r   r   r   ri   r   intr}   r   r   r   r   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__)r@   s   @rA   r!   r!   9   s_   
 #-1486:+059'+HL+//
/
 %TN/
 %-SM	/

 '/sm/
 #4./
 &.c]/
 tn/
 &d=)+C&DE/
 $C=/
f 596:($,SM( '/sm(T$D$D "":;$D 	$D
 
 $DL'' "":;' 	'
 
t/0$6	7'R44 4 	4
 "#s(^4 
4l$T*E%F%LM8AA A "":;	A
 A 
AF>)> > 	>
 >@%%$'%47%	tSy	%@..$'.47.	tSy	.` * '8-GH	:O)O O 	O
 
+T1	2Ob	*	+ #'26	 3- 4./	
 
&Jm JPT JrI   r!   ).r   r4   ru   r   typingr   r   r   r   r   r	   r
   r   r   r^   rF   r   litellm._loggingr   litellm.caching.cachingr   litellm.exceptionsr   %litellm.integrations.custom_guardrailr   litellm.proxy._typesr   litellm.types.guardrailsr   r   r   r   r   7litellm.types.proxy.guardrails.guardrail_hooks.presidior   r   r   r   r   litellm.utilsr   r   r   r   r   r!   r/   rI   rA   <module>r     sj       
 
 
    1 - 4 A /  > p	J? p	JrI   