22from abc import abstractmethod
33from typing import List , Optional , Tuple
44
5+ from codegate .extract_snippets .factory import MessageCodeExtractorFactory
56import structlog
67from litellm import ChatCompletionRequest , ChatCompletionSystemMessage , ModelResponse
78from litellm .types .utils import Delta , StreamingChoices
89
910from codegate .config import Config
1011from codegate .pipeline .base import (
1112 AlertSeverity ,
13+ CodeSnippet ,
1214 PipelineContext ,
1315 PipelineResult ,
1416 PipelineStep ,
@@ -44,7 +46,9 @@ def _hide_secret(self, match: Match) -> str:
4446 pass
4547
4648 @abstractmethod
47- def _notify_secret (self , match : Match , protected_text : List [str ]) -> None :
49+ def _notify_secret (
50+ self , match : Match , code_snippet : Optional [CodeSnippet ], protected_text : List [str ]
51+ ) -> None :
4852 """
4953 Notify about a found secret
5054 TODO: If the secret came from a CodeSnippet we should notify about that. This would
@@ -106,7 +110,9 @@ def _get_surrounding_secret_lines(
106110 end_line = min (secret_line + surrounding_lines , len (lines ))
107111 return "\n " .join (lines [start_line :end_line ])
108112
109- def obfuscate (self , text : str ) -> tuple [str , List [Match ]]:
113+ def obfuscate (self , text : str , snippet : Optional [CodeSnippet ]) -> tuple [str , List [Match ]]:
114+ if snippet :
115+ text = snippet .code
110116 matches = CodegateSignatures .find_in_string (text )
111117 if not matches :
112118 return text , []
@@ -147,13 +153,14 @@ def obfuscate(self, text: str) -> tuple[str, List[Match]]:
147153 logger .info (
148154 f"\n Service: { match .service } "
149155 f"\n Type: { match .type } "
156+ f"\n Key: { match .secret_key } "
150157 f"\n Original: { match .value } "
151158 f"\n Encrypted: { hidden_secret } "
152159 )
153160
154161 # Second pass. Notify the secrets in DB over the complete protected text.
155162 for _ , _ , match in absolute_matches :
156- self ._notify_secret (match , protected_text )
163+ self ._notify_secret (match , code_snippet = snippet , protected_text = protected_text )
157164
158165 # Convert back to string
159166 protected_string = "" .join (protected_text )
@@ -184,11 +191,23 @@ def _hide_secret(self, match: Match) -> str:
184191 )
185192 return f"REDACTED<${ encrypted_value } >"
186193
187- def _notify_secret (self , match : Match , protected_text : List [str ]) -> None :
194+ def _notify_secret (
195+ self , match : Match , code_snippet : Optional [CodeSnippet ], protected_text : List [str ]
196+ ) -> None :
188197 secret_lines = self ._get_surrounding_secret_lines (protected_text , match .line_number )
189- notify_string = f"{ match .service } - { match .type } :\n { secret_lines } "
198+ notify_string = (
199+ f"**Secret Detected** 🔒\n "
200+ f"- Service: { match .service } \n "
201+ f"- Type: { match .type } \n "
202+ f"- Key: { match .secret_key if match .secret_key else '(Unknown)' } \n "
203+ f"- Line Number: { match .line_number } \n "
204+ f"- Context:\n ```\n { secret_lines } \n ```"
205+ )
190206 self ._context .add_alert (
191- self ._name , trigger_string = notify_string , severity_category = AlertSeverity .CRITICAL
207+ self ._name ,
208+ trigger_string = notify_string ,
209+ severity_category = AlertSeverity .CRITICAL ,
210+ code_snippet = code_snippet ,
192211 )
193212
194213
@@ -205,7 +224,9 @@ def _hide_secret(self, match: Match) -> str:
205224 """
206225 return "*" * 32
207226
208- def _notify_secret (self , match : Match , protected_text : List [str ]) -> None :
227+ def _notify_secret (
228+ self , match : Match , code_snippet : Optional [CodeSnippet ], protected_text : List [str ]
229+ ) -> None :
209230 pass
210231
211232
@@ -227,7 +248,12 @@ def name(self) -> str:
227248 return "codegate-secrets"
228249
229250 def _redact_text (
230- self , text : str , secrets_manager : SecretsManager , session_id : str , context : PipelineContext
251+ self ,
252+ text : str ,
253+ snippet : Optional [CodeSnippet ],
254+ secrets_manager : SecretsManager ,
255+ session_id : str ,
256+ context : PipelineContext ,
231257 ) -> tuple [str , List [Match ]]:
232258 """
233259 Find and encrypt secrets in the given text.
@@ -242,7 +268,7 @@ def _redact_text(
242268 """
243269 # Find secrets in the text
244270 text_encryptor = SecretsEncryptor (secrets_manager , context , session_id )
245- return text_encryptor .obfuscate (text )
271+ return text_encryptor .obfuscate (text , snippet )
246272
247273 async def process (
248274 self , request : ChatCompletionRequest , context : PipelineContext
@@ -273,40 +299,74 @@ async def process(
273299
274300 # get last user message block to get index for the first relevant user message
275301 last_user_message = self .get_last_user_message_block (new_request , context .client )
276- last_assistant_idx = - 1
277- if last_user_message :
278- _ , user_idx = last_user_message
279- last_assistant_idx = user_idx - 1
302+ last_assistant_idx = last_user_message [1 ] - 1 if last_user_message else - 1
280303
281304 # Process all messages
282305 for i , message in enumerate (new_request ["messages" ]):
283306 if "content" in message and message ["content" ]:
284- # Protect the text
285- protected_string , secrets_matched = self ._redact_text (
286- str (message ["content" ]), secrets_manager , session_id , context
307+ redacted_content , secrets_matched = self ._redact_message_content (
308+ message ["content" ], secrets_manager , session_id , context
287309 )
288- new_request ["messages" ][i ]["content" ] = protected_string
289-
290- # Append the matches for messages after the last assistant message
310+ new_request ["messages" ][i ]["content" ] = redacted_content
291311 if i > last_assistant_idx :
292312 total_matches += secrets_matched
313+ new_request = self ._finalize_redaction (context , total_matches , new_request )
314+ return PipelineResult (request = new_request , context = context )
315+
316+ def _redact_message_content (self , message_content , secrets_manager , session_id , context ):
317+ # Extract any code snippets
318+ extractor = MessageCodeExtractorFactory .create_snippet_extractor (context .client )
319+ snippets = extractor .extract_snippets (message_content )
320+ redacted_snippets = {}
321+ total_matches = []
322+
323+ for snippet in snippets :
324+ redacted_snippet , secrets_matched = self ._redact_text (
325+ snippet , snippet , secrets_manager , session_id , context
326+ )
327+ redacted_snippets [snippet .code ] = redacted_snippet
328+ total_matches .extend (secrets_matched )
329+
330+ non_snippet_parts = []
331+ last_end = 0
332+
333+ for snippet in snippets :
334+ snippet_text = snippet .code
335+ start_index = message_content .find (snippet_text , last_end )
336+ if start_index > last_end :
337+ non_snippet_part = message_content [last_end :start_index ]
338+ redacted_part , secrets_matched = self ._redact_text (
339+ non_snippet_part , "" , secrets_manager , session_id , context
340+ )
341+ non_snippet_parts .append (redacted_part )
342+ total_matches .extend (secrets_matched )
343+
344+ non_snippet_parts .append (redacted_snippets [snippet_text ])
345+ last_end = start_index + len (snippet_text )
346+
347+ if last_end < len (message_content ):
348+ remaining_text = message_content [last_end :]
349+ redacted_remaining , secrets_matched = self ._redact_text (
350+ remaining_text , "" , secrets_manager , session_id , context
351+ )
352+ non_snippet_parts .append (redacted_remaining )
353+ total_matches .extend (secrets_matched )
293354
294- # Not count repeated secret matches
355+ return "" .join (non_snippet_parts ), total_matches
356+
357+ def _finalize_redaction (self , context , total_matches , new_request ):
295358 set_secrets_value = set (match .value for match in total_matches )
296359 total_redacted = len (set_secrets_value )
297360 context .secrets_found = total_redacted > 0
298361 logger .info (f"Total secrets redacted since last assistant message: { total_redacted } " )
299-
300- # Store the count in context metadata
301362 context .metadata ["redacted_secrets_count" ] = total_redacted
302363 if total_redacted > 0 :
303364 system_message = ChatCompletionSystemMessage (
304365 content = Config .get_config ().prompts .secrets_redacted ,
305366 role = "system" ,
306367 )
307- new_request = add_or_update_system_message (new_request , system_message , context )
308-
309- return PipelineResult (request = new_request , context = context )
368+ return add_or_update_system_message (new_request , system_message , context )
369+ return new_request
310370
311371
312372class SecretUnredactionStep (OutputPipelineStep ):
@@ -450,14 +510,13 @@ async def process_chunk(
450510 or input_context .metadata .get ("redacted_secrets_count" , 0 ) == 0
451511 ):
452512 return [chunk ]
513+
453514 tool_name = next (
454515 (
455516 tool .lower ()
456517 for tool in ["Cline" , "Kodu" ]
457518 for message in input_context .alerts_raised or []
458519 if tool in str (message .trigger_string or "" )
459- and "If you are Kodu"
460- not in str (message .trigger_string or "" ) # this comes from our prompts
461520 ),
462521 "" ,
463522 )
0 commit comments