feat(google-genai): Set system instruction attribute (#5354)

alexander-alderman-webb · web-flow · commit 68daea3030d4 · 2026-01-23T13:16:08.000+01:00
Set the system instruction attribute on `ai_chat` and `invoke_agent` spans in the `GoogleGenAIIntegration`. Handle all text message types in `ContentUnion` and `ContentUnionDict`, and skip non-text types.
diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py
@@ -32,16 +32,20 @@
     event_from_exception,
     safe_serialize,
 )
-from google.genai.types import GenerateContentConfig
+from google.genai.types import GenerateContentConfig, Part, Content
+from itertools import chain
 
 if TYPE_CHECKING:
     from sentry_sdk.tracing import Span
+    from sentry_sdk._types import TextPart
     from google.genai.types import (
         GenerateContentResponse,
         ContentListUnion,
+        ContentUnionDict,
         Tool,
         Model,
         EmbedContentResponse,
+        ContentUnion,
     )
 
 
@@ -720,6 +724,62 @@ def extract_finish_reasons(
     return finish_reasons if finish_reasons else None
 
 
+def _transform_system_instruction_one_level(
+    system_instructions: "Union[ContentUnionDict, ContentUnion]",
+    can_be_content: bool,
+) -> "list[TextPart]":
+    text_parts: "list[TextPart]" = []
+
+    if isinstance(system_instructions, str):
+        return [{"type": "text", "content": system_instructions}]
+
+    if isinstance(system_instructions, Part) and system_instructions.text:
+        return [{"type": "text", "content": system_instructions.text}]
+
+    if can_be_content and isinstance(system_instructions, Content):
+        if isinstance(system_instructions.parts, list):
+            for part in system_instructions.parts:
+                if isinstance(part.text, str):
+                    text_parts.append({"type": "text", "content": part.text})
+        return text_parts
+
+    if isinstance(system_instructions, dict) and system_instructions.get("text"):
+        return [{"type": "text", "content": system_instructions["text"]}]
+
+    elif can_be_content and isinstance(system_instructions, dict):
+        parts = system_instructions.get("parts", [])
+        for part in parts:
+            if isinstance(part, Part) and isinstance(part.text, str):
+                text_parts.append({"type": "text", "content": part.text})
+            elif isinstance(part, dict) and isinstance(part.get("text"), str):
+                text_parts.append({"type": "text", "content": part["text"]})
+        return text_parts
+
+    return text_parts
+
+
+def _transform_system_instructions(
+    system_instructions: "Union[ContentUnionDict, ContentUnion]",
+) -> "list[TextPart]":
+    text_parts: "list[TextPart]" = []
+
+    if isinstance(system_instructions, list):
+        text_parts = list(
+            chain.from_iterable(
+                _transform_system_instruction_one_level(
+                    instructions, can_be_content=False
+                )
+                for instructions in system_instructions
+            )
+        )
+
+        return text_parts
+
+    return _transform_system_instruction_one_level(
+        system_instructions, can_be_content=True
+    )
+
+
 def set_span_data_for_request(
     span: "Span",
     integration: "Any",
@@ -741,27 +801,19 @@ def set_span_data_for_request(
         messages = []
 
         # Add system instruction if present
+        system_instructions = None
         if config and hasattr(config, "system_instruction"):
-            system_instruction = config.system_instruction
-            if system_instruction:
-                system_messages = extract_contents_messages(system_instruction)
-                # System instruction should be a single system message
-                # Extract text from all messages and combine into one system message
-                system_texts = []
-                for msg in system_messages:
-                    content = msg.get("content")
-                    if isinstance(content, list):
-                        # Extract text from content parts
-                        for part in content:
-                            if isinstance(part, dict) and part.get("type") == "text":
-                                system_texts.append(part.get("text", ""))
-                    elif isinstance(content, str):
-                        system_texts.append(content)
-
-                if system_texts:
-                    messages.append(
-                        {"role": "system", "content": " ".join(system_texts)}
-                    )
+            system_instructions = config.system_instruction
+        elif isinstance(config, dict) and "system_instruction" in config:
+            system_instructions = config.get("system_instruction")
+
+        if system_instructions is not None:
+            set_data_normalized(
+                span,
+                SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
+                _transform_system_instructions(system_instructions),
+                unpack=False,
+            )
 
         # Extract messages from contents
         contents_messages = extract_contents_messages(contents)
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
@@ -4,6 +4,7 @@
 
 from google import genai
 from google.genai import types as genai_types
+from google.genai.types import Content, Part
 
 from sentry_sdk import start_transaction
 from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
@@ -106,11 +107,6 @@ def create_test_config(
     if seed is not None:
         config_dict["seed"] = seed
     if system_instruction is not None:
-        # Convert string to Content for system instruction
-        if isinstance(system_instruction, str):
-            system_instruction = genai_types.Content(
-                parts=[genai_types.Part(text=system_instruction)], role="system"
-            )
         config_dict["system_instruction"] = system_instruction
     if tools is not None:
         config_dict["tools"] = tools
@@ -186,6 +182,7 @@ def test_nonstreaming_generate_content(
         response_texts = json.loads(response_text)
         assert response_texts == ["Hello! How can I help you today?"]
     else:
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span["data"]
         assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"]
 
@@ -202,8 +199,41 @@ def test_nonstreaming_generate_content(
     assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
 
 
+@pytest.mark.parametrize("generate_content_config", (False, True))
+@pytest.mark.parametrize(
+    "system_instructions,expected_texts",
+    [
+        (None, None),
+        ({}, []),
+        (Content(role="system", parts=[]), []),
+        ({"parts": []}, []),
+        ("You are a helpful assistant.", ["You are a helpful assistant."]),
+        (Part(text="You are a helpful assistant."), ["You are a helpful assistant."]),
+        (
+            Content(role="system", parts=[Part(text="You are a helpful assistant.")]),
+            ["You are a helpful assistant."],
+        ),
+        ({"text": "You are a helpful assistant."}, ["You are a helpful assistant."]),
+        (
+            {"parts": [Part(text="You are a helpful assistant.")]},
+            ["You are a helpful assistant."],
+        ),
+        (
+            {"parts": [{"text": "You are a helpful assistant."}]},
+            ["You are a helpful assistant."],
+        ),
+        (["You are a helpful assistant."], ["You are a helpful assistant."]),
+        ([Part(text="You are a helpful assistant.")], ["You are a helpful assistant."]),
+        ([{"text": "You are a helpful assistant."}], ["You are a helpful assistant."]),
+    ],
+)
 def test_generate_content_with_system_instruction(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init,
+    capture_events,
+    mock_genai_client,
+    generate_content_config,
+    system_instructions,
+    expected_texts,
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
@@ -218,25 +248,35 @@ def test_generate_content_with_system_instruction(
         mock_genai_client._api_client, "request", return_value=mock_http_response
     ):
         with start_transaction(name="google_genai"):
-            config = create_test_config(
-                system_instruction="You are a helpful assistant",
-                temperature=0.5,
-            )
+            config = {
+                "system_instruction": system_instructions,
+                "temperature": 0.5,
+            }
+
+            if generate_content_config:
+                config = create_test_config(**config)
+
             mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents="What is 2+2?", config=config
+                model="gemini-1.5-flash",
+                contents="What is 2+2?",
+                config=config,
             )
 
     (event,) = events
     invoke_span = event["spans"][0]
 
-    # Check that system instruction is included in messages
+    if expected_texts is None:
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
+        return
+
     # (PII is enabled and include_prompts is True in this test)
-    messages_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    # Parse the JSON string to verify content
-    messages = json.loads(messages_str)
-    assert len(messages) == 2
-    assert messages[0] == {"role": "system", "content": "You are a helpful assistant"}
-    assert messages[1] == {"role": "user", "content": "What is 2+2?"}
+    system_instructions = json.loads(
+        invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+    )
+
+    assert system_instructions == [
+        {"type": "text", "content": text} for text in expected_texts
+    ]
 
 
 def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client):
@@ -933,10 +973,8 @@ def test_google_genai_message_truncation(
         with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash",
-                contents=small_content,
-                config=create_test_config(
-                    system_instruction=large_content,
-                ),
+                contents=[large_content, small_content],
+                config=create_test_config(),
             )
 
     (event,) = events