Skip to content

Commit 68daea3

Browse files
feat(google-genai): Set system instruction attribute (#5354)
Set the system instruction attribute on `ai_chat` and `invoke_agent` spans in the `GoogleGenAIIntegration`. Handle all text message types in `ContentUnion` and `ContentUnionDict`, and skip non-text types.
1 parent ed8489a commit 68daea3

File tree

2 files changed

+133
-43
lines changed

2 files changed

+133
-43
lines changed

sentry_sdk/integrations/google_genai/utils.py

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,20 @@
3232
event_from_exception,
3333
safe_serialize,
3434
)
35-
from google.genai.types import GenerateContentConfig
35+
from google.genai.types import GenerateContentConfig, Part, Content
36+
from itertools import chain
3637

3738
if TYPE_CHECKING:
3839
from sentry_sdk.tracing import Span
40+
from sentry_sdk._types import TextPart
3941
from google.genai.types import (
4042
GenerateContentResponse,
4143
ContentListUnion,
44+
ContentUnionDict,
4245
Tool,
4346
Model,
4447
EmbedContentResponse,
48+
ContentUnion,
4549
)
4650

4751

@@ -720,6 +724,62 @@ def extract_finish_reasons(
720724
return finish_reasons if finish_reasons else None
721725

722726

727+
def _transform_system_instruction_one_level(
728+
system_instructions: "Union[ContentUnionDict, ContentUnion]",
729+
can_be_content: bool,
730+
) -> "list[TextPart]":
731+
text_parts: "list[TextPart]" = []
732+
733+
if isinstance(system_instructions, str):
734+
return [{"type": "text", "content": system_instructions}]
735+
736+
if isinstance(system_instructions, Part) and system_instructions.text:
737+
return [{"type": "text", "content": system_instructions.text}]
738+
739+
if can_be_content and isinstance(system_instructions, Content):
740+
if isinstance(system_instructions.parts, list):
741+
for part in system_instructions.parts:
742+
if isinstance(part.text, str):
743+
text_parts.append({"type": "text", "content": part.text})
744+
return text_parts
745+
746+
if isinstance(system_instructions, dict) and system_instructions.get("text"):
747+
return [{"type": "text", "content": system_instructions["text"]}]
748+
749+
elif can_be_content and isinstance(system_instructions, dict):
750+
parts = system_instructions.get("parts", [])
751+
for part in parts:
752+
if isinstance(part, Part) and isinstance(part.text, str):
753+
text_parts.append({"type": "text", "content": part.text})
754+
elif isinstance(part, dict) and isinstance(part.get("text"), str):
755+
text_parts.append({"type": "text", "content": part["text"]})
756+
return text_parts
757+
758+
return text_parts
759+
760+
761+
def _transform_system_instructions(
762+
system_instructions: "Union[ContentUnionDict, ContentUnion]",
763+
) -> "list[TextPart]":
764+
text_parts: "list[TextPart]" = []
765+
766+
if isinstance(system_instructions, list):
767+
text_parts = list(
768+
chain.from_iterable(
769+
_transform_system_instruction_one_level(
770+
instructions, can_be_content=False
771+
)
772+
for instructions in system_instructions
773+
)
774+
)
775+
776+
return text_parts
777+
778+
return _transform_system_instruction_one_level(
779+
system_instructions, can_be_content=True
780+
)
781+
782+
723783
def set_span_data_for_request(
724784
span: "Span",
725785
integration: "Any",
@@ -741,27 +801,19 @@ def set_span_data_for_request(
741801
messages = []
742802

743803
# Add system instruction if present
804+
system_instructions = None
744805
if config and hasattr(config, "system_instruction"):
745-
system_instruction = config.system_instruction
746-
if system_instruction:
747-
system_messages = extract_contents_messages(system_instruction)
748-
# System instruction should be a single system message
749-
# Extract text from all messages and combine into one system message
750-
system_texts = []
751-
for msg in system_messages:
752-
content = msg.get("content")
753-
if isinstance(content, list):
754-
# Extract text from content parts
755-
for part in content:
756-
if isinstance(part, dict) and part.get("type") == "text":
757-
system_texts.append(part.get("text", ""))
758-
elif isinstance(content, str):
759-
system_texts.append(content)
760-
761-
if system_texts:
762-
messages.append(
763-
{"role": "system", "content": " ".join(system_texts)}
764-
)
806+
system_instructions = config.system_instruction
807+
elif isinstance(config, dict) and "system_instruction" in config:
808+
system_instructions = config.get("system_instruction")
809+
810+
if system_instructions is not None:
811+
set_data_normalized(
812+
span,
813+
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
814+
_transform_system_instructions(system_instructions),
815+
unpack=False,
816+
)
765817

766818
# Extract messages from contents
767819
contents_messages = extract_contents_messages(contents)

tests/integrations/google_genai/test_google_genai.py

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from google import genai
66
from google.genai import types as genai_types
7+
from google.genai.types import Content, Part
78

89
from sentry_sdk import start_transaction
910
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
@@ -106,11 +107,6 @@ def create_test_config(
106107
if seed is not None:
107108
config_dict["seed"] = seed
108109
if system_instruction is not None:
109-
# Convert string to Content for system instruction
110-
if isinstance(system_instruction, str):
111-
system_instruction = genai_types.Content(
112-
parts=[genai_types.Part(text=system_instruction)], role="system"
113-
)
114110
config_dict["system_instruction"] = system_instruction
115111
if tools is not None:
116112
config_dict["tools"] = tools
@@ -186,6 +182,7 @@ def test_nonstreaming_generate_content(
186182
response_texts = json.loads(response_text)
187183
assert response_texts == ["Hello! How can I help you today?"]
188184
else:
185+
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
189186
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span["data"]
190187
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"]
191188

@@ -202,8 +199,41 @@ def test_nonstreaming_generate_content(
202199
assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
203200

204201

202+
@pytest.mark.parametrize("generate_content_config", (False, True))
203+
@pytest.mark.parametrize(
204+
"system_instructions,expected_texts",
205+
[
206+
(None, None),
207+
({}, []),
208+
(Content(role="system", parts=[]), []),
209+
({"parts": []}, []),
210+
("You are a helpful assistant.", ["You are a helpful assistant."]),
211+
(Part(text="You are a helpful assistant."), ["You are a helpful assistant."]),
212+
(
213+
Content(role="system", parts=[Part(text="You are a helpful assistant.")]),
214+
["You are a helpful assistant."],
215+
),
216+
({"text": "You are a helpful assistant."}, ["You are a helpful assistant."]),
217+
(
218+
{"parts": [Part(text="You are a helpful assistant.")]},
219+
["You are a helpful assistant."],
220+
),
221+
(
222+
{"parts": [{"text": "You are a helpful assistant."}]},
223+
["You are a helpful assistant."],
224+
),
225+
(["You are a helpful assistant."], ["You are a helpful assistant."]),
226+
([Part(text="You are a helpful assistant.")], ["You are a helpful assistant."]),
227+
([{"text": "You are a helpful assistant."}], ["You are a helpful assistant."]),
228+
],
229+
)
205230
def test_generate_content_with_system_instruction(
206-
sentry_init, capture_events, mock_genai_client
231+
sentry_init,
232+
capture_events,
233+
mock_genai_client,
234+
generate_content_config,
235+
system_instructions,
236+
expected_texts,
207237
):
208238
sentry_init(
209239
integrations=[GoogleGenAIIntegration(include_prompts=True)],
@@ -218,25 +248,35 @@ def test_generate_content_with_system_instruction(
218248
mock_genai_client._api_client, "request", return_value=mock_http_response
219249
):
220250
with start_transaction(name="google_genai"):
221-
config = create_test_config(
222-
system_instruction="You are a helpful assistant",
223-
temperature=0.5,
224-
)
251+
config = {
252+
"system_instruction": system_instructions,
253+
"temperature": 0.5,
254+
}
255+
256+
if generate_content_config:
257+
config = create_test_config(**config)
258+
225259
mock_genai_client.models.generate_content(
226-
model="gemini-1.5-flash", contents="What is 2+2?", config=config
260+
model="gemini-1.5-flash",
261+
contents="What is 2+2?",
262+
config=config,
227263
)
228264

229265
(event,) = events
230266
invoke_span = event["spans"][0]
231267

232-
# Check that system instruction is included in messages
268+
if expected_texts is None:
269+
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
270+
return
271+
233272
# (PII is enabled and include_prompts is True in this test)
234-
messages_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
235-
# Parse the JSON string to verify content
236-
messages = json.loads(messages_str)
237-
assert len(messages) == 2
238-
assert messages[0] == {"role": "system", "content": "You are a helpful assistant"}
239-
assert messages[1] == {"role": "user", "content": "What is 2+2?"}
273+
system_instructions = json.loads(
274+
invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
275+
)
276+
277+
assert system_instructions == [
278+
{"type": "text", "content": text} for text in expected_texts
279+
]
240280

241281

242282
def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client):
@@ -933,10 +973,8 @@ def test_google_genai_message_truncation(
933973
with start_transaction(name="google_genai"):
934974
mock_genai_client.models.generate_content(
935975
model="gemini-1.5-flash",
936-
contents=small_content,
937-
config=create_test_config(
938-
system_instruction=large_content,
939-
),
976+
contents=[large_content, small_content],
977+
config=create_test_config(),
940978
)
941979

942980
(event,) = events

0 commit comments

Comments
 (0)