Skip to content

Commit f749ae4

Browse files
Merge branch 'master' into webb/openai-separate-input-handling
2 parents f304bec + 68daea3 commit f749ae4

File tree

13 files changed

+482
-172
lines changed

13 files changed

+482
-172
lines changed

.github/workflows/changelog-preview.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ on:
1111
permissions:
1212
contents: write
1313
pull-requests: write
14+
statuses: write
1415

1516
jobs:
1617
changelog-preview:

sentry_sdk/_types.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,3 +359,7 @@ class SDKInfo(TypedDict):
359359
)
360360

361361
HttpStatusCodeRange = Union[int, Container[int]]
362+
363+
class TextPart(TypedDict):
364+
type: Literal["text"]
365+
content: str

sentry_sdk/consts.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,12 @@ class SPANDATA:
542542
Example: 2048
543543
"""
544544

545+
GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions"
546+
"""
547+
The system instructions passed to the model.
548+
Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}]
549+
"""
550+
545551
GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages"
546552
"""
547553
The messages passed to the model. The "content" can be a string or an array of objects.

sentry_sdk/integrations/anthropic.py

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,14 @@
3939
from anthropic.resources import AsyncMessages, Messages
4040

4141
if TYPE_CHECKING:
42-
from anthropic.types import MessageStreamEvent
42+
from anthropic.types import MessageStreamEvent, TextBlockParam
4343
except ImportError:
4444
raise DidNotEnable("Anthropic not installed")
4545

4646
if TYPE_CHECKING:
4747
from typing import Any, AsyncIterator, Iterator, List, Optional, Union
4848
from sentry_sdk.tracing import Span
49+
from sentry_sdk._types import TextPart
4950

5051

5152
class AnthropicIntegration(Integration):
@@ -177,44 +178,53 @@ def _transform_anthropic_content_block(
177178
return result if result is not None else content_block
178179

179180

181+
def _transform_system_instructions(
182+
system_instructions: "Union[str, Iterable[TextBlockParam]]",
183+
) -> "list[TextPart]":
184+
if isinstance(system_instructions, str):
185+
return [
186+
{
187+
"type": "text",
188+
"content": system_instructions,
189+
}
190+
]
191+
192+
return [
193+
{
194+
"type": "text",
195+
"content": instruction["text"],
196+
}
197+
for instruction in system_instructions
198+
if isinstance(instruction, dict) and "text" in instruction
199+
]
200+
201+
180202
def _set_input_data(
181203
span: "Span", kwargs: "dict[str, Any]", integration: "AnthropicIntegration"
182204
) -> None:
183205
"""
184206
Set input data for the span based on the provided keyword arguments for the anthropic message creation.
185207
"""
186208
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
187-
system_prompt = kwargs.get("system")
209+
system_instructions: "Union[str, Iterable[TextBlockParam]]" = kwargs.get("system") # type: ignore
188210
messages = kwargs.get("messages")
189211
if (
190212
messages is not None
191213
and len(messages) > 0
192214
and should_send_default_pii()
193215
and integration.include_prompts
194216
):
195-
normalized_messages = []
196-
if system_prompt:
197-
system_prompt_content: "Optional[Union[str, List[dict[str, Any]]]]" = None
198-
if isinstance(system_prompt, str):
199-
system_prompt_content = system_prompt
200-
elif isinstance(system_prompt, Iterable):
201-
system_prompt_content = []
202-
for item in system_prompt:
203-
if (
204-
isinstance(item, dict)
205-
and item.get("type") == "text"
206-
and item.get("text")
207-
):
208-
system_prompt_content.append(item.copy())
209-
210-
if system_prompt_content:
211-
normalized_messages.append(
212-
{
213-
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM,
214-
"content": system_prompt_content,
215-
}
216-
)
217+
if isinstance(system_instructions, str) or isinstance(
218+
system_instructions, Iterable
219+
):
220+
set_data_normalized(
221+
span,
222+
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
223+
_transform_system_instructions(system_instructions),
224+
unpack=False,
225+
)
217226

227+
normalized_messages = []
218228
for message in messages:
219229
if (
220230
message.get("role") == GEN_AI_ALLOWED_MESSAGE_ROLES.USER

sentry_sdk/integrations/google_genai/utils.py

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,20 @@
3232
event_from_exception,
3333
safe_serialize,
3434
)
35-
from google.genai.types import GenerateContentConfig
35+
from google.genai.types import GenerateContentConfig, Part, Content
36+
from itertools import chain
3637

3738
if TYPE_CHECKING:
3839
from sentry_sdk.tracing import Span
40+
from sentry_sdk._types import TextPart
3941
from google.genai.types import (
4042
GenerateContentResponse,
4143
ContentListUnion,
44+
ContentUnionDict,
4245
Tool,
4346
Model,
4447
EmbedContentResponse,
48+
ContentUnion,
4549
)
4650

4751

@@ -720,6 +724,62 @@ def extract_finish_reasons(
720724
return finish_reasons if finish_reasons else None
721725

722726

727+
def _transform_system_instruction_one_level(
728+
system_instructions: "Union[ContentUnionDict, ContentUnion]",
729+
can_be_content: bool,
730+
) -> "list[TextPart]":
731+
text_parts: "list[TextPart]" = []
732+
733+
if isinstance(system_instructions, str):
734+
return [{"type": "text", "content": system_instructions}]
735+
736+
if isinstance(system_instructions, Part) and system_instructions.text:
737+
return [{"type": "text", "content": system_instructions.text}]
738+
739+
if can_be_content and isinstance(system_instructions, Content):
740+
if isinstance(system_instructions.parts, list):
741+
for part in system_instructions.parts:
742+
if isinstance(part.text, str):
743+
text_parts.append({"type": "text", "content": part.text})
744+
return text_parts
745+
746+
if isinstance(system_instructions, dict) and system_instructions.get("text"):
747+
return [{"type": "text", "content": system_instructions["text"]}]
748+
749+
elif can_be_content and isinstance(system_instructions, dict):
750+
parts = system_instructions.get("parts", [])
751+
for part in parts:
752+
if isinstance(part, Part) and isinstance(part.text, str):
753+
text_parts.append({"type": "text", "content": part.text})
754+
elif isinstance(part, dict) and isinstance(part.get("text"), str):
755+
text_parts.append({"type": "text", "content": part["text"]})
756+
return text_parts
757+
758+
return text_parts
759+
760+
761+
def _transform_system_instructions(
762+
system_instructions: "Union[ContentUnionDict, ContentUnion]",
763+
) -> "list[TextPart]":
764+
text_parts: "list[TextPart]" = []
765+
766+
if isinstance(system_instructions, list):
767+
text_parts = list(
768+
chain.from_iterable(
769+
_transform_system_instruction_one_level(
770+
instructions, can_be_content=False
771+
)
772+
for instructions in system_instructions
773+
)
774+
)
775+
776+
return text_parts
777+
778+
return _transform_system_instruction_one_level(
779+
system_instructions, can_be_content=True
780+
)
781+
782+
723783
def set_span_data_for_request(
724784
span: "Span",
725785
integration: "Any",
@@ -741,27 +801,19 @@ def set_span_data_for_request(
741801
messages = []
742802

743803
# Add system instruction if present
804+
system_instructions = None
744805
if config and hasattr(config, "system_instruction"):
745-
system_instruction = config.system_instruction
746-
if system_instruction:
747-
system_messages = extract_contents_messages(system_instruction)
748-
# System instruction should be a single system message
749-
# Extract text from all messages and combine into one system message
750-
system_texts = []
751-
for msg in system_messages:
752-
content = msg.get("content")
753-
if isinstance(content, list):
754-
# Extract text from content parts
755-
for part in content:
756-
if isinstance(part, dict) and part.get("type") == "text":
757-
system_texts.append(part.get("text", ""))
758-
elif isinstance(content, str):
759-
system_texts.append(content)
760-
761-
if system_texts:
762-
messages.append(
763-
{"role": "system", "content": " ".join(system_texts)}
764-
)
806+
system_instructions = config.system_instruction
807+
elif isinstance(config, dict) and "system_instruction" in config:
808+
system_instructions = config.get("system_instruction")
809+
810+
if system_instructions is not None:
811+
set_data_normalized(
812+
span,
813+
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
814+
_transform_system_instructions(system_instructions),
815+
unpack=False,
816+
)
765817

766818
# Extract messages from contents
767819
contents_messages = extract_contents_messages(contents)

sentry_sdk/integrations/langchain.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from uuid import UUID
3737

3838
from sentry_sdk.tracing import Span
39+
from sentry_sdk._types import TextPart
3940

4041

4142
try:
@@ -189,6 +190,40 @@ def _get_current_agent() -> "Optional[str]":
189190
return None
190191

191192

193+
def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]":
194+
system_instructions = []
195+
196+
for list_ in messages:
197+
for message in list_:
198+
# type of content: str | list[str | dict] | None
199+
if message.type == "system" and isinstance(message.content, str):
200+
system_instructions.append(message.content)
201+
202+
elif message.type == "system" and isinstance(message.content, list):
203+
for item in message.content:
204+
if isinstance(item, str):
205+
system_instructions.append(item)
206+
207+
elif isinstance(item, dict) and item.get("type") == "text":
208+
instruction = item.get("text")
209+
if isinstance(instruction, str):
210+
system_instructions.append(instruction)
211+
212+
return system_instructions
213+
214+
215+
def _transform_system_instructions(
216+
system_instructions: "List[str]",
217+
) -> "List[TextPart]":
218+
return [
219+
{
220+
"type": "text",
221+
"content": instruction,
222+
}
223+
for instruction in system_instructions
224+
]
225+
226+
192227
class LangchainIntegration(Integration):
193228
identifier = "langchain"
194229
origin = f"auto.ai.{identifier}"
@@ -430,9 +465,21 @@ def on_chat_model_start(
430465
_set_tools_on_span(span, all_params.get("tools"))
431466

432467
if should_send_default_pii() and self.include_prompts:
468+
system_instructions = _get_system_instructions(messages)
469+
if len(system_instructions) > 0:
470+
set_data_normalized(
471+
span,
472+
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
473+
_transform_system_instructions(system_instructions),
474+
unpack=False,
475+
)
476+
433477
normalized_messages = []
434478
for list_ in messages:
435479
for message in list_:
480+
if message.type == "system":
481+
continue
482+
436483
normalized_messages.append(
437484
self._normalize_langchain_message(message)
438485
)

sentry_sdk/integrations/openai_agents/spans/invoke_agent.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@
1818
import agents
1919
from typing import Any, Optional
2020

21+
from sentry_sdk._types import TextPart
22+
23+
24+
def _transform_system_instruction(system_instructions: "str") -> "list[TextPart]":
25+
return [
26+
{
27+
"type": "text",
28+
"content": system_instructions,
29+
}
30+
]
31+
2132

2233
def invoke_agent_span(
2334
context: "agents.RunContextWrapper", agent: "agents.Agent", kwargs: "dict[str, Any]"
@@ -35,16 +46,16 @@ def invoke_agent_span(
3546
if should_send_default_pii():
3647
messages = []
3748
if agent.instructions:
38-
message = (
49+
system_instruction = (
3950
agent.instructions
4051
if isinstance(agent.instructions, str)
4152
else safe_serialize(agent.instructions)
4253
)
43-
messages.append(
44-
{
45-
"content": [{"text": message, "type": "text"}],
46-
"role": "system",
47-
}
54+
set_data_normalized(
55+
span,
56+
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
57+
_transform_system_instruction(system_instruction),
58+
unpack=False,
4859
)
4960

5061
original_input = kwargs.get("original_input")

0 commit comments

Comments
 (0)