Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,38 @@ class GEN_AI_ALLOWED_MESSAGE_ROLES:
GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role


def parse_data_uri(url: str) -> "Tuple[str, str]":
"""
Parse a data URI and return (mime_type, content).

Data URI format (RFC 2397): data:[<mediatype>][;base64],<data>

Examples:
data:image/jpeg;base64,/9j/4AAQ... → ("image/jpeg", "/9j/4AAQ...")
data:text/plain,Hello → ("text/plain", "Hello")
data:;base64,SGVsbG8= → ("", "SGVsbG8=")

Raises:
ValueError: If the URL is not a valid data URI (missing comma separator)
"""
if "," not in url:
raise ValueError("Invalid data URI: missing comma separator")

header, content = url.split(",", 1)

# Extract mime type from header
# Format: "data:<mime>[;param1][;param2]..." e.g. "data:image/jpeg;base64"
# Remove "data:" prefix, then take everything before the first semicolon
if header.startswith("data:"):
mime_part = header[5:] # Remove "data:" prefix
else:
mime_part = header

mime_type = mime_part.split(";")[0]

return mime_type, content


def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
if hasattr(data, "model_dump"):
Expand Down
85 changes: 85 additions & 0 deletions tests/test_ai_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
truncate_and_annotate_messages,
truncate_messages_by_size,
_find_truncation_index,
parse_data_uri,
redact_blob_message_parts,
)
from sentry_sdk.serializer import serialize
Expand Down Expand Up @@ -757,3 +758,87 @@ def test_handles_non_dict_content_items(self):

# Should return same list since no blobs
assert result is messages


class TestParseDataUri:
def test_parses_base64_image_data_uri(self):
"""Test parsing a standard base64-encoded image data URI"""
uri = "data:image/jpeg;base64,/9j/4AAQSkZJRg=="
mime_type, content = parse_data_uri(uri)

assert mime_type == "image/jpeg"
assert content == "/9j/4AAQSkZJRg=="

def test_parses_png_data_uri(self):
"""Test parsing a PNG image data URI"""
uri = "data:image/png;base64,iVBORw0KGgo="
mime_type, content = parse_data_uri(uri)

assert mime_type == "image/png"
assert content == "iVBORw0KGgo="

def test_parses_plain_text_data_uri(self):
"""Test parsing a plain text data URI without base64 encoding"""
uri = "data:text/plain,Hello World"
mime_type, content = parse_data_uri(uri)

assert mime_type == "text/plain"
assert content == "Hello World"

def test_parses_data_uri_with_empty_mime_type(self):
"""Test parsing a data URI with empty mime type"""
uri = "data:;base64,SGVsbG8="
mime_type, content = parse_data_uri(uri)

assert mime_type == ""
assert content == "SGVsbG8="

def test_parses_data_uri_with_only_data_prefix(self):
"""Test parsing a data URI with only the data: prefix and content"""
uri = "data:,Hello"
mime_type, content = parse_data_uri(uri)

assert mime_type == ""
assert content == "Hello"

def test_raises_on_missing_comma(self):
"""Test that ValueError is raised when comma separator is missing"""
with pytest.raises(ValueError, match="missing comma separator"):
parse_data_uri("data:image/jpeg;base64")

def test_raises_on_empty_string(self):
"""Test that ValueError is raised for empty string"""
with pytest.raises(ValueError, match="missing comma separator"):
parse_data_uri("")

def test_handles_content_with_commas(self):
"""Test that only the first comma is used as separator"""
uri = "data:text/plain,Hello,World,With,Commas"
mime_type, content = parse_data_uri(uri)

assert mime_type == "text/plain"
assert content == "Hello,World,With,Commas"

def test_parses_data_uri_with_multiple_parameters(self):
"""Test parsing a data URI with multiple parameters in header"""
uri = "data:text/plain;charset=utf-8;base64,SGVsbG8="
mime_type, content = parse_data_uri(uri)

assert mime_type == "text/plain"
assert content == "SGVsbG8="

def test_parses_audio_data_uri(self):
"""Test parsing an audio data URI"""
uri = "data:audio/wav;base64,UklGRiQA"
mime_type, content = parse_data_uri(uri)

assert mime_type == "audio/wav"
assert content == "UklGRiQA"

def test_handles_uri_without_data_prefix(self):
"""Test parsing a URI that doesn't have the data: prefix"""
uri = "image/jpeg;base64,/9j/4AAQ"
mime_type, content = parse_data_uri(uri)

assert mime_type == "image/jpeg"
assert content == "/9j/4AAQ"
Loading