Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies = [
"sentry-arroyo>=2.38.7",
"sentry-conventions>=0.3.0",
"sentry-kafka-schemas>=2.1.24",
"sentry-protos>=0.8.14",
"sentry-protos>=0.12.0",
"sentry-redis-tools>=0.5.1",
"sentry-relay>=0.9.25",
"sentry-sdk>=2.35.0",
Expand Down
20 changes: 18 additions & 2 deletions snuba/web/rpc/v1/trace_item_attribute_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from snuba.query.data_source.simple import Entity, LogicalDataSource
from snuba.query.dsl import Functions as f
from snuba.query.dsl import column
from snuba.query.expressions import Expression
from snuba.query.expressions import Expression, FunctionCall
from snuba.query.logical import Query
from snuba.query.query_settings import HTTPQuerySettings
from snuba.request import Request as SnubaRequest
Expand Down Expand Up @@ -128,10 +128,20 @@ def _build_query(
name="attr_value",
expression=f.distinct(column(attr_value.alias, alias="attr_value")),
),
SelectedExpression(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant distinct wrapper with GROUP BY clause

Low Severity

f.distinct() wrapping attr_value is now redundant because the new groupby=[column("attr_value")] already guarantees unique values. In ClickHouse, this generates SELECT DISTINCT ... GROUP BY ... where the DISTINCT is a no-op. Keeping both deduplication mechanisms is confusing and obscures the query's actual semantics — GROUP BY is the operative deduplication here (needed for count()).

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit bf90686. Configure here.

name="count()",
expression=FunctionCall(
alias="count()",
function_name="count",
parameters=(),
),
),
],
order_by=[
OrderBy(direction=OrderByDirection.DESC, expression=column("count()")),
OrderBy(direction=OrderByDirection.ASC, expression=column("attr_value")),
],
groupby=[column("attr_value")],
limit=request.limit,
offset=(request.page_token.offset if request.page_token.HasField("offset") else 0),
)
Expand Down Expand Up @@ -184,6 +194,7 @@ def _execute(self, in_msg: TraceItemAttributeValuesRequest) -> TraceItemAttribut
if in_msg.key.name == "sentry.item_id" and in_msg.value_substring_match:
return TraceItemAttributeValuesResponse(
values=[in_msg.value_substring_match],
counts=[1],
page_token=None,
)
in_msg.limit = in_msg.limit or 1000
Expand All @@ -193,14 +204,19 @@ def _execute(self, in_msg: TraceItemAttributeValuesRequest) -> TraceItemAttribut
request=snuba_request,
timer=self._timer,
)
values = [r["attr_value"] for r in res.result.get("data", [])]
values, counts = [], []
for row in res.result.get("data", []):
values.append(row["attr_value"])
counts.append(row.get("count()", 0))
if len(values) == 0:
return TraceItemAttributeValuesResponse(
values=values,
counts=counts,
page_token=None,
)
return TraceItemAttributeValuesResponse(
values=values,
counts=counts,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cursor-based page token incompatible with new sort order

Medium Severity

The response page_token constructs a filter_offset cursor using attr_value > values[-1], which assumes results are sorted alphabetically by attr_value. The PR changed the primary sort to count() DESC, attr_value ASC, so values[-1] is no longer the alphabetically greatest value — it's just the last item in count-descending order. This makes the cursor semantically incorrect: it would skip values that belong on the next page or include values that don't. The offset-based path also doesn't account for the non-deterministic nature of count-based ordering across requests.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit bf90686. Configure here.

page_token=(
PageToken(offset=in_msg.page_token.offset + len(values))
if in_msg.page_token.HasField("offset") or len(values) == 0
Expand Down
14 changes: 13 additions & 1 deletion tests/web/rpc/v1/test_trace_item_attribute_values_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ def setup_teardown(eap: None, redis_db: None) -> Generator[List[bytes], None, No
"tag2": AnyValue(string_value="derp"),
},
),
gen_item_message(
start_timestamp=start_timestamp,
attributes={
"tag1": AnyValue(string_value="derpderp"),
"tag2": AnyValue(string_value="derp"),
},
),
gen_item_message(
start_timestamp=start_timestamp,
attributes={"tag2": AnyValue(string_value="hehe")},
Expand Down Expand Up @@ -135,7 +142,8 @@ def test_simple_case(self, setup_teardown: Any) -> None:
key=AttributeKey(name="tag1", type=AttributeKey.TYPE_STRING),
)
response = AttributeValuesRequest().execute(message)
assert response.values == ["blah", "derpderp", "durp", "herp", "herpderp"]
assert response.values == ["derpderp", "blah", "durp", "herp", "herpderp"]
assert response.counts == [2, 1, 1, 1, 1]

def test_with_value_substring_match(self, setup_teardown: Any) -> None:
message = TraceItemAttributeValuesRequest(
Expand All @@ -146,6 +154,7 @@ def test_with_value_substring_match(self, setup_teardown: Any) -> None:
)
response = AttributeValuesRequest().execute(message)
assert response.values == ["derpderp", "herp", "herpderp"]
assert response.counts == [2, 1, 1]

def test_empty_results(self) -> None:
req = TraceItemAttributeValuesRequest(
Expand All @@ -162,6 +171,7 @@ def test_empty_results(self) -> None:
)
res = AttributeValuesRequest().execute(req)
assert res.values == []
assert res.counts == []

def test_item_id_substring_match(self, setup_teardown: List[bytes]) -> None:
first_msg_bytes = setup_teardown[0]
Expand All @@ -182,6 +192,7 @@ def test_item_id_substring_match(self, setup_teardown: List[bytes]) -> None:
)
res = AttributeValuesRequest().execute(req)
assert res.values == [item_id]
assert res.counts == [1]

def test_deprecated_alias_attribute(self) -> None:
"""db.system.name request returns values stored only under deprecated key db.system."""
Expand Down Expand Up @@ -218,3 +229,4 @@ def test_deprecated_alias_attribute(self) -> None:
)
response = AttributeValuesRequest().execute(message)
assert sorted(response.values) == ["postgresql", "redis"]
assert response.counts == [1, 1]
6 changes: 3 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading