Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: patch
changes:
fixed:
- Fixed `_get_blob_by_metadata_version` to return the newest blob (highest generation) when multiple blobs match the requested version.
19 changes: 13 additions & 6 deletions policyengine/utils/data/version_aware_storage_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,16 +112,23 @@ def _get_blob_by_metadata_version(
f"Searching for blob {bucket.name}/{key} with metadata version '{version}'"
)
versions = bucket.list_blobs(prefix=key, versions=True)
matching_blobs = []
for blob in versions:
if blob.metadata and blob.metadata.get("version") == version:
logger.info(
f"Found blob {bucket.name}/{key} with metadata version '{version}'"
)
return blob
matching_blobs.append(blob)

raise ValueError(
f"No blob found with version '{version}' for {bucket.name}/{key}"
if not matching_blobs:
raise ValueError(
f"No blob found with version '{version}' for {bucket.name}/{key}"
)

# Return the blob with the highest generation number (newest)
newest_blob = max(matching_blobs, key=lambda b: b.generation)
logger.info(
f"Found blob {bucket.name}/{key} with metadata version '{version}' "
f"(generation {newest_blob.generation})"
)
return newest_blob

def crc32c(
self,
Expand Down
43 changes: 43 additions & 0 deletions tests/utils/data/test_version_aware_storage_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,13 @@ def test_get_blob__metadata_version_searches_blobs(
# Create mock blobs with different metadata versions
blob1 = MagicMock()
blob1.metadata = {"version": "1.0.0"}
blob1.generation = 100
blob2 = MagicMock()
blob2.metadata = {"version": "1.2.3"}
blob2.generation = 200
blob3 = MagicMock()
blob3.metadata = None
blob3.generation = 150

bucket.list_blobs.return_value = [blob1, blob3, blob2]

Expand All @@ -177,6 +180,45 @@ def test_get_blob__metadata_version_searches_blobs(
assert result == blob2
bucket.list_blobs.assert_called_with(prefix="test_key", versions=True)

@patch(
"policyengine.utils.data.version_aware_storage_client.Client",
autospec=True,
)
def test_get_blob__metadata_version_returns_newest_when_multiple_match(
self, mock_client_class
):
"""Test that when multiple blobs have the same version, the newest (highest generation) is returned."""
mock_instance = mock_client_class.return_value
bucket = mock_instance.bucket.return_value

# Create mock blobs with the SAME metadata version but different generations
oldest_blob = MagicMock()
oldest_blob.metadata = {"version": "1.2.3"}
oldest_blob.generation = 100

middle_blob = MagicMock()
middle_blob.metadata = {"version": "1.2.3"}
middle_blob.generation = 200

newest_blob = MagicMock()
newest_blob.metadata = {"version": "1.2.3"}
newest_blob.generation = 300

# Return blobs in non-sorted order to ensure we're not relying on order
bucket.list_blobs.return_value = [
middle_blob,
oldest_blob,
newest_blob,
]

client = VersionAwareStorageClient()
result = client.get_blob("test_bucket", "test_key", version="1.2.3")

# Should return the blob with the highest generation number
assert result == newest_blob
assert result.generation == 300
bucket.list_blobs.assert_called_with(prefix="test_key", versions=True)

@patch(
"policyengine.utils.data.version_aware_storage_client.Client",
autospec=True,
Expand Down Expand Up @@ -217,6 +259,7 @@ def test_get_blob__generation_fallback_to_metadata(
# Set up metadata-based lookup to succeed
metadata_blob = MagicMock()
metadata_blob.metadata = {"version": "999"}
metadata_blob.generation = 100
bucket.list_blobs.return_value = [metadata_blob]

client = VersionAwareStorageClient()
Expand Down
Loading