Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 45 additions & 6 deletions src/server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
from typing import Optional, Dict, Any, Union

Expand Down Expand Up @@ -172,13 +173,19 @@ def find_nearby_entities(
..., ge=-180, le=180, description="Center longitude in degrees"
),
radius_meters: float = Query(..., gt=0, description="Search radius in meters"),
filter_json: Optional[str] = Query(
None,
description="Optional JSON string containing MongoDB-style filter criteria to refine search results",
),
) -> EntitiesResponse:
r"""Find entities within a specified radius of a geographic point using MongoDB's $near operator.

This endpoint uses MongoDB's geospatial $near query which requires a 2dsphere index
on the coordinates field for optimal performance.
on the coordinates field for optimal performance. An optional filter_json parameter can be
provided as a JSON string to further refine the results.

Example: /bertron/geo/nearby?latitude=47.6062&longitude=-122.3321&radius_meters=10000
Example with filter: /bertron/geo/nearby?latitude=47.6062&longitude=-122.3321&radius_meters=10000&filter_json={"type":"sample"}
"""
db = mongo_client[cfg.mongo_database]

Expand All @@ -205,8 +212,21 @@ def find_nearby_entities(
}
}

# Execute find with geospatial filter
cursor = collection.find(filter=geo_filter)
# Parse and combine with optional filter if provided
final_filter = geo_filter
if filter_json:
try:
additional_filter = json.loads(filter_json)
# Combine geospatial and additional filters using $and
final_filter = {"$and": [geo_filter, additional_filter]}
except json.JSONDecodeError:
raise HTTPException(
status_code=400,
detail="Invalid JSON format in filter_json parameter",
)

# Execute find with combined filter
cursor = collection.find(filter=final_filter)

# Convert cursor to list and convert to Entity objects
documents = list(cursor)
Expand Down Expand Up @@ -234,13 +254,19 @@ def find_entities_in_bounding_box(
northeast_lng: float = Query(
..., ge=-180, le=180, description="Northeast corner longitude"
),
filter_json: Optional[str] = Query(
None,
description="Optional JSON string containing MongoDB-style filter criteria to refine search results",
),
) -> EntitiesResponse:
r"""Find entities within a bounding box using MongoDB's $geoWithin operator.

This endpoint finds all entities whose coordinates fall within the specified
rectangular bounding box defined by southwest and northeast corners.
rectangular bounding box defined by southwest and northeast corners. An optional
filter_json parameter can be provided as a JSON string to further refine the results.

Example: /bertron/geo/bbox?southwest_lat=47.5&southwest_lng=-122.4&northeast_lat=47.7&northeast_lng=-122.2
Example with filter: /bertron/geo/bbox?southwest_lat=47.5&southwest_lng=-122.4&northeast_lat=47.7&northeast_lng=-122.2&filter_json={"type":"sample"}
"""
db = mongo_client[cfg.mongo_database]

Expand Down Expand Up @@ -278,8 +304,21 @@ def find_entities_in_bounding_box(
}
}

# Execute find with geospatial filter
cursor = collection.find(filter=geo_filter)
# Parse and combine with optional filter if provided
final_filter = geo_filter
if filter_json:
try:
additional_filter = json.loads(filter_json)
# Combine geospatial and additional filters using $and
final_filter = {"$and": [geo_filter, additional_filter]}
except json.JSONDecodeError:
raise HTTPException(
status_code=400,
detail="Invalid JSON format in filter_json parameter",
)

# Execute find with combined filter
cursor = collection.find(filter=final_filter)

# Convert cursor to list and convert to Entity objects
documents = list(cursor)
Expand Down
158 changes: 158 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Dict, Any
import json

from fastapi.testclient import TestClient
from pymongo.database import Database
Expand Down Expand Up @@ -292,6 +293,163 @@ def test_geo_bounding_box_invalid_coordinates(
error_data = response.json()
assert "latitude" in error_data["detail"].lower()

def test_geo_nearby_search_with_filter(self, test_client: TestClient, seeded_db: Database):
"""Test geographic nearby search with additional filter."""

# Search near the EMSL coordinates with a filter for EMSL data source
params = {
"latitude": 34.0,
"longitude": 118.0,
"radius_meters": 100000, # 100km radius
"filter_json": json.dumps({"ber_data_source": "EMSL"})
}

response = test_client.get("/bertron/geo/nearby", params=params)

assert response.status_code == status.HTTP_200_OK
entities_data = response.json()

assert "documents" in entities_data
assert "count" in entities_data

# All returned entities should be from EMSL
for entity in entities_data["documents"]:
assert entity["ber_data_source"] == "EMSL"
self._verify_entity_structure(entity)

# Should find at least one entity
assert entities_data["count"] > 0
Comment on lines +315 to +321
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick (optional): Swap the order of these blocks (assert there are items, then check the items).


def test_geo_nearby_search_with_invalid_filter_json(self, test_client: TestClient, seeded_db: Database):
"""Test geographic nearby search with invalid JSON filter."""
params = {
"latitude": 34.0,
"longitude": 118.0,
"radius_meters": 100000,
"filter_json": "invalid json {" # Invalid JSON
}

response = test_client.get("/bertron/geo/nearby", params=params)
assert response.status_code == status.HTTP_400_BAD_REQUEST

def test_geo_bbox_search_with_filter(self, test_client: TestClient, seeded_db: Database):
"""Test geographic bounding box search with additional filter."""

# Bounding box around Alaska with filter for ESS-DIVE data source
params = {
"southwest_lat": 64.0,
"southwest_lng": -166.0,
"northeast_lat": 66.0,
"northeast_lng": -163.0,
"filter_json": json.dumps({"ber_data_source": "ESS-DIVE"})
}

response = test_client.get("/bertron/geo/bbox", params=params)

assert response.status_code == status.HTTP_200_OK
entities_data = response.json()

assert "documents" in entities_data
assert "count" in entities_data

# All returned entities should be from ESS-DIVE and within bounding box
for entity in entities_data["documents"]:
assert entity["ber_data_source"] == "ESS-DIVE"
# Verify coordinates are within bounding box
lat = entity["coordinates"]["latitude"]
lng = entity["coordinates"]["longitude"]
assert 64.0 <= lat <= 66.0
assert -166.0 <= lng <= -163.0
self._verify_entity_structure(entity)

def test_geo_bbox_search_with_empty_filter(self, test_client: TestClient, seeded_db: Database):
"""Test geographic bounding box search with empty filter (should work like no filter)."""

# Bounding box around Alaska with empty filter
params = {
"southwest_lat": 64.0,
"southwest_lng": -166.0,
"northeast_lat": 66.0,
"northeast_lng": -163.0,
"filter_json": json.dumps({}) # Empty filter
}

response = test_client.get("/bertron/geo/bbox", params=params)

assert response.status_code == status.HTTP_200_OK
entities_data = response.json()

assert "documents" in entities_data
assert "count" in entities_data

# Should find entities regardless of data source (empty filter = no additional restrictions)
for entity in entities_data["documents"]:
self._verify_entity_structure(entity)
Comment on lines +385 to +387
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the comment mentions the data source, I suggest adding an assertion that multiple data sources are represented in the response. The test can get a list of data sources directly from the database beforehand, for comparison.


def test_geo_search_filter_with_complex_query(self, test_client: TestClient, seeded_db: Database):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this illustrative test.

"""Test geospatial search with more complex filter query."""

# Test with a more complex filter using MongoDB operators
params = {
"latitude": 34.0,
"longitude": 118.0,
"radius_meters": 500000, # Larger radius to catch more entities
"filter_json": json.dumps({
"entity_type": {"$in": ["sample", "study"]},
"ber_data_source": {"$ne": "JGI"} # Exclude JGI data
})
}

response = test_client.get("/bertron/geo/nearby", params=params)

assert response.status_code == status.HTTP_200_OK
entities_data = response.json()

assert "documents" in entities_data
assert "count" in entities_data

# Verify filter conditions are met
for entity in entities_data["documents"]:
# Should have entity_type containing "sample" or "study"
entity_types = entity.get("entity_type", [])
assert any(et in ["sample", "study"] for et in entity_types)
# Should not be from JGI
assert entity["ber_data_source"] != "JGI"
self._verify_entity_structure(entity)

def test_geosearch_with_properties(self, test_client: TestClient, seeded_db: Database):
"""Test searching entities by properties."""

# Search for entities with a specific property label
params = {
"latitude": 28.125842,
"longitude": -81.434174,
"radius_meters": 1000000, # 1000km radius to include NMDC entity
"filter_json": json.dumps({
"properties.attribute.label": "depth",
"properties.numeric_value": 24
})
}

response = test_client.get("/bertron/geo/nearby", params=params)

assert response.status_code == status.HTTP_200_OK
entities_data = response.json()

assert "documents" in entities_data
assert "count" in entities_data

# Should find at least the NMDC entity with depth property
found_nmdc = False
for entity in entities_data["documents"]:
properties = [ prop["attribute"]["label"] for prop in entity.get("properties", []) ]
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Missing space after opening bracket in list comprehension. Should be [prop["attribute"]["label"] for prop in entity.get("properties", [])] for consistency with Python style guidelines.

Suggested change
properties = [ prop["attribute"]["label"] for prop in entity.get("properties", []) ]
properties = [prop["attribute"]["label"] for prop in entity.get("properties", [])]

Copilot uses AI. Check for mistakes.
if "depth" in properties and entity["id"] == "nmdc:bsm-11-bsf8yq62":
found_nmdc = True
self._verify_entity_structure(entity)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an assertion that the value of the retrieved property is 24 (like in the specified filter).

assert found_nmdc, "Should find NMDC entity with depth property"


def _verify_entity_structure(self, entity: Dict[str, Any]):
"""Helper method to verify entity structure matches schema."""
required_fields = [
Expand Down