Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions backend/grievance_classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import joblib
import os
import logging

Expand All @@ -10,22 +9,38 @@ class GrievanceClassifier:
def __init__(self):
self.model = None
self._initialized = False
# Lazy load logic moved to load_model

def load_model(self):
# Check for dependencies first
try:
import joblib
# Note: scikit-learn is required to load the model but imported by joblib during deserialization
# We can check for it explicitly to be safe
import sklearn
except ImportError as e:
logger.warning(f"Grievance classifier disabled: Missing dependency {e}. This is expected on lightweight deployments.")
self.model = None
return

if os.path.exists(MODEL_PATH):
try:
self.model = joblib.load(MODEL_PATH)
logger.info("Grievance model loaded successfully.")
except Exception as e:
# Catch sklearn deserialization errors or other issues
logger.error(f"Failed to load grievance model: {e}")
self.model = None
else:
logger.warning(f"Grievance model not found at {MODEL_PATH}")

def predict(self, text: str):
if not self.model:
# Try reloading if it failed previously or file was created later
self.load_model()
# Try reloading (once) if it wasn't initialized
if not self._initialized:
self.load_model()
self._initialized = True

if not self.model:
return "Unknown (Model Unavailable)"

Expand Down
6 changes: 0 additions & 6 deletions backend/requirements-render.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,9 @@ google-generativeai
python-multipart
psycopg2-binary
async-lru
huggingface-hub
httpx
python-magic
pywebpush
Pillow
firebase-functions
firebase-admin
a2wsgi
scikit-learn
numpy
python-jose[cryptography]
passlib[bcrypt]
6 changes: 0 additions & 6 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,6 @@ pywebpush
torch
transformers
Pillow
firebase-functions
firebase-admin
a2wsgi
# Spatial deduplication dependencies
scikit-learn
numpy
pytest
python-jose[cryptography]
passlib[bcrypt]
66 changes: 16 additions & 50 deletions backend/spatial_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
"""
import math
from typing import List, Tuple, Optional
from sklearn.cluster import DBSCAN
import numpy as np

from backend.models import Issue

Expand Down Expand Up @@ -56,6 +54,20 @@ def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> fl
return R * c


def equirectangular_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate the distance between two points using the Equirectangular approximation.
This is much faster than Haversine and accurate enough for small distances (< 10km).

Returns distance in meters.
"""
R = 6371000.0
# Convert difference to radians directly
x = math.radians(lon2 - lon1) * math.cos(math.radians((lat1 + lat2) / 2))
y = math.radians(lat2 - lat1)
return R * math.sqrt(x*x + y*y)
Comment on lines +64 to +68
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

equirectangular_distance hard-codes Earth radius again (R = 6371000.0) and uses manual sqrt(x*x + y*y). Consider defining a module-level constant for Earth radius and reusing it across haversine_distance/equirectangular_distance (and other helpers) to avoid inconsistencies, and using math.hypot(x, y) for clearer, numerically stable distance computation.

Copilot uses AI. Check for mistakes.
Comment on lines +57 to +68
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Antimeridian (±180° longitude) wrapping not handled — correctness regression from Haversine.

If two points straddle the antimeridian (e.g., lon1=179.999°, lon2=−179.999°), lon2 - lon1 yields ≈ −360° instead of ≈ −0.002°, massively overestimating the distance. Haversine is immune to this because sin²(Δλ/2) is periodic, but the linear subtraction here is not.

For your stated use case (civic issues, small radii) this is unlikely, but if the app ever serves locations near the antimeridian (Fiji, Tonga, far-east Russia), nearby duplicates would be missed silently.

A minimal fix is to normalize the longitude delta:

Proposed fix
     R = 6371000.0
-    x = math.radians(lon2 - lon1) * math.cos(math.radians((lat1 + lat2) / 2))
+    dlon = (lon2 - lon1 + 180) % 360 - 180  # normalize to [-180, 180]
+    x = math.radians(dlon) * math.cos(math.radians((lat1 + lat2) / 2))
     y = math.radians(lat2 - lat1)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def equirectangular_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate the distance between two points using the Equirectangular approximation.
This is much faster than Haversine and accurate enough for small distances (< 10km).
Returns distance in meters.
"""
R = 6371000.0
# Convert difference to radians directly
x = math.radians(lon2 - lon1) * math.cos(math.radians((lat1 + lat2) / 2))
y = math.radians(lat2 - lat1)
return R * math.sqrt(x*x + y*y)
def equirectangular_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate the distance between two points using the Equirectangular approximation.
This is much faster than Haversine and accurate enough for small distances (< 10km).
Returns distance in meters.
"""
R = 6371000.0
# Convert difference to radians directly
dlon = (lon2 - lon1 + 180) % 360 - 180 # normalize to [-180, 180]
x = math.radians(dlon) * math.cos(math.radians((lat1 + lat2) / 2))
y = math.radians(lat2 - lat1)
return R * math.sqrt(x*x + y*y)
🤖 Prompt for AI Agents
In `@backend/spatial_utils.py` around lines 57 - 68, The equirectangular_distance
function fails to handle antimeridian wrapping causing huge deltas for
longitudes crossing ±180°; fix by normalizing the longitude difference in
equirectangular_distance (compute delta_lon = (lon2 - lon1 + 180) % 360 - 180 or
equivalent) before converting to radians and using it in x = radians(delta_lon)
* cos(radians((lat1+lat2)/2)); keep all other math the same so distances near
the antimeridian are computed correctly.



def find_nearby_issues(
issues: List[Issue],
target_lat: float,
Expand All @@ -80,7 +92,8 @@ def find_nearby_issues(
if issue.latitude is None or issue.longitude is None:
continue

distance = haversine_distance(
# Use Equirectangular approximation for faster filtering
distance = equirectangular_distance(
target_lat, target_lon,
issue.latitude, issue.longitude
)
Expand All @@ -94,53 +107,6 @@ def find_nearby_issues(
return nearby_issues


def cluster_issues_dbscan(issues: List[Issue], eps_meters: float = 30.0) -> List[List[Issue]]:
"""
Cluster issues using DBSCAN algorithm based on spatial proximity.

Args:
issues: List of Issue objects with latitude/longitude
eps_meters: Maximum distance between two samples for one to be considered
as in the neighborhood of the other (default 30m)

Returns:
List of clusters, where each cluster is a list of Issue objects
"""
# Filter issues with valid coordinates
valid_issues = [
issue for issue in issues
if issue.latitude is not None and issue.longitude is not None
]

if not valid_issues:
return []

# Convert to numpy array for DBSCAN
coordinates = np.array([
[issue.latitude, issue.longitude] for issue in valid_issues
])

# Convert eps from meters to degrees (approximate)
# 1 degree latitude ≈ 111,000 meters
# 1 degree longitude ≈ 111,000 * cos(latitude) meters
eps_degrees = eps_meters / 111000 # Rough approximation

# Perform DBSCAN clustering
db = DBSCAN(eps=eps_degrees, min_samples=1, metric='haversine').fit(
np.radians(coordinates)
)

# Group issues by cluster
clusters = {}
for i, label in enumerate(db.labels_):
if label not in clusters:
clusters[label] = []
clusters[label].append(valid_issues[i])

# Return clusters as list of lists (exclude noise points labeled as -1)
return [cluster for label, cluster in clusters.items() if label != -1]


def get_cluster_representative(cluster: List[Issue]) -> Issue:
"""
Get the representative issue from a cluster.
Expand Down
63 changes: 63 additions & 0 deletions backend/tests/test_spatial_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

import pytest
import math
Comment on lines +2 to +3
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pytest and math are imported but not used in this test module. Removing unused imports keeps the test suite clean and avoids failing builds if linting is enabled.

Suggested change
import pytest
import math

Copilot uses AI. Check for mistakes.
from backend.spatial_utils import haversine_distance, equirectangular_distance, find_nearby_issues
from backend.models import Issue

def test_equirectangular_accuracy_small_distance():
"""Test that equirectangular approximation is accurate for small distances (< 1km)."""
lat1, lon1 = 18.5204, 73.8567
# 0.001 degrees is roughly 100 meters
lat2, lon2 = 18.5214, 73.8577

h_dist = haversine_distance(lat1, lon1, lat2, lon2)
e_dist = equirectangular_distance(lat1, lon1, lat2, lon2)

# Allow 0.1% error margin
assert abs(h_dist - e_dist) / h_dist < 0.001

def test_equirectangular_accuracy_larger_distance():
"""Test that equirectangular approximation is reasonably accurate for 10km."""
lat1, lon1 = 18.5204, 73.8567
# 0.1 degrees is roughly 10km
lat2, lon2 = 18.6204, 73.9567
Comment on lines +8 to +23
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comments/docstrings describing the test distances are misleading: changing both lat and lon by 0.001° yields a diagonal distance (~150m at this latitude), not “roughly 100 meters”, and 0.1°/0.1° is closer to ~15km than 10km. This can confuse future readers about the intent/coverage of these accuracy tests—either adjust the coordinate deltas or update the comments/docstrings.

Copilot uses AI. Check for mistakes.

h_dist = haversine_distance(lat1, lon1, lat2, lon2)
e_dist = equirectangular_distance(lat1, lon1, lat2, lon2)

# Allow 1% error margin for larger distances (still very good)
assert abs(h_dist - e_dist) / h_dist < 0.01

def test_find_nearby_issues():
"""Test find_nearby_issues filtering."""
base_lat, base_lon = 18.5204, 73.8567

# Create mock issues
issue_close = Issue(id=1, latitude=base_lat + 0.0001, longitude=base_lon + 0.0001) # Very close
issue_far = Issue(id=2, latitude=base_lat + 0.1, longitude=base_lon + 0.1) # ~10km away
issue_exact = Issue(id=3, latitude=base_lat, longitude=base_lon) # Same spot

issues = [issue_close, issue_far, issue_exact]

# Radius 50m
# 0.0001 deg is approx 11m lat, so issue_close is well within 50m
nearby = find_nearby_issues(issues, base_lat, base_lon, radius_meters=50.0)

assert len(nearby) == 2
ids = [i.id for i, _ in nearby]
assert 3 in ids # Exact match (dist 0)
assert 1 in ids # Close match
assert 2 not in ids # Far match

# Check sorting
assert nearby[0][0].id == 3 # Closest first
assert nearby[1][0].id == 1

def test_find_nearby_issues_empty():
nearby = find_nearby_issues([], 0, 0)
assert nearby == []

def test_find_nearby_issues_invalid_coords():
issue = Issue(id=1, latitude=None, longitude=None)
nearby = find_nearby_issues([issue], 0, 0)
assert nearby == []