Shared-Reality-Lab · jeffbl · Nov 16, 2025 · Nov 16, 2025
diff --git a/preprocessors/object-detection-llm/object-detection-llm.py b/preprocessors/object-detection-llm/object-detection-llm.py
@@ -61,10 +61,10 @@ def normalize_bbox(bbox, width, height):
     """
     x1, y1, x2, y2 = bbox
     return [
-        max(0.0, min(x1 / width, 1.0)),
-        max(0.0, min(y1 / height, 1.0)),
-        max(0.0, min(x2 / width, 1.0)),
-        max(0.0, min(y2 / height, 1.0))
+        max(0.0, min(x1 / 1000, 1.0)),
+        max(0.0, min(y1 / 1000, 1.0)),
+        max(0.0, min(x2 / 1000, 1.0)),
+        max(0.0, min(y2 / 1000, 1.0))
     ]
 
 

diff --git a/utils/llm/client.py b/utils/llm/client.py
@@ -68,7 +68,7 @@ def chat_completion(
         image_base64: Optional[str] = None,
         json_schema: Optional[Dict[str, Any]] = None,
         schema_name: str = "response-format",
-        temperature: float = 0.5,
+        temperature: float = 0.0,
         max_tokens: Optional[int] = None,
         response_format: Optional[Dict[str, str]] = None,
         system_prompt: Optional[str] = None,

diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py
@@ -40,11 +40,11 @@
 Ensure that the bounding boxes are in the format [x1, y1, x2, y2].
 
 Rules:
-1. The graphic can contain any number of objects, from zero to many.
-2. If no objects are detected, return an empty list: {"objects": []}.
-3. Use simple and common object labels (e.g., "car", "person", "tree").
-4. Include only objects that are clearly visible and identifiable.
-5. Focus on the major and important objects in the image.
+1. Focus ONLY on the major and important objects in the image.
+2. The graphic can contain any number of objects, from zero to many.
+3. If no objects are detected, return an empty list: {"objects": []}.
+4. Use simple and common object labels (e.g., "car", "person", "tree").
+5. Include ONLY objects that are clearly visible and identifiable.
 6. Multiple objects can have the same confidence score.
 """
 ###