openmpf · jrobble · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/python/ArgosTranslation/tests/data/spanish_long.txt b/python/ArgosTranslation/tests/data/spanish_long.txt
diff --git a/python/ArgosTranslation/tests/test_argos_translation.py b/python/ArgosTranslation/tests/test_argos_translation.py
@@ -41,21 +41,8 @@
 CHINESE_SHORT_SAMPLE = "你好，你叫什么名字？"
 SHORT_OUTPUT = "Where's the library?"
 
-# Note: Argos-Chinese translations have improved over time.
 SHORT_OUTPUT_CHINESE = "Hello. What's your name?"
 
-LONG_OUTPUT = (
-    "We hold as evident these truths: that all men are created equal, "
-    "that they are endowed by their Creator with certain inalienable rights, "
-    "which among them are life, liberty and the pursuit of happiness. "
-    "That in order to nurture these rights, governments are instituted among men, "
-    "which derive their legitimate powers from the consent of the governed. "
-    "Whenever a form of government becomes destroyer of these principles, "
-    "the people have the right to reform or abolish it and to institute a new government "
-    "that is founded on those principles, and to organize their powers in the way that in "
-    "their opinion will offer the greatest chance of achieving their security and happiness."
-)
-
 MED_OUTPUT = (
     "Considering that the recognition of the inherent dignity and equal and "
     "inalienable rights of all members of the human family is the foundation "
@@ -150,29 +137,6 @@ def test_language_behavior(self):
         self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[1].detection_properties['TRANSLATION'])
         self.assertEqual(SHORT_OUTPUT_CHINESE, result[0].frame_locations[2].detection_properties['TRANSLATION'])
 
-    def test_large_text(self):
-        comp = ArgosTranslationComponent()
-        job = mpf.GenericJob(
-            job_name='Test Sentence Length',
-            data_uri=str(TEST_DATA / 'spanish_long.txt'),
-            job_properties=dict(DEFAULT_SOURCE_LANGUAGE='ES'),
-            media_properties={},
-            feed_forward_track=None
-        )
-
-        result = comp.get_detections_from_generic(job)
-
-        self.assertEqual(1, len(result))
-        self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE'])
-
-        trans_result = result[0].detection_properties['TRANSLATION'].replace("nullify","nurture")
-        trans_result = trans_result.replace("founded on these principles","founded on those principles")
-        trans_result = trans_result.replace("That whenever a form of government", "Whenever a form of government")
-
-        # TODO: Identify why the 1.0 spanish model occasionally switches words.
-        # In this case,  words for nurture/nullify, and these/those are sometimes switched depending on build environment.
-        self.assertEqual(LONG_OUTPUT, trans_result)
-
     def test_medium_text(self):
         comp = ArgosTranslationComponent()
         job = mpf.GenericJob(

diff --git a/python/AzureTranslation/README.md b/python/AzureTranslation/README.md
@@ -77,6 +77,11 @@ must be provided. Neither has a default value.
   default to translating from Spanish, whenever Azure's language detector fails
   to identify the source language of the incoming text.
 
+- `ACS_SUBSCRIPTION_REGION`: Optional property that specifies the subscription
+  region for the Azure Cognitive Services resource, such as 'eastus'. Required
+  for some Azure deployments. If provided, will be set in the
+  'Ocp-Apim-Subscription-Region' request header.
+
 
 # Text Splitter Job Properties
 The following settings control the behavior of dividing input text into acceptable chunks

diff --git a/python/AzureTranslation/acs_translation_component/acs_translation_component.py b/python/AzureTranslation/acs_translation_component/acs_translation_component.py
@@ -170,6 +170,8 @@ class TranslationClient:
 
     def __init__(self, job_properties: Mapping[str, str], sentence_model: TextSplitterModel):
         self._subscription_key = get_required_property('ACS_SUBSCRIPTION_KEY', job_properties)
+        self._subscription_region = job_properties.get('ACS_SUBSCRIPTION_REGION', '')
+
         self._http_retry = mpf_util.HttpRetry.from_properties(job_properties, log.warning)
 
         url_builder = AcsTranslateUrlBuilder(job_properties)
@@ -331,7 +333,7 @@ def _send_translation_request(self, text: str,
         ]
         encoded_body = json.dumps(request_body).encode('utf-8')
         request = urllib.request.Request(url, encoded_body,
-                                         get_acs_headers(self._subscription_key))
+                                         get_acs_headers(self._subscription_key, self._subscription_region))
         log.info(f'Sending POST to {url}')
         log_json(request_body)
         with self._http_retry.urlopen(
@@ -436,7 +438,7 @@ def _send_detect_request(self, text) -> 'AcsResponses.Detect':
         ]
         encoded_body = json.dumps(request_body).encode('utf-8')
         request = urllib.request.Request(self._detect_url, encoded_body,
-                                         get_acs_headers(self._subscription_key))
+                                         get_acs_headers(self._subscription_key, self._subscription_region))
         log.info(f'Sending POST {self._detect_url}')
         log_json(request_body)
         with self._http_retry.urlopen(request) as response:
@@ -561,10 +563,15 @@ def set_query_params(url: str, query_params: Mapping[str, str]) -> str:
 
 
 
-def get_acs_headers(subscription_key: str) -> Dict[str, str]:
-    return {'Ocp-Apim-Subscription-Key': subscription_key,
-            'Content-type': 'application/json; charset=UTF-8',
-            'X-ClientTraceId': str(uuid.uuid4())}
+def get_acs_headers(subscription_key: str, region: Optional[str] = None) -> Dict[str, str]:
+    headers = {
+        'Ocp-Apim-Subscription-Key': subscription_key,
+        'Content-type': 'application/json; charset=UTF-8',
+        'X-ClientTraceId': str(uuid.uuid4())
+    }
+    if region:
+        headers['Ocp-Apim-Subscription-Region'] = region
+    return headers
 
 
 class AcsTranslateUrlBuilder:

diff --git a/python/AzureTranslation/plugin-files/descriptor/descriptor.json b/python/AzureTranslation/plugin-files/descriptor/descriptor.json
@@ -33,6 +33,12 @@
           "type": "STRING",
           "defaultValue": ""
         },
+        {
+          "name": "ACS_SUBSCRIPTION_REGION",
+          "description": "Azure region for the Cognitive Services resource, such as 'eastus'. Required for some Azure deployments. If provided, will be set in the 'Ocp-Apim-Subscription-Region' request header.",
+          "type": "STRING",
+          "defaultValue": ""
+        },
         {
           "name": "FEED_FORWARD_PROP_TO_PROCESS",
           "description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection to consider translating. If the first property listed is present, then that property will be translated. If it's not, then the next property in the list is considered. At most, one property will be translated.",

diff --git a/python/ClipDetection/Dockerfile b/python/ClipDetection/Dockerfile
@@ -29,11 +29,10 @@
 ARG MODELS_REGISTRY=openmpf/
 ARG BUILD_REGISTRY
 ARG BUILD_TAG=latest
-FROM ${MODELS_REGISTRY}openmpf_clip_detection_models:8.0.0 as models
-FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG}
 
-COPY --from=models /models/ViT-B-32.pt /models/ViT-B-32.pt
-COPY --from=models /models/ViT-L-14.pt /models/ViT-L-14.pt
+FROM ${MODELS_REGISTRY}openmpf_clip_detection_models:8.0.0 AS models
+
+FROM ${BUILD_REGISTRY}openmpf_python_component_build:${BUILD_TAG} AS build
 
 RUN --mount=type=tmpfs,target=/var/cache/apt \
     --mount=type=tmpfs,target=/var/lib/apt/lists  \
@@ -43,21 +42,30 @@ RUN --mount=type=tmpfs,target=/var/cache/apt \
 
 RUN pip3 install --upgrade pip
 
-RUN pip3 install ftfy regex tqdm 'setuptools<70'
-
 RUN --mount=type=tmpfs,target=/tmp \
-        mkdir /tmp/CLIP; \
-        wget -O- 'https://github.com/openai/CLIP/tarball/master' \
-            | tar --extract --gzip --directory /tmp/CLIP; \
-        cd /tmp/CLIP/*; \
-        pip3 install . 'torchvision==0.14.1'
+    mkdir /tmp/CLIP; \
+    wget -O- 'https://github.com/openai/CLIP/tarball/master' \
+        | tar --extract --gzip --directory /tmp/CLIP; \
+    cd /tmp/CLIP/*; \
+    pip3 install . 'torchvision==0.14.1' 'tritonclient[grpc]==2.40'
 
 ARG RUN_TESTS=false
 
 RUN --mount=target=.,readwrite \
+    --mount=from=models,source=/models,target=/models \
     install-component.sh; \
     if [ "${RUN_TESTS,,}" == true ]; then python tests/test_clip.py; fi
 
+
+FROM ${BUILD_REGISTRY}openmpf_python_executor:${BUILD_TAG}
+
+COPY --from=models /models/ViT-B-32.pt /models/ViT-B-32.pt
+COPY --from=models /models/ViT-L-14.pt /models/ViT-L-14.pt
+
+COPY --from=build $COMPONENT_VIRTUALENV $COMPONENT_VIRTUALENV
+
+COPY --from=build $PLUGINS_DIR/ClipDetection $PLUGINS_DIR/ClipDetection
+
 LABEL org.label-schema.license="Apache 2.0" \
       org.label-schema.name="OpenMPF CLIP Detection" \
       org.label-schema.schema-version="1.0" \

diff --git a/python/GeminiDetection/Dockerfile b/python/GeminiDetection/Dockerfile
@@ -45,13 +45,13 @@ RUN --mount=type=tmpfs,target=/var/cache/apt \
 # Create separate venv for Python 3.9 subprocess
 RUN mkdir -p /gemini-subprocess/venv; \
     python3.9 -m venv /gemini-subprocess/venv; \
-    /gemini-subprocess/venv/bin/pip3 install google-genai
+    /gemini-subprocess/venv/bin/pip3 install google-genai pillow numpy
 
-COPY gemini-process-image.py /gemini-subprocess
+COPY gemini-process-image.py gemini_component/resource_tracker_monkeypatch.py /gemini-subprocess
 
 RUN pip3 install --upgrade pip
 
-RUN pip3 install opencv-python
+RUN pip3 install tenacity opencv-python
 
 ARG RUN_TESTS=false
 

diff --git a/python/GeminiDetection/README.md b/python/GeminiDetection/README.md
@@ -6,10 +6,16 @@ This component utilizes a config file that contains any number of prompts for an
 
 # Job Properties
 
-The following are the properties that can be specified for the component. Each property has a default value and so none of them necessarily need to be specified for processing jobs.
+The following are the properties that can be specified for the component. All properties except for GEMINI_API_KEY and CLASSIFICATION have default values, making them optional to set.
 
-- `PROMPT_CONFIGURATION_PATH`: Path to JSON file which contains prompts for specified classifications.
 - `GEMINI_API_KEY`: Your API key to send requests to Google Gemini
+- `CLASSIFICATION`: The class of the object(s) in the media. Used to determine the prompt(s). Examples: PERSON and VEHICLE.
+- `PROMPT_CONFIGURATION_PATH`: The path to JSON file which contains prompts for specified classifications.
+- `JSON_PROMPT_CONFIGURATION_PATH`: The path to a JSON file which contains classes and prompts that specify Gemini to return a JSON object.
+- `ENABLE_JSON_PROMPT_FORMAT`: Enables returning a JSON formatted response from Gemini, with the prompt specified at PROMPT_JSON_CONFIGURATION_PATH job property. By default set to false.
+- `GENERATE_FRAME_RATE_CAP`: The threshold on the maximum number of frames to process in the video segment within one second of the native video time.
+- `MODEL_NAME`: The model to use for Gemini inference. By default it is set to `"gemma-3-27b-it"`.
+- `GENERATION_MAX_ATTEMPTS`: The maximum number of times the component will attempt to generate valid JSON output.
 
 # Config File
 
@@ -54,6 +60,4 @@ Once the responses are generated, they are added onto the `detection_properties`
 
 # TODO
 
-- Implement feed forward jobs
-- Implement JSON response jobs
-- Fix mocking unittests so API key isn't required to test component
+- Add functionality for generic class property detection
diff --git a/python/GeminiDetection/gemini-process-image.py b/python/GeminiDetection/gemini-process-image.py
@@ -1,26 +1,83 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
 import argparse
+import json
+import sys
+import numpy as np
+
 from google import genai
+from multiprocessing.shared_memory import SharedMemory
+from google.genai.errors import ClientError
 from PIL import Image
-import sys
+
+from resource_tracker_monkeypatch import remove_shm_from_resource_tracker 
 
 def main():
     parser = argparse.ArgumentParser(description='Sends image and prompt to Gemini Client for processing.')
 
-    parser.add_argument("--model", "-m", type=str, default="gemini-1.5-pro", help="The name of the Gemini model to use.")
-    parser.add_argument("--filepath", "-f", type=str, required=True, help="Path to the media file to process with Gemini.")
+    parser.add_argument("--model", "-m", type=str, default="gemma-3-27b-it", help="The name of the Gemini model to use.")
+    parser.add_argument("--shm-name", type=str, required=True, help="Shared memory name for image data.")
+    parser.add_argument("--shm-shape", type=str, required=True, help="Shape of the image in shared memory (JSON list).")
+    parser.add_argument("--shm-dtype", type=str, required=True, help="Numpy dtype of the image in shared memory.")
     parser.add_argument("--prompt", "-p", type=str, required=True, help="The prompt you want to use with the image.")
     parser.add_argument("--api_key", "-a", type=str, required=True, help="Your API key for Gemini.")
     args = parser.parse_args()
-
+
+    remove_shm_from_resource_tracker()
+
+    shm = None
+
     try:
+        shape = tuple(json.loads(args.shm_shape))
+        dtype = np.dtype(args.shm_dtype)
+        shm = SharedMemory(name=args.shm_name)
+
+        np_img = np.ndarray(shape, dtype=dtype, buffer=shm.buf)
+        image = Image.fromarray(np_img)
+
         client = genai.Client(api_key=args.api_key)
-        content = client.models.generate_content(model=args.model, contents=[args.prompt, Image.open(args.filepath)])
+        content = client.models.generate_content(model=args.model, contents=[args.prompt, image])
         print(content.text)
         sys.exit(0)
+
+    except ClientError as e:
+        if hasattr(e, 'code') and e.code == 429:
+            print("Caught a ResourceExhausted error (429 Too Many Requests)", file=sys.stderr)
+        else:
+            print(e, file=sys.stderr)
+        sys.exit(1)
+
     except Exception as e:
-        print(e)
+        print(e, file=sys.stderr)
         sys.exit(1)
 
+    finally:
+        if shm:
+            shm.close()
 
 if __name__ == "__main__":
     main()