Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
913cee3
Now include prompt files in module.
jrobble Apr 3, 2025
389390e
Improve prompts.
jrobble Apr 3, 2025
fb9ad28
Enable generating JSON output for people and vehicles.
jrobble Apr 3, 2025
5d65585
Allow features with a value of "true".
jrobble Apr 3, 2025
4e2b197
Ignore unsure prefixes.
jrobble Apr 3, 2025
0814cd2
Flatten output for vehicles.
jrobble Apr 4, 2025
0f2d7ba
Always add "ANNOTATED BY GEMINI".
jrobble Apr 4, 2025
b5bf823
Output CLASSIFICATION property.
jrobble Apr 8, 2025
c8f2932
Create LLaMA video summarization component. (#398)
jrobble May 21, 2025
89c14af
Merge branch 'master' into merge/llama-video
jrobble May 21, 2025
912c084
Added support for MODEL_NAME and backoff for model rate limit errors
Jun 3, 2025
4ae286f
Fixed issues from pr 401, besides those relating to backoff
Jun 6, 2025
f4f3862
Altered the exception for 429 to catch the error code
Jun 6, 2025
2c44b0a
Removing redundant assignments
Jun 10, 2025
0e7d8db
Merge develop changes to kburkewv/feat/gemini-detection
Jun 10, 2025
74de3b5
Merge branch develop into kburkewv/feat/gemini-detection
Jun 10, 2025
8ddb41b
Image and video processing are supported through SharedMemory
Jun 13, 2025
9dd2c3c
Feed forward implemenation
Jun 13, 2025
6fed115
Added ff support for images
Jun 16, 2025
231bb47
Added geminidetection ff with markup
Jun 17, 2025
9c80941
Added unittests and data
Jun 17, 2025
382ed65
Merge w/ markup change
Jun 17, 2025
7de957a
Structured output prompts for person and vehicle
Jun 18, 2025
ad69298
Updating to include 'Ocp-Apim-Subscription-Region' header option. Rem…
hhuangMITRE Jul 7, 2025
ec8aa6a
Fix clip dockerfile (#403)
brosenberg42 Jul 14, 2025
4cfcd09
Merge branch 'master' into hf-merge/clip-build-failure
brosenberg42 Jul 14, 2025
52ee803
Merge remote-tracking branch 'origin/develop' into kmburke/feat/gemin…
Jul 17, 2025
954fce4
Bug fixes for color and SHM
Jul 17, 2025
3fd8f6d
Fixed a bug where if an image is too large it caused SharedMemory to …
Jul 18, 2025
f80d252
Removed the close() call in gemini-process-image
Jul 18, 2025
c9b437b
Changes to prompt and image processing
Jul 18, 2025
1608600
Updated prompts and finalized details
Jul 21, 2025
18767fa
Use OpenCV to convert color profile.
jrobble Jul 29, 2025
d9431d5
Improve shared memory resource cleanup.
jrobble Jul 30, 2025
161dd01
Add monkey patch for resource tracker.
jrobble Jul 30, 2025
11fb07b
Added GENERATION_MAX_ATTEMPTS and removed redundant code
Jul 31, 2025
d86f086
Removed obsolete file
Jul 31, 2025
ecb92a3
Updated the README
Jul 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/ArgosTranslation/tests/data/spanish_long.txt

This file was deleted.

36 changes: 0 additions & 36 deletions python/ArgosTranslation/tests/test_argos_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,8 @@
CHINESE_SHORT_SAMPLE = "你好,你叫什么名字?"
SHORT_OUTPUT = "Where's the library?"

# Note: Argos-Chinese translations have improved over time.
SHORT_OUTPUT_CHINESE = "Hello. What's your name?"

LONG_OUTPUT = (
"We hold as evident these truths: that all men are created equal, "
"that they are endowed by their Creator with certain inalienable rights, "
"which among them are life, liberty and the pursuit of happiness. "
"That in order to nurture these rights, governments are instituted among men, "
"which derive their legitimate powers from the consent of the governed. "
"Whenever a form of government becomes destroyer of these principles, "
"the people have the right to reform or abolish it and to institute a new government "
"that is founded on those principles, and to organize their powers in the way that in "
"their opinion will offer the greatest chance of achieving their security and happiness."
)

MED_OUTPUT = (
"Considering that the recognition of the inherent dignity and equal and "
"inalienable rights of all members of the human family is the foundation "
Expand Down Expand Up @@ -150,29 +137,6 @@ def test_language_behavior(self):
self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[1].detection_properties['TRANSLATION'])
self.assertEqual(SHORT_OUTPUT_CHINESE, result[0].frame_locations[2].detection_properties['TRANSLATION'])

def test_large_text(self):
comp = ArgosTranslationComponent()
job = mpf.GenericJob(
job_name='Test Sentence Length',
data_uri=str(TEST_DATA / 'spanish_long.txt'),
job_properties=dict(DEFAULT_SOURCE_LANGUAGE='ES'),
media_properties={},
feed_forward_track=None
)

result = comp.get_detections_from_generic(job)

self.assertEqual(1, len(result))
self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE'])

trans_result = result[0].detection_properties['TRANSLATION'].replace("nullify","nurture")
trans_result = trans_result.replace("founded on these principles","founded on those principles")
trans_result = trans_result.replace("That whenever a form of government", "Whenever a form of government")

# TODO: Identify why the 1.0 spanish model occasionally switches words.
# In this case, words for nurture/nullify, and these/those are sometimes switched depending on build environment.
self.assertEqual(LONG_OUTPUT, trans_result)

def test_medium_text(self):
comp = ArgosTranslationComponent()
job = mpf.GenericJob(
Expand Down
5 changes: 5 additions & 0 deletions python/AzureTranslation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ must be provided. Neither has a default value.
default to translating from Spanish, whenever Azure's language detector fails
to identify the source language of the incoming text.

- `ACS_SUBSCRIPTION_REGION`: Optional property that specifies the subscription
region for the Azure Cognitive Services resource, such as 'eastus'. Required
for some Azure deployments. If provided, will be set in the
'Ocp-Apim-Subscription-Region' request header.


# Text Splitter Job Properties
The following settings control the behavior of dividing input text into acceptable chunks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ class TranslationClient:

def __init__(self, job_properties: Mapping[str, str], sentence_model: TextSplitterModel):
self._subscription_key = get_required_property('ACS_SUBSCRIPTION_KEY', job_properties)
self._subscription_region = job_properties.get('ACS_SUBSCRIPTION_REGION', '')

self._http_retry = mpf_util.HttpRetry.from_properties(job_properties, log.warning)

url_builder = AcsTranslateUrlBuilder(job_properties)
Expand Down Expand Up @@ -331,7 +333,7 @@ def _send_translation_request(self, text: str,
]
encoded_body = json.dumps(request_body).encode('utf-8')
request = urllib.request.Request(url, encoded_body,
get_acs_headers(self._subscription_key))
get_acs_headers(self._subscription_key, self._subscription_region))
log.info(f'Sending POST to {url}')
log_json(request_body)
with self._http_retry.urlopen(
Expand Down Expand Up @@ -436,7 +438,7 @@ def _send_detect_request(self, text) -> 'AcsResponses.Detect':
]
encoded_body = json.dumps(request_body).encode('utf-8')
request = urllib.request.Request(self._detect_url, encoded_body,
get_acs_headers(self._subscription_key))
get_acs_headers(self._subscription_key, self._subscription_region))
log.info(f'Sending POST {self._detect_url}')
log_json(request_body)
with self._http_retry.urlopen(request) as response:
Expand Down Expand Up @@ -561,10 +563,15 @@ def set_query_params(url: str, query_params: Mapping[str, str]) -> str:



def get_acs_headers(subscription_key: str) -> Dict[str, str]:
return {'Ocp-Apim-Subscription-Key': subscription_key,
'Content-type': 'application/json; charset=UTF-8',
'X-ClientTraceId': str(uuid.uuid4())}
def get_acs_headers(subscription_key: str, region: Optional[str] = None) -> Dict[str, str]:
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
'Content-type': 'application/json; charset=UTF-8',
'X-ClientTraceId': str(uuid.uuid4())
}
if region:
headers['Ocp-Apim-Subscription-Region'] = region
return headers


class AcsTranslateUrlBuilder:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
"type": "STRING",
"defaultValue": ""
},
{
"name": "ACS_SUBSCRIPTION_REGION",
"description": "Azure region for the Cognitive Services resource, such as 'eastus'. Required for some Azure deployments. If provided, will be set in the 'Ocp-Apim-Subscription-Region' request header.",
"type": "STRING",
"defaultValue": ""
},
{
"name": "FEED_FORWARD_PROP_TO_PROCESS",
"description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection to consider translating. If the first property listed is present, then that property will be translated. If it's not, then the next property in the list is considered. At most, one property will be translated.",
Expand Down
30 changes: 19 additions & 11 deletions python/ClipDetection/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@
ARG MODELS_REGISTRY=openmpf/
ARG BUILD_REGISTRY
ARG BUILD_TAG=latest
FROM ${MODELS_REGISTRY}openmpf_clip_detection_models:8.0.0 as models
FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG}

COPY --from=models /models/ViT-B-32.pt /models/ViT-B-32.pt
COPY --from=models /models/ViT-L-14.pt /models/ViT-L-14.pt
FROM ${MODELS_REGISTRY}openmpf_clip_detection_models:8.0.0 AS models

FROM ${BUILD_REGISTRY}openmpf_python_component_build:${BUILD_TAG} AS build

RUN --mount=type=tmpfs,target=/var/cache/apt \
--mount=type=tmpfs,target=/var/lib/apt/lists \
Expand All @@ -43,21 +42,30 @@ RUN --mount=type=tmpfs,target=/var/cache/apt \

RUN pip3 install --upgrade pip

RUN pip3 install ftfy regex tqdm 'setuptools<70'

RUN --mount=type=tmpfs,target=/tmp \
mkdir /tmp/CLIP; \
wget -O- 'https://github.com/openai/CLIP/tarball/master' \
| tar --extract --gzip --directory /tmp/CLIP; \
cd /tmp/CLIP/*; \
pip3 install . 'torchvision==0.14.1'
mkdir /tmp/CLIP; \
wget -O- 'https://github.com/openai/CLIP/tarball/master' \
| tar --extract --gzip --directory /tmp/CLIP; \
cd /tmp/CLIP/*; \
pip3 install . 'torchvision==0.14.1' 'tritonclient[grpc]==2.40'

ARG RUN_TESTS=false

RUN --mount=target=.,readwrite \
--mount=from=models,source=/models,target=/models \
install-component.sh; \
if [ "${RUN_TESTS,,}" == true ]; then python tests/test_clip.py; fi


FROM ${BUILD_REGISTRY}openmpf_python_executor:${BUILD_TAG}

COPY --from=models /models/ViT-B-32.pt /models/ViT-B-32.pt
COPY --from=models /models/ViT-L-14.pt /models/ViT-L-14.pt

COPY --from=build $COMPONENT_VIRTUALENV $COMPONENT_VIRTUALENV

COPY --from=build $PLUGINS_DIR/ClipDetection $PLUGINS_DIR/ClipDetection

LABEL org.label-schema.license="Apache 2.0" \
org.label-schema.name="OpenMPF CLIP Detection" \
org.label-schema.schema-version="1.0" \
Expand Down
6 changes: 3 additions & 3 deletions python/GeminiDetection/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ RUN --mount=type=tmpfs,target=/var/cache/apt \
# Create separate venv for Python 3.9 subprocess
RUN mkdir -p /gemini-subprocess/venv; \
python3.9 -m venv /gemini-subprocess/venv; \
/gemini-subprocess/venv/bin/pip3 install google-genai
/gemini-subprocess/venv/bin/pip3 install google-genai pillow numpy

COPY gemini-process-image.py /gemini-subprocess
COPY gemini-process-image.py gemini_component/resource_tracker_monkeypatch.py /gemini-subprocess

RUN pip3 install --upgrade pip

RUN pip3 install opencv-python
RUN pip3 install tenacity opencv-python

ARG RUN_TESTS=false

Expand Down
14 changes: 9 additions & 5 deletions python/GeminiDetection/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,16 @@ This component utilizes a config file that contains any number of prompts for an

# Job Properties

The following are the properties that can be specified for the component. Each property has a default value and so none of them necessarily need to be specified for processing jobs.
The following are the properties that can be specified for the component. All properties except for GEMINI_API_KEY and CLASSIFICATION have default values, making them optional to set.

- `PROMPT_CONFIGURATION_PATH`: Path to JSON file which contains prompts for specified classifications.
- `GEMINI_API_KEY`: Your API key to send requests to Google Gemini
- `CLASSIFICATION`: The class of the object(s) in the media. Used to determine the prompt(s). Examples: PERSON and VEHICLE.
- `PROMPT_CONFIGURATION_PATH`: The path to JSON file which contains prompts for specified classifications.
- `JSON_PROMPT_CONFIGURATION_PATH`: The path to a JSON file which contains classes and prompts that specify Gemini to return a JSON object.
- `ENABLE_JSON_PROMPT_FORMAT`: Enables returning a JSON formatted response from Gemini, with the prompt specified at PROMPT_JSON_CONFIGURATION_PATH job property. By default set to false.
- `GENERATE_FRAME_RATE_CAP`: The threshold on the maximum number of frames to process in the video segment within one second of the native video time.
- `MODEL_NAME`: The model to use for Gemini inference. By default it is set to `"gemma-3-27b-it"`.
- `GENERATION_MAX_ATTEMPTS`: The maximum number of times the component will attempt to generate valid JSON output.

# Config File

Expand Down Expand Up @@ -54,6 +60,4 @@ Once the responses are generated, they are added onto the `detection_properties`

# TODO

- Implement feed forward jobs
- Implement JSON response jobs
- Fix mocking unittests so API key isn't required to test component
- Add functionality for generic class property detection
69 changes: 63 additions & 6 deletions python/GeminiDetection/gemini-process-image.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,26 +1,83 @@
#############################################################################
# NOTICE #
# #
# This software (or technical data) was produced for the U.S. Government #
# under contract, and is subject to the Rights in Data-General Clause #
# 52.227-14, Alt. IV (DEC 2007). #
# #
# Copyright 2024 The MITRE Corporation. All Rights Reserved. #
#############################################################################

#############################################################################
# Copyright 2024 The MITRE Corporation #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
#############################################################################

import argparse
import json
import sys
import numpy as np

from google import genai
from multiprocessing.shared_memory import SharedMemory
from google.genai.errors import ClientError
from PIL import Image
import sys

from resource_tracker_monkeypatch import remove_shm_from_resource_tracker

def main():
parser = argparse.ArgumentParser(description='Sends image and prompt to Gemini Client for processing.')

parser.add_argument("--model", "-m", type=str, default="gemini-1.5-pro", help="The name of the Gemini model to use.")
parser.add_argument("--filepath", "-f", type=str, required=True, help="Path to the media file to process with Gemini.")
parser.add_argument("--model", "-m", type=str, default="gemma-3-27b-it", help="The name of the Gemini model to use.")
parser.add_argument("--shm-name", type=str, required=True, help="Shared memory name for image data.")
parser.add_argument("--shm-shape", type=str, required=True, help="Shape of the image in shared memory (JSON list).")
parser.add_argument("--shm-dtype", type=str, required=True, help="Numpy dtype of the image in shared memory.")
parser.add_argument("--prompt", "-p", type=str, required=True, help="The prompt you want to use with the image.")
parser.add_argument("--api_key", "-a", type=str, required=True, help="Your API key for Gemini.")
args = parser.parse_args()


remove_shm_from_resource_tracker()

shm = None

try:
shape = tuple(json.loads(args.shm_shape))
dtype = np.dtype(args.shm_dtype)
shm = SharedMemory(name=args.shm_name)

np_img = np.ndarray(shape, dtype=dtype, buffer=shm.buf)
image = Image.fromarray(np_img)

client = genai.Client(api_key=args.api_key)
content = client.models.generate_content(model=args.model, contents=[args.prompt, Image.open(args.filepath)])
content = client.models.generate_content(model=args.model, contents=[args.prompt, image])
print(content.text)
sys.exit(0)

except ClientError as e:
if hasattr(e, 'code') and e.code == 429:
print("Caught a ResourceExhausted error (429 Too Many Requests)", file=sys.stderr)
else:
print(e, file=sys.stderr)
sys.exit(1)

except Exception as e:
print(e)
print(e, file=sys.stderr)
sys.exit(1)

finally:
if shm:
shm.close()

if __name__ == "__main__":
main()
Loading