Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions preprocessors/text-followup/text-followup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021 IMAGE Project, Shared Reality Lab, McGill University
# Copyright (c) 2025 IMAGE Project, Shared Reality Lab, McGill University
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
Expand Down Expand Up @@ -452,13 +452,16 @@ def followup():
{"error": "Failed to process focus area on image"}
), 500

# get followup prompt from env as an override if it exists
followup_prompt = os.getenv('FOLLOWUP_PROMPT_OVERRIDE', FOLLOWUP_PROMPT)

if not focus:
system_prompt = FOLLOWUP_PROMPT
system_prompt = followup_prompt
else:
system_prompt = FOLLOWUP_PROMPT + FOLLOWUP_PROMPT_FOCUS
system_prompt = followup_prompt + FOLLOWUP_PROMPT_FOCUS

system_message = {
"role": "developer",
"role": "system",
"content": system_prompt
}

Expand Down Expand Up @@ -508,7 +511,9 @@ def followup():

followup_response_json = llm_client.chat_completion(
prompt="", # Empty since we're using full messages via kwargs
json_schema=FOLLOWUP_RESPONSE_SCHEMA,
system_prompt=system_prompt,
json_schema=None, # qwen3 wants json_object not rigid schema
response_format={"type": "json_object"},
temperature=0.0,
messages=messages, # Pass full conversation history via kwargs
parse_json=True,
Expand All @@ -518,7 +523,7 @@ def followup():
if followup_response_json is None:
logging.error("Failed to receive response from LLM.")
return jsonify(
{"error": "Failed to get graphic caption from LLM"}
{"error": "Failed to receive response from LLM"}
), 500

response_text, token_usage = followup_response_json
Expand Down
4 changes: 3 additions & 1 deletion utils/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def chat_completion(
# Add system prompt if provided
if system_prompt:
messages.append(
{"role": "developer",
{"role": "system",
"content": system_prompt}
)

Expand All @@ -140,6 +140,8 @@ def chat_completion(

messages.append({"role": "user", "content": user_content})

logging.pii(messages)

# Build API call parameters
params = {
"model": self.model,
Expand Down
3 changes: 2 additions & 1 deletion utils/llm/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@
"response_brief": "One sentence response to the user request.",
"response_full": "Further details. Maximum three sentences."
}

"""
OLD_END_OF_FOLLOWUP_PROMPT = """
The user may add a note to focus on a specific part of the image
and an updated picture with the area of interest marked with a red rectangle.
In this case, answer the question ONLY about the contents
Expand Down