feat(google-vertex): add new models [bot]#977
Conversation
|
/test-models |
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 1 potential issue.
❌ Bugbot Autofix is OFF. To automatically fix reported issues with cloud agents, enable autofix in the Cursor dashboard.
Reviewed by Cursor Bugbot for commit 877b9a7. Configure here.
Gateway test results
Failures (10)
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
response_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"date": {"type": "string"},
"participants": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["name", "date", "participants"],
}
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="Alice and Bob are going to a science fair on Friday.")]),
]
config = types.GenerateContentConfig(
system_instruction="Extract the event information as a structured CalendarEvent JSON object.",
response_mime_type="application/json",
response_json_schema=response_schema,
)
response = client.models.generate_content(
model=_model_id,
contents=contents,
config=config,
)
print(response.text)
import json as _json
_text = response.text
print(_text)
if not _text:
raise Exception("VALIDATION FAILED: structured-output - GenAI response text is empty")
_parsed = _json.loads(_text)
print(_json.dumps(_parsed, indent=2))
if "name" not in _parsed or "date" not in _parsed or "participants" not in _parsed:
raise Exception("VALIDATION FAILED: structured-output - missing expected fields (name, date, participants)")
if not isinstance(_parsed.get("participants"), list):
raise Exception("VALIDATION FAILED: structured-output - 'participants' is not a list, schema not enforced")
print("VALIDATION: structured-output SUCCESS")
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
get_weather = types.FunctionDeclaration(
name="get_weather",
description="Get the current weather for a location.",
parameters_json_schema={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name, e.g. London",
},
},
"required": ["location"],
},
)
tool = types.Tool(function_declarations=[get_weather])
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="Use the get_weather tool to check the weather in London. You must call the tool, do not respond with plain text.")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant with access to tools. You MUST strictly use the provided tools to answer. Never respond with plain text when a tool is available.",
tools=[tool],
)
response = client.models.generate_content(
model=_model_id,
contents=contents,
config=config,
)
for part in response.candidates[0].content.parts:
if part.function_call:
print(f"Tool: {part.function_call.name}")
print(f"Args: {part.function_call.args}")
elif part.text:
print(part.text)
_parts = response.candidates[0].content.parts
_function_calls = [p for p in _parts if p.function_call]
if _function_calls:
for _fc in _function_calls:
print(f"Tool: {_fc.function_call.name}")
print(f"Args: {_fc.function_call.args}")
else:
_text_parts = [p.text for p in _parts if p.text]
print("\n".join(_text_parts))
if not _function_calls:
raise Exception("VALIDATION FAILED: tool-call - no function calls in GenAI response")
print("VALIDATION: tool-call SUCCESS")
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="How to calculate 3^3^3^3? Think step by step and show all reasoning.")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps.",
thinking_config=types.ThinkingConfig(
include_thoughts=True,
thinking_budget=5000,
),
)
_chunks = []
for chunk in client.models.generate_content_stream(
model=_model_id,
contents=contents,
config=config,
):
_chunks.append(chunk)
if chunk.candidates and chunk.candidates[0].content and chunk.candidates[0].content.parts:
for part in chunk.candidates[0].content.parts:
if not part.text:
continue
if part.thought:
print(f"[Thinking] {part.text}", end="", flush=True)
else:
print(part.text, end="", flush=True)
_thought_detected = False
for _chunk in _chunks:
if not _chunk.candidates or not _chunk.candidates[0].content:
continue
for _part in _chunk.candidates[0].content.parts:
if not _part.text:
continue
if _part.thought:
_thought_detected = True
print(_part.text, end="", flush=True)
else:
print(_part.text, end="", flush=True)
if not _thought_detected:
_usage = getattr(_chunks[-1], "usage_metadata", None) if _chunks else None
if _usage and getattr(_usage, "thoughts_token_count", 0):
_thought_detected = True
if not _thought_detected:
raise Exception("VALIDATION FAILED: reasoning stream - no thinking information in GenAI stream")
print("\nVALIDATION: reasoning stream SUCCESS")
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="How to calculate 3^3^3^3? Think step by step and show all reasoning.")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps.",
thinking_config=types.ThinkingConfig(
include_thoughts=True,
thinking_budget=5000,
),
)
response = client.models.generate_content(
model=_model_id,
contents=contents,
config=config,
)
for part in response.candidates[0].content.parts:
if not part.text:
continue
if part.thought:
print(f"[Thinking] {part.text}")
else:
print(part.text)
_parts = response.candidates[0].content.parts
_thought_detected = False
for _part in _parts:
if not _part.text:
continue
if _part.thought:
_thought_detected = True
print(f"Thinking: {_part.text[:200]}...")
else:
print(_part.text)
_usage = getattr(response, "usage_metadata", None)
if _usage and getattr(_usage, "thoughts_token_count", 0):
_thought_detected = True
if not _thought_detected:
print("Response: ", response)
raise Exception("VALIDATION FAILED: reasoning - no thinking information in GenAI response")
print("VALIDATION: reasoning SUCCESS")
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="What is the capital of France?")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant.",
max_output_tokens=256,
temperature=0.7,
)
_chunks = []
for chunk in client.models.generate_content_stream(
model=_model_id,
contents=contents,
config=config,
):
_chunks.append(chunk)
if chunk.text:
print(chunk.text, end="", flush=True)
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
response_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"date": {"type": "string"},
"participants": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["name", "date", "participants"],
}
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="Alice and Bob are going to a science fair on Friday.")]),
]
config = types.GenerateContentConfig(
system_instruction="Extract the event information as a structured CalendarEvent JSON object.",
response_mime_type="application/json",
response_json_schema=response_schema,
)
_chunks = []
for chunk in client.models.generate_content_stream(
model=_model_id,
contents=contents,
config=config,
):
_chunks.append(chunk)
if chunk.text:
print(chunk.text, end="", flush=True)
import json as _json
_accumulated = ""
for _chunk in _chunks:
if _chunk.text:
_accumulated += _chunk.text
if not _accumulated:
raise Exception("VALIDATION FAILED: structured-output stream - no content received from GenAI stream")
_parsed = _json.loads(_accumulated)
print(_json.dumps(_parsed, indent=2))
if "name" not in _parsed or "date" not in _parsed or "participants" not in _parsed:
raise Exception("VALIDATION FAILED: structured-output stream - missing expected fields (name, date, participants)")
if not isinstance(_parsed.get("participants"), list):
raise Exception("VALIDATION FAILED: structured-output stream - 'participants' is not a list, schema not enforced")
print("\nVALIDATION: structured-output stream SUCCESS")
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="List 3 colors with their hex codes in JSON.")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant. Respond in JSON format.",
response_mime_type="application/json",
)
response = client.models.generate_content(
model=_model_id,
contents=contents,
config=config,
)
print(response.text)
import json as _json
_text = response.text
print(_text)
if not _text:
raise Exception("VALIDATION FAILED: json-output - GenAI response text is empty")
_json.loads(_text)
print("VALIDATION: json-output SUCCESS")
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="What is the capital of France?")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant.",
max_output_tokens=256,
temperature=0.7,
)
response = client.models.generate_content(
model=_model_id,
contents=contents,
config=config,
)
for part in response.candidates[0].content.parts:
if part.text:
print(part.text)
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="List 3 colors with their hex codes in JSON.")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant. Respond in JSON format.",
response_mime_type="application/json",
)
_chunks = []
for chunk in client.models.generate_content_stream(
model=_model_id,
contents=contents,
config=config,
):
_chunks.append(chunk)
if chunk.text:
print(chunk.text, end="", flush=True)
import json as _json
_accumulated = ""
for _chunk in _chunks:
if _chunk.text:
_accumulated += _chunk.text
if not _accumulated:
raise Exception("VALIDATION FAILED: json-output stream - no content received from GenAI stream")
_json.loads(_accumulated)
print("\nVALIDATION: json-output stream SUCCESS")
Error: Code snippetfrom google import genai
from google.genai import types
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_full_model = "test-v2-vertex/google/gemini-3.1-flash-lite"
_parts = _full_model.split("/")
_provider_account = _parts[0]
_model_id = "/".join(_parts[1:])
if "/" in _model_id:
_model_id = _model_id.rsplit("/", 1)[-1]
_base_url = f"{_endpoint}/gemini/{_provider_account}/proxy"
client = genai.Client(
api_key=_api_key,
http_options=types.HttpOptions(base_url=_base_url),
)
get_weather = types.FunctionDeclaration(
name="get_weather",
description="Get the current weather for a location.",
parameters_json_schema={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name, e.g. London",
},
},
"required": ["location"],
},
)
tool = types.Tool(function_declarations=[get_weather])
contents = [
types.Content(role="user", parts=[types.Part.from_text(text="Hi")]),
types.Content(role="model", parts=[types.Part.from_text(text="Hi, how can I help you")]),
types.Content(role="user", parts=[types.Part.from_text(text="Use the get_weather tool to check the weather in London. You must call the tool, do not respond with plain text.")]),
]
config = types.GenerateContentConfig(
system_instruction="You are a helpful assistant with access to tools. You MUST strictly use the provided tools to answer. Never respond with plain text when a tool is available.",
tools=[tool],
)
_chunks = []
for chunk in client.models.generate_content_stream(
model=_model_id,
contents=contents,
config=config,
):
_chunks.append(chunk)
if chunk.candidates and chunk.candidates[0].content and chunk.candidates[0].content.parts:
for part in chunk.candidates[0].content.parts:
if part.function_call:
print(f"Tool: {part.function_call.name}", flush=True)
print(f"Args: {part.function_call.args}", flush=True)
elif part.text:
print(part.text, end="", flush=True)
_tool_use_detected = False
for _chunk in _chunks:
if not _chunk.candidates or not _chunk.candidates[0].content:
continue
for _part in _chunk.candidates[0].content.parts:
if _part.function_call:
_tool_use_detected = True
print(f"Tool: {_part.function_call.name}", flush=True)
print(f"Args: {_part.function_call.args}", flush=True)
elif _part.text:
print(_part.text, end="", flush=True)
if not _tool_use_detected:
raise Exception("VALIDATION FAILED: tool-call stream - no function calls in GenAI stream")
print("\nVALIDATION: tool-call stream SUCCESS") |

Auto-generated by model-addition-agent for provider
google-vertex.Note
Low Risk
Low risk: this PR only adds new model metadata/config YAMLs and does not change runtime logic.
Overview
Adds new Google Vertex model definition YAMLs for
google/gemini-3.1-flash-liteandmoonshotai/kimi-k2-6.The
gemini-3.1-flash-liteconfig includes pricing by region, supported features/modalities, large context/token limits, and marks the model as preview withthinkingenabled;kimi-k2-6is added as a minimal stub withmode: unknown.Reviewed by Cursor Bugbot for commit 877b9a7. Bugbot is set up for automated code reviews on this repo. Configure here.