Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
23 changes: 22 additions & 1 deletion .github/scripts/autogen/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,10 @@ export interface components {
*/
Mode: "audio_transcription" | "audio_translation" | "chat" | "completion" | "embedding" | "image" | "moderation" | "realtime" | "rerank" | "responses" | "text_to_speech" | "unknown" | "unsupported" | "video";
ModelConfig: {
/** @description Pricing entries per region; use "*" for global/uniform pricing */
/**
* @description Pricing entries per region; use "*" for global/uniform pricing
* Deprecated: prefer `pricing` (kept for backward compatibility)
*/
costs?: components["schemas"]["CostWithRegion"][];
/** @description Date after which the model is considered deprecated (YYYY-MM-DD) */
deprecationDate?: string;
Expand All @@ -112,6 +115,7 @@ export interface components {
model: string;
/** @description Param overrides or additions relative to the provider default */
params?: components["schemas"]["ModelParam"][];
pricing?: components["schemas"]["Pricing"];
provisioning?: components["schemas"]["Provisioning"];
/** @description Param keys to remove from the provider default */
removeParams?: components["schemas"]["ModelParamKey"][];
Expand All @@ -136,6 +140,11 @@ export interface components {
ModelParamKey: "json_schema" | "max_completion_tokens" | "max_tokens" | "min_tokens" | "n" | "parallel_tool_calls" | "reasoning" | "reasoning_effort" | "response_format" | "seed" | "stop" | "stream" | "temperature" | "thinking" | "tool_choice" | "top_k" | "top_p" | "verbosity";
/** @enum {string} */
ModelParamType: "array-of-strings" | "boolean" | "json" | "number" | "string";
/** @description Keys enumerated explicitly because cue def --out openapi drops typed key constraints. */
Pricing: {
priority?: components["schemas"]["CostWithRegion"][];
standard?: components["schemas"]["CostWithRegion"][];
};
/**
* @description How the model prices long context tokens
* marginal: remaining tokens after long context are priced under long context pricing
Expand All @@ -153,6 +162,16 @@ export interface components {
* @enum {string}
*/
Provisioning: "serverless" | "provisioned";
/**
* @default [
* "priority",
* "standard"
* ]
*/
ServiceTiers: [
"priority",
"standard"
];
/**
* @description Lifecycle status of a model
* @enum {string}
Expand Down Expand Up @@ -197,9 +216,11 @@ export type ModelConfig = components['schemas']['ModelConfig'];
export type ModelParam = components['schemas']['ModelParam'];
export type ModelParamKey = components['schemas']['ModelParamKey'];
export type ModelParamType = components['schemas']['ModelParamType'];
export type Pricing = components['schemas']['Pricing'];
export type PricingMode = components['schemas']['PricingMode'];
export type PricingTier = components['schemas']['PricingTier'];
export type Provisioning = components['schemas']['Provisioning'];
export type ServiceTiers = components['schemas']['ServiceTiers'];
export type Status = components['schemas']['Status'];
export type TieredPricing = components['schemas']['TieredPricing'];
export type VertexRegion = components['schemas']['VertexRegion'];
13 changes: 13 additions & 0 deletions .github/test/model.cue
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,11 @@ package model

#ModelConfig: {
// Pricing entries per region; use "*" for global/uniform pricing
// Deprecated: prefer `pricing` (kept for backward compatibility)
costs?: [...#CostWithRegion]
// Pricing entries keyed by service tier; each entry holds an array of
// region-scoped cost entries (same shape as the legacy `costs` field)
pricing?: #Pricing
// Date after which the model is considered deprecated (YYYY-MM-DD)
deprecationDate?: string & =~"^\\d{4}-\\d{2}-\\d{2}$"
// Feature flags for capabilities like function calling, prompt caching, etc.
Expand Down Expand Up @@ -287,6 +291,13 @@ package model
thinking?: bool
}

// Keys enumerated explicitly because cue def --out openapi drops typed key constraints.
#Pricing: {
for tier in #ServiceTiers {
"\(tier)"?: [...#CostWithRegion]
}
}

#PricingTier: {
cost_per_token: number & >= 0
from: int & >= 0
Expand All @@ -304,6 +315,8 @@ package model
"serverless" | // Managed API, pay-per-token/request
"provisioned" // Dedicated capacity or user-deployed instance

#ServiceTiers: ["priority", "standard"]

// Lifecycle status of a model
#Status:
"active" | // Model is fully supported and recommended for use (aka stable, ga)
Expand Down
5 changes: 5 additions & 0 deletions providers/ai21/j2-light.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,10 @@ limits:
max_tokens: 4096
mode: completion
model: j2-light
pricing:
standard:
- input_cost_per_token: 0.000003
output_cost_per_token: 0.000003
region: "*"
removeParams:
- stream
5 changes: 5 additions & 0 deletions providers/ai21/j2-mid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@ limits:
max_tokens: 4096
mode: completion
model: j2-mid
pricing:
standard:
- input_cost_per_token: 0.00001
output_cost_per_token: 0.00001
region: "*"
removeParams:
- stream
5 changes: 5 additions & 0 deletions providers/ai21/j2-ultra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@ limits:
max_tokens: 4096
mode: completion
model: j2-ultra
pricing:
standard:
- input_cost_per_token: 0.000015
output_cost_per_token: 0.000015
region: "*"
removeParams:
- stream
5 changes: 5 additions & 0 deletions providers/ai21/jamba-1.5-large.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 4096
mode: chat
model: jamba-1.5-large
pricing:
standard:
- input_cost_per_token: 0.000002
output_cost_per_token: 0.000008
region: "*"
5 changes: 5 additions & 0 deletions providers/ai21/jamba-1.5-large@001.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 256000
mode: chat
model: jamba-1.5-large@001
pricing:
standard:
- input_cost_per_token: 0.000002
output_cost_per_token: 0.000008
region: "*"
5 changes: 5 additions & 0 deletions providers/ai21/jamba-1.5-mini.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 4096
mode: chat
model: jamba-1.5-mini
pricing:
standard:
- input_cost_per_token: 2e-7
output_cost_per_token: 4e-7
region: "*"
5 changes: 5 additions & 0 deletions providers/ai21/jamba-1.5-mini@001.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 256000
mode: chat
model: jamba-1.5-mini@001
pricing:
standard:
- input_cost_per_token: 2e-7
output_cost_per_token: 4e-7
region: "*"
5 changes: 5 additions & 0 deletions providers/ai21/jamba-1.5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 256000
mode: chat
model: jamba-1.5
pricing:
standard:
- input_cost_per_token: 2e-7
output_cost_per_token: 4e-7
region: "*"
5 changes: 5 additions & 0 deletions providers/ai21/jamba-large-1.6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 256000
mode: chat
model: jamba-large-1.6
pricing:
standard:
- input_cost_per_token: 0.000002
output_cost_per_token: 0.000008
region: "*"
5 changes: 5 additions & 0 deletions providers/ai21/jamba-large-1.7.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ params:
key: "n"
maxValue: 16
minValue: 1
pricing:
standard:
- input_cost_per_token: 0.000002
output_cost_per_token: 0.000008
region: "*"
provisioning: serverless
removeParams:
- top_k
Expand Down
5 changes: 5 additions & 0 deletions providers/ai21/jamba-mini-1.6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 256000
mode: chat
model: jamba-mini-1.6
pricing:
standard:
- input_cost_per_token: 2e-7
output_cost_per_token: 4e-7
region: "*"
5 changes: 5 additions & 0 deletions providers/ai21/jamba-mini-1.7.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ limits:
max_tokens: 256000
mode: chat
model: jamba-mini-1.7
pricing:
standard:
- input_cost_per_token: 2e-7
output_cost_per_token: 4e-7
region: "*"
9 changes: 9 additions & 0 deletions providers/anthropic/claude-3-5-haiku-20241022.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,12 @@ model: claude-3-5-haiku-20241022
params:
- key: max_tokens
maxValue: 8192
pricing:
standard:
- cache_creation_input_token_cost: 0.000001
cache_read_input_token_cost: 8e-8
input_cost_per_token: 8e-7
input_cost_per_token_batches: 4e-7
output_cost_per_token: 0.000004
output_cost_per_token_batches: 0.000002
region: "*"
7 changes: 7 additions & 0 deletions providers/anthropic/claude-3-5-sonnet-20240620.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,10 @@ model: claude-3-5-sonnet-20240620
params:
- key: max_tokens
maxValue: 8192
pricing:
standard:
- cache_creation_input_token_cost: 0.00000375
cache_read_input_token_cost: 3e-7
input_cost_per_token: 0.000003
output_cost_per_token: 0.000015
region: "*"
7 changes: 7 additions & 0 deletions providers/anthropic/claude-3-5-sonnet-20241022.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,10 @@ model: claude-3-5-sonnet-20241022
params:
- key: max_tokens
maxValue: 8192
pricing:
standard:
- cache_creation_input_token_cost: 0.00000375
cache_read_input_token_cost: 3e-7
input_cost_per_token: 0.000003
output_cost_per_token: 0.000015
region: "*"
7 changes: 7 additions & 0 deletions providers/anthropic/claude-3-5-sonnet-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,10 @@ model: claude-3-5-sonnet-latest
params:
- key: max_tokens
maxValue: 8192
pricing:
standard:
- cache_creation_input_token_cost: 0.00000375
cache_read_input_token_cost: 3e-7
input_cost_per_token: 0.000003
output_cost_per_token: 0.000015
region: "*"
9 changes: 9 additions & 0 deletions providers/anthropic/claude-3-7-sonnet-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,12 @@ params:
maxValue: 128000
- defaultValue: null
key: thinking
pricing:
standard:
- cache_creation_input_token_cost: 0.00000375
cache_read_input_token_cost: 3e-7
input_cost_per_token: 0.000003
input_cost_per_token_batches: 0.0000015
output_cost_per_token: 0.000015
output_cost_per_token_batches: 0.0000075
region: "*"
9 changes: 9 additions & 0 deletions providers/anthropic/claude-3-haiku-20240307.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@ model: claude-3-haiku-20240307
params:
- key: max_tokens
maxValue: 4096
pricing:
standard:
- cache_creation_input_token_cost: 3e-7
cache_read_input_token_cost: 3e-8
input_cost_per_token: 2.5e-7
input_cost_per_token_batches: 1.25e-7
output_cost_per_token: 0.00000125
output_cost_per_token_batches: 6.25e-7
region: "*"
9 changes: 9 additions & 0 deletions providers/anthropic/claude-3-opus-20240229.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@ model: claude-3-opus-20240229
params:
- key: max_tokens
maxValue: 4096
pricing:
standard:
- cache_creation_input_token_cost: 0.00001875
cache_read_input_token_cost: 0.0000015
input_cost_per_token: 0.000015
input_cost_per_token_batches: 0.0000075
output_cost_per_token: 0.000075
output_cost_per_token_batches: 0.0000375
region: "*"
9 changes: 9 additions & 0 deletions providers/anthropic/claude-3-opus-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ params:
maxValue: 64000
- defaultValue: null
key: thinking
pricing:
standard:
- cache_creation_input_token_cost: 0.00001875
cache_read_input_token_cost: 0.0000015
input_cost_per_token: 0.000015
input_cost_per_token_batches: 0.0000075
output_cost_per_token: 0.000075
output_cost_per_token_batches: 0.0000375
region: "*"
removeParams:
- temperature
- top_p
Expand Down
9 changes: 9 additions & 0 deletions providers/anthropic/claude-4-opus-20250514.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ params:
maxValue: 64000
- defaultValue: null
key: thinking
pricing:
standard:
- cache_creation_input_token_cost: 0.00001875
cache_read_input_token_cost: 0.0000015
input_cost_per_token: 0.000015
input_cost_per_token_batches: 0.0000075
output_cost_per_token: 0.000075
output_cost_per_token_batches: 0.0000375
region: "*"
removeParams:
- temperature
- top_p
Expand Down
9 changes: 9 additions & 0 deletions providers/anthropic/claude-4-sonnet-20250514.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ params:
key: max_tokens
maxValue: 64000
minValue: 1
pricing:
standard:
- cache_creation_input_token_cost: 0.00000375
cache_read_input_token_cost: 3e-7
input_cost_per_token: 0.000003
input_cost_per_token_batches: 0.0000015
output_cost_per_token: 0.000015
output_cost_per_token_batches: 0.0000075
region: "*"
status: active
supportedModes:
- chat
Expand Down
10 changes: 10 additions & 0 deletions providers/anthropic/claude-haiku-4-5-20251001.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ params:
maxValue: 64000
- defaultValue: null
key: thinking
pricing:
standard:
- cache_creation_input_token_cost: 0.00000125
cache_creation_input_token_cost_per_hour: 0.000002
cache_read_input_token_cost: 1e-7
input_cost_per_token: 0.000001
input_cost_per_token_batches: 5e-7
output_cost_per_token: 0.000005
output_cost_per_token_batches: 0.0000025
region: "*"
provisioning: serverless
removeParams:
- top_p
Expand Down
10 changes: 10 additions & 0 deletions providers/anthropic/claude-haiku-4-5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ params:
key: max_tokens
maxValue: 64000
minValue: 1
pricing:
standard:
- cache_creation_input_token_cost: 0.00000125
cache_creation_input_token_cost_per_hour: 0.000002
cache_read_input_token_cost: 1e-7
input_cost_per_token: 0.000001
input_cost_per_token_batches: 5e-7
output_cost_per_token: 0.000005
output_cost_per_token_batches: 0.0000025
region: "*"
provisioning: serverless
status: active
supportedModes:
Expand Down
10 changes: 10 additions & 0 deletions providers/anthropic/claude-opus-4-1-20250805.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ params:
maxValue: 32000
- defaultValue: null
key: thinking
pricing:
standard:
- cache_creation_input_token_cost: 0.00001875
cache_creation_input_token_cost_per_hour: 0.00003
cache_read_input_token_cost: 0.0000015
input_cost_per_token: 0.000015
input_cost_per_token_batches: 0.0000075
output_cost_per_token: 0.000075
output_cost_per_token_batches: 0.0000375
region: "*"
provisioning: serverless
removeParams:
- temperature
Expand Down
Loading
Loading