truefoundry · harshiv-26 · May 26, 2026 · May 26, 2026 · May 26, 2026 · May 27, 2026
diff --git a/.github/scripts/autogen/types.ts b/.github/scripts/autogen/types.ts
@@ -96,7 +96,10 @@ export interface components {
          */
         Mode: "audio_transcription" | "audio_translation" | "chat" | "completion" | "embedding" | "image" | "moderation" | "realtime" | "rerank" | "responses" | "text_to_speech" | "unknown" | "unsupported" | "video";
         ModelConfig: {
-            /** @description Pricing entries per region; use "*" for global/uniform pricing */
+            /**
+             * @description Pricing entries per region; use "*" for global/uniform pricing
+             *     Deprecated: prefer `pricing` (kept for backward compatibility)
+             */
             costs?: components["schemas"]["CostWithRegion"][];
             /** @description Date after which the model is considered deprecated (YYYY-MM-DD) */
             deprecationDate?: string;
@@ -112,6 +115,7 @@ export interface components {
             model: string;
             /** @description Param overrides or additions relative to the provider default */
             params?: components["schemas"]["ModelParam"][];
+            pricing?: components["schemas"]["Pricing"];
             provisioning?: components["schemas"]["Provisioning"];
             /** @description Param keys to remove from the provider default */
             removeParams?: components["schemas"]["ModelParamKey"][];
@@ -136,6 +140,11 @@ export interface components {
         ModelParamKey: "json_schema" | "max_completion_tokens" | "max_tokens" | "min_tokens" | "n" | "parallel_tool_calls" | "reasoning" | "reasoning_effort" | "response_format" | "seed" | "stop" | "stream" | "temperature" | "thinking" | "tool_choice" | "top_k" | "top_p" | "verbosity";
         /** @enum {string} */
         ModelParamType: "array-of-strings" | "boolean" | "json" | "number" | "string";
+        /** @description Keys enumerated explicitly because cue def --out openapi drops typed key constraints. */
+        Pricing: {
+            priority?: components["schemas"]["CostWithRegion"][];
+            standard?: components["schemas"]["CostWithRegion"][];
+        };
         /**
          * @description How the model prices long context tokens
          *     marginal: remaining tokens after long context are priced under long context pricing
@@ -153,6 +162,16 @@ export interface components {
          * @enum {string}
          */
         Provisioning: "serverless" | "provisioned";
+        /**
+         * @default [
+         *       "priority",
+         *       "standard"
+         *     ]
+         */
+        ServiceTiers: [
+            "priority",
+            "standard"
+        ];
         /**
          * @description Lifecycle status of a model
          * @enum {string}
@@ -197,9 +216,11 @@ export type ModelConfig = components['schemas']['ModelConfig'];
 export type ModelParam = components['schemas']['ModelParam'];
 export type ModelParamKey = components['schemas']['ModelParamKey'];
 export type ModelParamType = components['schemas']['ModelParamType'];
+export type Pricing = components['schemas']['Pricing'];
 export type PricingMode = components['schemas']['PricingMode'];
 export type PricingTier = components['schemas']['PricingTier'];
 export type Provisioning = components['schemas']['Provisioning'];
+export type ServiceTiers = components['schemas']['ServiceTiers'];
 export type Status = components['schemas']['Status'];
 export type TieredPricing = components['schemas']['TieredPricing'];
 export type VertexRegion = components['schemas']['VertexRegion'];
diff --git a/.github/test/model.cue b/.github/test/model.cue
@@ -252,7 +252,11 @@ package model
 
 #ModelConfig: {
 	// Pricing entries per region; use "*" for global/uniform pricing
+	// Deprecated: prefer `pricing` (kept for backward compatibility)
 	costs?: [...#CostWithRegion]
+	// Pricing entries keyed by service tier; each entry holds an array of
+	// region-scoped cost entries (same shape as the legacy `costs` field)
+	pricing?: #Pricing
 	// Date after which the model is considered deprecated (YYYY-MM-DD)
 	deprecationDate?: string & =~"^\\d{4}-\\d{2}-\\d{2}$"
 	// Feature flags for capabilities like function calling, prompt caching, etc.
@@ -287,6 +291,13 @@ package model
 	thinking?: bool
 }
 
+// Keys enumerated explicitly because cue def --out openapi drops typed key constraints.
+#Pricing: {
+	for tier in #ServiceTiers {
+		"\(tier)"?: [...#CostWithRegion]
+	}
+}
+
 #PricingTier: {
 	cost_per_token: number & >= 0
 	from:           int & >= 0
@@ -304,6 +315,8 @@ package model
 	"serverless" |   // Managed API, pay-per-token/request
 	"provisioned"    // Dedicated capacity or user-deployed instance
 
+#ServiceTiers: ["priority", "standard"]
+
 // Lifecycle status of a model
 #Status:
 	"active" |      // Model is fully supported and recommended for use (aka stable, ga)

diff --git a/providers/ai21/j2-light.yaml b/providers/ai21/j2-light.yaml
@@ -8,5 +8,10 @@ limits:
     max_tokens: 4096
 mode: completion
 model: j2-light
+pricing:
+    standard:
+        - input_cost_per_token: 0.000003
+          output_cost_per_token: 0.000003
+          region: "*"
 removeParams:
     - stream
diff --git a/providers/ai21/j2-mid.yaml b/providers/ai21/j2-mid.yaml
@@ -9,5 +9,10 @@ limits:
     max_tokens: 4096
 mode: completion
 model: j2-mid
+pricing:
+    standard:
+        - input_cost_per_token: 0.00001
+          output_cost_per_token: 0.00001
+          region: "*"
 removeParams:
     - stream
diff --git a/providers/ai21/j2-ultra.yaml b/providers/ai21/j2-ultra.yaml
@@ -9,5 +9,10 @@ limits:
     max_tokens: 4096
 mode: completion
 model: j2-ultra
+pricing:
+    standard:
+        - input_cost_per_token: 0.000015
+          output_cost_per_token: 0.000015
+          region: "*"
 removeParams:
     - stream
diff --git a/providers/ai21/jamba-1.5-large.yaml b/providers/ai21/jamba-1.5-large.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 4096
 mode: chat
 model: jamba-1.5-large
+pricing:
+    standard:
+        - input_cost_per_token: 0.000002
+          output_cost_per_token: 0.000008
+          region: "*"
diff --git a/providers/ai21/jamba-1.5-large@001.yaml b/providers/ai21/jamba-1.5-large@001.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 256000
 mode: chat
 model: jamba-1.5-large@001
+pricing:
+    standard:
+        - input_cost_per_token: 0.000002
+          output_cost_per_token: 0.000008
+          region: "*"
diff --git a/providers/ai21/jamba-1.5-mini.yaml b/providers/ai21/jamba-1.5-mini.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 4096
 mode: chat
 model: jamba-1.5-mini
+pricing:
+    standard:
+        - input_cost_per_token: 2e-7
+          output_cost_per_token: 4e-7
+          region: "*"
diff --git a/providers/ai21/jamba-1.5-mini@001.yaml b/providers/ai21/jamba-1.5-mini@001.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 256000
 mode: chat
 model: jamba-1.5-mini@001
+pricing:
+    standard:
+        - input_cost_per_token: 2e-7
+          output_cost_per_token: 4e-7
+          region: "*"
diff --git a/providers/ai21/jamba-1.5.yaml b/providers/ai21/jamba-1.5.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 256000
 mode: chat
 model: jamba-1.5
+pricing:
+    standard:
+        - input_cost_per_token: 2e-7
+          output_cost_per_token: 4e-7
+          region: "*"
diff --git a/providers/ai21/jamba-large-1.6.yaml b/providers/ai21/jamba-large-1.6.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 256000
 mode: chat
 model: jamba-large-1.6
+pricing:
+    standard:
+        - input_cost_per_token: 0.000002
+          output_cost_per_token: 0.000008
+          region: "*"
diff --git a/providers/ai21/jamba-large-1.7.yaml b/providers/ai21/jamba-large-1.7.yaml
@@ -31,6 +31,11 @@ params:
       key: "n"
       maxValue: 16
       minValue: 1
+pricing:
+    standard:
+        - input_cost_per_token: 0.000002
+          output_cost_per_token: 0.000008
+          region: "*"
 provisioning: serverless
 removeParams:
     - top_k

diff --git a/providers/ai21/jamba-mini-1.6.yaml b/providers/ai21/jamba-mini-1.6.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 256000
 mode: chat
 model: jamba-mini-1.6
+pricing:
+    standard:
+        - input_cost_per_token: 2e-7
+          output_cost_per_token: 4e-7
+          region: "*"
diff --git a/providers/ai21/jamba-mini-1.7.yaml b/providers/ai21/jamba-mini-1.7.yaml
@@ -11,3 +11,8 @@ limits:
     max_tokens: 256000
 mode: chat
 model: jamba-mini-1.7
+pricing:
+    standard:
+        - input_cost_per_token: 2e-7
+          output_cost_per_token: 4e-7
+          region: "*"
diff --git a/providers/anthropic/claude-3-5-haiku-20241022.yaml b/providers/anthropic/claude-3-5-haiku-20241022.yaml
@@ -26,3 +26,12 @@ model: claude-3-5-haiku-20241022
 params:
     - key: max_tokens
       maxValue: 8192
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.000001
+          cache_read_input_token_cost: 8e-8
+          input_cost_per_token: 8e-7
+          input_cost_per_token_batches: 4e-7
+          output_cost_per_token: 0.000004
+          output_cost_per_token_batches: 0.000002
+          region: "*"
diff --git a/providers/anthropic/claude-3-5-sonnet-20240620.yaml b/providers/anthropic/claude-3-5-sonnet-20240620.yaml
@@ -24,3 +24,10 @@ model: claude-3-5-sonnet-20240620
 params:
     - key: max_tokens
       maxValue: 8192
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00000375
+          cache_read_input_token_cost: 3e-7
+          input_cost_per_token: 0.000003
+          output_cost_per_token: 0.000015
+          region: "*"
diff --git a/providers/anthropic/claude-3-5-sonnet-20241022.yaml b/providers/anthropic/claude-3-5-sonnet-20241022.yaml
@@ -24,3 +24,10 @@ model: claude-3-5-sonnet-20241022
 params:
     - key: max_tokens
       maxValue: 8192
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00000375
+          cache_read_input_token_cost: 3e-7
+          input_cost_per_token: 0.000003
+          output_cost_per_token: 0.000015
+          region: "*"
diff --git a/providers/anthropic/claude-3-5-sonnet-latest.yaml b/providers/anthropic/claude-3-5-sonnet-latest.yaml
@@ -24,3 +24,10 @@ model: claude-3-5-sonnet-latest
 params:
     - key: max_tokens
       maxValue: 8192
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00000375
+          cache_read_input_token_cost: 3e-7
+          input_cost_per_token: 0.000003
+          output_cost_per_token: 0.000015
+          region: "*"
diff --git a/providers/anthropic/claude-3-7-sonnet-latest.yaml b/providers/anthropic/claude-3-7-sonnet-latest.yaml
@@ -28,3 +28,12 @@ params:
       maxValue: 128000
     - defaultValue: null
       key: thinking
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00000375
+          cache_read_input_token_cost: 3e-7
+          input_cost_per_token: 0.000003
+          input_cost_per_token_batches: 0.0000015
+          output_cost_per_token: 0.000015
+          output_cost_per_token_batches: 0.0000075
+          region: "*"
diff --git a/providers/anthropic/claude-3-haiku-20240307.yaml b/providers/anthropic/claude-3-haiku-20240307.yaml
@@ -25,3 +25,12 @@ model: claude-3-haiku-20240307
 params:
     - key: max_tokens
       maxValue: 4096
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 3e-7
+          cache_read_input_token_cost: 3e-8
+          input_cost_per_token: 2.5e-7
+          input_cost_per_token_batches: 1.25e-7
+          output_cost_per_token: 0.00000125
+          output_cost_per_token_batches: 6.25e-7
+          region: "*"
diff --git a/providers/anthropic/claude-3-opus-20240229.yaml b/providers/anthropic/claude-3-opus-20240229.yaml
@@ -25,3 +25,12 @@ model: claude-3-opus-20240229
 params:
     - key: max_tokens
       maxValue: 4096
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00001875
+          cache_read_input_token_cost: 0.0000015
+          input_cost_per_token: 0.000015
+          input_cost_per_token_batches: 0.0000075
+          output_cost_per_token: 0.000075
+          output_cost_per_token_batches: 0.0000375
+          region: "*"
diff --git a/providers/anthropic/claude-3-opus-latest.yaml b/providers/anthropic/claude-3-opus-latest.yaml
@@ -27,6 +27,15 @@ params:
       maxValue: 64000
     - defaultValue: null
       key: thinking
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00001875
+          cache_read_input_token_cost: 0.0000015
+          input_cost_per_token: 0.000015
+          input_cost_per_token_batches: 0.0000075
+          output_cost_per_token: 0.000075
+          output_cost_per_token_batches: 0.0000375
+          region: "*"
 removeParams:
     - temperature
     - top_p

diff --git a/providers/anthropic/claude-4-opus-20250514.yaml b/providers/anthropic/claude-4-opus-20250514.yaml
@@ -36,6 +36,15 @@ params:
       maxValue: 64000
     - defaultValue: null
       key: thinking
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00001875
+          cache_read_input_token_cost: 0.0000015
+          input_cost_per_token: 0.000015
+          input_cost_per_token_batches: 0.0000075
+          output_cost_per_token: 0.000075
+          output_cost_per_token_batches: 0.0000375
+          region: "*"
 removeParams:
     - temperature
     - top_p

diff --git a/providers/anthropic/claude-4-sonnet-20250514.yaml b/providers/anthropic/claude-4-sonnet-20250514.yaml
@@ -34,6 +34,15 @@ params:
       key: max_tokens
       maxValue: 64000
       minValue: 1
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00000375
+          cache_read_input_token_cost: 3e-7
+          input_cost_per_token: 0.000003
+          input_cost_per_token_batches: 0.0000015
+          output_cost_per_token: 0.000015
+          output_cost_per_token_batches: 0.0000075
+          region: "*"
 status: active
 supportedModes:
     - chat

diff --git a/providers/anthropic/claude-haiku-4-5-20251001.yaml b/providers/anthropic/claude-haiku-4-5-20251001.yaml
@@ -37,6 +37,16 @@ params:
       maxValue: 64000
     - defaultValue: null
       key: thinking
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00000125
+          cache_creation_input_token_cost_per_hour: 0.000002
+          cache_read_input_token_cost: 1e-7
+          input_cost_per_token: 0.000001
+          input_cost_per_token_batches: 5e-7
+          output_cost_per_token: 0.000005
+          output_cost_per_token_batches: 0.0000025
+          region: "*"
 provisioning: serverless
 removeParams:
     - top_p

diff --git a/providers/anthropic/claude-haiku-4-5.yaml b/providers/anthropic/claude-haiku-4-5.yaml
@@ -36,6 +36,16 @@ params:
       key: max_tokens
       maxValue: 64000
       minValue: 1
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00000125
+          cache_creation_input_token_cost_per_hour: 0.000002
+          cache_read_input_token_cost: 1e-7
+          input_cost_per_token: 0.000001
+          input_cost_per_token_batches: 5e-7
+          output_cost_per_token: 0.000005
+          output_cost_per_token_batches: 0.0000025
+          region: "*"
 provisioning: serverless
 status: active
 supportedModes:

diff --git a/providers/anthropic/claude-opus-4-1-20250805.yaml b/providers/anthropic/claude-opus-4-1-20250805.yaml
@@ -36,6 +36,16 @@ params:
       maxValue: 32000
     - defaultValue: null
       key: thinking
+pricing:
+    standard:
+        - cache_creation_input_token_cost: 0.00001875
+          cache_creation_input_token_cost_per_hour: 0.00003
+          cache_read_input_token_cost: 0.0000015
+          input_cost_per_token: 0.000015
+          input_cost_per_token_batches: 0.0000075
+          output_cost_per_token: 0.000075
+          output_cost_per_token_batches: 0.0000375
+          region: "*"
 provisioning: serverless
 removeParams:
     - temperature