Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go/adk/pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ func CreateLLM(ctx context.Context, m adk.Model, log logr.Logger) (adkmodel.LLM,
Model: modelName,
Region: region,
AdditionalModelRequestFields: m.AdditionalModelRequestFields,
PromptCaching: m.PromptCaching,
}
return models.NewBedrockModelWithLogger(ctx, cfg, log)

Expand Down
37 changes: 35 additions & 2 deletions go/adk/pkg/models/bedrock.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ type BedrockConfig struct {
Temperature *float64
TopP *float64
AdditionalModelRequestFields map[string]any
// PromptCaching, when true, appends a default CachePoint block at the
// end of the Converse request's system content array and the end of
// the tools array. Bedrock caches up to and including those markers
// across requests in the same region; cached prefix is billed at a
// reduced rate. The marker is silently ignored by Bedrock for models
// that do not support prompt caching.
PromptCaching bool
}

// BedrockModel implements model.LLM for Amazon Bedrock using the Converse API.
Expand Down Expand Up @@ -151,7 +158,7 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques
var toolConfig *types.ToolConfiguration
nameMap := make(map[string]string)
if req.Config != nil && len(req.Config.Tools) > 0 {
tools, nm := convertGenaiToolsToBedrock(req.Config.Tools)
tools, nm := convertGenaiToolsToBedrock(req.Config.Tools, m.Config.PromptCaching)
nameMap = nm
if len(tools) > 0 {
toolConfig = &types.ToolConfiguration{
Expand Down Expand Up @@ -193,6 +200,16 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques
Value: systemInstruction,
})
}
// If prompt caching is enabled, mark the end of the system content
// as a cache breakpoint. Bedrock caches everything up to and including
// this point for ~5 minutes; subsequent requests with the same prefix
// hit the cache. Skipped for empty systems — caching nothing is a no-op
// that wastes a marker.
if m.Config.PromptCaching && len(systemPrompt) > 0 {
systemPrompt = append(systemPrompt, &types.SystemContentBlockMemberCachePoint{
Value: types.CachePointBlock{Type: types.CachePointTypeDefault},
})
}

additionalFields := m.buildAdditionalModelRequestFields()

Expand Down Expand Up @@ -568,7 +585,12 @@ func convertGenaiContentsToBedrockMessages(contents []*genai.Content, nameMap ma
// It sanitizes tool names to satisfy Bedrock's [a-zA-Z0-9_-]+ constraint and
// returns the original->sanitized name mapping so callers can apply it to
// conversation history and reverse it when restoring names from responses.
func convertGenaiToolsToBedrock(tools []*genai.Tool) ([]types.Tool, map[string]string) {
//
// When promptCaching is true, a CachePoint marker is appended after the
// last tool spec — Bedrock then caches the entire (typically large) tool
// definitions array for ~5 minutes, billing the prefix at a reduced rate
// on cache hits.
func convertGenaiToolsToBedrock(tools []*genai.Tool, promptCaching bool) ([]types.Tool, map[string]string) {
if len(tools) == 0 {
return nil, nil
}
Expand Down Expand Up @@ -625,6 +647,17 @@ func convertGenaiToolsToBedrock(tools []*genai.Tool) ([]types.Tool, map[string]s
}
}

// If prompt caching is enabled, append a CachePoint at the END of the
// tool list. Bedrock caches the entire tool definitions array up to
// this marker; this is usually the biggest single chunk of static
// prefix in an agent conversation and benefits most from caching.
// Skipped when there are no tools — a cache marker by itself is a no-op.
if promptCaching && len(bedrockTools) > 0 {
bedrockTools = append(bedrockTools, &types.ToolMemberCachePoint{
Value: types.CachePointBlock{Type: types.CachePointTypeDefault},
})
}

return bedrockTools, nameMap
}

Expand Down
55 changes: 51 additions & 4 deletions go/adk/pkg/models/bedrock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
},
}}}}

bt1, nm1 := convertGenaiToolsToBedrock(tools)
bt1, nm1 := convertGenaiToolsToBedrock(tools, false)
schema := extractSchema(t, bt1, nm1)

props := schema["properties"].(map[string]any)
Expand Down Expand Up @@ -190,7 +190,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
},
}}}}

bt2, nm2 := convertGenaiToolsToBedrock(tools)
bt2, nm2 := convertGenaiToolsToBedrock(tools, false)
schema := extractSchema(t, bt2, nm2)
props, ok := schema["properties"].(map[string]any)
if !ok || len(props) == 0 {
Expand All @@ -211,7 +211,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
ParametersJsonSchema: s,
}}}}

bt3, nm3 := convertGenaiToolsToBedrock(tools)
bt3, nm3 := convertGenaiToolsToBedrock(tools, false)
schema := extractSchema(t, bt3, nm3)
props, ok := schema["properties"].(map[string]any)
if !ok || len(props) == 0 {
Expand Down Expand Up @@ -366,7 +366,7 @@ func TestConvertGenaiToolsToBedrockSanitizesNames(t *testing.T) {
{Name: "filesystem:read_file", Description: "Read a file"},
}}}

bedrockTools, nameMap := convertGenaiToolsToBedrock(tools)
bedrockTools, nameMap := convertGenaiToolsToBedrock(tools, false)
if len(bedrockTools) != 2 {
t.Fatalf("expected 2 tools, got %d", len(bedrockTools))
}
Expand Down Expand Up @@ -424,3 +424,50 @@ func TestStreamingToolCallParseArgs(t *testing.T) {
})
}
}

func TestConvertGenaiToolsToBedrockPromptCaching(t *testing.T) {
tools := []*genai.Tool{{FunctionDeclarations: []*genai.FunctionDeclaration{
{Name: "get_weather", Description: "lookup weather"},
{Name: "list_pods", Description: "list pods"},
}}}

t.Run("disabled: no cache marker appended", func(t *testing.T) {
out, _ := convertGenaiToolsToBedrock(tools, false)
if len(out) != 2 {
t.Fatalf("expected 2 tools, got %d", len(out))
}
for i, tool := range out {
if _, ok := tool.(*types.ToolMemberCachePoint); ok {
t.Fatalf("did not expect a CachePoint at index %d when caching disabled", i)
}
}
})

t.Run("enabled: cache marker appended at the END of the tool list", func(t *testing.T) {
out, _ := convertGenaiToolsToBedrock(tools, true)
if len(out) != 3 {
t.Fatalf("expected 3 entries (2 tools + 1 CachePoint), got %d", len(out))
}
// The first two must remain ToolSpec entries (order preserved).
for i := 0; i < 2; i++ {
if _, ok := out[i].(*types.ToolMemberToolSpec); !ok {
t.Fatalf("entry %d: expected ToolMemberToolSpec, got %T", i, out[i])
}
}
// The trailing entry must be a CachePoint with type=default.
cp, ok := out[2].(*types.ToolMemberCachePoint)
if !ok {
t.Fatalf("trailing entry: expected ToolMemberCachePoint, got %T", out[2])
}
if cp.Value.Type != types.CachePointTypeDefault {
t.Errorf("expected CachePointType=default, got %v", cp.Value.Type)
}
})

t.Run("enabled but no tools: no cache marker (skipped)", func(t *testing.T) {
out, _ := convertGenaiToolsToBedrock(nil, true)
if len(out) != 0 {
t.Fatalf("expected empty slice for no tools, got %d entries", len(out))
}
})
}
5 changes: 5 additions & 0 deletions go/api/adk/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ type Bedrock struct {
// additionalModelRequestFields in the Converse API. Use this for provider-specific
// options outside the standard InferenceConfiguration block.
AdditionalModelRequestFields map[string]any `json:"additional_model_request_fields,omitempty"`
// PromptCaching enables Bedrock prompt caching by appending a CachePoint
// block to the end of the system content array and the end of the tools
// array in the Converse request. See the v1alpha2.BedrockConfig CRD doc
// for context.
PromptCaching bool `json:"prompt_caching,omitempty"`
}

func (b *Bedrock) MarshalJSON() ([]byte, error) {
Expand Down
18 changes: 18 additions & 0 deletions go/api/config/crd/bases/kagent.dev_modelconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,24 @@ spec:
Claude extended thinking or top_k. Values are forwarded as-is to the API.
Example: {"top_k": 5, "thinking": {"type": "enabled", "budget_tokens": 16000}}
x-kubernetes-preserve-unknown-fields: true
promptCaching:
default: false
description: |-
PromptCaching enables Bedrock prompt caching by appending a CachePoint
block at the end of the Converse request's `system` content array and
the end of the `tools` array. Bedrock will cache the prefix up to and
including those cache points across requests in the same region for
roughly 5 minutes after first use, billing the cached portion at a
reduced rate on cache hits.

Recommended for tool-using agents that make many Converse calls per
task with a stable system prompt and tool set — the per-call input
token count can drop by 70-90% on hit. Has no effect on models that
don't support caching; the marker is ignored by Bedrock for those.

See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
for the current list of supported models and minimum prefix sizes.
type: boolean
region:
description: AWS region where the Bedrock model is available (e.g.,
us-east-1, us-west-2)
Expand Down
18 changes: 18 additions & 0 deletions go/api/v1alpha2/modelconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,24 @@ type BedrockConfig struct {
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
AdditionalModelRequestFields *apiextensionsv1.JSON `json:"additionalModelRequestFields,omitempty"`

// PromptCaching enables Bedrock prompt caching by appending a CachePoint
// block at the end of the Converse request's `system` content array and
// the end of the `tools` array. Bedrock will cache the prefix up to and
// including those cache points across requests in the same region for
// roughly 5 minutes after first use, billing the cached portion at a
// reduced rate on cache hits.
Comment on lines +262 to +265
//
// Recommended for tool-using agents that make many Converse calls per
// task with a stable system prompt and tool set — the per-call input
// token count can drop by 70-90% on hit. Has no effect on models that
// don't support caching; the marker is ignored by Bedrock for those.
//
// See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
// for the current list of supported models and minimum prefix sizes.
// +optional
// +kubebuilder:default=false
PromptCaching bool `json:"promptCaching,omitempty"`
}

// SAPAICoreConfig contains SAP AI Core-specific configuration options.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC
},
Region: model.Spec.Bedrock.Region,
AdditionalModelRequestFields: additionalFields,
PromptCaching: model.Spec.Bedrock.PromptCaching,
}

// Populate TLS fields in BaseModel
Expand Down
18 changes: 18 additions & 0 deletions helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,24 @@ spec:
Claude extended thinking or top_k. Values are forwarded as-is to the API.
Example: {"top_k": 5, "thinking": {"type": "enabled", "budget_tokens": 16000}}
x-kubernetes-preserve-unknown-fields: true
promptCaching:
default: false
description: |-
PromptCaching enables Bedrock prompt caching by appending a CachePoint
block at the end of the Converse request's `system` content array and
the end of the `tools` array. Bedrock will cache the prefix up to and
including those cache points across requests in the same region for
roughly 5 minutes after first use, billing the cached portion at a
reduced rate on cache hits.

Recommended for tool-using agents that make many Converse calls per
task with a stable system prompt and tool set — the per-call input
token count can drop by 70-90% on hit. Has no effect on models that
don't support caching; the marker is ignored by Bedrock for those.

See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
for the current list of supported models and minimum prefix sizes.
type: boolean
region:
description: AWS region where the Bedrock model is available (e.g.,
us-east-1, us-west-2)
Expand Down
17 changes: 17 additions & 0 deletions python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,12 @@ class KAgentBedrockLlm(KAgentTLSMixin, BaseLlm):

extra_headers: Optional[dict[str, str]] = None
additional_model_request_fields: Optional[dict[str, Any]] = None
# When True, append a CachePoint block to the end of the Converse
# request's `system` content array and the end of the `toolConfig.tools`
# array. Bedrock caches the prefix up to and including those markers
# across requests in the same region; cached portion is billed at a
# reduced rate on hit. See AWS docs for supported models / minimums.
prompt_caching: bool = False

model_config = {"arbitrary_types_allowed": True}

Expand Down Expand Up @@ -288,12 +294,23 @@ async def generate_content_async(
text = "\n".join(p.text for p in si.parts or [] if p.text)
if text:
kwargs["system"] = [{"text": text}]
# If prompt caching is on, mark the end of the system content as
# a cache breakpoint. Bedrock caches everything up to and including
# this point for ~5 minutes; subsequent requests with the same
# prefix hit the cache. No-op if we didn't produce any system text.
if self.prompt_caching and kwargs.get("system"):
kwargs["system"].append({"cachePoint": {"type": "default"}})
Comment on lines +297 to +302

if llm_request.config and llm_request.config.tools:
genai_tools = [t for t in llm_request.config.tools if hasattr(t, "function_declarations")]
if genai_tools:
converse_tools = _convert_tools_to_converse(genai_tools, tool_name_map, tool_name_counter)
if converse_tools:
# CachePoint at the END of the tool list: tool definitions
# are usually the biggest static chunk of an agent request
# and benefit most from caching.
if self.prompt_caching:
converse_tools.append({"cachePoint": {"type": "default"}})
kwargs["toolConfig"] = {"tools": converse_tools}
Comment on lines +309 to 314

# Reverse map lets us restore original tool names from sanitized names in Bedrock responses.
Expand Down
6 changes: 6 additions & 0 deletions python/packages/kagent-adk/src/kagent/adk/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ class Bedrock(BaseLLM):
# additionalModelRequestFields in the Converse API. Use this for provider-specific
# options outside the standard InferenceConfiguration block.
additional_model_request_fields: dict | None = None
# prompt_caching enables Bedrock prompt caching: a CachePoint marker is
# appended to the end of the Converse request's system content array and
# toolConfig.tools array. Bedrock caches the prefix across requests in the
# same region; cached portion is billed at a reduced rate on hit.
prompt_caching: bool = False
type: Literal["bedrock"]


Expand Down Expand Up @@ -600,6 +605,7 @@ def _create_llm_from_model_config(model_config: ModelUnion):
model=model_config.model,
extra_headers=extra_headers,
additional_model_request_fields=model_config.additional_model_request_fields,
prompt_caching=model_config.prompt_caching,
**_transport_kwargs(model_config),
)
if model_config.type == "sap_ai_core":
Expand Down