Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions charts/examples/maestro-two-resources/adapter-task-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,19 @@ preconditions:
- name: is_deleting
expression: "has(clusterStatus.deleted_time)"
- expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"
name: readyConditionStatus
name: reconciledConditionStatus
- name: placementClusterName
expression: '"cluster1"' # TBC coming from placement adapter
conditions:
- field: readyConditionStatus
- field: reconciledConditionStatus
operator: equals
value: "False"
name: clusterStatus
- expression: |
readyConditionStatus == "False"
reconciledConditionStatus == "False"
name: validationCheck

resources:
Expand Down
4 changes: 2 additions & 2 deletions charts/examples/maestro/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ helm install <name> ./charts -f charts/examples/values.yaml \
## How It Works

1. The adapter receives a CloudEvent with a cluster ID and generation
2. **Preconditions**: Fetches cluster status from the Hyperfleet API and captures the cluster name, generation, and ready condition
3. **Validation**: Checks that the cluster's Ready condition is "False" before proceeding
2. **Preconditions**: Fetches cluster status from the Hyperfleet API and captures the cluster name, generation, and reconciled condition
3. **Validation**: Checks that the cluster's Reconciled condition is "False" before proceeding
4. **Resource creation**: Creates resources in order:
- Namespace named with the cluster ID
- ServiceAccount in the new namespace
Expand Down
10 changes: 5 additions & 5 deletions charts/examples/maestro/adapter-task-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,21 @@ preconditions:
- name: is_deleting
expression: "has(clusterStatus.deleted_time)"
- expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"
name: readyConditionStatus
name: reconciledConditionStatus
- name: placementClusterName
expression: '"cluster1"' # TBC coming from placement adapter
# Structured conditions with valid operators
conditions:
- field: readyConditionStatus
- field: reconciledConditionStatus
operator: equals
value: "False"
name: clusterStatus
- # Valid CEL expression
expression: |
readyConditionStatus == "False"
reconciledConditionStatus == "False"
name: validationCheck
# Resources with valid K8s manifests
resources:
Expand Down
16 changes: 8 additions & 8 deletions configs/adapter-task-config-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
# =================
# Option 1: CEL expression (direct field on precondition)
# expression: |
# readyConditionStatus == "False"
# reconciledConditionStatus == "False"
#
# Option 2: Structured conditions (direct field on precondition)
# conditions:
# - field: "readyConditionStatus"
# - field: "reconciledConditionStatus"
# operator: "equals"
# value: "Terminating"
#
Expand Down Expand Up @@ -118,10 +118,10 @@ preconditions:
# Simple dot notation
- name: "clusterName"
field: "name"
- name: "readyConditionStatus"
- name: "reconciledConditionStatus"
expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"
- name: "generationId"
field: "generation"
Expand All @@ -146,12 +146,12 @@ preconditions:

# Conditions to check. SCOPE: Full execution context
# You can access:
# - Captured values: readyConditionStatus, clusterName, etc.
# - Captured values: reconciledConditionStatus, clusterName, etc.
# - Full API response: clusterStatus.status.conditions, clusterStatus.spec.nodeCount
# - Params: clusterId, hyperfleetApiBaseUrl, etc.
conditions:
# Using captured value
- field: "readyConditionStatus"
- field: "reconciledConditionStatus"
operator: "equals"
value: "True"

Expand All @@ -162,7 +162,7 @@ preconditions:

# Alternative: CEL expression with full access
# expression: |
# clusterStatus.status.conditions.filter(c, c.type == "Ready")[0].status == "True" &&
# clusterStatus.status.conditions.filter(c, c.type == "Reconciled")[0].status == "True" &&
# clusterStatus.spec.nodeCount > 0

# ============================================================================
Expand Down
92 changes: 46 additions & 46 deletions docs/adapter-authoring-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ sequenceDiagram
Adapter->>API: GET /clusters/{id}
Adapter->>Adapter: Create/update resources
Adapter->>API: POST status {observed_generation: N+1}
API->>API: All adapters at N+1 → Ready=True
API->>API: All adapters at N+1 → Reconciled=True
```

### Anemic events
Expand Down Expand Up @@ -257,10 +257,10 @@ After the API call, capture values from the response for use in later phases. Tw
field: "name"

# CEL expression for computed values
- name: "readyStatus"
- name: "reconciledStatus"
expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"

# JSONPath with filter
Expand All @@ -278,7 +278,7 @@ After captures, evaluate conditions to decide whether to proceed. Two syntaxes a

```yaml
conditions:
- field: "readyStatus"
- field: "reconciledStatus"
operator: "equals"
value: "False"
```
Expand All @@ -287,7 +287,7 @@ After captures, evaluate conditions to decide whether to proceed. Two syntaxes a

```yaml
expression: |
readyStatus == "False" && clusterStatus.spec.nodeCount > 0
reconciledStatus == "False" && clusterStatus.spec.nodeCount > 0
```

> **Scope:** Conditions see the **full execution context**: all params, all captured fields, and the full API response accessible via the precondition name (e.g., `clusterStatus.status.conditions`).
Expand Down Expand Up @@ -325,10 +325,10 @@ preconditions:
api_call:
url: "/api/hyperfleet/v1/clusters/{{ .clusterId }}/statuses"
capture:
- name: "lzReady"
- name: "lzReconciled"
field: "{.items[?(@.adapter=='landing-zone')].data.namespace.status}"
conditions:
- field: "lzReady"
- field: "lzReconciled"
operator: "equals"
value: "Active"
```
Expand All @@ -341,10 +341,10 @@ When a condition is **not met**, the adapter skips the resources phase but still

Adapter preconditions typically need to handle two scenarios:

1. **Initial deployment** — Deploy resources when the cluster is NOT Ready
2. **Self-healing** — Detect and recreate accidentally deleted resources when the cluster IS Ready
1. **Initial deployment** — Deploy resources when the cluster is NOT Reconciled
2. **Self-healing** — Detect and recreate accidentally deleted resources when the cluster IS Reconciled

A condition-only precondition (e.g., "only run when cluster is NOT Ready") handles scenario 1 but breaks scenario 2:
A condition-only precondition (e.g., "only run when cluster is NOT Reconciled") handles scenario 1 but breaks scenario 2:

```yaml
# Condition-only pattern - INCOMPLETE
Expand All @@ -353,31 +353,31 @@ preconditions:
api_call:
url: "/api/hyperfleet/v1/clusters/{{ .clusterId }}"
capture:
- name: "readyStatus"
- name: "reconciledStatus"
expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"
conditions:
- field: "readyStatus"
- field: "reconciledStatus"
operator: "equals"
value: "False" # Only runs resource phase when NOT Ready
value: "False" # Only runs resource phase when NOT Reconciled
```

**Problem:** If a resource is accidentally deleted while the cluster is Ready, the adapter skips the resource operation phase because the precondition is `False`. The adapter still runs and reports status, but it cannot detect or recreate the deleted resource because it never executes the resource phase.
**Problem:** If a resource is accidentally deleted while the cluster is Reconciled, the adapter skips the resource operation phase because the precondition is `False`. The adapter still runs and reports status, but it cannot detect or recreate the deleted resource because it never executes the resource phase.

**Solution:** Add a time-based stability check to enable both scenarios:

- Run resource phase when cluster is **NOT Ready**
- Run resource phase when cluster is **Ready AND stable for >5 minutes** (periodic self-healing)
- Run resource phase when cluster is **NOT Reconciled**
- Run resource phase when cluster is **Reconciled AND stable for >5 minutes** (periodic self-healing)

#### Understanding `last_transition_time` vs `last_updated_time`

To implement time-based stability checks, you need to know how long a cluster has been in its current state. Each condition provides two timestamp fields:

| Field | Updates when | Use for |
|-------|-------------|---------|
| **`last_transition_time`** | Condition status **changes** (True→False or False→True) | **Stability windows** — "cluster has been Ready for N minutes" |
| **`last_transition_time`** | Condition status **changes** (True→False or False→True) | **Stability windows** — "cluster has been Reconciled for N minutes" |
| **`last_updated_time`** | Adapter **reports status** (every POST, even if unchanged) | **Liveness checks** — "adapter reported recently" |

**Critical:** For stability windows, always use `last_transition_time`. The `last_updated_time` field has special aggregation behavior that makes it unsuitable for measuring state duration.
Expand All @@ -390,30 +390,30 @@ preconditions:
api_call:
url: "/api/hyperfleet/v1/clusters/{{ .clusterId }}"
capture:
- name: "clusterNotReady"
- name: "clusterNotReconciled"
expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status != "True"
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True"
: true
- name: "clusterReadyTTL"
- name: "clusterReconciledTTL"
expression: |
(timestamp(now()) - timestamp(
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].last_transition_time
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time
: now()
)).getSeconds() > 300

- name: "validationCheck"
# Precondition passes if cluster is NOT Ready OR if cluster is Ready and stable for >300 seconds since last transition (enables self-healing)
# Precondition passes if cluster is NOT Reconciled OR if cluster is Reconciled and stable for >300 seconds since last transition (enables self-healing)
expression: |
clusterNotReady || clusterReadyTTL
clusterNotReconciled || clusterReconciledTTL
```

**What this does:**

- `clusterNotReady` → Captures whether the cluster is NOT Ready (true when Ready condition is missing or not "True")
- `clusterReadyTTL` → Captures whether the cluster has been Ready for >5 minutes (300 seconds) since the last status transition
- `validationCheck` → Evaluates both conditions: run resource phase when cluster is NOT Ready OR when cluster has been Ready and stable for >5 minutes (self-healing)
- `clusterNotReconciled` → Captures whether the cluster is NOT Reconciled (true when Reconciled condition is missing or not "True")
- `clusterReconciledTTL` → Captures whether the cluster has been Reconciled for >5 minutes (300 seconds) since the last status transition
- `validationCheck` → Evaluates both conditions: run resource phase when cluster is NOT Reconciled OR when cluster has been Reconciled and stable for >5 minutes (self-healing)

**Important notes:**

Expand Down Expand Up @@ -1014,10 +1014,10 @@ Because the adapter reads status at a point in time, the overall flow is a **con

1. First cycle: adapter creates resources, discovers them immediately — status may be `Pending` or `Unknown`
2. Adapter reports `Applied=True, Available=Unknown` to the API
3. Sentinel detects the cluster is not yet Ready (generation mismatch or max-age exceeded)
3. Sentinel detects the cluster is not yet Reconciled (generation mismatch or max-age exceeded)
4. Next cycle: adapter discovers the same resources — status has progressed to `Active` or `Complete`
5. Adapter reports `Applied=True, Available=True`
6. API aggregates: all adapters at current generation with `Available=True` → cluster is `Ready`
6. API aggregates: all adapters at current generation with `Available=True` → cluster is `Reconciled`

This means your adapter does not need to poll or wait. The framework and Sentinel handle retry timing. Your job is to write CEL expressions that correctly read the current state, whatever it may be.

Expand Down Expand Up @@ -1178,7 +1178,7 @@ Optionally attach adapter-specific metrics extracted from your resources:
When your adapter reports status, the API aggregates across **all registered adapters**:

- **Available** = all adapters report `Available=True` at *any* generation (last known good)
- **Ready** = all adapters report `Available=True` at the *current* generation (fully reconciled)
- **Reconciled** = all adapters report `Available=True` at the *current* generation (fully reconciled)

Your adapter name must be registered in the `HYPERFLEET_CLUSTER_ADAPTERS` environment variable on the API for it to participate in aggregation.

Expand Down Expand Up @@ -1250,7 +1250,7 @@ Mock responses matched by HTTP method and URL regex. Supports sequential respons
"generation": 5,
"status": {
"conditions": [
{ "type": "Ready", "status": "False" }
{ "type": "Reconciled", "status": "False" }
]
}
}
Expand Down Expand Up @@ -1313,7 +1313,7 @@ Phase 2: Preconditions ..................... SUCCESS (MET)
[1/1] fetch-cluster PASS
API Call: GET /api/hyperfleet/v1/clusters/abc123 -> 200
Captured: clusterName = "my-cluster"
Captured: readyStatus = "False"
Captured: reconciledStatus = "False"

Phase 3: Resources ........................ SUCCESS
[1/2] namespace0 CREATE
Expand Down Expand Up @@ -1342,7 +1342,7 @@ Use `--dry-run-verbose` to see rendered manifests and full API request/response
2. Create mock files for a representative cluster
3. Run dry-run, inspect the trace
4. Fix config issues, re-run
5. Test edge cases: change mock API responses to simulate different cluster states (Ready=True, missing fields, error responses)
5. Test edge cases: change mock API responses to simulate different cluster states (Reconciled=True, missing fields, error responses)
6. Deploy when the trace shows the expected behavior

---
Expand Down Expand Up @@ -1382,13 +1382,13 @@ preconditions:
capture:
- name: "generation"
field: "generation"
- name: "readyStatus"
- name: "reconciledStatus"
expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"
conditions:
- field: "readyStatus"
- field: "reconciledStatus"
operator: "equals"
value: "False"

Expand Down Expand Up @@ -1470,7 +1470,7 @@ The adapter will run preconditions, skip straight to post-actions, and report st

1. **Register your adapter name** in the HyperFleet API's `HYPERFLEET_CLUSTER_ADAPTERS` (or `HYPERFLEET_NODEPOOL_ADAPTERS`) environment variable. Without this, the API won't include your adapter in status aggregation.

- The API will compute the `Ready` condition of the managed object as when all registered adapters have reported `True` as their `Available` condition status.
- The API will compute the `Reconciled` condition of the managed object as when all registered adapters have reported `True` as their `Available` condition status.

1. **Create the AdapterConfig** with your environment's API endpoint, broker subscription, and client settings:

Expand Down Expand Up @@ -1519,14 +1519,14 @@ resources.?clusterNamespace.?status.?phase.orValue("")
has(resources.clusterNamespace)

# Array filtering — find a condition by type
status.conditions.filter(c, c.type == "Ready")
status.conditions.filter(c, c.type == "Reconciled")

# Array existence check
status.conditions.exists(c, c.type == "Ready" && c.status == "True")
status.conditions.exists(c, c.type == "Reconciled" && c.status == "True")

# Get first matching element with fallback
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"

# Ternary
Expand Down
8 changes: 4 additions & 4 deletions internal/configloader/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ Captures values from API responses. Supports two modes (mutually exclusive):

```yaml
capture:
# CEL expression for Ready condition status
- name: "readyConditionStatus"
# CEL expression for Reconciled condition status
- name: "reconciledConditionStatus"
expression: |
status.conditions.filter(c, c.type == "Ready").size() > 0
? status.conditions.filter(c, c.type == "Ready")[0].status
status.conditions.filter(c, c.type == "Reconciled").size() > 0
? status.conditions.filter(c, c.type == "Reconciled")[0].status
: "False"

# JSONPath for complex extraction
Expand Down
2 changes: 1 addition & 1 deletion internal/configloader/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ func TestValidateCELExpressions(t *testing.T) {
t.Run("valid CEL with has() function", func(t *testing.T) {
cfg := withExpression(
`has(cluster.status) && ` +
`cluster.status.conditions.exists(c, c.type == "Ready" && c.status == "True")`,
`cluster.status.conditions.exists(c, c.type == "Reconciled" && c.status == "True")`,
)
v := newTaskValidator(cfg)
require.NoError(t, v.ValidateStructure())
Expand Down
Loading