Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .hack/devnet/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,17 @@ ENCLAVE_NAME="${ENCLAVE_NAME:-assertoor}"
if kurtosis enclave inspect "$ENCLAVE_NAME" > /dev/null; then
echo "Kurtosis enclave '$ENCLAVE_NAME' is already up."
else
kurtosis run github.com/ethpandaops/ethereum-package --enclave "$ENCLAVE_NAME" --args-file "$args_file" --non-blocking-tasks --image-download always
kurtosis_run_flags=(--non-blocking-tasks --image-download always)

# Disruptoor needs --privileged to manage iptables in the enclave.
# Pattern matches a bare YAML list entry `- disruptoor` (the form used by
# ethereum-package's `additional_services`); it will miss other shapes
# (e.g. mapping keys, quoted strings, inline flow lists) — extend if needed.
if grep -Eq '^[[:space:]]*-[[:space:]]*disruptoor([[:space:]]*(#.*)?)?$' "$args_file"; then
kurtosis_run_flags+=(--privileged)
fi

kurtosis run github.com/ethpandaops/ethereum-package --enclave "$ENCLAVE_NAME" --args-file "$args_file" "${kurtosis_run_flags[@]}"

# Stop assertoor instance within ethereum-package if running
kurtosis service stop "$ENCLAVE_NAME" assertoor > /dev/null || true
Expand Down
27 changes: 25 additions & 2 deletions playbooks/_index.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Auto-generated playbook index
# Generated: 2026-05-14T00:40:37Z
# Generated: 2026-05-19T19:23:49Z
# DO NOT EDIT MANUALLY - regenerate via `make generate-playbook-index`.

generated: 2026-05-14T00:40:37Z
generated: 2026-05-19T19:23:49Z
folders:
- path: dev
name: Development & Utilities
Expand Down Expand Up @@ -214,6 +214,29 @@ playbooks:
- consensus
- execution
timeout: 12h
- file: dev/two-way-network-split.yaml
id: two-way-network-split
name: Two-Way Network Split Finality Test
description: |-
Splits a Kurtosis-launched devnet into two halves through the disruptoor
HTTP API, verifies that finality stops for two epochs, heals the split,
waits two more epochs, and verifies finality recovers.

The participant groups are computed dynamically from the assertoor client
pool: nodes 1..floor(N/2) form the left half, nodes floor(N/2)+1..N form
the right half. For odd N the right half gets the extra node. The playbook
requires at least `minClientCount` clients (default 2); choosing a topology
where one side retains a 2/3 finality majority (e.g. 3 nodes split 1/2) is
the operator's call. A disruptoor service is expected at `disruptoorUrl`
(default `http://disruptoor:7700`).
version: 1.0.0
tags:
- disruptoor
- kurtosis
- finality
- network-split
- consensus
timeout: 45m
- file: dev/validator-lifecycle-test.yaml
id: validator-lifecycle-test
name: Validator Lifecycle Test (Un-finality Stress)
Expand Down
169 changes: 169 additions & 0 deletions playbooks/dev/two-way-network-split.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
id: two-way-network-split
name: "Two-Way Network Split Finality Test"
description: |
Splits a Kurtosis-launched devnet into two halves through the disruptoor
HTTP API, verifies that finality stops for two epochs, heals the split,
waits two more epochs, and verifies finality recovers.

The participant groups are computed dynamically from the assertoor client
pool: nodes 1..floor(N/2) form the left half, nodes floor(N/2)+1..N form
the right half. For odd N the right half gets the extra node. The playbook
requires at least `minClientCount` clients (default 2); choosing a topology
where one side retains a 2/3 finality majority (e.g. 3 nodes split 1/2) is
the operator's call. A disruptoor service is expected at `disruptoorUrl`
(default `http://disruptoor:7700`).
version: 1.0.0
tags: [disruptoor, kurtosis, finality, network-split, consensus]
timeout: 45m
config:
disruptoorUrl: "http://disruptoor:7700"
minClientCount: 2
partitionClientTypes: ["execution", "beacon"]
splitObservationEpochs: 2
recoveryEpochs: 2
recoveredMaxUnfinalizedEpochs: 3
tasks:
- name: get_consensus_specs
id: get_specs
title: "Get consensus chain specs"

- name: sleep
title: "Wait for disruptoor API to come up"
config:
duration: 10s

- name: run_shell
title: "Check disruptoor API health"
timeout: 1m
config:
envVars:
DISRUPTOOR_URL: disruptoorUrl
command: |
set -euo pipefail
disruptoor_url=$(echo "$DISRUPTOOR_URL" | jq -r .)
curl -fsS "${disruptoor_url}/v1/healthz" >/dev/null

- name: check_clients_are_healthy
id: client_check
title: "Wait for all devnet clients to be healthy"
timeout: 10m
configVars:
minClientCount: "minClientCount"
config:
maxUnhealthyCount: 0

- name: check_consensus_finality
id: initial_finality
title: "Wait for initial finality"
timeout: 20m
config:
minFinalizedEpochs: 2
maxUnfinalizedEpochs: 3

- name: check_consensus_slot_range
id: split_start
title: "Capture split start slot"
timeout: 1m
config: {}

- name: run_shell
title: "Split devnet into two halves"
timeout: 1m
config:
envVars:
DISRUPTOOR_URL: disruptoorUrl
SPLIT_LEFT_PARTICIPANTS: "| [range(1; ((.tasks.client_check.outputs.totalCount / 2) | floor) + 1)]"
SPLIT_RIGHT_PARTICIPANTS: "| [range(((.tasks.client_check.outputs.totalCount / 2) | floor) + 1; .tasks.client_check.outputs.totalCount + 1)]"
PARTITION_CLIENT_TYPES: partitionClientTypes
command: |
set -euo pipefail
disruptoor_url=$(echo "$DISRUPTOOR_URL" | jq -r .)
left_participants=$(echo "$SPLIT_LEFT_PARTICIPANTS" | jq -c .)
right_participants=$(echo "$SPLIT_RIGHT_PARTICIPANTS" | jq -c .)
client_types=$(echo "$PARTITION_CLIENT_TYPES" | jq -c .)
left_count=$(echo "$left_participants" | jq 'length')
right_count=$(echo "$right_participants" | jq 'length')
echo "Splitting ${left_count} left (${left_participants}) vs ${right_count} right (${right_participants})"
if [ "$left_count" -lt 1 ] || [ "$right_count" -lt 1 ]; then
echo "Refusing to split: each side must have at least one node" >&2
exit 1
fi
state_file=$(mktemp)
trap 'rm -f "$state_file"' EXIT

jq -n \
--argjson left "$left_participants" \
--argjson right "$right_participants" \
--argjson clientTypes "$client_types" \
'{
partitions: [
{
name: "assertoor-two-half-split",
groups: [
{"node-index": $left, "client-type": $clientTypes},
{"node-index": $right, "client-type": $clientTypes}
],
scope: ["el_p2p", "cl_p2p"],
symmetric: true
}
]
}' >"$state_file"

curl -fsS -X PUT "${disruptoor_url}/v1/state" \
-H "Content-Type: application/json" \
--data @"$state_file"
curl -fsS "${disruptoor_url}/v1/state" | jq -e '.partitions | length == 1' >/dev/null

- name: check_consensus_slot_range
title: "Wait split observation epochs with the split active"
configVars:
minSlotNumber: "| (.tasks.split_start.outputs.currentSlot | tonumber) + ((.splitObservationEpochs | tonumber) * (.tasks.get_specs.outputs.specs.SLOTS_PER_EPOCH | tonumber))"

- name: check_consensus_finality
title: "Check non-finality after split observation"
timeout: 1m
configVars:
minUnfinalizedEpochs: "| (.tasks.initial_finality.outputs.unfinalizedEpochs | tonumber) + (.splitObservationEpochs | tonumber)"
config:
failOnCheckMiss: true

- name: run_shell
title: "Clear disruptoor network split"
timeout: 1m
config:
envVars:
DISRUPTOOR_URL: disruptoorUrl
command: |
set -euo pipefail
disruptoor_url=$(echo "$DISRUPTOOR_URL" | jq -r .)
curl -fsS -X POST "${disruptoor_url}/v1/state/clear"
curl -fsS "${disruptoor_url}/v1/state" | jq -e '(.partitions | length) == 0 and (.shaping | length) == 0' >/dev/null

- name: check_consensus_slot_range
id: recovery_start
title: "Capture recovery start slot"
timeout: 1m
config: {}

- name: check_consensus_slot_range
title: "Wait recovery epochs after clearing the split"
configVars:
minSlotNumber: "| (.tasks.recovery_start.outputs.currentSlot | tonumber) + ((.recoveryEpochs | tonumber) * (.tasks.get_specs.outputs.specs.SLOTS_PER_EPOCH | tonumber))"

- name: check_consensus_finality
title: "Check finality recovered"
timeout: 4m
configVars:
maxUnfinalizedEpochs: "recoveredMaxUnfinalizedEpochs"

cleanupTasks:
- name: run_shell
title: "Clear disruptoor state"
timeout: 1m
config:
envVars:
DISRUPTOOR_URL: disruptoorUrl
command: |
set -euo pipefail
disruptoor_url=$(echo "$DISRUPTOOR_URL" | jq -r .)
curl -fsS -X POST "${disruptoor_url}/v1/state/clear" || true