Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions agent/06_agent_create_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,55 @@ function run_agent_test_cases() {

echo "Finished fixing DNS through agent-tui"
fi

if [[ $AGENT_TEST_CASES =~ "copy_network" ]]; then
if [[ ${NUM_MASTERS} -lt 2 ]]; then
echo "ERROR: copy_network test case requires at least 2 master nodes (NUM_MASTERS=${NUM_MASTERS}). Not supported with SNO topology."
exit 1
fi
if [[ "${IP_STACK}" != "v4" ]]; then
echo "ERROR: copy_network test case only supports IPv4 (IP_STACK=${IP_STACK})."
exit 1
fi
echo "Running test scenario: inject static network keyfile on master_0 and master_1"

# Inject a NetworkManager keyfile on master_0 and master_1 via SSH into the live
# environment, simulating what a user would create via the agent-tui. The keyfile
# must be in place before agent-set-host-copy-network-arg.service runs.
# The script waits for SSH to become available on each node.
# master_1 is intentionally included as a non-rendezvous node, which is the
# scenario the bug affected.
# master-0 is the rendezvous/bootstrap node whose IP is recorded in the etcd
# cluster during installation - preserve its DHCP IP as the static IP so etcd
# membership is not broken after reboot.
# master-1 uses a distinct static IP outside the DHCP range to prove the
# static config was copied and persists after installation.
subnet_prefix=$(echo "${EXTERNAL_SUBNET_V4}" | cut -d'/' -f2)
master0_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} 0)
master0_dhcp_ip=$(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath \
"string(//dns[*]/host/hostname[. = '${master0_hostname}']/../@ip)" -)
# Find an unused static IP for master-1 by scanning virsh for unassigned offsets
master1_static_ip=""
for offset in $(seq 90 254); do
candidate=$(nth_ip ${EXTERNAL_SUBNET_V4} ${offset})
if ! sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath "//dns[*]/host[@ip = '${candidate}']" - &>/dev/null; then
master1_static_ip=${candidate}
break
fi
done
if [ -z "${master1_static_ip}" ]; then
echo "ERROR: could not find an unused IP in ${EXTERNAL_SUBNET_V4} for master-1 static config"
exit 1
fi
echo "Using static IP ${master1_static_ip} for master-1"
declare -A COPY_NETWORK_STATIC_IPS=([0]="${master0_dhcp_ip}/${subnet_prefix}" [1]="${master1_static_ip}/${subnet_prefix}")
for node_index in 0 1; do
echo "Injecting keyfile on master_${node_index}"
./agent/e2e/agent-tui/test-copy-network.sh $node_index ${COPY_NETWORK_STATIC_IPS[$node_index]}
done

echo "Finished injecting network keyfiles"
fi
}

# Setup the environment to allow iPXE booting, by reusing libvirt native features
Expand Down
5 changes: 5 additions & 0 deletions agent/08_agent_post_install_validation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ if (( $NUM_MASTERS != $installed_control_plane_nodes )); then
fi

oc get clusterversion

if [[ "${AGENT_TEST_CASES:-}" =~ "copy_network" ]]; then
echo "Validating static network config persistence after installation"
./agent/e2e/agent-tui/validate-copy-network.sh
fi
75 changes: 75 additions & 0 deletions agent/e2e/agent-tui/test-copy-network.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash
set -euxo pipefail

SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../" && pwd )"
source $SCRIPTDIR/common.sh

NODE_INDEX=${1:-0}
STATIC_IP=${2:-"192.168.111.90/24"}
CONNECTION_NAME="copy-network-static"

SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -q"

# Derive the DNS hostname from MASTER_HOSTNAME_FORMAT (e.g. "master-0")
node_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} ${NODE_INDEX})

# Get the node's current DHCP IP (assigned by virsh) to SSH into the live environment
node_ip=$(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath \
"string(//dns[*]/host/hostname[. = '${node_hostname}']/../@ip)" -)

if [ -z "$node_ip" ]; then
echo "ERROR: Could not resolve IP for ${node_hostname} on network ${BAREMETAL_NETWORK_NAME}"
exit 1
fi

echo "Waiting for live environment SSH on ${node_hostname} (${node_ip})..."
until ssh $SSH_OPTS core@${node_ip} true 2>/dev/null; do
sleep 10
done

echo "SSH available on ${node_hostname}, injecting static network keyfile"

# Determine the MAC address of the interface that has the current DHCP IP,
# and the default gateway and DNS from the live environment.
iface_mac=$(ssh $SSH_OPTS core@${node_ip} \
"ip -j addr show | jq -r '.[] | select(.addr_info[]? | .local == \"${node_ip}\") | .address'")
gateway=$(ssh $SSH_OPTS core@${node_ip} \
"ip route show default | awk '/default/ {print \$3; exit}'")
dns=$(ssh $SSH_OPTS core@${node_ip} \
"awk '/^nameserver/ {print \$2; exit}' /etc/resolv.conf")

echo "Interface MAC: ${iface_mac}, Gateway: ${gateway}, DNS: ${dns}, Static IP: ${STATIC_IP}"

if [ -z "$iface_mac" ] || [ -z "$gateway" ]; then
echo "ERROR: Could not determine interface MAC or gateway on ${node_hostname}"
exit 1
fi

# Write a static NetworkManager keyfile using a static IP distinct from the DHCP
# address, so the installed OS can be verified to be using the static config.
# Bound to the primary interface by MAC address and uses autoconnect-priority=1
# to take precedence over auto-generated DHCP connections (priority -100).
ssh $SSH_OPTS core@${node_ip} \
"sudo bash -c 'umask 177; cat > /etc/NetworkManager/system-connections/${CONNECTION_NAME}.nmconnection'" << EOF
[connection]
id=${CONNECTION_NAME}
type=ethernet
autoconnect=true
autoconnect-priority=1

[ethernet]
mac-address=${iface_mac}

[ipv4]
address1=${STATIC_IP},${gateway}
dns=${dns};
method=manual

[ipv6]
method=disabled

[proxy]
EOF

echo "Injected static keyfile '${CONNECTION_NAME}.nmconnection' on ${node_hostname}"
echo " Static IP: ${STATIC_IP}, Gateway: ${gateway}, DNS: ${dns}"
74 changes: 74 additions & 0 deletions agent/e2e/agent-tui/validate-copy-network.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash
set -euxo pipefail

SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../" && pwd )"
source $SCRIPTDIR/common.sh
source $SCRIPTDIR/agent/common.sh
source $SCRIPTDIR/network.sh

CONNECTION_NAME="copy-network-static"

SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -q"

# master-0 keeps its DHCP IP as the static IP (to preserve etcd membership),
# so we reach it via its virsh-assigned IP.
# master-1 uses a distinct static IP outside the DHCP range, so we SSH to that
# IP directly to prove the installed OS is using the static config.
subnet_prefix=$(echo "${EXTERNAL_SUBNET_V4}" | cut -d'/' -f2)
master0_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} 0)
master0_ip=$(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath \
"string(//dns[*]/host/hostname[. = '${master0_hostname}']/../@ip)" -)
master1_ip=""
for offset in $(seq 90 254); do
candidate=$(nth_ip ${EXTERNAL_SUBNET_V4} ${offset})
if ! sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath "//dns[*]/host[@ip = '${candidate}']" - &>/dev/null; then
master1_ip=${candidate}
break
fi
done
if [ -z "${master1_ip}" ]; then
echo "ERROR: could not find the static IP for master-1 in ${EXTERNAL_SUBNET_V4}"
exit 1
fi

declare -A NODE_IPS=([0]="${master0_ip}" [1]="${master1_ip}")

failed=0
for node_index in 0 1; do
node_hostname=$(printf ${MASTER_HOSTNAME_FORMAT} ${node_index})
node_ip=${NODE_IPS[$node_index]}

echo "Checking ${node_hostname} at ${node_ip} for connection '${CONNECTION_NAME}'..."

if ! ssh $SSH_OPTS core@${node_ip} true 2>/dev/null; then
echo "FAIL: Cannot SSH to ${node_hostname} at ${node_ip}"
failed=1
continue
fi

# Verify the NetworkManager keyfile exists in the installed OS
if ! ssh $SSH_OPTS core@${node_ip} \
"sudo ls /etc/NetworkManager/system-connections/ | grep -q '${CONNECTION_NAME}'"; then
echo "FAIL: Connection keyfile '${CONNECTION_NAME}' not found on ${node_hostname}"
failed=1
continue
fi

# Verify nmcli reports the connection with static method - this is the proof
# that --copy-network copied the user-created keyfile to the installed OS
if ! ssh $SSH_OPTS core@${node_ip} \
"sudo nmcli -f ipv4.method connection show '${CONNECTION_NAME}' | grep -q 'manual'"; then
echo "FAIL: Connection '${CONNECTION_NAME}' does not have static IPv4 method on ${node_hostname}"
failed=1
continue
fi

echo "PASS: ${node_hostname} has connection '${CONNECTION_NAME}' with method=manual"
done

if [ $failed -ne 0 ]; then
echo "FAIL: Network config persistence validation failed on one or more nodes"
exit 1
fi

echo "PASS: Static network config persisted after installation on all nodes"
12 changes: 12 additions & 0 deletions config_example.sh
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,19 @@ set -x
# then the wait-for commands should timeout and fail.
# This test case is only supported when IP_STACK=v4.
#
# 2. 'copy_network' test case:
# Validates that static network connections created manually via the agent-tui during boot
# persist into the installed OS (OCPBUGS-63472). Use a DHCP scenario so that no static
# networking is pre-configured in the manifests. During boot, the test uses console key
# presses to create a new static connection via nmtui on master_0 and master_1. After
# installation, the post-install validation checks that the connection keyfile and nmcli
# entry are present on both nodes, confirming that --copy-network was set per-host by
# agent-set-host-copy-network-arg.service and coreos-installer copied the keyfile into the OS.
# Requires: AGENT_E2E_TEST_SCENARIO=COMPACT_IPV4_DHCP (or any HA_IPV4_DHCP), IP_STACK=v4.
# Not supported with SNO topology (requires at least 2 master nodes) or IPv6.
#
# export AGENT_TEST_CASES='bad_dns'
# export AGENT_TEST_CASES='copy_network'

# Uncomment the following line to deploy the cluster using the appliance model
# The appliance model boots the host using the unconfigured ignition. It then mounts
Expand Down