Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/3-deploy-builds/wiab-staging
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ Fixed: offline-vm-setup script to use ubuntu cloud image, local seed iso and VM
Changed: Add ansible playbook for wiab-staging VM provisioning
Fixed: offline-deploy.sh for SSH_AUTH_SOCK handling and remove defunct passwords for postgresql
Added: terraform resources for wiab-staging
Added: cd_staging script to verify the default build bundle
184 changes: 184 additions & 0 deletions offline/cd_staging.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#!/usr/bin/env bash

set -euo pipefail

CD_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TF_DIR="${CD_DIR}/../terraform/examples/wiab-staging-hetzner"
ARTIFACTS_DIR="${CD_DIR}/default-build/output"
VALUES_DIR="${CD_DIR}/../values"
COMMIT_HASH="${GITHUB_SHA}"
ARTIFACT="wire-server-deploy-static-${COMMIT_HASH}"

# Retry configuration
MAX_RETRIES=3
RETRY_DELAY=30

echo "Wire Offline Deployment with Retry Logic"
echo "========================================"

function cleanup {
(cd "$TF_DIR" && terraform destroy -auto-approve)
echo "Cleanup completed"
}
trap cleanup EXIT

cd "$TF_DIR"
terraform init

# Retry loop for terraform apply
echo "Starting deployment with automatic retry on resource unavailability..."
for attempt in $(seq 1 $MAX_RETRIES); do
echo ""
echo "Deployment attempt $attempt of $MAX_RETRIES"
date

if terraform apply -auto-approve; then
echo "Infrastructure deployment successful on attempt $attempt!"
break
else
echo "Infrastructure deployment failed on attempt $attempt"

if [[ $attempt -lt $MAX_RETRIES ]]; then
echo "Will retry with different configuration..."

# Clean up partial deployment
echo "Cleaning up partial deployment..."
terraform destroy -auto-approve || true

# Wait for resources to potentially become available
echo "Waiting ${RETRY_DELAY}s for resources to become available..."
sleep $RETRY_DELAY

# Modify configuration for better availability
echo "Adjusting server type preferences for attempt $((attempt + 1))..."
case $attempt in
1)
# Attempt 2: Prioritize cpx22 and cx53
sed -i.bak 's/"cx33", "cpx22", "cx43"/"cpx22", "cx43", "cx33"/' main.tf
sed -i.bak 's/"cx43", "cx53", "cpx42"/"cx53", "cpx42", "cx43"/' main.tf
echo " -> Prioritizing cpx22 and cx53 server types"
;;
2)
# Attempt 3: Use biggest available types
sed -i.bak 's/"cpx22", "cx43", "cx33"/"cx43", "cx33", "cpx22"/' main.tf
sed -i.bak 's/"cx53", "cpx42", "cx43"/"cpx42", "cx43", "cx53"/' main.tf
echo " -> Using Biggest available server types"
;;
esac

terraform init -reconfigure
else
echo "All deployment attempts failed after $MAX_RETRIES tries"
echo ""
echo "This usually means:"
echo " 1. High demand for Hetzner Cloud resources in EU regions"
echo " 2. Your account may have resource limits"
echo " 3. Try again later when resources become available"
echo ""
echo "Manual solutions:"
echo " 1. Check Hetzner Console for resource limits"
echo " 2. Try different server types manually"
echo " 3. Contact Hetzner support for resource availability"

# Restore original config
if [[ -f main.tf.bak ]]; then
mv main.tf.bak main.tf
terraform init -reconfigure
fi

exit 1
fi
fi
done

# Restore original config after successful deployment
if [[ -f main.tf.bak ]]; then
mv main.tf.bak main.tf
terraform init -reconfigure
fi

echo ""
echo "Infrastructure ready! Proceeding with application deployment..."

# Continue with the rest of the original cd.sh logic
adminhost=$(terraform output -raw adminhost)
ssh_private_key=$(terraform output ssh_private_key)

eval "$(ssh-agent)"
ssh-add - <<< "$ssh_private_key"
rm -f ssh_private_key || true
echo "$ssh_private_key" > ssh_private_key
chmod 400 ssh_private_key

terraform output -json static-inventory > inventory.json
yq eval -o=yaml '.' inventory.json > inventory.yml

echo "Running ansible playbook setup_nodes.yml via adminhost ($adminhost)..."
ansible-playbook -i inventory.yml setup_nodes.yml --private-key "ssh_private_key"

# user demo needs to exist
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=10 \
"demo@$adminhost" wget -q "https://s3-eu-west-1.amazonaws.com/public.wire.com/artifacts/${ARTIFACT}.tgz"

ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=10 \
"demo@$adminhost" tar xzf "$ARTIFACT.tgz"

# override for ingress-nginx-controller values for hetzner environment $TF_DIR/setup_nodes.yml
scp -A "$VALUES_DIR/ingress-nginx-controller/hetzner-ci.example.yaml" "demo@$adminhost:./values/ingress-nginx-controller/prod-values.example.yaml"

# Source and target files
SOURCE="inventory.yml"
cp "${CD_DIR}/../ansible/inventory/offline/staging.yml" "inventory-secondary.yml"
TARGET="inventory-secondary.yml"

# Read assethost IP
ASSETHOST_IP=$(yq eval '.assethost.hosts.assethost.ansible_host' "$SOURCE")
yq eval -i ".assethost.hosts.assethost.ansible_host = \"$ASSETHOST_IP\"" "$TARGET"

# Read kube-node IPs using to_entries
KUBENODE1_IP=$(yq eval '.["kube-node"].hosts | to_entries | .[0].value.ansible_host' "$SOURCE")
KUBENODE2_IP=$(yq eval '.["kube-node"].hosts | to_entries | .[1].value.ansible_host' "$SOURCE")
KUBENODE3_IP=$(yq eval '.["kube-node"].hosts | to_entries | .[2].value.ansible_host' "$SOURCE")

yq eval -i ".kube-node.hosts.kubenode1.ansible_host = \"$KUBENODE1_IP\"" "$TARGET"
yq eval -i ".kube-node.hosts.kubenode2.ansible_host = \"$KUBENODE2_IP\"" "$TARGET"
yq eval -i ".kube-node.hosts.kubenode3.ansible_host = \"$KUBENODE3_IP\"" "$TARGET"

# Read datanode IPs using to_entries
DATANODE1_IP=$(yq eval '.datanode.hosts | to_entries | .[0].value.ansible_host' "$SOURCE")
DATANODE2_IP=$(yq eval '.datanode.hosts | to_entries | .[1].value.ansible_host' "$SOURCE")
DATANODE3_IP=$(yq eval '.datanode.hosts | to_entries | .[2].value.ansible_host' "$SOURCE")

# Set datanodes IPs
yq eval -i ".datanodes.hosts.datanode1.ansible_host = \"$DATANODE1_IP\"" "$TARGET"
yq eval -i ".datanodes.hosts.datanode2.ansible_host = \"$DATANODE2_IP\"" "$TARGET"
yq eval -i ".datanodes.hosts.datanode3.ansible_host = \"$DATANODE3_IP\"" "$TARGET"

# Override network_interface from SOURCE to TARGET for all service groups
NETWORK_INTERFACE=$(yq eval '.datanode.vars.datanode_network_interface' "$SOURCE")
yq eval -i ".cassandra.vars.cassandra_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET"
yq eval -i ".elasticsearch.vars.elasticsearch_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET"
yq eval -i ".minio.vars.minio_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET"
yq eval -i ".rabbitmq.vars.rabbitmq_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET"
yq eval -i ".postgresql.vars.postgresql_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET"

# Extract all kube-node vars from SOURCE and merge into TARGET
KUBE_NODE_VARS_FILE=$(mktemp)
yq eval '.["kube-node"].vars' "$SOURCE" > "$KUBE_NODE_VARS_FILE"
yq eval -i '.kube-node.vars |= load("'"$KUBE_NODE_VARS_FILE"'")' "$TARGET"
rm -f "$KUBE_NODE_VARS_FILE"

yq eval -i ".all.vars.ansible_ssh_private_key_file = \"ssh/ssh_private_key\"" "$TARGET"

echo "created secondary inventory file $TARGET successfully"

scp "$TARGET" "demo@$adminhost":./ansible/inventory/offline/inventory.yml

ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=10 "demo@$adminhost" cat ./ansible/inventory/offline/inventory.yml || true

# NOTE: Agent is forwarded; so that the adminhost can provision the other boxes
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=10 -A "demo@$adminhost" ./bin/offline-deploy.sh

echo ""
echo "Wire offline deployment completed successfully!"
cleanup