Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions tests/integration-tests/clusters_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def start(self, wait_running=False):
logging.error("Failed starting cluster with error:\n%s\nand output:\n%s", e.stderr, e.stdout)
raise

def stop(self):
def stop(self, wait_stopped=False):
"""Run pcluster stop and return the result."""
cmd_args = ["pcluster", "update-compute-fleet", "--cluster-name", self.name, "--status"]
scheduler = self.config["Scheduling"]["Scheduler"]
Expand All @@ -203,7 +203,12 @@ def stop(self):
try:
result = run_pcluster_command(cmd_args, log_error=False, custom_cli_credentials=self.custom_cli_credentials)
logging.info("Cluster {0} stopped successfully".format(self.name))

if wait_stopped:
retry(
wait_fixed=seconds(10),
stop_max_delay=minutes(2),
retry_on_result=lambda describe_result: "STOPPED" != describe_result["status"],
)(self.describe_compute_fleet)()
return result.stdout
except subprocess.CalledProcessError as e:
logging.error("Failed stopping cluster with error:\n%s\nand output:\n%s", e.stderr, e.stdout)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import re
import tarfile
import tempfile
import time

import boto3
import botocore
Expand Down Expand Up @@ -295,14 +294,13 @@ def _test_pcluster_compute_fleet(cluster, expected_num_nodes):
last_stop_time = compute_fleet["lastStatusUpdatedTime"]

logging.info("Testing pcluster start functionalities")
# Do a complicated sequence of start and stop and see if commands will still work
cluster.start()
time.sleep(15)
cluster.stop()
time.sleep(15)
cluster.stop()
time.sleep(30)
cluster.start()
# Do a complicated sequence of start and stop and see if commands will still work.
# We must wait for terminal states between commands to avoid racing with clusterstatusmgtd's
# ~60s poll loop, which transitions *_REQUESTED -> *ING and would cause conditional DDB write failures.
cluster.start(wait_running=True)
cluster.stop(wait_stopped=True)
cluster.stop(wait_stopped=True) # idempotent on already-stopped fleet
cluster.start(wait_running=True)
compute_fleet = cluster.describe_compute_fleet()
last_start_time = compute_fleet["lastStatusUpdatedTime"]
logging.info("Checking last status update time is updated")
Expand Down
Loading