Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions simplyblock_core/cluster_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,6 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn
cluster.strict_node_anti_affinity = strict_node_anti_affinity

default_cluster = clusters[0]
cluster.mode = default_cluster.mode
cluster.db_connection = default_cluster.db_connection
cluster.grafana_secret = monitoring_secret if default_cluster.mode == "kubernetes" else default_cluster.grafana_secret
cluster.grafana_endpoint = default_cluster.grafana_endpoint
Expand Down Expand Up @@ -1132,6 +1131,7 @@ def get_logs(cluster_id, limit=50, **kwargs) -> t.List[dict]:
if record.event in ["device_status", "node_status"]:
msg = msg+f" ({record.count})"

logger.debug(record)
out.append({
"Date": record.get_date_string(),
"NodeId": record.node_id,
Expand All @@ -1154,10 +1154,6 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,

logger.info("Updating mgmt cluster")
if cluster.mode == "docker":
sbcli=constants.SIMPLY_BLOCK_CLI_NAME
subprocess.check_call(f"pip install {sbcli} --upgrade".split(' '))
logger.info(f"{sbcli} upgraded")

cluster_docker = utils.get_docker_client(cluster_id)
logger.info(f"Pulling image {constants.SIMPLY_BLOCK_DOCKER_IMAGE}")
pull_docker_image_with_retry(cluster_docker, constants.SIMPLY_BLOCK_DOCKER_IMAGE)
Expand All @@ -1171,7 +1167,7 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,
for service in cluster_docker.services.list():
if image_parts in service.attrs['Spec']['Labels']['com.docker.stack.image'] or \
"simplyblock" in service.attrs['Spec']['Labels']['com.docker.stack.image']:
if service.name == "app_CachingNodeMonitor":
if service.name in ["app_CachingNodeMonitor", "app_CachedLVolStatsCollector"]:
logger.info(f"Removing service {service.name}")
service.remove()
else:
Expand Down Expand Up @@ -1281,7 +1277,12 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,
logger.info(f"Restarting node: {node.get_id()} with SPDK image: {spdk_image}")
else:
logger.info(f"Restarting node: {node.get_id()}")
storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image)
try:
storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image)
except Exception as e:
logger.debug(e)
logger.error(f"Failed to restart node: {node.get_id()}")
return

logger.info("Done")

Expand Down
4 changes: 2 additions & 2 deletions simplyblock_core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ def get_config_var(name, default=None):
CLIENT_QPAIR_COUNT=3
NVME_TIMEOUT_US=8000000
NVMF_MAX_SUBSYSTEMS=50000
HA_JM_COUNT=3
KATO=10000
ACK_TO=11
BDEV_RETRY=0
Expand Down Expand Up @@ -225,4 +224,5 @@ def get_config_var(name, default=None):

qos_class_meta_and_migration_weight_percent = 25

MIG_PARALLEL_JOBS = 64
MIG_PARALLEL_JOBS = 64
MIG_JOB_SIZE = 64
75 changes: 0 additions & 75 deletions simplyblock_core/controllers/device_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,81 +653,6 @@ def add_device(device_id, add_migration_task=True):
tasks_controller.add_new_device_mig_task(device_id)
return device_id

#
# # create partitions
# partitions = snode.num_partitions_per_dev
# rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
# # look for partitions
# partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj)
# logger.debug("partitioned_devices")
# logger.debug(partitioned_devices)
# if len(partitioned_devices) == partitions+1:
# logger.info("Partitioned devices found")
# else:
# logger.info(f"Creating partitions for {device_obj.nvme_bdev}")
# storage_node_ops._create_device_partitions(rpc_client, device_obj, snode, partitions, snode.jm_percent)
# partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj)
# if len(partitioned_devices) == partitions+1:
# logger.info("Device partitions created")
# else:
# logger.error("Failed to create partitions")
# return False
#
# jm_part = partitioned_devices.pop(0)
# new_devices = []
# dev_order = storage_node_ops.get_next_cluster_device_order(db_controller, snode.cluster_id)
# for dev in partitioned_devices:
# new_device = storage_node_ops._create_storage_device_stack(rpc_client, dev, snode, after_restart=False)
# if not new_device:
# logger.error("failed to create dev stack")
# continue
#
# new_device.cluster_device_order = dev_order
# dev_order += 1
# device_events.device_create(new_device)
# new_devices.append(new_device)
#
# if new_devices:
# snode.nvme_devices.remove(device_obj)
# snode.nvme_devices.extend(new_devices)
# snode.write_to_db(db_controller.kv_store)
# else:
# logger.error("failed to create devices")
# return False
#
# for dev in new_devices:
# distr_controller.send_cluster_map_add_device(dev, snode)
#
# logger.info("Make other nodes connect to the node devices")
# snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id)
# for node in snodes:
# if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE:
# continue
# node.remote_devices = storage_node_ops._connect_to_remote_devs(node)
# node.write_to_db()
# for dev in new_devices:
# distr_controller.send_cluster_map_add_device(dev, node)
#
# for dev in new_devices:
# tasks_controller.add_new_device_mig_task(dev.get_id())
#
# # add to jm raid
# if snode.jm_device and snode.jm_device.raid_bdev and jm_part:
# # looking for jm partition
# jm_dev_part = jm_part.nvme_bdev
# ret = rpc_client.get_bdevs(jm_dev_part)
# if ret:
# logger.info(f"JM part found: {jm_dev_part}")
# if snode.jm_device.status in [JMDevice.STATUS_UNAVAILABLE, JMDevice.STATUS_REMOVED]:
# restart_jm_device(snode.jm_device.get_id(), force=True, format_alceml=True)
#
# if snode.jm_device.status == JMDevice.STATUS_ONLINE and \
# jm_dev_part not in snode.jm_device.jm_nvme_bdev_list:
# remove_jm_device(snode.jm_device.get_id(), force=True)
# restart_jm_device(snode.jm_device.get_id(), force=True)
#
# return "Done"


def device_set_failed_and_migrated(device_id):
db_controller = DBController()
Expand Down
Loading
Loading