Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
ef488f9
add KMS integration and configuration for Docker and Kubernetes deplo…
Hamdy-khader Mar 7, 2026
d59a0f3
refactor: improve KMS configuration logic and add initial kms_config.hcl
Hamdy-khader Mar 17, 2026
74476ba
feat: add KMS configuration for Docker and Kubernetes deployments
Hamdy-khader Mar 17, 2026
b4f181a
fix: update KMS container name prefix in get_kms_cont function
Hamdy-khader Mar 17, 2026
851d406
fix: enhance KMS initialization and error handling in configuration
Hamdy-khader Mar 17, 2026
e046091
fix: add database write operation after cluster creation and improve …
Hamdy-khader Mar 17, 2026
f4f8f4e
fix: add pool UUID and name assignment in logical volume creation
Hamdy-khader Mar 17, 2026
bbd498e
fix: update KMS decryption process to handle new key structure
Hamdy-khader Mar 17, 2026
530c8c7
fix: implement key deletion functionality in KMS client and integrate…
Hamdy-khader Mar 17, 2026
57ac519
fix: add update_pool_key method in KMS client and integrate with pool…
Hamdy-khader Mar 18, 2026
215ef04
fix: add missing newline at end of kms_config.hcl for proper formatting
Hamdy-khader Mar 18, 2026
36a8d3a
fix: remove unused crypto key parameters from backup and logical volu…
Hamdy-khader Mar 19, 2026
f373293
fix: update security context configuration in app_k8s.yaml and adjust…
Hamdy-khader Mar 24, 2026
1b91a31
feat: add new configuration files and scripts for KMS setup and manag…
Hamdy-khader Mar 24, 2026
825cacf
fix: update KMS container command and adjust hostPath type in app_k8s…
Hamdy-khader Mar 24, 2026
3107b96
fix: update KMS init container image to use the correct repository
Hamdy-khader Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions simplyblock_cli/cli-reference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,12 @@ commands:
Network interface name from client to use for LVol connection.
dest: client_data_nic
type: str
- name: "--no-kms"
help: "Disable KMS deployment and integration"
dest: no_kms
type: bool
default: false
action: store_true
- name: add
help: "Adds a new cluster"
arguments:
Expand Down Expand Up @@ -1446,14 +1452,6 @@ commands:
dest: encrypt
type: bool
action: store_true
- name: "--crypto-key1"
help: "Hex value of key1 to be used for logical volume encryption"
dest: crypto_key1
type: str
- name: "--crypto-key2"
help: "Hex value of key2 to be used for logical volume encryption"
dest: crypto_key2
type: str
- name: "--max-rw-iops"
help: "Maximum Read Write IO Per Second"
dest: max_rw_iops
Expand Down
3 changes: 1 addition & 2 deletions simplyblock_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ def init_cluster__create(self, subparser):
argument = subcommand.add_argument('--nvmf-base-port', help='Base port for all NVMe-oF listeners (lvol, hublvol, device). Default: 4420', type=int, default=4420, dest='nvmf_base_port')
argument = subcommand.add_argument('--rpc-base-port', help='Base port for SPDK JSON-RPC. Default: 8080', type=int, default=8080, dest='rpc_base_port')
argument = subcommand.add_argument('--snode-api-port', help='SNodeAPI/firewall port (one per host IP). Default: 50001', type=int, default=50001, dest='snode_api_port')
argument = subcommand.add_argument('--no-kms', help='Disable KMS deployment and integration', default=False, dest='no_kms', action='store_true')

def init_cluster__add(self, subparser):
subcommand = self.add_sub_command(subparser, 'add', 'Adds a new cluster')
Expand Down Expand Up @@ -604,8 +605,6 @@ def init_volume__add(self, subparser):
argument = subcommand.add_argument('--max-size', help='Logical volume max size', type=size_type(), default='1000T', dest='max_size')
argument = subcommand.add_argument('--host-id', help='Primary storage node id or Hostname', type=str, dest='host_id')
argument = subcommand.add_argument('--encrypt', help='Use inline data encryption and decryption on the logical volume', dest='encrypt', action='store_true')
argument = subcommand.add_argument('--crypto-key1', help='Hex value of key1 to be used for logical volume encryption', type=str, dest='crypto_key1')
argument = subcommand.add_argument('--crypto-key2', help='Hex value of key2 to be used for logical volume encryption', type=str, dest='crypto_key2')
argument = subcommand.add_argument('--max-rw-iops', help='Maximum Read Write IO Per Second', type=int, dest='max_rw_iops')
argument = subcommand.add_argument('--max-rw-mbytes', help='Maximum Read Write Megabytes Per Second', type=int, dest='max_rw_mbytes')
argument = subcommand.add_argument('--max-r-mbytes', help='Maximum Read Megabytes Per Second', type=int, dest='max_r_mbytes')
Expand Down
5 changes: 2 additions & 3 deletions simplyblock_cli/clibase.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,6 @@ def volume__add(self, sub_command, args):
args.max_w_mbytes,
with_snapshot=with_snapshot,
max_size=max_size,
crypto_key1=args.crypto_key1,
crypto_key2=args.crypto_key2,
lvol_priority_class=lvol_priority_class,
uid=args.uid, pvc_name=args.pvc_name, namespace=args.namespace,
max_namespace_per_subsys=args.max_namespace_per_subsys, ndcs=ndcs, npcs=npcs, fabric=args.fabric,
Expand Down Expand Up @@ -990,6 +988,7 @@ def cluster_create(self, args):
is_single_node = args.is_single_node
fabric = args.fabric
client_data_nic = args.client_data_nic
deploy_kms = not bool(args.no_kms)

nvmeof_tls_config = None
if args.host_sec:
Expand All @@ -1014,7 +1013,7 @@ def cluster_create(self, args):
ifname, mgmt_ip, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint,
distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, mode, enable_node_affinity,
qpair_count, client_qpair_count, max_queue_size, inflight_io_threshold, disable_monitoring,
strict_node_anti_affinity, name, tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic,
strict_node_anti_affinity, name, tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic, deploy_kms,
nvmeof_tls_config=nvmeof_tls_config, max_fault_tolerance=max_fault_tolerance,
backup_config=backup_config,
nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port)
Expand Down
53 changes: 15 additions & 38 deletions simplyblock_core/cluster_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, mgmt_ip, log_del_interval, metrics_retention_period,
contact_point, grafana_endpoint, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, mode,
enable_node_affinity, qpair_count, client_qpair_count, max_queue_size, inflight_io_threshold, disable_monitoring, strict_node_anti_affinity, name,
tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic,
tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic, deploy_kms=True,
nvmeof_tls_config=None, max_fault_tolerance=1, backup_config=None,
nvmf_base_port=4420, rpc_base_port=8080, snode_api_port=50001) -> str:

Expand All @@ -246,6 +246,7 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
scripts.install_deps(mode)
logger.info("Installing dependencies > Done")

db_connection = None
if mode == "docker":
if not ifname:
ifname = "eth0"
Expand Down Expand Up @@ -328,8 +329,8 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
cluster.grafana_endpoint = grafana_endpoint
elif ingress_host_source == "hostip":
cluster.grafana_endpoint = f"http://{dev_ip}/grafana"
else:
cluster.grafana_endpoint = f"http://{dns_name}/grafana"

cluster.deploy_kms = bool(deploy_kms)
cluster.enable_node_affinity = enable_node_affinity
cluster.qpair_count = qpair_count or constants.QPAIR_COUNT
cluster.client_qpair_count = client_qpair_count or constants.CLIENT_QPAIR_COUNT
Expand Down Expand Up @@ -361,7 +362,8 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
logger.info("Deploying swarm stack ...")
log_level = "DEBUG" if constants.LOG_WEB_DEBUG else "INFO"
scripts.deploy_stack(cli_pass, dev_ip, constants.SIMPLY_BLOCK_DOCKER_IMAGE, cluster.secret, cluster.uuid,
log_del_interval, metrics_retention_period, log_level, cluster.grafana_endpoint, str(disable_monitoring))
log_del_interval, metrics_retention_period, log_level, cluster.grafana_endpoint,
str(disable_monitoring), str(cluster.deploy_kms))
logger.info("Deploying swarm stack > Done")

logger.info("Configuring DB...")
Expand All @@ -377,10 +379,6 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
logger.info("Patching prometheus configmap...")
utils.patch_prometheus_configmap(cluster.uuid, cluster.secret)

if not disable_monitoring:
if ingress_host_source == "hostip":
dns_name = dev_ip

_set_max_result_window(dns_name)

_add_graylog_input(dns_name, monitoring_secret)
Expand All @@ -397,6 +395,14 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,

mgmt_node_ops.add_mgmt_node(dev_ip, mode, cluster.uuid)

# configure kms vault
if cluster.deploy_kms:
if mode == "docker":
utils.configure_kms_on_docker(cluster, dev_ip)
elif mode == "kubernetes":
utils.configure_kms_on_k8s(cluster)
cluster.write_to_db(db_controller.kv_store)

logger.info("New Cluster has been created")
logger.info(cluster.uuid)
return cluster.uuid
Expand Down Expand Up @@ -426,35 +432,6 @@ def _cleanup_nvme(mount_point, nqn_value) -> None:
logger.info(f"Removed mount point: {mount_point}")


def _run_fio(mount_point) -> None:
if not os.path.exists(mount_point):
os.makedirs(mount_point, exist_ok=True)

try:
fio_config = textwrap.dedent(f"""
[test]
ioengine=aiolib
direct=1
iodepth=4
readwrite=randrw
bs=4K
nrfiles=4
size=1G
verify=md5
numjobs=3
directory={mount_point}
""").strip()
config_file = "fio.cfg"
with open(config_file, "w") as f:
f.write(fio_config)

logger.info(subprocess.check_output(["sudo", "fio", config_file], text=True))
finally:
if os.path.exists(config_file):
os.remove(config_file)
logger.info("fio configuration file removed.")


def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, qpair_count,
max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, fabric="tcp",
Expand All @@ -475,7 +452,7 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn
raise ValueError("max_fault_tolerance > 1 requires distr_npcs >= 2")

monitoring_secret = os.environ.get("MONITORING_SECRET", "")

logger.info("Adding new cluster")
cluster = Cluster()
cluster.uuid = str(uuid.uuid4())
Expand Down
2 changes: 0 additions & 2 deletions simplyblock_core/controllers/backup_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,6 @@ def restore_backup(backup_id, lvol_name, pool_id_or_name, cluster_id=None,
max_w_mbytes=0,
host_id_or_name=restore_node_id,
ha_type="default",
crypto_key1=None,
crypto_key2=None,
use_comp=False,
distr_vuid=0,
lvol_priority_class=0,
Expand Down
96 changes: 77 additions & 19 deletions simplyblock_core/controllers/lvol_controller.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# coding=utf-8
import base64
import logging as lg
import json
import math
Expand All @@ -12,6 +13,7 @@
from simplyblock_core import utils, constants
from simplyblock_core.controllers import snapshot_controller, pool_controller, lvol_events
from simplyblock_core.db_controller import DBController
from simplyblock_core.kms_client import KMSClient
from simplyblock_core.models.pool import Pool
from simplyblock_core.models.lvol_model import LVol
from simplyblock_core.models.storage_node import StorageNode
Expand Down Expand Up @@ -74,6 +76,7 @@ def _create_crypto_lvol(rpc_client, name, base_name, key1, key2):
if not ret:
logger.error(f"Failed to find LVol bdev {base_name}")
return False

key_name = f'key_{name}'
ret = rpc_client.lvol_crypto_key_create(key_name, key1, key2)
if not ret:
Expand All @@ -85,6 +88,39 @@ def _create_crypto_lvol(rpc_client, name, base_name, key1, key2):
return False
return ret

def _create_crypto_lvol_kms(snode, lvol, cluster):
rpc_client = snode.rpc_client()
name = lvol.crypto_bdev
base_name = f"{lvol.lvs_name}/{lvol.lvol_bdev}"
ret = rpc_client.get_bdevs(base_name)
if not ret:
logger.error(f"Failed to find LVol bdev {base_name}")
return False

kms_client = KMSClient(cluster.get_id())
lvol_keys, err = kms_client.get_keys(name)
if not lvol_keys:
logger.error(f"Failed to get keys for lvol: {name} from KMS")
if lvol.crypto_key1 and lvol.crypto_key2:
logger.warning(f"Using keys from DB for lvol: {name}")
return _create_crypto_lvol(rpc_client, name, base_name, lvol.crypto_key1, lvol.crypto_key2)

base64_key1, err = kms_client.decrypt(lvol.pool_uuid, lvol_keys['key1'][0]['ciphertext'])
original_key1 = base64_key1['plaintext']

base64_key2, err = kms_client.decrypt(lvol.pool_uuid, lvol_keys['key2'][0]['ciphertext'])
original_key2 = base64_key2['plaintext']

key_name = f'key_{name}'
ret = rpc_client.lvol_crypto_key_create(key_name, original_key1, original_key2)
if not ret:
logger.error("failed to create crypto key")
return False
ret = rpc_client.lvol_crypto_create(name, base_name, key_name)
if not ret:
logger.error(f"failed to create crypto LVol {name}")
return False
return ret

def _create_compress_lvol(rpc_client, base_bdev_name):
pm_path = constants.PMEM_DIR
Expand Down Expand Up @@ -295,7 +331,7 @@ def validate_aes_xts_keys(key1: str, key2: str) -> Tuple[bool, str]:

def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp, use_crypto,
distr_vuid, max_rw_iops, max_rw_mbytes, max_r_mbytes, max_w_mbytes,
with_snapshot=False, max_size=0, crypto_key1=None, crypto_key2=None, lvol_priority_class=0,
with_snapshot=False, max_size=0, lvol_priority_class=0,
uid=None, pvc_name=None, namespace=None, max_namespace_per_subsys=1, fabric="tcp", ndcs=0, npcs=0,
allowed_hosts=None, sec_options=None):

Expand Down Expand Up @@ -469,7 +505,8 @@ def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp,
lvol.guid = utils.generate_hex_string(16)
lvol.vuid = vuid
lvol.lvol_bdev = f"LVOL_{vuid}"

lvol.pool_uuid = pool.get_id()
lvol.pool_name = pool.pool_name
lvol.crypto_bdev = ''
lvol.comp_bdev = ''

Expand Down Expand Up @@ -544,28 +581,33 @@ def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp,
lvol.bdev_stack = [lvol_dict]

if use_crypto:
if crypto_key1 is None or crypto_key2 is None:
return False, "encryption keys for lvol not provided"
else:
success, err = validate_aes_xts_keys(crypto_key1, crypto_key2)
if not success:
return False, err
crypto_key1 = utils.generate_hex_string(32)
crypto_key2 = utils.generate_hex_string(32)

lvol.crypto_bdev = f"crypto_{lvol.lvol_bdev}"
lvol.bdev_stack.append({
"type": "crypto",
"name": lvol.crypto_bdev,
"params": {
"name": lvol.crypto_bdev,
"base_name": lvol.top_bdev,
"key1": crypto_key1,
"key2": crypto_key2,
"base_name": lvol.top_bdev
}
})
lvol.lvol_type += ',crypto'
lvol.top_bdev = lvol.crypto_bdev
lvol.crypto_key1 = crypto_key1
lvol.crypto_key2 = crypto_key2

if cl.deploy_kms:
kms_client = KMSClient(cl.get_id())
encrypted_key1 = kms_client.encrypt(pool.get_id(), crypto_key1)
encrypted_key2 = kms_client.encrypt(pool.get_id(), crypto_key2)
ret, err = kms_client.save_keys(lvol.crypto_bdev, encrypted_key1, encrypted_key2)
if ret:
logger.info(ret)
if err:
logger.error(err)
else:
lvol.crypto_key1 = crypto_key1
lvol.crypto_key2 = crypto_key2

# Process allowed hosts (for host restriction and/or DH-HMAC-CHAP authentication)
if allowed_hosts and not namespace:
Expand Down Expand Up @@ -670,8 +712,6 @@ def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp,
lvol.remove(db_controller.kv_store)
return False, error

lvol.pool_uuid = pool.get_id()
lvol.pool_name = pool.pool_name
lvol.status = LVol.STATUS_ONLINE
lvol.write_to_db(db_controller.kv_store)
lvol_events.lvol_create(lvol)
Expand Down Expand Up @@ -702,7 +742,13 @@ def _create_bdev_stack(lvol, snode, is_primary=True):
ret = rpc_client.ultra21_lvol_mount_lvol(**params)

elif type == "crypto":
ret = _create_crypto_lvol(rpc_client, **params)
db_controller = DBController()
cluster = db_controller.get_cluster_by_id(snode.cluster_id)
if cluster.deploy_kms:
ret = _create_crypto_lvol_kms(snode, lvol, cluster)
else:
ret = _create_crypto_lvol(rpc_client, lvol.crypto_bdev, f"{lvol.lvs_name}/{lvol.lvol_bdev}",
lvol.crypto_key1, lvol.crypto_key2)

elif type == "bdev_lvstore":
ret = rpc_client.create_lvstore(**params)
Expand Down Expand Up @@ -831,13 +877,17 @@ def is_node_leader(snode, lvs_name):
return False

def recreate_lvol_on_node(lvol, snode, ha_inode_self=0, ana_state=None):
db_controller = DBController()
rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)

base=f"{lvol.lvs_name}/{lvol.lvol_bdev}"

if "crypto" in lvol.lvol_type:
ret = _create_crypto_lvol(
rpc_client, lvol.crypto_bdev, base, lvol.crypto_key1, lvol.crypto_key2)
cluster = db_controller.get_cluster_by_id(snode.cluster_id)
if cluster.deploy_kms:
ret = _create_crypto_lvol_kms(snode, lvol, cluster)
else:
ret = _create_crypto_lvol(
rpc_client, lvol.crypto_bdev, base, lvol.crypto_key1, lvol.crypto_key2)
if not ret:
msg=f"Failed to create crypto lvol on node {snode.get_id()}"
logger.error(msg)
Expand Down Expand Up @@ -1159,6 +1209,14 @@ def delete_lvol(id_or_name, force_delete=False):
except KeyError:
pass # already deleted

cl = db_controller.get_cluster_by_id(snode.cluster_id)
if cl.deploy_kms:
kms_client = KMSClient(cl.get_id())
ret, err = kms_client.delete_key(lvol.crypto_bdev)
if ret:
logger.info(ret)
if err:
logger.error(err)
logger.info("Done")
return True

Expand Down
Loading
Loading