Skip to content

Commit 790dfde

Browse files
committed
Merge remote-tracking branch 'origin/main' into control-center-integration
2 parents 4502bec + 01c98f3 commit 790dfde

19 files changed

Lines changed: 600 additions & 321 deletions

docs/k8s_mgmt.md

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# SimplyBlock Management Node on Kubernetes
2+
3+
This guide explains how to deploy the SimplyBlock Management Node on a Kubernetes cluster using the provided bootstrap scripts.
4+
5+
---
6+
7+
## Prerequisites
8+
9+
- A Linux host or VM with access to the required bootstrap scripts:
10+
- `./bootstrap-k3s.sh`
11+
- `./bootstrap-cluster.sh`
12+
- Access to the kubernetes Cluster
13+
14+
---
15+
16+
## Step-by-Step Guide
17+
18+
### 1. Bootstrap the Kubernetes Cluster with Worker Node Support
19+
20+
Run the following command to deploy a K3s-based Kubernetes cluster with support for storage worker nodes:
21+
22+
```bash
23+
./bootstrap-k3s.sh --k8s-snode
24+
```
25+
26+
### 2. Prepare an Administrative Host
27+
28+
Once the cluster is bootstrapped, copy the generated kubeconfig file (~/.kube/config or the one output by K3s) to a Linux host where you will perform SimplyBlock cluster administrative tasks. Also, update the IP in the kubeconfig file from 127.0.0.1 to ``<mgmt-worker-node-ip>``:
29+
30+
```bash
31+
scp /etc/rancher/k3s/k3s.yaml <admin-host>:/home/<user>/.kube/config
32+
```
33+
34+
> **Important:** The administrative host must be a **Linux machine** that:
35+
>
36+
> - Has access to the Kubernetes `kubeconfig` file
37+
> - Can reach the Kubernetes **worker nodes over the network**, including the following ports:
38+
> - `6443` for Kubernetes API server
39+
> - `4500` for FoundationDB
40+
> - `80` and `443` for HTTP and HTTPS access (if applicable)
41+
> - Is used for managing and operating the SimplyBlock cluster (not necessarily running the Management Node itself)
42+
43+
Install SimplyBlock CLI and FoundationDB Client
44+
On the administrative host, install the following tools:
45+
46+
SimplyBlock CLI (sbctl):
47+
48+
```bash
49+
pip install sbctl
50+
```
51+
52+
FoundationDB Client (for RPM-based systems like CentOS/RHEL):
53+
54+
```bash
55+
sudo yum install -y https://github.com/apple/foundationdb/releases/download/7.3.3/foundationdb-clients-7.3.3-1.el7.x86_64.rpm
56+
```
57+
58+
59+
### 3. Deploy the Cluster in Kubernetes Mode
60+
Now run the bootstrap cluster script
61+
```bash
62+
./bootstrap-cluster.sh --mode kubernetes
63+
```
64+
65+
### 4. Add FDB Configuration file on Administrative Host
66+
67+
Create foundationdb config directory
68+
69+
```bash
70+
mkdir /etc/foundationdb
71+
```
72+
73+
Retrieve the cluster config and write it to fdb.cluster
74+
75+
```bash
76+
kubectl -n simplyblock get cm simplyblock-config \
77+
-o jsonpath="{.data.FDB_CLUSTER_FILE_CONTENTS}" \
78+
| sudo tee /etc/foundationdb/fdb.cluster > /dev/null
79+
```
80+
81+
Optional: Verify the contents
82+
83+
```bash
84+
cat /etc/foundationdb/fdb.cluster
85+
```
86+
87+
### 5. Verification
88+
You can verify that the Management Node is running by checking the pods in the namespace (e.g., simplyblock):
89+
90+
```bash
91+
kubectl get pods -n simplyblock
92+
```
93+
94+
List the Bootstrapped cluster.
95+
96+
```bash
97+
sbctl cluster list
98+
```

simplyblock_cli/cli-reference.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,10 @@ commands:
701701
help: "Management interface name, e.g. eth0"
702702
dest: ifname
703703
type: str
704+
- name: "--mgmt-ip"
705+
help: "Management IP address to use for the node (e.g., 192.168.1.10)"
706+
dest: mgmt_ip
707+
type: str
704708
- name: "--log-del-interval"
705709
help: "Logging retention policy, default: 3d"
706710
dest: log_del_interval
@@ -1526,10 +1530,14 @@ commands:
15261530
help: "Cluster secret"
15271531
dest: cluster_secret
15281532
type: str
1529-
- name: "ifname"
1533+
- name: "--ifname"
15301534
help: "Management interface name"
15311535
dest: ifname
15321536
type: str
1537+
- name: "--mgmt-ip"
1538+
help: "Management IP address to use for the node (e.g., 192.168.1.10)"
1539+
dest: mgmt_ip
1540+
type: str
15331541
- name: "--mode"
15341542
help: "Environment to deploy management services, default: docker "
15351543
dest: mode

simplyblock_cli/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ def init_cluster__create(self, subparser):
341341
argument = subcommand.add_argument('--prov-cap-warn', help='Capacity warning level in percent, default: 250', type=int, default=250, dest='prov_cap_warn')
342342
argument = subcommand.add_argument('--prov-cap-crit', help='Capacity critical level in percent, default: 500', type=int, default=500, dest='prov_cap_crit')
343343
argument = subcommand.add_argument('--ifname', help='Management interface name, e.g. eth0', type=str, dest='ifname')
344+
argument = subcommand.add_argument('--mgmt-ip', help='Management IP address to use for the node (e.g., 192.168.1.10)', type=str, dest='mgmt_ip')
344345
argument = subcommand.add_argument('--log-del-interval', help='Logging retention policy, default: 3d', type=str, default='3d', dest='log_del_interval')
345346
argument = subcommand.add_argument('--metrics-retention-period', help='Retention period for I/O statistics (Prometheus), default: 7d', type=str, default='7d', dest='metrics_retention_period')
346347
argument = subcommand.add_argument('--contact-point', help='Email or slack webhook url to be used for alerting', type=str, default='', dest='contact_point')
@@ -633,7 +634,8 @@ def init_control_plane__add(self, subparser):
633634
subcommand.add_argument('cluster_ip', help='Cluster IP address', type=str)
634635
subcommand.add_argument('cluster_id', help='Cluster id', type=str)
635636
subcommand.add_argument('cluster_secret', help='Cluster secret', type=str)
636-
subcommand.add_argument('ifname', help='Management interface name', type=str)
637+
argument = subcommand.add_argument('--ifname', help='Management interface name', type=str, dest='ifname')
638+
argument = subcommand.add_argument('--mgmt-ip', help='Management IP address to use for the node (e.g., 192.168.1.10)', type=str, dest='mgmt_ip')
637639
argument = subcommand.add_argument('--mode', help='Environment to deploy management services, default: docker ', type=str, default='docker', dest='mode', choices=['docker','kubernetes',])
638640

639641
def init_control_plane__list(self, subparser):

simplyblock_cli/clibase.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -551,8 +551,9 @@ def control_plane__add(self, sub_command, args):
551551
cluster_ip = args.cluster_ip
552552
cluster_secret = args.cluster_secret
553553
ifname = args.ifname
554+
mgmt_ip = args.mgmt_ip
554555
mode = args.mode
555-
return mgmt_ops.deploy_mgmt_node(cluster_ip, cluster_id, ifname, cluster_secret, mode)
556+
return mgmt_ops.deploy_mgmt_node(cluster_ip, cluster_id, ifname, mgmt_ip, cluster_secret, mode)
556557

557558
def control_plane__list(self, sub_command, args):
558559
return mgmt_ops.list_mgmt_nodes(args.json)
@@ -664,6 +665,7 @@ def cluster_create(self, args):
664665
prov_cap_warn = args.prov_cap_warn
665666
prov_cap_crit = args.prov_cap_crit
666667
ifname = args.ifname
668+
mgmt_ip = args.mgmt_ip
667669
distr_ndcs = args.distr_ndcs
668670
distr_npcs = args.distr_npcs
669671
distr_bs = args.distr_bs
@@ -687,7 +689,7 @@ def cluster_create(self, args):
687689
return cluster_ops.create_cluster(
688690
blk_size, page_size_in_blocks,
689691
CLI_PASS, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
690-
ifname, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint,
692+
ifname, mgmt_ip, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint,
691693
distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, mode, enable_node_affinity,
692694
qpair_count, max_queue_size, inflight_io_threshold, enable_qos, disable_monitoring,
693695
strict_node_anti_affinity, name, refresh_token_secret)

simplyblock_core/cluster_ops.py

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,19 @@ def _create_update_user(cluster_id, grafana_url, grafana_secret, user_secret, up
7979

8080

8181
def _add_graylog_input(cluster_ip, password):
82-
url = f"http://{cluster_ip}/graylog/api/system/inputs"
82+
base_url = f"http://{cluster_ip}/graylog/api"
83+
input_url = f"{base_url}/system/inputs"
8384

8485
retries = 30
85-
reachable=False
86-
while retries > 0:
86+
reachable = False
87+
session = requests.session()
88+
session.auth = ("admin", password)
89+
headers = {
90+
'X-Requested-By': 'setup-script',
91+
'Content-Type': 'application/json',
92+
}
8793

94+
while retries > 0:
8895
payload = json.dumps({
8996
"title": "spdk log input",
9097
"type": "org.graylog2.inputs.gelf.tcp.GELFTCPInput",
@@ -99,30 +106,61 @@ def _add_graylog_input(cluster_ip, password):
99106
},
100107
"global": True
101108
})
102-
headers = {
103-
'X-Requested-By': '',
104-
'Content-Type': 'application/json',
105-
}
106-
session = requests.session()
107-
session.auth = ("admin", password)
108-
response = session.request("POST", url, headers=headers, data=payload)
109+
110+
response = session.post(input_url, headers=headers, data=payload)
109111
if response.status_code == 201:
110112
logger.info("Graylog input created...")
111-
reachable=True
113+
reachable = True
112114
break
113115

114116
logger.debug(response.text)
115117
retries -= 1
116118
time.sleep(5)
119+
117120
if not reachable:
118121
logger.error(f"Failed to create graylog input: {response.text}")
119122
return False
120123

121-
return response.status_code == 201
124+
inputs_response = session.get(input_url, headers=headers)
125+
if inputs_response.status_code != 200:
126+
logger.error(f"Failed to retrieve inputs: {inputs_response.text}")
127+
return False
128+
129+
input_id = None
130+
for item in inputs_response.json()["inputs"]:
131+
if item["title"] == "spdk log input":
132+
input_id = item["id"]
133+
break
134+
135+
if not input_id:
136+
logger.error("Could not find created input to add extractor.")
137+
return False
138+
139+
extractor_url = f"{input_url}/{input_id}/extractors"
140+
extractor_payload = {
141+
"title": "Extract Kubernetes JSON",
142+
"type": "json",
143+
"converters": [],
144+
"order": 0,
145+
"cursor_strategy": "copy",
146+
"source_field": "message",
147+
"target_field": "",
148+
"extractor_config": {},
149+
"condition_type": "none",
150+
"condition_value": ""
151+
}
152+
153+
extractor_response = session.post(extractor_url, headers=headers, data=json.dumps(extractor_payload))
154+
if extractor_response.status_code != 201:
155+
logger.error(f"Failed to add JSON extractor: {extractor_response.text}")
156+
return False
157+
158+
logger.info("JSON extractor added successfully.")
159+
return True
122160

123161
def _set_max_result_window(cluster_ip, max_window=100000):
124162

125-
url_existing_indices = f"http://{cluster_ip}:9200/_all/_settings"
163+
url_existing_indices = f"http://{cluster_ip}/opensearch/_all/_settings"
126164

127165
retries = 30
128166
reachable=False
@@ -149,7 +187,7 @@ def _set_max_result_window(cluster_ip, max_window=100000):
149187
logger.error(f"Failed to update settings for existing indices: {response.text}")
150188
return False
151189

152-
url_template = f"http://{cluster_ip}:9200/_template/all_indices_template"
190+
url_template = f"http://{cluster_ip}/opensearch/_template/all_indices_template"
153191
payload_template = json.dumps({
154192
"index_patterns": ["*"],
155193
"settings": {
@@ -166,7 +204,7 @@ def _set_max_result_window(cluster_ip, max_window=100000):
166204

167205

168206
def create_cluster(blk_size, page_size_in_blocks, cli_pass,
169-
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period,
207+
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, mgmt_ip, log_del_interval, metrics_retention_period,
170208
contact_point, grafana_endpoint, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, mode,
171209
enable_node_affinity, qpair_count, max_queue_size, inflight_io_threshold, enable_qos, disable_monitoring,
172210
strict_node_anti_affinity, name, refresh_token_secret) -> str:
@@ -178,17 +216,16 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
178216
scripts.install_deps(mode)
179217
logger.info("Installing dependencies > Done")
180218

181-
if not ifname:
182-
ifname = "eth0"
183-
184-
dev_ip = utils.get_iface_ip(ifname)
185-
if not dev_ip:
186-
raise ValueError(f"Error getting interface ip: {ifname}")
219+
if mode == "docker":
220+
if not ifname:
221+
ifname = "eth0"
187222

188-
db_connection = f"{utils.generate_string(8)}:{utils.generate_string(32)}@{dev_ip}:4500"
189-
scripts.set_db_config(db_connection)
223+
dev_ip = utils.get_iface_ip(ifname)
224+
if not dev_ip:
225+
raise ValueError(f"Error getting interface ip: {ifname}")
190226

191-
if mode == "docker":
227+
db_connection = f"{utils.generate_string(8)}:{utils.generate_string(32)}@{dev_ip}:4500"
228+
scripts.set_db_config(db_connection)
192229
logger.info(f"Node IP: {dev_ip}")
193230
scripts.configure_docker(dev_ip)
194231
logger.info("Configuring docker swarm...")
@@ -218,6 +255,14 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
218255
logger.info(f"Labeled node '{hostname}' with app=graylog")
219256
else:
220257
logger.warning("Could not find current node for labeling")
258+
elif mode == "kubernetes":
259+
dev_ip = mgmt_ip
260+
if not dev_ip:
261+
raise ValueError("Error getting ip: For Kubernetes-based deployments, please supply --mgmt-ip.")
262+
263+
current_node = utils.get_node_name_by_ip(dev_ip)
264+
utils.label_node_as_mgmt_plane(current_node)
265+
db_connection = f"{utils.generate_string(8)}:{utils.generate_string(32)}@{dev_ip}:4500"
221266

222267
if not cli_pass:
223268
cli_pass = utils.generate_string(10)
@@ -286,7 +331,8 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
286331
log_level = "DEBUG" if constants.LOG_WEB_DEBUG else "INFO"
287332
scripts.deploy_k8s_stack(cli_pass, dev_ip, constants.SIMPLY_BLOCK_DOCKER_IMAGE, cluster.secret, cluster.uuid,
288333
log_del_interval, metrics_retention_period, log_level, cluster.grafana_endpoint,
289-
contact_point, constants.K8S_NAMESPACE, str(disable_monitoring), refresh_token_secret)
334+
contact_point, db_connection, constants.K8S_NAMESPACE, str(disable_monitoring),
335+
refresh_token_secret)
290336
logger.info("Deploying helm stack > Done")
291337

292338
logger.info("Configuring DB...")
@@ -307,7 +353,7 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
307353

308354
cluster_events.cluster_create(cluster)
309355

310-
mgmt_node_ops.add_mgmt_node(dev_ip, cluster.uuid)
356+
mgmt_node_ops.add_mgmt_node(dev_ip, mode, cluster.uuid)
311357

312358
logger.info("New Cluster has been created")
313359
logger.info(cluster.uuid)

0 commit comments

Comments
 (0)