forked from OCP-on-NERC/python-batchtools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild_yaml.py
More file actions
95 lines (91 loc) · 2.97 KB
/
build_yaml.py
File metadata and controls
95 lines (91 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from imports import *
def build_job_body(
job_name: str,
queue_name: str,
image: str,
container_name: str,
cmdline: str,
max_sec: int,
gpu: str,
gpu_req: int,
gpu_lim: int,
context: int,
devpod_name: str,
devcontainer: str,
context_dir: str,
jobs_dir: str,
job_workspace: str,
getlist_path: str,
) -> dict[str, Any]:
"""
Build a batch/v1 Job as a dict to pass to oc.create()
"""
if gpu == "none":
resources = {
"requests": {"cpu": "1", "memory": "1Gi"},
"limits": {"cpu": "1", "memory": "1Gi"},
}
else:
resources = {
"requests": {"nvidia.com/gpu": gpu_req},
"limits": {"nvidia.com/gpu": gpu_lim},
}
# - when CONTEXT==0, just run the provided command via /bin/sh -
if context:
rsync_verbose = "-q"
print("running with context 1")
command = [
"/bin/sh",
"-c",
(
f"export RSYNC_RSH='oc rsh -c {devcontainer}'; "
f"mkdir -p {job_workspace} && "
f"rsync {rsync_verbose} --archive --no-owner --no-group "
f"--omit-dir-times --numeric-ids "
f"{devpod_name}:{getlist_path} {job_workspace}/getlist >/dev/null 2>&1 && "
f"rsync {rsync_verbose} -r --archive --no-owner --no-group "
f"--omit-dir-times --numeric-ids "
f"--files-from={job_workspace}/getlist "
f"{devpod_name}:{context_dir}/ {job_workspace}/ && "
f"find {job_name} -mindepth 1 -maxdepth 1 > {job_name}/gotlist && "
f"cd {job_workspace} && {cmdline} |& tee {job_name}.log; cd ..; "
f"rsync {rsync_verbose} --archive --no-owner --no-group "
f"--omit-dir-times --no-relative --numeric-ids "
f"--exclude-from={job_workspace}/gotlist "
f"{job_workspace} {devpod_name}:{jobs_dir}"
),
]
else:
command = ["/bin/sh", "-c", cmdline]
body = {
"apiVersion": "batch/v1",
"kind": "Job",
"metadata": {
"name": job_name,
"labels": {
"kueue.x-k8s.io/queue-name": queue_name,
"test_name": "kueue_test",
},
},
"spec": {
"parallelism": 1,
"completions": 1,
"backoffLimit": 0,
"activeDeadlineSeconds": max_sec,
"template": {
"spec": {
"maximumExecutionTimeSeconds": max_sec,
"restartPolicy": "Never",
"containers": [
{
"name": container_name,
"image": image,
"command": command,
"resources": resources,
}
],
}
},
},
}
return body