-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathMakefile
More file actions
318 lines (289 loc) · 13.7 KB
/
Makefile
File metadata and controls
318 lines (289 loc) · 13.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
#
# Configuration options
ROCM_PATH ?= /opt/rocm
CUDA_PATH ?= /usr/local/cuda
MPI_PATH ?= /usr/local/openmpi
HIPCC ?= $(ROCM_PATH)/bin/amdclang++
NVCC ?= $(CUDA_PATH)/bin/nvcc
DEBUG ?= 0
# Optional features (set to 0 to disable, 1 to enable)
# DISABLE_NIC_EXEC: Disable RDMA/NIC executor support (default: 0)
# DISABLE_MPI_COMM: Disable MPI communicator support (default: 0)
# DISABLE_DMA_BUF: Disable DMA-BUF support for GPU Direct RDMA (default: 1)
# DISABLE_AMD_SMI: Disable AMD-SMI pod membership checking support (default: 0)
# DISABLE_NVML: Disable NVML pod membership detection for CUDA builds (default: 0)
# DISABLE_POD_COMM: Disable pod communication support (default: 0)
# DISABLE_CUMEM: Disable CUDA driver API (also disables pod on CUDA) (default: 0)
# ROCm device libraries can live in different locations depending on packaging.
# hipcc/clang needs to find the amdgcn bitcode directory at link time.
ROCM_DEVICE_LIB_PATH ?=
ifneq ($(wildcard $(ROCM_PATH)/amdgcn/bitcode),)
ROCM_DEVICE_LIB_PATH := $(ROCM_PATH)/amdgcn/bitcode
else ifneq ($(wildcard $(ROCM_PATH)/lib/llvm/amdgcn/bitcode),)
ROCM_DEVICE_LIB_PATH := $(ROCM_PATH)/lib/llvm/amdgcn/bitcode
endif
# Option to compile with single GFX kernel to drop compilation time
SINGLE_KERNEL ?= 0
# This can be a space separated string of multiple GPU targets
# Default is the native GPU target
GPU_TARGETS ?= native
EXE=TransferBench
# Only perform this check if 'make clean' is not the target
ifeq ($(filter clean,$(MAKECMDGOALS)),)
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
$(info Building TransferBenchCuda)
# Check for nvcc
ifneq ($(shell test -e $(NVCC) && echo found), found)
$(error "Could not find $(NVCC). Please set CUDA_PATH appropriately")
else
$(info Compiling TransferBenchCuda using $(NVCC))
endif
NVFLAGS = -x cu -lnuma -arch=native
else
# Check for HIP compiler
ifeq ("$(shell test -e $(HIPCC) && echo found)", "found")
CXX=$(HIPCC)
else
ifeq ("$(shell test -e $(ROCM_PATH)/llvm/bin/amdclang++ && echo found)", "found")
CXX=$(ROCM_PATH)/llvm/bin/amdclang++
else ifeq ("$(shell test -e $(ROCM_PATH)/llvm/bin/clang++ && echo found)", "found")
CXX=$(ROCM_PATH)/llvm/bin/clang++
else ifeq ("$(shell test -e $(ROCM_PATH)/bin/hipcc && echo found)", "found")
CXX=$(ROCM_PATH)/bin/hipcc
else
$(error "Could not find a HIP compiler. Tried: $(HIPCC), $(ROCM_PATH)/llvm/bin/amdclang++, $(ROCM_PATH)/llvm/bin/clang++, $(ROCM_PATH)/bin/hipcc. Check if ROCM_PATH is correct")
endif
$(info "Could not find $(HIPCC). Using fallback to $(CXX)")
endif
GPU_TARGETS_FLAGS = $(foreach target,$(GPU_TARGETS),"--offload-arch=$(target)")
$(info Compiling for $(GPU_TARGETS) architecture(s). Can modify this by setting GPU_TARGETS)
CXXFLAGS = -I. -I$(ROCM_PATH)/include -I$(ROCM_PATH)/include/hip -I$(ROCM_PATH)/include/hsa
HIPLDFLAGS= -lnuma -L$(ROCM_PATH)/lib -lhsa-runtime64 -lamdhip64
HIPFLAGS = -Wall -x hip -D__HIP_PLATFORM_AMD__ -D__HIPCC__ $(GPU_TARGETS_FLAGS)
ifneq ($(strip $(ROCM_DEVICE_LIB_PATH)),)
HIPFLAGS += --rocm-device-lib-path=$(ROCM_DEVICE_LIB_PATH)
endif
endif
ifeq ($(SINGLE_KERNEL), 1)
COMMON_FLAGS += -DSINGLE_KERNEL
endif
ifeq ($(DEBUG), 0)
COMMON_FLAGS += -O3 -g
else
COMMON_FLAGS += -O0 -g -ggdb3
endif
COMMON_FLAGS += -I./src/header -I./src/client -I./src/client/Presets
LDFLAGS += -lpthread
NIC_ENABLED = 0
# Compile RDMA executor if
# 1) DISABLE_NIC_EXEC is not set to 1
# 2) IBVerbs is found in the Dynamic Linker cache
# 3) infiniband/verbs.h is found in the default include path
DISABLE_NIC_EXEC ?= 0
ifneq ($(DISABLE_NIC_EXEC),1)
$(info Attempting to build with NIC executor support)
ifeq ("$(shell ldconfig -p | grep -c ibverbs)", "0")
$(info - ibverbs library not found)
else ifeq ("$(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'infiniband/verbs.h')", "0")
$(info - infiniband/verbs.h not found)
else
COMMON_FLAGS += -DNIC_EXEC_ENABLED
LDFLAGS += -libverbs
NIC_ENABLED = 1
# Disable DMA-BUF support by default (set DISABLE_DMA_BUF=0 to enable)
DISABLE_DMA_BUF ?= 1
ifeq ($(DISABLE_DMA_BUF), 0)
# Check for both ibv_reg_dmabuf_mr and ROCm DMA-BUF export support
HAVE_IBV_DMABUF := $(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'ibv_reg_dmabuf_mr')
HAVE_ROCM_DMABUF := $(shell echo '#include <hsa/hsa_ext_amd.h>' | $(CXX) -I$(ROCM_PATH)/include -E - 2>/dev/null | grep -c 'hsa_amd_portable_export_dmabuf')
ifeq ($(HAVE_IBV_DMABUF):$(HAVE_ROCM_DMABUF), 0:0)
$(info Building without DMA-BUF support: missing both ibv_reg_dmabuf_mr and ROCm DMA-BUF export)
else ifeq ($(HAVE_IBV_DMABUF), 0)
$(info Building without DMA-BUF support: missing ibv_reg_dmabuf_mr)
else ifeq ($(HAVE_ROCM_DMABUF), 0)
$(info Building without DMA-BUF support: missing ROCm DMA-BUF export)
else
COMMON_FLAGS += -DHAVE_DMABUF_SUPPORT
$(info Building with DMA-BUF support)
endif
else
$(info Building with DMA-BUF support disabled (DISABLE_DMA_BUF=1))
endif
endif
ifeq ($(NIC_ENABLED), 0)
$(info - Building without NIC executor support)
$(info - To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed)
else
$(info - Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable)
endif
endif
MPI_ENABLED = 0
# Compile with MPI communicator support if
# 1) DISABLE_MPI_COMM is not set to 1
# 2) mpi.h is found in the MPI_PATH
DISABLE_MPI_COMM ?= 0
ifneq ($(DISABLE_MPI_COMM), 1)
$(info Attempting to build with MPI communicator support)
ifeq ($(wildcard $(MPI_PATH)/include/mpi.h),)
$(info - Unable to find mpi.h at $(MPI_PATH)/include. Please specify appropriate MPI_PATH)
else
MPI_ENABLED = 1
COMMON_FLAGS += -DMPI_COMM_ENABLED -I$(MPI_PATH)/include
LDFLAGS += -L$(MPI_PATH)/lib -L$(MPI_PATH)/lib64 -lmpi
endif
ifeq ($(MPI_ENABLED), 0)
$(info - Building without MPI communicator support)
$(info - To use TransferBench with MPI support, install MPI libraries and specify appropriate MPI_PATH)
else
$(info - Building with MPI communicator support. Can set DISABLE_MPI_COMM=1 to disable)
endif
endif
NVML_ENABLED = 0
# Enable NVML support for pod membership detection on NVIDIA platforms
# Compile with NVML support if
# 1) DISABLE_NVML is not set to 1
# 2) Building TransferBenchCuda
# 3) nvml.h is found under CUDA_PATH
DISABLE_NVML ?= 0
ifneq ($(DISABLE_NVML), 1)
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
$(info Attempting to build with NVML support)
ifneq ($(wildcard $(CUDA_PATH)/include/nvml.h),)
COMMON_FLAGS += -DNVML_ENABLED
LDFLAGS += -lnvidia-ml
NVML_ENABLED = 1
$(info - Building with NVML support for pod membership detection)
else
$(info - nvml.h not found at $(CUDA_PATH)/include. Building without NVML support)
$(info - Pod membership may be forced by setting TB_FORCE_SINGLE_POD=1)
endif
endif
endif
# TransferBenchCuda: CUDA driver API (libcuda). Independent of POD, but POD on CUDA requires CUMEM.
DISABLE_CUMEM ?= 0
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
ifneq ($(DISABLE_CUMEM),1)
$(info - Building with CUMEM_ENABLED (CUDA driver API, -lcuda))
COMMON_FLAGS += -DCUMEM_ENABLED
LDFLAGS += -lcuda
else
$(info - CUDA driver API disabled (DISABLE_CUMEM=1); POD comm unavailable on CUDA)
endif
endif
POD_ENABLED = 0
AMD_SMI_ENABLED = 0
# Compile with pod support if
# 1) DISABLE_POD_COMM is not set to 1
# 2) For HIP: a small probe program that uses hipMemFabricHandle_t,
# hipMemExportToShareableHandle, and hipMemImportFromShareableHandle
# compiles and links successfully against amdhip64
# For CUDA: CUDA Version >= 12.2
DISABLE_POD_COMM ?= 0
DISABLE_AMD_SMI ?= 0
ifneq ($(DISABLE_POD_COMM), 1)
$(info Attempting to build with pod communication support)
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
# Check for appropriate CUDA support for MNNVL
CUDA_MIN_MAJOR := 12
CUDA_MIN_MINOR := 2
CUDA_VERSION_STR := $(shell $(NVCC) --version | grep release | sed -E 's/.*release ([0-9]+)\.([0-9]+).*/\1 \2/')
CUDA_MAJOR := $(word 1,$(CUDA_VERSION_STR))
CUDA_MINOR := $(word 2,$(CUDA_VERSION_STR))
CUDA_VERSION_OK := $(shell \
if [ $(CUDA_MAJOR) -gt $(CUDA_MIN_MAJOR) ] || \
[ $(CUDA_MAJOR) -eq $(CUDA_MIN_MAJOR) -a $(CUDA_MINOR) -ge $(CUDA_MIN_MINOR) ]; then \
echo yes; \
else \
echo no; \
fi)
ifeq ($(CUDA_VERSION_OK),yes)
$(info - Detected CUDA version $(CUDA_MAJOR).$(CUDA_MINOR) which has MNNVL support)
ifeq ($(DISABLE_CUMEM),1)
$(info - Pod communication skipped on CUDA: requires CUMEM_ENABLED (DISABLE_CUMEM=1))
else
COMMON_FLAGS += -DPOD_COMM_ENABLED
POD_ENABLED = 1
endif
else
$(info - Detected CUDA version $(CUDA_MAJOR).$(CUDA_MINOR) which does not have MNNVL support)
$(info - Pod support will require CUDA version of at least $(CUDA_MIN_MAJOR).$(CUDA_MIN_MINOR))
endif
else
# Check for the HIP fabric API functions used by TransferBench at runtime.
HIP_HAS_FABRIC := $(shell \
printf '%s\n' \
'#include <hip/hip_runtime_api.h>' \
'int main() {' \
' hipMemFabricHandle_t fabricHandle = {};' \
' hipMemGenericAllocationHandle_t allocationHandle = {};' \
' hipMemExportToShareableHandle(&fabricHandle, allocationHandle, hipMemHandleTypeFabric, 0);' \
' hipMemImportFromShareableHandle(&allocationHandle, &fabricHandle, hipMemHandleTypeFabric);' \
' return 0;' \
'}' | \
$(CXX) -I$(ROCM_PATH)/include -D__HIP_PLATFORM_AMD__ -x c++ - \
-L$(ROCM_PATH)/lib -L$(ROCM_PATH)/lib64 -lamdhip64 -o /dev/null 2>/dev/null && echo yes || echo no)
ifeq ($(HIP_HAS_FABRIC),yes)
$(info - HIP fabric API found; enabling pod communication support)
COMMON_FLAGS += -DPOD_COMM_ENABLED
POD_ENABLED = 1
ifeq ($(DISABLE_AMD_SMI), 1)
$(info - AMD-SMI disabled via DISABLE_AMD_SMI=1; set TB_FORCE_SINGLE_POD=1 at runtime to override pod membership)
else
# Prefer AMD-SMI for pod membership queries; fall back to TB_FORCE_SINGLE_POD=1 at runtime.
AMD_SMI_HEADER := $(ROCM_PATH)/include/amd_smi/amdsmi.h
AMD_SMI_LIB := $(firstword $(wildcard $(ROCM_PATH)/lib/libamd_smi.so $(ROCM_PATH)/lib64/libamd_smi.so))
ifneq ($(wildcard $(AMD_SMI_HEADER)),)
ifneq ($(AMD_SMI_LIB),)
# Check for the AMD-SMI functions used by TransferBench at runtime.
AMDSMI_HAS_FABRIC := $(shell \
printf '%s\n' \
'#include <amd_smi/amdsmi.h>' \
'int main() {' \
' amdsmi_bdf_t bdf = {};' \
' amdsmi_processor_handle h;' \
' amdsmi_get_processor_handle_from_bdf(bdf, &h);' \
' amdsmi_fabric_info_t fi;' \
' amdsmi_get_gpu_fabric_info(h, &fi);' \
' (void)fi.fabric_info.fabric_version.v1.ppod_id;' \
' (void)fi.fabric_info.fabric_version.v1.vpod_id;' \
' return 0;' \
'}' | \
$(CXX) -I$(ROCM_PATH)/include -x c++ - \
-L$(dir $(AMD_SMI_LIB)) -lamd_smi -o /dev/null 2>/dev/null && echo yes || echo no)
ifeq ($(AMDSMI_HAS_FABRIC),yes)
$(info - AMD-SMI fabric API found; using AMD-SMI for pod membership queries)
COMMON_FLAGS += -DAMD_SMI_ENABLED
LDFLAGS += -L$(dir $(AMD_SMI_LIB)) -lamd_smi
AMD_SMI_ENABLED = 1
else
$(info - AMD-SMI fabric API not found; set TB_FORCE_SINGLE_POD=1 at runtime to override pod membership)
endif
else
$(info - libamd_smi not found under $(ROCM_PATH)/lib or $(ROCM_PATH)/lib64; set TB_FORCE_SINGLE_POD=1 at runtime to override pod membership)
endif
else
$(info - amd_smi/amdsmi.h not found under $(ROCM_PATH)/include; set TB_FORCE_SINGLE_POD=1 at runtime to override pod membership)
endif
endif
else
$(info - HIP fabric API not found; disabling pod communication support)
endif
endif
endif
# Git metadata (branch + short commit hash)
# Priority: git rev-parse > GIT_VERSION file (populated by packaging scripts) > "unknown"
_TB_DIR := $(dir $(abspath $(firstword $(MAKEFILE_LIST))))
TB_GIT_BRANCH := $(shell git -C "$(_TB_DIR)" rev-parse --abbrev-ref HEAD 2>/dev/null || sed -n '1p' "$(_TB_DIR)GIT_VERSION" 2>/dev/null || echo unknown)
TB_GIT_COMMIT := $(shell git -C "$(_TB_DIR)" rev-parse --short HEAD 2>/dev/null || sed -n '2p' "$(_TB_DIR)GIT_VERSION" 2>/dev/null || echo unknown)
COMMON_FLAGS += -DTB_GIT_BRANCH='"$(TB_GIT_BRANCH)"' -DTB_GIT_COMMIT='"$(TB_GIT_COMMIT)"'
endif
.PHONY : all clean
all: TransferBench
TransferBench: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp")
$(CXX) $(CXXFLAGS) $(HIPFLAGS) $(COMMON_FLAGS) $< -o $@ $(HIPLDFLAGS) $(LDFLAGS)
TransferBenchCuda: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp")
$(NVCC) $(NVFLAGS) $(COMMON_FLAGS) $< -o $@ $(LDFLAGS)
clean:
rm -f ./TransferBench ./TransferBenchCuda