Skip to content

Commit 62b2fb4

Browse files
committed
Add: memfd-based SO loading for all runtimes
- Add memfd_loader.h for in-memory SO loading using memfd_create - Integrate memfd loading into AICPU executors across all runtimes - Try memfd first, fall back to file-based loading if memfd fails - Eliminates temporary file pollution in /tmp directory - Provides consistent loading performance without filesystem overhead
1 parent d210a99 commit 62b2fb4

File tree

6 files changed

+612
-120
lines changed

6 files changed

+612
-120
lines changed

src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp

Lines changed: 71 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
#include "runtime.h"
3030
#include "spin_hint.h"
3131

32+
// memfd-based SO loading
33+
#include "memfd_loader.h"
34+
3235
// Runtime headers (full struct definition for create/destroy + PTO2_SCOPE)
3336
#include "pto_runtime2.h"
3437
#include "pto_runtime2_types.h"
@@ -237,6 +240,7 @@ struct AicpuExecutor {
237240
// Orchestration SO handle - defer dlclose until all tasks complete
238241
void *orch_so_handle_{nullptr};
239242
char orch_so_path_[256]{}; // Path to orchestration SO file for cleanup
243+
int orch_so_memfd_{-1}; // memfd for memfd_create path (-1 if file-based)
240244

241245
// Shared orchestration function pointer (loaded by first orch thread, used by all)
242246
DeviceOrchestrationFunc orch_func_{nullptr};
@@ -1596,50 +1600,69 @@ int32_t AicpuExecutor::run(Runtime *runtime) {
15961600
return -1;
15971601
}
15981602

1599-
// Try multiple paths that may allow execution on AICPU
1603+
// Try memfd first, fall back to file-based
16001604
char so_path[256];
1601-
bool file_created = false;
1602-
const char *candidate_dirs[] = {
1603-
"/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp"
1604-
};
1605-
const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]);
1606-
1607-
for (int32_t i = 0; i < num_candidates && !file_created; i++) {
1608-
snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid());
1609-
int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755);
1610-
if (fd < 0) {
1611-
DEV_INFO(
1612-
"Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno
1613-
);
1614-
continue;
1615-
}
1616-
ssize_t written = write(fd, so_data, so_size);
1617-
close(fd);
1618-
if (written != static_cast<ssize_t>(so_size)) {
1619-
DEV_INFO(
1620-
"Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno
1621-
);
1622-
unlink(so_path);
1623-
continue;
1624-
}
1625-
file_created = true;
1626-
DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size);
1627-
}
1605+
void *handle = nullptr;
1606+
int memfd = -1;
16281607

1629-
if (!file_created) {
1630-
DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx);
1631-
return -1;
1608+
// Attempt memfd-based loading first
1609+
int memfd_rc = load_orchestration_so_with_memfd(
1610+
so_data, so_size, thread_idx, &handle, so_path, &memfd
1611+
);
1612+
1613+
if (memfd_rc == 0 && handle != nullptr) {
1614+
// memfd loading succeeded, use memfd-loaded handle
1615+
orch_so_memfd_ = memfd;
16321616
}
16331617

1634-
dlerror();
1635-
void *handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL);
1636-
const char *dlopen_err = dlerror();
16371618
if (handle == nullptr) {
1638-
DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown");
1639-
unlink(so_path);
1640-
return -1;
1619+
// memfd failed or unavailable - use file-based loading
1620+
orch_so_memfd_ = -1;
1621+
1622+
// Try multiple paths that may allow execution on AICPU
1623+
bool file_created = false;
1624+
const char *candidate_dirs[] = {
1625+
"/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp"
1626+
};
1627+
const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]);
1628+
1629+
for (int32_t i = 0; i < num_candidates && !file_created; i++) {
1630+
snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid());
1631+
int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755);
1632+
if (fd < 0) {
1633+
DEV_INFO(
1634+
"Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno
1635+
);
1636+
continue;
1637+
}
1638+
ssize_t written = write(fd, so_data, so_size);
1639+
close(fd);
1640+
if (written != static_cast<ssize_t>(so_size)) {
1641+
DEV_INFO(
1642+
"Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno
1643+
);
1644+
unlink(so_path);
1645+
continue;
1646+
}
1647+
file_created = true;
1648+
DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size);
1649+
}
1650+
1651+
if (!file_created) {
1652+
DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx);
1653+
return -1;
1654+
}
1655+
1656+
dlerror();
1657+
handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL);
1658+
const char *dlopen_err = dlerror();
1659+
if (handle == nullptr) {
1660+
DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown");
1661+
unlink(so_path);
1662+
return -1;
1663+
}
1664+
DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle);
16411665
}
1642-
DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle);
16431666

16441667
dlerror();
16451668
auto config_func =
@@ -1970,8 +1993,15 @@ int32_t AicpuExecutor::run(Runtime *runtime) {
19701993
// Destroy PTO2 runtime and close orchestration SO (moved from orchestrator path)
19711994
if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) {
19721995
pto2_runtime_destroy(rt);
1973-
dlclose(orch_so_handle_);
1974-
unlink(orch_so_path_);
1996+
// Handle cleanup based on loading method
1997+
if (orch_so_memfd_ >= 0) {
1998+
// memfd-based: close fd AFTER dlclose
1999+
cleanup_memfd_so(orch_so_memfd_, orch_so_handle_);
2000+
} else {
2001+
// File-based: dlclose handle and unlink file
2002+
dlclose(orch_so_handle_);
2003+
unlink(orch_so_path_);
2004+
}
19752005
}
19762006
DEV_ALWAYS("Thread %d: Last thread, marking executor finished", thread_idx);
19772007
}
@@ -2029,6 +2059,7 @@ void AicpuExecutor::deinit(Runtime *runtime) {
20292059
orch_args_cached_ = nullptr;
20302060
orch_so_handle_ = nullptr;
20312061
orch_so_path_[0] = '\0';
2062+
orch_so_memfd_ = -1;
20322063

20332064
// Reset register-related state
20342065
for (int32_t i = 0; i < MAX_CORES_PER_THREAD; i++) {
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Copyright (c) PyPTO Contributors.
3+
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
4+
* CANN Open Software License Agreement Version 2.0 (the "License").
5+
* Please refer to the License for details. You may not use this file except in compliance with the License.
6+
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR ANY KIND, EITHER EXPRESS OR IMPLIED,
7+
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
8+
* See LICENSE in the root of the software repository for the full text of the License.
9+
* -----------------------------------------------------------------------------------------------------------
10+
*/
11+
12+
/**
13+
* @file memfd_loader.h
14+
* @brief Memory file descriptor based SO loading for AICPU environment
15+
*/
16+
17+
// Enable GNU extensions for memfd_create and MFD_CLOEXEC
18+
#ifndef _GNU_SOURCE
19+
#define _GNU_SOURCE
20+
#endif
21+
22+
#ifndef MEMFD_LOADER_H
23+
#define MEMFD_LOADER_H
24+
25+
#ifdef __cplusplus
26+
extern "C" {
27+
#endif
28+
29+
#include <dlfcn.h>
30+
#include <fcntl.h>
31+
#include <unistd.h>
32+
#include <sys/mman.h>
33+
#include <cstring>
34+
#include <cstdio>
35+
36+
#include "aicpu/device_log.h"
37+
38+
/**
39+
* Load orchestration SO using memfd
40+
*/
41+
static inline int load_orchestration_so_with_memfd(
42+
const void *so_data,
43+
size_t so_size,
44+
int orch_thread_num,
45+
void **out_handle,
46+
char *out_so_path,
47+
int *out_memfd
48+
) {
49+
*out_handle = nullptr;
50+
*out_memfd = -1;
51+
out_so_path[0] = '\0';
52+
53+
if (so_data == nullptr || so_size == 0) {
54+
return -1;
55+
}
56+
57+
// Create memfd
58+
int fd = memfd_create("libdevice_orch", MFD_CLOEXEC);
59+
60+
if (fd < 0) {
61+
DEV_INFO("memfd_create failed: errno=%d", errno);
62+
return -1;
63+
}
64+
65+
// Write SO data to memfd
66+
ssize_t written = write(fd, so_data, so_size);
67+
68+
if (written < 0) {
69+
DEV_INFO("memfd write failed: errno=%d", errno);
70+
close(fd);
71+
return -1;
72+
}
73+
if (written != static_cast<ssize_t>(so_size)) {
74+
DEV_INFO("memfd partial write: %zd/%zu", written, so_size);
75+
close(fd);
76+
return -1;
77+
}
78+
79+
// Reset file position to beginning before dlopen
80+
lseek(fd, 0, SEEK_SET);
81+
82+
// Construct /proc/self/fd/N path for symlink target
83+
char proc_fd_path[256];
84+
snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%d", fd);
85+
86+
// Create a symlink to /proc/self/fd/N with a "normal" path
87+
// This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths
88+
char link_path[256];
89+
snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num);
90+
91+
int symlink_rc = symlink(proc_fd_path, link_path);
92+
if (symlink_rc != 0) {
93+
DEV_INFO("symlink failed: errno=%d", errno);
94+
close(fd);
95+
return -1;
96+
}
97+
98+
snprintf(out_so_path, 256, "%s", link_path);
99+
100+
// Try dlopen from the symlink
101+
dlerror();
102+
void *handle = dlopen(out_so_path, RTLD_LAZY | RTLD_LOCAL);
103+
104+
// Clean up symlink immediately after dlopen (dlopen has its own reference)
105+
unlink(link_path);
106+
107+
if (handle == nullptr) {
108+
const char *dl_err = dlerror();
109+
DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown");
110+
close(fd);
111+
return -1;
112+
}
113+
114+
*out_handle = handle;
115+
*out_memfd = fd;
116+
return 0;
117+
}
118+
119+
/**
120+
* Cleanup memfd-based SO
121+
*/
122+
static inline void cleanup_memfd_so(int memfd, void *handle) {
123+
if (handle != nullptr) {
124+
dlclose(handle);
125+
}
126+
if (memfd >= 0) {
127+
close(memfd);
128+
}
129+
}
130+
131+
#ifdef __cplusplus
132+
}
133+
#endif
134+
135+
#endif // MEMFD_LOADER_H

0 commit comments

Comments
 (0)