|
29 | 29 | #include "runtime.h" |
30 | 30 | #include "spin_hint.h" |
31 | 31 |
|
| 32 | +// memfd-based SO loading |
| 33 | +#include "memfd_loader.h" |
| 34 | + |
32 | 35 | // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) |
33 | 36 | #include "pto_runtime2.h" |
34 | 37 | #include "pto_runtime2_types.h" |
@@ -237,6 +240,7 @@ struct AicpuExecutor { |
237 | 240 | // Orchestration SO handle - defer dlclose until all tasks complete |
238 | 241 | void *orch_so_handle_{nullptr}; |
239 | 242 | char orch_so_path_[256]{}; // Path to orchestration SO file for cleanup |
| 243 | + int orch_so_memfd_{-1}; // memfd for memfd_create path (-1 if file-based) |
240 | 244 |
|
241 | 245 | // Shared orchestration function pointer (loaded by first orch thread, used by all) |
242 | 246 | DeviceOrchestrationFunc orch_func_{nullptr}; |
@@ -1596,50 +1600,69 @@ int32_t AicpuExecutor::run(Runtime *runtime) { |
1596 | 1600 | return -1; |
1597 | 1601 | } |
1598 | 1602 |
|
1599 | | - // Try multiple paths that may allow execution on AICPU |
| 1603 | + // Try memfd first, fall back to file-based |
1600 | 1604 | char so_path[256]; |
1601 | | - bool file_created = false; |
1602 | | - const char *candidate_dirs[] = { |
1603 | | - "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" |
1604 | | - }; |
1605 | | - const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); |
1606 | | - |
1607 | | - for (int32_t i = 0; i < num_candidates && !file_created; i++) { |
1608 | | - snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); |
1609 | | - int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); |
1610 | | - if (fd < 0) { |
1611 | | - DEV_INFO( |
1612 | | - "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno |
1613 | | - ); |
1614 | | - continue; |
1615 | | - } |
1616 | | - ssize_t written = write(fd, so_data, so_size); |
1617 | | - close(fd); |
1618 | | - if (written != static_cast<ssize_t>(so_size)) { |
1619 | | - DEV_INFO( |
1620 | | - "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno |
1621 | | - ); |
1622 | | - unlink(so_path); |
1623 | | - continue; |
1624 | | - } |
1625 | | - file_created = true; |
1626 | | - DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); |
1627 | | - } |
| 1605 | + void *handle = nullptr; |
| 1606 | + int memfd = -1; |
1628 | 1607 |
|
1629 | | - if (!file_created) { |
1630 | | - DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); |
1631 | | - return -1; |
| 1608 | + // Attempt memfd-based loading first |
| 1609 | + int memfd_rc = load_orchestration_so_with_memfd( |
| 1610 | + so_data, so_size, thread_idx, &handle, so_path, &memfd |
| 1611 | + ); |
| 1612 | + |
| 1613 | + if (memfd_rc == 0 && handle != nullptr) { |
| 1614 | + // memfd loading succeeded, use memfd-loaded handle |
| 1615 | + orch_so_memfd_ = memfd; |
1632 | 1616 | } |
1633 | 1617 |
|
1634 | | - dlerror(); |
1635 | | - void *handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); |
1636 | | - const char *dlopen_err = dlerror(); |
1637 | 1618 | if (handle == nullptr) { |
1638 | | - DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); |
1639 | | - unlink(so_path); |
1640 | | - return -1; |
| 1619 | + // memfd failed or unavailable - use file-based loading |
| 1620 | + orch_so_memfd_ = -1; |
| 1621 | + |
| 1622 | + // Try multiple paths that may allow execution on AICPU |
| 1623 | + bool file_created = false; |
| 1624 | + const char *candidate_dirs[] = { |
| 1625 | + "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" |
| 1626 | + }; |
| 1627 | + const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); |
| 1628 | + |
| 1629 | + for (int32_t i = 0; i < num_candidates && !file_created; i++) { |
| 1630 | + snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); |
| 1631 | + int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); |
| 1632 | + if (fd < 0) { |
| 1633 | + DEV_INFO( |
| 1634 | + "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno |
| 1635 | + ); |
| 1636 | + continue; |
| 1637 | + } |
| 1638 | + ssize_t written = write(fd, so_data, so_size); |
| 1639 | + close(fd); |
| 1640 | + if (written != static_cast<ssize_t>(so_size)) { |
| 1641 | + DEV_INFO( |
| 1642 | + "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno |
| 1643 | + ); |
| 1644 | + unlink(so_path); |
| 1645 | + continue; |
| 1646 | + } |
| 1647 | + file_created = true; |
| 1648 | + DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); |
| 1649 | + } |
| 1650 | + |
| 1651 | + if (!file_created) { |
| 1652 | + DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); |
| 1653 | + return -1; |
| 1654 | + } |
| 1655 | + |
| 1656 | + dlerror(); |
| 1657 | + handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); |
| 1658 | + const char *dlopen_err = dlerror(); |
| 1659 | + if (handle == nullptr) { |
| 1660 | + DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); |
| 1661 | + unlink(so_path); |
| 1662 | + return -1; |
| 1663 | + } |
| 1664 | + DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); |
1641 | 1665 | } |
1642 | | - DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); |
1643 | 1666 |
|
1644 | 1667 | dlerror(); |
1645 | 1668 | auto config_func = |
@@ -1970,8 +1993,15 @@ int32_t AicpuExecutor::run(Runtime *runtime) { |
1970 | 1993 | // Destroy PTO2 runtime and close orchestration SO (moved from orchestrator path) |
1971 | 1994 | if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) { |
1972 | 1995 | pto2_runtime_destroy(rt); |
1973 | | - dlclose(orch_so_handle_); |
1974 | | - unlink(orch_so_path_); |
| 1996 | + // Handle cleanup based on loading method |
| 1997 | + if (orch_so_memfd_ >= 0) { |
| 1998 | + // memfd-based: close fd AFTER dlclose |
| 1999 | + cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); |
| 2000 | + } else { |
| 2001 | + // File-based: dlclose handle and unlink file |
| 2002 | + dlclose(orch_so_handle_); |
| 2003 | + unlink(orch_so_path_); |
| 2004 | + } |
1975 | 2005 | } |
1976 | 2006 | DEV_ALWAYS("Thread %d: Last thread, marking executor finished", thread_idx); |
1977 | 2007 | } |
@@ -2029,6 +2059,7 @@ void AicpuExecutor::deinit(Runtime *runtime) { |
2029 | 2059 | orch_args_cached_ = nullptr; |
2030 | 2060 | orch_so_handle_ = nullptr; |
2031 | 2061 | orch_so_path_[0] = '\0'; |
| 2062 | + orch_so_memfd_ = -1; |
2032 | 2063 |
|
2033 | 2064 | // Reset register-related state |
2034 | 2065 | for (int32_t i = 0; i < MAX_CORES_PER_THREAD; i++) { |
|
0 commit comments