Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ option(SNMALLOC_IPO "Link with IPO/LTO support" OFF)
option(SNMALLOC_BENCHMARK_INDIVIDUAL_MITIGATIONS "Build tests and ld_preload for individual mitigations" OFF)
option(SNMALLOC_ENABLE_DYNAMIC_LOADING "Build such that snmalloc can be dynamically loaded. This is not required for LD_PRELOAD, and will harm performance if enabled." OFF)
option(SNMALLOC_ENABLE_WAIT_ON_ADDRESS "Use wait on address backoff strategy if it is available" ON)
option(SNMALLOC_PTHREAD_FORK_PROTECTION "Guard against forking while allocator locks are held using pthread_atfork hooks" OFF)
option(SNMALLOC_ENABLE_FUZZING "Enable fuzzing instrumentation tests" OFF)
option(SNMALLOC_USE_SELF_VENDORED_STL "Avoid using system STL" OFF)
# Options that apply only if we're not building the header-only library
Expand Down Expand Up @@ -133,6 +134,9 @@ int main() {
}
" SNMALLOC_PTHREAD_ATFORK_WORKS)

if (SNMALLOC_PTHREAD_FORK_PROTECTION AND NOT SNMALLOC_PTHREAD_ATFORK_WORKS)
message(FATAL_ERROR "SNMALLOC_PTHREAD_FORK_PROTECTION requires working pthread_atfork support")
endif()

if (NOT MSVC AND NOT (SNMALLOC_CLEANUP STREQUAL CXX11_DESTRUCTORS))
# If the target compiler doesn't support -nostdlib++ then we must enable C at
Expand Down Expand Up @@ -333,6 +337,7 @@ endfunction()
add_as_define(SNMALLOC_QEMU_WORKAROUND)
add_as_define(SNMALLOC_TRACING)
add_as_define(SNMALLOC_CI_BUILD)
add_as_define(SNMALLOC_PTHREAD_FORK_PROTECTION)
add_as_define(SNMALLOC_PLATFORM_HAS_GETENTROPY)
add_as_define(SNMALLOC_PTHREAD_ATFORK_WORKS)
add_as_define(SNMALLOC_HAS_LINUX_RANDOM_H)
Expand Down
5 changes: 3 additions & 2 deletions src/snmalloc/ds_aal/flaglock.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,16 @@ namespace snmalloc
public:
FlagLock(FlagWord& lock) : lock(lock)
{
while (lock.flag.exchange(true, stl::memory_order_acquire))
while (
SNMALLOC_UNLIKELY(lock.flag.exchange(true, stl::memory_order_acquire)))
{
// assert_not_owned_by_current_thread is only called when the first
// acquiring is failed; which means the lock is already held somewhere
// else.
lock.assert_not_owned_by_current_thread();
// This loop is better for spin-waiting because it won't issue
// expensive write operation (xchg for example).
while (lock.flag.load(stl::memory_order_relaxed))
while (SNMALLOC_UNLIKELY(lock.flag.load(stl::memory_order_relaxed)))
{
Aal::pause();
}
Expand Down
17 changes: 15 additions & 2 deletions src/snmalloc/ds_aal/prevent_fork.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

namespace snmalloc
{

#ifdef SNMALLOC_PTHREAD_FORK_PROTECTION
// This is a simple implementation of a class that can be
// used to prevent a process from forking. Holding a lock
// in the allocator while forking can lead to deadlocks.
Expand Down Expand Up @@ -43,15 +45,15 @@ namespace snmalloc
// calls would be ignored.
static void ensure_init()
{
#ifdef SNMALLOC_PTHREAD_ATFORK_WORKS
# ifdef SNMALLOC_PTHREAD_ATFORK_WORKS
static stl::Atomic<bool> initialised{false};

if (initialised.load(stl::memory_order_acquire))
return;

pthread_atfork(prefork, postfork_parent, postfork_child);
initialised.store(true, stl::memory_order_release);
#endif
# endif
Comment on lines 46 to +56
Copy link

Copilot AI Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When SNMALLOC_PTHREAD_FORK_PROTECTION is enabled, ensure_init() only calls pthread_atfork under #ifdef SNMALLOC_PTHREAD_ATFORK_WORKS. If that macro is not consistently defined by the build (and it currently isn’t exported from CMake), fork protection silently becomes a no-op even though the option is ON. Consider making the pthread_atfork path depend solely on SNMALLOC_PTHREAD_FORK_PROTECTION (given the CMake fatal-error guard), or ensure the SNMALLOC_PTHREAD_ATFORK_WORKS define is always provided when detected.

Copilot uses AI. Check for mistakes.
};

public:
Expand Down Expand Up @@ -158,4 +160,15 @@ namespace snmalloc
threads_preventing_fork--;
}
};
#else
// The fork protection can cost a lot and it is generally not required.
// This is a dummy implementation of the PreventFork class that does nothing.
class PreventFork
{
public:
PreventFork() {}

~PreventFork() {}
};
#endif
} // namespace snmalloc
9 changes: 5 additions & 4 deletions src/snmalloc/global/threadalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ namespace snmalloc
// we need to record if we are already in that state as we will not
// receive another teardown call, so each operation needs to release
// the underlying data structures after the call.
static inline thread_local bool teardown_called{false};
static inline thread_local size_t times_teardown_called{0};

public:
/**
Expand All @@ -114,8 +114,9 @@ namespace snmalloc
if (alloc == &default_alloc)
return;

teardown_called = true;
alloc->flush();
times_teardown_called++;
if (bits::is_pow2(times_teardown_called) || times_teardown_called < 128)
alloc->flush();
AllocPool<Config>::release(alloc);
alloc = const_cast<Alloc*>(&default_alloc);
Comment on lines +117 to 121
Copy link

Copilot AI Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ThreadAlloc::teardown() now sometimes releases an allocator back to AllocPool without calling alloc->flush(). Since flush() is what drains fast free lists, processes the message queue, and posts the remote-dealloc cache, skipping it can leave remote frees unposted and memory retained in an allocator that may sit idle in the pool for a long time. If the intent is to amortize teardown cost, consider introducing a cheaper “post-teardown flush” that at least processes/publishes remote deallocations (and/or document why it’s safe to skip a full flush before pooling).

Copilot uses AI. Check for mistakes.
}
Expand All @@ -131,7 +132,7 @@ namespace snmalloc
template<typename Restart, typename... Args>
SNMALLOC_SLOW_PATH static auto check_init_slow(Restart r, Args... args)
{
bool post_teardown = teardown_called;
bool post_teardown = times_teardown_called > 0;

alloc = AllocPool<Config>::acquire();

Expand Down
26 changes: 16 additions & 10 deletions src/snmalloc/mem/corealloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,10 @@ namespace snmalloc
template<bool check_slabs = false>
SNMALLOC_SLOW_PATH void dealloc_local_slabs(smallsizeclass_t sizeclass)
{
if constexpr (!check_slabs)
if (alloc_classes[sizeclass].unused == 0)
return;

// Return unused slabs of sizeclass_t back to global allocator
alloc_classes[sizeclass].available.iterate([this, sizeclass](auto* meta) {
auto domesticate =
Expand Down Expand Up @@ -1420,18 +1424,20 @@ namespace snmalloc
for (smallsizeclass_t sizeclass = 0; sizeclass < NUM_SMALL_SIZECLASSES;
sizeclass++)
{
dealloc_local_slabs<true>(sizeclass);
dealloc_local_slabs<mitigations(freelist_teardown_validate)>(sizeclass);
}

laden.iterate(
[domesticate](BackendSlabMetadata* meta) SNMALLOC_FAST_PATH_LAMBDA {
if (!meta->is_large())
{
meta->free_queue.validate(
freelist::Object::key_root, meta->as_key_tweak(), domesticate);
}
});

if constexpr (mitigations(freelist_teardown_validate))
{
laden.iterate(
[domesticate](BackendSlabMetadata* meta) SNMALLOC_FAST_PATH_LAMBDA {
if (!meta->is_large())
{
meta->free_queue.validate(
freelist::Object::key_root, meta->as_key_tweak(), domesticate);
}
});
}
// Set the remote_dealloc_cache to immediately slow path.
remote_dealloc_cache.capacity = 0;

Expand Down
12 changes: 10 additions & 2 deletions src/snmalloc/mem/pooled.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,21 @@ namespace snmalloc
public:
void set_in_use()
{
if (in_use.exchange(true))
#ifndef NDEBUG
if (in_use.exchange(true, stl::memory_order_acq_rel))
error("Critical error: double use of Pooled Type!");
#else
in_use.store(true, stl::memory_order_relaxed);
#endif
}

void reset_in_use()
{
in_use.store(false);
#ifndef NDEBUG
in_use.store(false, stl::memory_order_release);
#else
in_use.store(false, stl::memory_order_relaxed);
#endif
}

bool debug_is_in_use()
Expand Down
50 changes: 50 additions & 0 deletions src/test/perf/post_teardown/post-teardown.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <cstdlib>
#include <snmalloc/snmalloc.h>
#include <test/measuretime.h>
#include <test/setup.h>
#include <vector>

using namespace snmalloc;

void fill(std::vector<void*>& out, size_t count, size_t size)
{
out.reserve(count);
for (size_t i = 0; i < count; i++)
{
out.push_back(snmalloc::alloc<Uninit>(size));
}
}

void drain(const char* label, std::vector<void*>& vec, size_t size)
{
MeasureTime m;
m << label << " (" << vec.size() << " x " << size << " B)";
for (void* p : vec)
{
snmalloc::dealloc(p, size);
}
vec.clear();
}

int main(int, char**)
{
setup();
// Issue #809: perf when many objects are freed after the allocator has
// already been finalised (e.g. static/global teardown). Keep counts equal
// for baseline and post-teardown to isolate the teardown cost.
constexpr size_t alloc_count = 1 << 18;
constexpr size_t obj_size = 64;

std::vector<void*> ptrs;
fill(ptrs, alloc_count, obj_size);
drain("Baseline dealloc before finalise", ptrs, obj_size);

// Simulate the allocator already being torn down before remaining frees
// (post-main / static destruction path from #809).
ThreadAlloc::teardown();

fill(ptrs, alloc_count, obj_size);
drain("Immediate dealloc after teardown", ptrs, obj_size);

return 0;
}
Loading