Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ddprof-lib/src/main/cpp/dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ class Dictionary {
void clear();

bool check(const char* key);
// NOT signal-safe: the inserting lookup overloads call malloc/calloc on miss
// (see allocateKey and the calloc in dictionary.cpp). Signal handlers must use
// bounded_lookup(key, length, 0) instead, which never inserts and returns
// INT_MAX on miss.
unsigned int lookup(const char *key);
unsigned int lookup(const char *key, size_t length);
unsigned int bounded_lookup(const char *key, size_t length, int size_limit);
Expand Down
19 changes: 13 additions & 6 deletions ddprof-lib/src/main/cpp/flightRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1152,9 +1152,10 @@ void Recording::writeCpool(Buffer *buf) {
// constant pool count - bump each time a new pool is added
buf->put8(12);

// Profiler::instance()->classMap() provides access to non-locked _class_map
// instance The non-locked access is ok here as this code will never run
// concurrently to _class_map.clear()
// _class_map is shared across the dump (this thread) and the JVMTI shared-lock
// writers (Profiler::lookupClass and friends). writeClasses() takes
// _class_map_lock shared while reading; the exclusive _class_map.clear() in
// Profiler::dump runs only after this method returns.
Lookup lookup(this, &_method_map, Profiler::instance()->classMap());
writeFrameTypes(buf);
writeThreadStates(buf);
Expand Down Expand Up @@ -1368,9 +1369,15 @@ void Recording::writeMethods(Buffer *buf, Lookup *lookup) {

void Recording::writeClasses(Buffer *buf, Lookup *lookup) {
std::map<u32, const char *> classes;
// no need to lock _classes as this code will never run concurrently with
// resetting that dictionary
lookup->_classes->collect(classes);
// Hold _class_map_lock shared while reading. JVMTI writers
// (Profiler::lookupClass, ObjectSampler, LivenessTracker) also take it
// shared and use CAS-based inserts that are safe against concurrent shared
// readers; the exclusive _class_map.clear() in Profiler::dump runs only
// after writeClasses() returns and is blocked here.
{
SharedLockGuard guard(Profiler::instance()->classMapLock());
lookup->_classes->collect(classes);
}

buf->putVar64(T_CLASS);
buf->putVar64(classes.size());
Expand Down
20 changes: 18 additions & 2 deletions ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

#include <climits>
#include <cstdlib>
#include <setjmp.h>
#include "asyncSampleMutex.h"
Expand Down Expand Up @@ -530,8 +531,23 @@ __attribute__((no_sanitize("address"))) int HotspotSupport::walkVM(void* ucontex
uintptr_t receiver = frame.jarg0();
if (receiver != 0) {
VMSymbol* symbol = VMKlass::fromOop(receiver)->name();
u32 class_id = profiler->classMap()->lookup(symbol->body(), symbol->length());
fillFrame(frames[depth++], BCI_ALLOC, class_id);
// walkVM runs in a signal handler. _class_map is mutated
// under _class_map_lock (shared by Profiler::lookupClass
// inserters, exclusive by _class_map.clear() in the dump
// path between unlockAll() and lock()). bounded_lookup
// with size_limit=0 never inserts (no malloc), but it
// still traverses row->next and reads row->keys, which
// clear() concurrently frees. Take the lock shared via
// try-lock; if an exclusive clear() is in progress, drop
// the synthetic frame rather than read freed memory.
OptionalSharedLockGuard guard(profiler->classMapLock());
if (guard.ownsLock()) {
u32 class_id = profiler->classMap()->bounded_lookup(
symbol->body(), symbol->length(), 0);
if (class_id != INT_MAX) {
fillFrame(frames[depth++], BCI_ALLOC, class_id);
}
}
}
}

Expand Down
1 change: 1 addition & 0 deletions ddprof-lib/src/main/cpp/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ class alignas(alignof(SpinLock)) Profiler {
Engine *wallEngine() { return _wall_engine; }

Dictionary *classMap() { return &_class_map; }
SpinLock *classMapLock() { return &_class_map_lock; }
Dictionary *stringLabelMap() { return &_string_label_map; }
Dictionary *contextValueMap() { return &_context_value_map; }
u32 numContextAttributes() { return _num_context_attributes; }
Expand Down
168 changes: 168 additions & 0 deletions ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

#include <gtest/gtest.h>
#include <atomic>
#include <chrono>
#include <climits>
#include <cstdio>
#include <cstring>
#include <map>
#include <string>
#include <thread>
#include <vector>
Comment thread
jbachorik marked this conversation as resolved.
Comment thread
jbachorik marked this conversation as resolved.

#include "dictionary.h"
#include "spinLock.h"
#include "../../main/cpp/gtest_crash_handler.h"

// PROF-14549 regression tests.
//
// These tests pin down two contracts:
// (1) bounded_lookup(key, length, 0) is read-only (no malloc/calloc) and
// returns INT_MAX on miss. This is the contract walkVM relies on at
// hotspotSupport.cpp:388 to be async-signal-safe.
// (2) Dictionary::collect, when externally guarded by a SpinLock taken
// shared, can run concurrently with shared-lock inserters and is
// serialized against an exclusive-lock Dictionary::clear() — matching
// the protocol Recording::writeClasses now uses around _class_map_lock.
//
// Test name for crash handler
static constexpr char DICTIONARY_CONCURRENT_TEST_NAME[] = "DictionaryConcurrent";

namespace {

class DictionaryConcurrentSetup {
public:
DictionaryConcurrentSetup() {
installGtestCrashHandler<DICTIONARY_CONCURRENT_TEST_NAME>();
}
~DictionaryConcurrentSetup() {
restoreDefaultSignalHandlers();
}
};

static DictionaryConcurrentSetup dictionary_concurrent_global_setup;

} // namespace

// (1a) bounded_lookup with size_limit=0 must not insert on miss.
TEST(DictionaryConcurrent, BoundedLookupMissReturnsSentinelAndDoesNotInsert) {
Dictionary dict(/*id=*/0);

std::map<unsigned int, const char*> before;
dict.collect(before);
ASSERT_TRUE(before.empty());

const char* key = "Lorg/example/Cold;";
unsigned int id = dict.bounded_lookup(key, strlen(key), 0);
EXPECT_EQ(static_cast<unsigned int>(INT_MAX), id);

std::map<unsigned int, const char*> after;
dict.collect(after);
EXPECT_TRUE(after.empty());

// A second miss on a different key must also leave the dictionary empty.
const char* key2 = "Lorg/example/Other;";
unsigned int id2 = dict.bounded_lookup(key2, strlen(key2), 0);
EXPECT_EQ(static_cast<unsigned int>(INT_MAX), id2);

std::map<unsigned int, const char*> after2;
dict.collect(after2);
EXPECT_TRUE(after2.empty());
}

// (1b) bounded_lookup with size_limit=0 must return the existing id when the
// key is already present (e.g. previously inserted from a non-signal context).
TEST(DictionaryConcurrent, BoundedLookupHitReturnsExistingId) {
Dictionary dict(/*id=*/0);

const char* key = "Ljava/util/HashMap;";
unsigned int inserted_id = dict.lookup(key, strlen(key));
ASSERT_NE(0u, inserted_id);
ASSERT_NE(static_cast<unsigned int>(INT_MAX), inserted_id);

unsigned int looked_up_id = dict.bounded_lookup(key, strlen(key), 0);
EXPECT_EQ(inserted_id, looked_up_id);
}

// (2) Stress test: shared-lock inserters + exclusive-lock clear + shared-lock
// collect, all driven from separate threads. This mirrors the discipline that
// Recording::writeClasses (shared-lock collect) and Profiler::dump (exclusive-lock
// clear) follow around _class_map_lock. Without the lock, this pattern races
// (and SIGSEGVs on dictionary corruption); with the lock it is well-formed and
// the test passes cleanly under TSan/ASan.
TEST(DictionaryConcurrent, ConcurrentInsertCollectClearWithExternalLock) {
Dictionary dict(/*id=*/0);
SpinLock lock;

constexpr int kWriters = 4;
constexpr int kKeysPerWriter = 256;
const auto kDuration = std::chrono::milliseconds(500);

std::atomic<bool> stop{false};
std::atomic<long> total_inserts{0};
std::atomic<long> total_collects{0};
std::atomic<long> total_clears{0};

std::vector<std::thread> writers;
writers.reserve(kWriters);
for (int w = 0; w < kWriters; ++w) {
writers.emplace_back([&, w]() {
char buf[64];
int counter = 0;
while (!stop.load(std::memory_order_relaxed)) {
snprintf(buf, sizeof(buf), "Lcom/example/W%d/K%d;",
w, counter % kKeysPerWriter);
size_t len = strlen(buf);
lock.lockShared();
unsigned int id = dict.lookup(buf, len);
lock.unlockShared();
EXPECT_NE(static_cast<unsigned int>(INT_MAX), id);
total_inserts.fetch_add(1, std::memory_order_relaxed);
++counter;
}
});
}

std::thread collector([&]() {
while (!stop.load(std::memory_order_relaxed)) {
std::map<unsigned int, const char*> snapshot;
lock.lockShared();
dict.collect(snapshot);
lock.unlockShared();
// The map may be empty if a clear() just ran; either way it must
// be a well-formed map of (id, key) pairs that we can iterate.
for (auto it = snapshot.begin(); it != snapshot.end(); ++it) {
ASSERT_NE(nullptr, it->second);
}
total_collects.fetch_add(1, std::memory_order_relaxed);
}
});

std::thread clearer([&]() {
while (!stop.load(std::memory_order_relaxed)) {
std::this_thread::sleep_for(std::chrono::milliseconds(20));
lock.lock();
dict.clear();
lock.unlock();
total_clears.fetch_add(1, std::memory_order_relaxed);
}
});

std::this_thread::sleep_for(kDuration);
stop.store(true, std::memory_order_relaxed);

for (auto& t : writers) {
t.join();
}
collector.join();
clearer.join();

// Sanity: each role made progress.
EXPECT_GT(total_inserts.load(), 0L);
EXPECT_GT(total_collects.load(), 0L);
EXPECT_GT(total_clears.load(), 0L);
}
Loading