From 3a61202a45057dfeb8e4ba568594805125f915b5 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 13 May 2026 12:24:49 +0200 Subject: [PATCH 1/6] fix(profiler): pre-register vtable receiver classes via JVMTI (PROF-14618) Restore vtable-target frame capture in CPU-only recordings by populating _class_map from safe JVM-thread contexts: - Profiler::preregisterLoadedClasses(jvmtiEnv*) bulk-inserts via GetLoadedClasses + GetClassSignature + ObjectSampler::normalizeClassSignature. Called inside the existing exclusive _class_map_lock at recording start and dump so signal-handler readers see a populated map. - VM::ClassPrepare moved out-of-line into vmEntry.cpp; now also inserts newly prepared classes when StackWalkFeatures.vtable_target is set. - Hot signal-handler path in hotspotSupport.cpp is unchanged. New dictionary_concurrent_ut.cpp test pins the bounded_lookup(0) hit contract after bulk pre-registration. A new VtableTargetCpuTest is included @Disabled with a TODO for the follow-up (vtable_target has no CLI toggle yet, and the synthetic BCI_ALLOC frame is recorded as a class id, not in the stack-trace string). Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/profiler.cpp | 38 ++++++ ddprof-lib/src/main/cpp/profiler.h | 5 + ddprof-lib/src/main/cpp/vmEntry.cpp | 28 +++++ ddprof-lib/src/main/cpp/vmEntry.h | 6 +- .../src/test/cpp/dictionary_concurrent_ut.cpp | 32 +++++ .../profiler/cpu/VtableTargetCpuTest.java | 114 ++++++++++++++++++ 6 files changed, 219 insertions(+), 4 deletions(-) create mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index f613103ee..f55c13d73 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1084,6 +1084,9 @@ Error Profiler::start(Arguments &args, bool reset) { // it is being cleaned up _class_map_lock.lock(); _class_map.clear(); + if (args._features.vtable_target && VMStructs::hasClassNames()) { + preregisterLoadedClasses(VM::jvmti()); + } _class_map_lock.unlock(); // Reset call trace storage @@ -1399,6 +1402,9 @@ Error Profiler::dump(const char *path, const int length) { // Reset classmap _class_map_lock.lock(); _class_map.clear(); + if (_features.vtable_target && VMStructs::hasClassNames()) { + preregisterLoadedClasses(VM::jvmti()); + } _class_map_lock.unlock(); _thread_info.clearAll(thread_ids); @@ -1539,6 +1545,38 @@ int Profiler::lookupClass(const char *key, size_t length) { return -1; } +void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { + if (jvmti == nullptr) { + return; + } + jint class_count = 0; + jclass* classes = nullptr; + if (jvmti->GetLoadedClasses(&class_count, &classes) != JVMTI_ERROR_NONE || + classes == nullptr) { + return; + } + for (jint i = 0; i < class_count; i++) { + char* sig = nullptr; + if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE || + sig == nullptr) { + if (sig != nullptr) { + jvmti->Deallocate(reinterpret_cast(sig)); + } + continue; + } + const char* slice = nullptr; + size_t slice_len = 0; + if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { + // Caller holds _class_map_lock exclusively — call _class_map.lookup + // directly. Do NOT call lookupClass(), which would re-attempt + // tryLockShared() and deadlock. + (void)_class_map.lookup(slice, slice_len); + } + jvmti->Deallocate(reinterpret_cast(sig)); + } + jvmti->Deallocate(reinterpret_cast(classes)); +} + int Profiler::status(char* status, int max_len) { return snprintf(status, max_len, "== Java-Profiler Status ===\n" diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 4ba24905d..e7a430451 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -213,6 +213,11 @@ class alignas(alignof(SpinLock)) Profiler { const char* cstack() const; int lookupClass(const char *key, size_t length); + // Pre-populate _class_map with all currently-loaded reference classes so that + // signal-safe lookups in walkVM (vtable_target) can resolve them without ever + // needing to malloc. Caller MUST hold _class_map_lock EXCLUSIVELY. Runs on a + // JVM thread (never in a signal handler). + void preregisterLoadedClasses(jvmtiEnv* jvmti); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { _call_trace_storage.processTraces(processor); diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 76fd49c68..ccc51349a 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -575,6 +575,34 @@ void VM::loadAllMethodIDs(jvmtiEnv *jvmti, JNIEnv *jni) { } } +void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass) { + loadMethodIDs(jvmti, jni, klass); + + // Pre-populate _class_map for vtable_target signal-safe lookup. ClassPrepare + // runs on a JVM thread (never a signal handler) so malloc inside + // _class_map.lookup is legal. Gate on vtable_target to avoid overhead when + // the feature is disabled. + Profiler* profiler = Profiler::instance(); + if (profiler == nullptr || !profiler->stackWalkFeatures().vtable_target) { + return; + } + char* sig = nullptr; + if (jvmti->GetClassSignature(klass, &sig, nullptr) != JVMTI_ERROR_NONE || + sig == nullptr) { + if (sig != nullptr) { + jvmti->Deallocate(reinterpret_cast(sig)); + } + return; + } + const char* slice = nullptr; + size_t slice_len = 0; + if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { + (void)profiler->lookupClass(slice, slice_len); + } + jvmti->Deallocate(reinterpret_cast(sig)); +} + void JNICALL VM::VMInit(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) { ready(jvmti, jni); loadAllMethodIDs(jvmti, jni); diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index 6be4c87bb..2c98548c9 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -198,10 +198,8 @@ class VM { // Needed only for AsyncGetCallTrace support } - static void JNICALL ClassPrepare(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, - jclass klass) { - loadMethodIDs(jvmti, jni, klass); - } + static void JNICALL ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass); static jvmtiError JNICALL RedefineClassesHook(jvmtiEnv *jvmti, jint class_count, diff --git a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp index ad2ccb17e..ab985b81d 100644 --- a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp +++ b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp @@ -250,6 +250,38 @@ TEST(DictionaryConcurrent, SignalHandlerBoundedLookupVsDumpClear) { EXPECT_GT(total_clears.load(), 0L); } +// (4a) Bulk exclusive-lock insert (mimicking preregisterLoadedClasses) followed +// by read-only bounded_lookup(0) on each inserted key. Verifies that the +// pre-registration contract holds: every key inserted while the exclusive lock +// is held is subsequently visible to bounded_lookup(0) from a shared context. +TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { + Dictionary dict(/*id=*/0); + + constexpr int kBulk = 50; + char keys[kBulk][64]; + unsigned int inserted_ids[kBulk]; + + // Bulk insert under an exclusive lock — mirrors preregisterLoadedClasses. + for (int i = 0; i < kBulk; i++) { + snprintf(keys[i], sizeof(keys[i]), "java/util/BulkClass%d", i); + inserted_ids[i] = dict.lookup(keys[i], strlen(keys[i])); + ASSERT_NE(0u, inserted_ids[i]); + ASSERT_NE(static_cast(INT_MAX), inserted_ids[i]); + } + + // Read back with bounded_lookup(0) — must return the same id, no malloc. + for (int i = 0; i < kBulk; i++) { + unsigned int found = dict.bounded_lookup(keys[i], strlen(keys[i]), 0); + EXPECT_EQ(inserted_ids[i], found) + << "bounded_lookup returned wrong id for key " << keys[i]; + } + + // A never-inserted key must return INT_MAX. + const char* absent = "java/util/NeverInserted"; + EXPECT_EQ(static_cast(INT_MAX), + dict.bounded_lookup(absent, strlen(absent), 0)); +} + // (4) Same race as (3) but using BoundedOptionalSharedLockGuard, which is the // guard classMapTrySharedGuard() now returns in hotspotSupport.cpp. The bounded // variant may fail spuriously under reader pressure (≤5 CAS attempts); this diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java new file mode 100644 index 000000000..1a29361d2 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java @@ -0,0 +1,114 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.datadoghq.profiler.cpu; + +import com.datadoghq.profiler.AbstractProfilerTest; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Verifies vtable-target pre-registration: when vtable_target is enabled, + * CPU samples taken at vtable/itable dispatch stubs should carry a synthetic + * BCI_ALLOC frame whose class name matches the polymorphic receiver type. + * + * TODO (PROF-14618 follow-up): two blockers prevent enabling this test: + * 1. vtable_target has no command-line argument; needs a "features=vtable_target" + * (or equivalent) toggle wired through Arguments::parse(). + * 2. The current STACK_TRACE_STRING-based assertion is wrong: vtable_target + * emits the receiver class as a synthetic BCI_ALLOC frame whose label is + * stored as the class-id, not as text in the stack trace string. When + * enabling, replace the trace.contains(...) check with a scan over each + * ExecutionSample's stack-frame items looking for a BCI_ALLOC frame whose + * class label matches the receiver type. + */ +@Disabled("PROF-14618 follow-up: nail down assertion shape — vtable_target has no CLI toggle yet") +public class VtableTargetCpuTest extends AbstractProfilerTest { + + // Two concrete receiver types to force polymorphic vtable dispatch. + interface Workload { + long compute(long seed); + } + + static final class WorkloadA implements Workload { + @Override + public long compute(long seed) { + long x = seed; + for (int i = 0; i < 1_000; i++) { + x = x * 6364136223846793005L + 1442695040888963407L; + } + return x; + } + } + + static final class WorkloadB implements Workload { + @Override + public long compute(long seed) { + long x = seed; + for (int i = 0; i < 1_000; i++) { + x = x * 2862933555777941757L + 3037000493L; + } + return x; + } + } + + @Test + public void vtableStubReceiverClassAppearsInSamples() throws Exception { + Workload a = new WorkloadA(); + Workload b = new WorkloadB(); + + // Drive polymorphic dispatch for ~3 s so the JIT compiles the call site + // as a vtable stub and the profiler collects CPU samples inside it. + long result = 0; + long deadline = System.currentTimeMillis() + 3_000; + while (System.currentTimeMillis() < deadline) { + result += a.compute(result); + result += b.compute(result); + } + // Prevent dead-code elimination. + if (result == 0) { + throw new AssertionError("unreachable"); + } + + stopProfiler(); + + IItemCollection events = verifyEvents("datadog.ExecutionSample"); + + // At least one sample must contain a synthetic frame whose label + // includes the receiver class simple name. The label is recorded as + // the "allocated class" name (BCI_ALLOC frame) for vtable-stub samples. + boolean foundVtableFrame = false; + for (IItemIterable cpuSamples : events) { + IMemberAccessor frameAccessor = + JdkAttributes.STACK_TRACE_STRING.getAccessor(cpuSamples.getType()); + for (IItem sample : cpuSamples) { + String trace = frameAccessor.getMember(sample); + if (trace != null && (trace.contains("WorkloadA") || trace.contains("WorkloadB"))) { + foundVtableFrame = true; + break; + } + } + if (foundVtableFrame) break; + } + + assertTrue(foundVtableFrame, + "Expected at least one CPU sample with a vtable-stub receiver frame " + + "(WorkloadA or WorkloadB) but none were found"); + } + + @Override + protected String getProfilerCommand() { + // TODO (PROF-14618): replace with "cpu=10ms,features=vtable_target" once + // Arguments::parse() supports the vtable_target flag. + return "cpu=10ms"; + } +} From 3a34b65a9f65fd7426f055174d92c52c5932cb48 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 13 May 2026 12:57:01 +0200 Subject: [PATCH 2/6] fixup: chorus feedback (logging, dead-code, drop disabled test) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Log::warn on JVMTI error paths in preregisterLoadedClasses and ClassPrepare so silent skips are observable. - Drop dead-code `if (sig != nullptr)` guards after the `|| sig == nullptr` short-circuit (JVMTI spec leaves output undefined on error). - Remove disabled VtableTargetCpuTest.java — integration test deferred to a follow-up once vtable_target gets a CLI toggle and the synthetic-frame assertion shape is settled. Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/profiler.cpp | 17 +-- ddprof-lib/src/main/cpp/vmEntry.cpp | 11 +- .../profiler/cpu/VtableTargetCpuTest.java | 114 ------------------ 3 files changed, 16 insertions(+), 126 deletions(-) delete mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index f55c13d73..709c8e448 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1551,17 +1551,20 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { } jint class_count = 0; jclass* classes = nullptr; - if (jvmti->GetLoadedClasses(&class_count, &classes) != JVMTI_ERROR_NONE || - classes == nullptr) { + jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); + if (err != JVMTI_ERROR_NONE) { + Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — vtable-target frames will miss until ClassPrepare fires", err); + return; + } + if (classes == nullptr) { return; } for (jint i = 0; i < class_count; i++) { char* sig = nullptr; - if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE || - sig == nullptr) { - if (sig != nullptr) { - jvmti->Deallocate(reinterpret_cast(sig)); - } + if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { + continue; + } + if (sig == nullptr) { continue; } const char* slice = nullptr; diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index ccc51349a..6ae12beaa 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -588,11 +588,12 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, return; } char* sig = nullptr; - if (jvmti->GetClassSignature(klass, &sig, nullptr) != JVMTI_ERROR_NONE || - sig == nullptr) { - if (sig != nullptr) { - jvmti->Deallocate(reinterpret_cast(sig)); - } + jvmtiError err = jvmti->GetClassSignature(klass, &sig, nullptr); + if (err != JVMTI_ERROR_NONE) { + Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration", err); + return; + } + if (sig == nullptr) { return; } const char* slice = nullptr; diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java deleted file mode 100644 index 1a29361d2..000000000 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2026, Datadog, Inc. - * SPDX-License-Identifier: Apache-2.0 - */ - -package com.datadoghq.profiler.cpu; - -import com.datadoghq.profiler.AbstractProfilerTest; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.openjdk.jmc.common.item.IItem; -import org.openjdk.jmc.common.item.IItemCollection; -import org.openjdk.jmc.common.item.IItemIterable; -import org.openjdk.jmc.common.item.IMemberAccessor; -import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Verifies vtable-target pre-registration: when vtable_target is enabled, - * CPU samples taken at vtable/itable dispatch stubs should carry a synthetic - * BCI_ALLOC frame whose class name matches the polymorphic receiver type. - * - * TODO (PROF-14618 follow-up): two blockers prevent enabling this test: - * 1. vtable_target has no command-line argument; needs a "features=vtable_target" - * (or equivalent) toggle wired through Arguments::parse(). - * 2. The current STACK_TRACE_STRING-based assertion is wrong: vtable_target - * emits the receiver class as a synthetic BCI_ALLOC frame whose label is - * stored as the class-id, not as text in the stack trace string. When - * enabling, replace the trace.contains(...) check with a scan over each - * ExecutionSample's stack-frame items looking for a BCI_ALLOC frame whose - * class label matches the receiver type. - */ -@Disabled("PROF-14618 follow-up: nail down assertion shape — vtable_target has no CLI toggle yet") -public class VtableTargetCpuTest extends AbstractProfilerTest { - - // Two concrete receiver types to force polymorphic vtable dispatch. - interface Workload { - long compute(long seed); - } - - static final class WorkloadA implements Workload { - @Override - public long compute(long seed) { - long x = seed; - for (int i = 0; i < 1_000; i++) { - x = x * 6364136223846793005L + 1442695040888963407L; - } - return x; - } - } - - static final class WorkloadB implements Workload { - @Override - public long compute(long seed) { - long x = seed; - for (int i = 0; i < 1_000; i++) { - x = x * 2862933555777941757L + 3037000493L; - } - return x; - } - } - - @Test - public void vtableStubReceiverClassAppearsInSamples() throws Exception { - Workload a = new WorkloadA(); - Workload b = new WorkloadB(); - - // Drive polymorphic dispatch for ~3 s so the JIT compiles the call site - // as a vtable stub and the profiler collects CPU samples inside it. - long result = 0; - long deadline = System.currentTimeMillis() + 3_000; - while (System.currentTimeMillis() < deadline) { - result += a.compute(result); - result += b.compute(result); - } - // Prevent dead-code elimination. - if (result == 0) { - throw new AssertionError("unreachable"); - } - - stopProfiler(); - - IItemCollection events = verifyEvents("datadog.ExecutionSample"); - - // At least one sample must contain a synthetic frame whose label - // includes the receiver class simple name. The label is recorded as - // the "allocated class" name (BCI_ALLOC frame) for vtable-stub samples. - boolean foundVtableFrame = false; - for (IItemIterable cpuSamples : events) { - IMemberAccessor frameAccessor = - JdkAttributes.STACK_TRACE_STRING.getAccessor(cpuSamples.getType()); - for (IItem sample : cpuSamples) { - String trace = frameAccessor.getMember(sample); - if (trace != null && (trace.contains("WorkloadA") || trace.contains("WorkloadB"))) { - foundVtableFrame = true; - break; - } - } - if (foundVtableFrame) break; - } - - assertTrue(foundVtableFrame, - "Expected at least one CPU sample with a vtable-stub receiver frame " + - "(WorkloadA or WorkloadB) but none were found"); - } - - @Override - protected String getProfilerCommand() { - // TODO (PROF-14618): replace with "cpu=10ms,features=vtable_target" once - // Arguments::parse() supports the vtable_target flag. - return "cpu=10ms"; - } -} From 5a06eb7e54d6763db1c392d59f7e844edf975357 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 18 May 2026 21:30:22 +0200 Subject: [PATCH 3/6] address: review feedback on vtable JVMTI pre-registration - ClassPrepare: blocking SharedLockGuard + classMap()->lookup() (was best-effort lookupClass) - preregisterLoadedClasses: filter to 'L'-type signatures; DeleteLocalRef each class - Test/header comments rewritten to match new lock protocol and L-type filter Addresses PR #527 review comments: - 3233432627 (Copilot, vmEntry.cpp): ClassPrepare no longer silently skips during dump - 3233432702 (Copilot, profiler.h/cpp): only reference types inserted into _class_map - 3233432734 (Copilot, dictionary_concurrent_ut.cpp): no false 'exclusive lock' claim - 3260404113 (Codex, profiler.cpp): JNI local refs released at every loop exit Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/profiler.cpp | 14 ++++++++++++++ ddprof-lib/src/main/cpp/profiler.h | 10 ++++++---- ddprof-lib/src/main/cpp/vmEntry.cpp | 17 +++++++++++++---- .../src/test/cpp/dictionary_concurrent_ut.cpp | 13 ++++++++----- 4 files changed, 41 insertions(+), 13 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 709c8e448..1cd50b911 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1549,6 +1549,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (jvmti == nullptr) { return; } + JNIEnv* jni = VM::jni(); jint class_count = 0; jclass* classes = nullptr; jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); @@ -1559,12 +1560,24 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (classes == nullptr) { return; } + // Each classes[i] is a JNI local reference allocated by JVMTI; delete it at + // every loop exit so the local-reference table does not grow across repeated + // start/dump calls on large applications. for (jint i = 0; i < class_count; i++) { char* sig = nullptr; if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } if (sig == nullptr) { + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); + continue; + } + // Only 'L'-type (reference) signatures can ever match vtable_target lookup + // keys; skip primitives, arrays, and other non-reference signatures. + if (sig[0] != 'L') { + jvmti->Deallocate(reinterpret_cast(sig)); + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } const char* slice = nullptr; @@ -1576,6 +1589,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { (void)_class_map.lookup(slice, slice_len); } jvmti->Deallocate(reinterpret_cast(sig)); + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); } jvmti->Deallocate(reinterpret_cast(classes)); } diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index e7a430451..9108a6008 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -213,10 +213,12 @@ class alignas(alignof(SpinLock)) Profiler { const char* cstack() const; int lookupClass(const char *key, size_t length); - // Pre-populate _class_map with all currently-loaded reference classes so that - // signal-safe lookups in walkVM (vtable_target) can resolve them without ever - // needing to malloc. Caller MUST hold _class_map_lock EXCLUSIVELY. Runs on a - // JVM thread (never in a signal handler). + // Pre-populate _class_map with all currently-loaded 'L'-type (reference) + // class signatures so that signal-safe lookups in walkVM (vtable_target) can + // resolve them without ever needing to malloc. Primitives and arrays are + // skipped — they never match vtable lookup keys. Caller MUST hold + // _class_map_lock EXCLUSIVELY. Runs on a JVM thread (never in a signal + // handler). void preregisterLoadedClasses(jvmtiEnv* jvmti); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 6ae12beaa..81b928ea2 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -580,9 +580,11 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, loadMethodIDs(jvmti, jni, klass); // Pre-populate _class_map for vtable_target signal-safe lookup. ClassPrepare - // runs on a JVM thread (never a signal handler) so malloc inside - // _class_map.lookup is legal. Gate on vtable_target to avoid overhead when - // the feature is disabled. + // runs on a JVM thread (never a signal handler), so we take a blocking + // shared lock via classMapSharedGuard() rather than the best-effort + // tryLockShared() in lookupClass(). This ensures newly prepared classes are + // never silently skipped while an exclusive dump/reset is in flight. Gate on + // vtable_target to avoid overhead when the feature is disabled. Profiler* profiler = Profiler::instance(); if (profiler == nullptr || !profiler->stackWalkFeatures().vtable_target) { return; @@ -596,10 +598,17 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, if (sig == nullptr) { return; } + // Only 'L'-type (reference) signatures can ever match vtable_target lookup + // keys; skip primitives and arrays. + if (sig[0] != 'L') { + jvmti->Deallocate(reinterpret_cast(sig)); + return; + } const char* slice = nullptr; size_t slice_len = 0; if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { - (void)profiler->lookupClass(slice, slice_len); + SharedLockGuard guard = profiler->classMapSharedGuard(); + (void)profiler->classMap()->lookup(slice, slice_len); } jvmti->Deallocate(reinterpret_cast(sig)); } diff --git a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp index ab985b81d..fa5147a2d 100644 --- a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp +++ b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp @@ -250,10 +250,12 @@ TEST(DictionaryConcurrent, SignalHandlerBoundedLookupVsDumpClear) { EXPECT_GT(total_clears.load(), 0L); } -// (4a) Bulk exclusive-lock insert (mimicking preregisterLoadedClasses) followed -// by read-only bounded_lookup(0) on each inserted key. Verifies that the -// pre-registration contract holds: every key inserted while the exclusive lock -// is held is subsequently visible to bounded_lookup(0) from a shared context. +// (4a) Single-threaded bulk insert (mimicking the inserts preregisterLoadedClasses +// performs while holding _class_map_lock exclusively in production) followed by +// read-only bounded_lookup(0) on each inserted key. Verifies the pre-registration +// contract: every key inserted via Dictionary::lookup() is subsequently visible +// to bounded_lookup(0). This test takes no external lock — the production +// exclusive-lock protocol is exercised by the other tests in this file. TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { Dictionary dict(/*id=*/0); @@ -261,7 +263,8 @@ TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { char keys[kBulk][64]; unsigned int inserted_ids[kBulk]; - // Bulk insert under an exclusive lock — mirrors preregisterLoadedClasses. + // Bulk insert — mirrors the per-class lookup() calls preregisterLoadedClasses + // issues from a JVM thread. for (int i = 0; i < kBulk; i++) { snprintf(keys[i], sizeof(keys[i]), "java/util/BulkClass%d", i); inserted_ids[i] = dict.lookup(keys[i], strlen(keys[i])); From 8fe88245cf1acf2c12016ace52cb2526765c1865 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 19 May 2026 13:37:02 +0000 Subject: [PATCH 4/6] fix: add warn logging for GetClassSignature per-class failures in preregisterLoadedClasses Agent-Logs-Url: https://github.com/DataDog/java-profiler/sessions/bdfc7f46-b926-497d-b779-a1cc8c5b7769 Co-authored-by: jbachorik <738413+jbachorik@users.noreply.github.com> --- ddprof-lib/src/main/cpp/profiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 1cd50b911..911c1c14a 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1563,9 +1563,11 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { // Each classes[i] is a JNI local reference allocated by JVMTI; delete it at // every loop exit so the local-reference table does not grow across repeated // start/dump calls on large applications. + jint sig_failures = 0; for (jint i = 0; i < class_count; i++) { char* sig = nullptr; if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { + sig_failures++; if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } @@ -1592,6 +1594,9 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (jni != nullptr) jni->DeleteLocalRef(classes[i]); } jvmti->Deallocate(reinterpret_cast(classes)); + if (sig_failures > 0) { + Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d of %d classes — those classes skipped for vtable-target pre-registration", sig_failures, class_count); + } } int Profiler::status(char* status, int max_len) { From e6989bc0f6d8f5445771ca68500f4c27f2fa5f1b Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 May 2026 15:55:51 +0200 Subject: [PATCH 5/6] address: fix features race and resume gap in vtable_target preregistration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - profiler.cpp:1077 — move _features=args._features before reset block so ClassPrepare callbacks see correct vtable_target immediately - profiler.cpp:1117 — add else-if resume branch calling preregisterLoadedClasses without map clear, catching classes loaded while profiler was stopped - profiler.cpp:1574 — add sig_failures counter + post-loop Log::warn for GetClassSignature failures in preregisterLoadedClasses Co-Authored-By: muse --- ddprof-lib/src/main/cpp/profiler.cpp | 37 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 911c1c14a..768a034d4 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1074,6 +1074,21 @@ Error Profiler::start(Arguments &args, bool reset) { return Error("No profiling events specified"); } + // Commit _features before the reset/preregister block so ClassPrepare + // callbacks (which gate on _features.vtable_target) see the correct enabled + // state from the moment preregisterLoadedClasses releases the class-map lock. + _features = args._features; + if (VM::hotspot_version() < 8) { + _features.java_anchor = 0; + _features.gc_traces = 0; + } + if (!VMStructs::hasClassNames()) { + _features.vtable_target = 0; + } + if (!VMStructs::hasCompilerStructs()) { + _features.comp_task = 0; + } + if (reset || _start_time == 0) { // Reset counters _total_samples = 0; @@ -1099,6 +1114,13 @@ Error Profiler::start(Arguments &args, bool reset) { // Reset thread names and IDs _thread_info.clearAll(); + } else if (_features.vtable_target && VMStructs::hasClassNames()) { + // Resume: classes loaded while the profiler was stopped miss ClassPrepare + // (JVMTI events are off while stopped). Re-snapshot without clearing so + // existing valid entries are preserved. + _class_map_lock.lock(); + preregisterLoadedClasses(VM::jvmti()); + _class_map_lock.unlock(); } // (Re-)allocate calltrace buffers @@ -1120,17 +1142,6 @@ Error Profiler::start(Arguments &args, bool reset) { // Remote symbolication is now inline in ASGCT_CallFrame // No separate pool allocation needed! - _features = args._features; - if (VM::hotspot_version() < 8) { - _features.java_anchor = 0; - _features.gc_traces = 0; - } - if (!VMStructs::hasClassNames()) { - _features.vtable_target = 0; - } - if (!VMStructs::hasCompilerStructs()) { - _features.comp_task = 0; - } _safe_mode = args._safe_mode; if (VM::hotspot_version() < 8 || VM::isZing()) { _safe_mode |= GC_TRACES | LAST_JAVA_PC; @@ -1567,7 +1578,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { for (jint i = 0; i < class_count; i++) { char* sig = nullptr; if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { - sig_failures++; + ++sig_failures; if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } @@ -1595,7 +1606,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { } jvmti->Deallocate(reinterpret_cast(classes)); if (sig_failures > 0) { - Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d of %d classes — those classes skipped for vtable-target pre-registration", sig_failures, class_count); + Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d/%d class(es) — those vtable-target frames may be missing until ClassPrepare fires", sig_failures, class_count); } } From 92574654b15d33136f54a291ad0ce2c5ccb89838 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 May 2026 20:09:56 +0200 Subject: [PATCH 6/6] fix: address review comments on PR #527 - use _features.vtable_target (sanitized) instead of args._features - add isRunning() gated on _state_lock for ClassPrepare gate - two-phase preregisterLoadedClasses: JVMTI enum lock-free, bulk inserts under exclusive lock - gate ClassPrepare inserts on isRunning(); rate-limit GetClassSignature warn Co-Authored-By: muse --- ddprof-lib/src/main/cpp/profiler.cpp | 34 ++++++++++++++++------------ ddprof-lib/src/main/cpp/profiler.h | 11 ++++++--- ddprof-lib/src/main/cpp/vmEntry.cpp | 8 +++++-- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 768a034d4..fd61422d1 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include #include #include @@ -1099,10 +1101,12 @@ Error Profiler::start(Arguments &args, bool reset) { // it is being cleaned up _class_map_lock.lock(); _class_map.clear(); - if (args._features.vtable_target && VMStructs::hasClassNames()) { + _class_map_lock.unlock(); + // preregisterLoadedClasses manages _class_map_lock internally; callers must + // not hold it (JVMTI enumeration is lock-free; inserts take the lock). + if (_features.vtable_target && VMStructs::hasClassNames()) { preregisterLoadedClasses(VM::jvmti()); } - _class_map_lock.unlock(); // Reset call trace storage if (!_omit_stacktraces) { @@ -1118,9 +1122,7 @@ Error Profiler::start(Arguments &args, bool reset) { // Resume: classes loaded while the profiler was stopped miss ClassPrepare // (JVMTI events are off while stopped). Re-snapshot without clearing so // existing valid entries are preserved. - _class_map_lock.lock(); preregisterLoadedClasses(VM::jvmti()); - _class_map_lock.unlock(); } // (Re-)allocate calltrace buffers @@ -1413,10 +1415,10 @@ Error Profiler::dump(const char *path, const int length) { // Reset classmap _class_map_lock.lock(); _class_map.clear(); + _class_map_lock.unlock(); if (_features.vtable_target && VMStructs::hasClassNames()) { preregisterLoadedClasses(VM::jvmti()); } - _class_map_lock.unlock(); _thread_info.clearAll(thread_ids); _thread_info.reportCounters(); @@ -1571,9 +1573,11 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (classes == nullptr) { return; } - // Each classes[i] is a JNI local reference allocated by JVMTI; delete it at - // every loop exit so the local-reference table does not grow across repeated - // start/dump calls on large applications. + // Phase 1 (no lock): enumerate signatures and copy normalized slices. + // normalizeClassSignature returns a pointer INTO the JVMTI sig buffer, so + // the slice is copied into a std::string before Deallocate invalidates it. + std::vector sigs; + sigs.reserve(class_count); jint sig_failures = 0; for (jint i = 0; i < class_count; i++) { char* sig = nullptr; @@ -1586,8 +1590,6 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } - // Only 'L'-type (reference) signatures can ever match vtable_target lookup - // keys; skip primitives, arrays, and other non-reference signatures. if (sig[0] != 'L') { jvmti->Deallocate(reinterpret_cast(sig)); if (jni != nullptr) jni->DeleteLocalRef(classes[i]); @@ -1596,10 +1598,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { const char* slice = nullptr; size_t slice_len = 0; if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { - // Caller holds _class_map_lock exclusively — call _class_map.lookup - // directly. Do NOT call lookupClass(), which would re-attempt - // tryLockShared() and deadlock. - (void)_class_map.lookup(slice, slice_len); + sigs.emplace_back(slice, slice_len); } jvmti->Deallocate(reinterpret_cast(sig)); if (jni != nullptr) jni->DeleteLocalRef(classes[i]); @@ -1608,6 +1607,13 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (sig_failures > 0) { Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d/%d class(es) — those vtable-target frames may be missing until ClassPrepare fires", sig_failures, class_count); } + // Phase 2 (exclusive lock): bulk-insert the collected names. Only the cheap + // Dictionary pointer operations happen under the lock — no JVMTI calls. + _class_map_lock.lock(); + for (const std::string& s : sigs) { + (void)_class_map.lookup(s.c_str(), s.size()); + } + _class_map_lock.unlock(); } int Profiler::status(char* status, int max_len) { diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 9108a6008..5de3f2995 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -197,6 +197,11 @@ class alignas(alignof(SpinLock)) Profiler { return _features; } + inline bool isRunning() { + MutexLocker ml(_state_lock); + return _state == RUNNING; + } + u64 total_samples() { return _total_samples; } int max_stack_depth() { return _max_stack_depth; } time_t uptime() { return time(NULL) - _start_time; } @@ -216,9 +221,9 @@ class alignas(alignof(SpinLock)) Profiler { // Pre-populate _class_map with all currently-loaded 'L'-type (reference) // class signatures so that signal-safe lookups in walkVM (vtable_target) can // resolve them without ever needing to malloc. Primitives and arrays are - // skipped — they never match vtable lookup keys. Caller MUST hold - // _class_map_lock EXCLUSIVELY. Runs on a JVM thread (never in a signal - // handler). + // skipped — they never match vtable lookup keys. Caller must NOT hold + // _class_map_lock; this function acquires it internally for the bulk-insert + // phase only. Runs on a JVM thread (never in a signal handler). void preregisterLoadedClasses(jvmtiEnv* jvmti); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 81b928ea2..417174235 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -18,6 +18,7 @@ #include "safeAccess.h" #include "hotspot/vmStructs.h" #include "hotspot/jitCodeCache.h" +#include #include #include #include @@ -586,13 +587,16 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, // never silently skipped while an exclusive dump/reset is in flight. Gate on // vtable_target to avoid overhead when the feature is disabled. Profiler* profiler = Profiler::instance(); - if (profiler == nullptr || !profiler->stackWalkFeatures().vtable_target) { + if (profiler == nullptr || !profiler->isRunning() || !profiler->stackWalkFeatures().vtable_target) { return; } char* sig = nullptr; jvmtiError err = jvmti->GetClassSignature(klass, &sig, nullptr); if (err != JVMTI_ERROR_NONE) { - Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration", err); + static std::atomic warn_count{0}; + if (warn_count.fetch_add(1, std::memory_order_relaxed) == 0) { + Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration (further failures suppressed)", err); + } return; } if (sig == nullptr) {