diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index f613103ee..fd61422d1 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include #include #include @@ -1074,6 +1076,21 @@ Error Profiler::start(Arguments &args, bool reset) { return Error("No profiling events specified"); } + // Commit _features before the reset/preregister block so ClassPrepare + // callbacks (which gate on _features.vtable_target) see the correct enabled + // state from the moment preregisterLoadedClasses releases the class-map lock. + _features = args._features; + if (VM::hotspot_version() < 8) { + _features.java_anchor = 0; + _features.gc_traces = 0; + } + if (!VMStructs::hasClassNames()) { + _features.vtable_target = 0; + } + if (!VMStructs::hasCompilerStructs()) { + _features.comp_task = 0; + } + if (reset || _start_time == 0) { // Reset counters _total_samples = 0; @@ -1085,6 +1102,11 @@ Error Profiler::start(Arguments &args, bool reset) { _class_map_lock.lock(); _class_map.clear(); _class_map_lock.unlock(); + // preregisterLoadedClasses manages _class_map_lock internally; callers must + // not hold it (JVMTI enumeration is lock-free; inserts take the lock). + if (_features.vtable_target && VMStructs::hasClassNames()) { + preregisterLoadedClasses(VM::jvmti()); + } // Reset call trace storage if (!_omit_stacktraces) { @@ -1096,6 +1118,11 @@ Error Profiler::start(Arguments &args, bool reset) { // Reset thread names and IDs _thread_info.clearAll(); + } else if (_features.vtable_target && VMStructs::hasClassNames()) { + // Resume: classes loaded while the profiler was stopped miss ClassPrepare + // (JVMTI events are off while stopped). Re-snapshot without clearing so + // existing valid entries are preserved. + preregisterLoadedClasses(VM::jvmti()); } // (Re-)allocate calltrace buffers @@ -1117,17 +1144,6 @@ Error Profiler::start(Arguments &args, bool reset) { // Remote symbolication is now inline in ASGCT_CallFrame // No separate pool allocation needed! - _features = args._features; - if (VM::hotspot_version() < 8) { - _features.java_anchor = 0; - _features.gc_traces = 0; - } - if (!VMStructs::hasClassNames()) { - _features.vtable_target = 0; - } - if (!VMStructs::hasCompilerStructs()) { - _features.comp_task = 0; - } _safe_mode = args._safe_mode; if (VM::hotspot_version() < 8 || VM::isZing()) { _safe_mode |= GC_TRACES | LAST_JAVA_PC; @@ -1400,6 +1416,9 @@ Error Profiler::dump(const char *path, const int length) { _class_map_lock.lock(); _class_map.clear(); _class_map_lock.unlock(); + if (_features.vtable_target && VMStructs::hasClassNames()) { + preregisterLoadedClasses(VM::jvmti()); + } _thread_info.clearAll(thread_ids); _thread_info.reportCounters(); @@ -1539,6 +1558,64 @@ int Profiler::lookupClass(const char *key, size_t length) { return -1; } +void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { + if (jvmti == nullptr) { + return; + } + JNIEnv* jni = VM::jni(); + jint class_count = 0; + jclass* classes = nullptr; + jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); + if (err != JVMTI_ERROR_NONE) { + Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — vtable-target frames will miss until ClassPrepare fires", err); + return; + } + if (classes == nullptr) { + return; + } + // Phase 1 (no lock): enumerate signatures and copy normalized slices. + // normalizeClassSignature returns a pointer INTO the JVMTI sig buffer, so + // the slice is copied into a std::string before Deallocate invalidates it. + std::vector sigs; + sigs.reserve(class_count); + jint sig_failures = 0; + for (jint i = 0; i < class_count; i++) { + char* sig = nullptr; + if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { + ++sig_failures; + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); + continue; + } + if (sig == nullptr) { + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); + continue; + } + if (sig[0] != 'L') { + jvmti->Deallocate(reinterpret_cast(sig)); + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); + continue; + } + const char* slice = nullptr; + size_t slice_len = 0; + if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { + sigs.emplace_back(slice, slice_len); + } + jvmti->Deallocate(reinterpret_cast(sig)); + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); + } + jvmti->Deallocate(reinterpret_cast(classes)); + if (sig_failures > 0) { + Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d/%d class(es) — those vtable-target frames may be missing until ClassPrepare fires", sig_failures, class_count); + } + // Phase 2 (exclusive lock): bulk-insert the collected names. Only the cheap + // Dictionary pointer operations happen under the lock — no JVMTI calls. + _class_map_lock.lock(); + for (const std::string& s : sigs) { + (void)_class_map.lookup(s.c_str(), s.size()); + } + _class_map_lock.unlock(); +} + int Profiler::status(char* status, int max_len) { return snprintf(status, max_len, "== Java-Profiler Status ===\n" diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 4ba24905d..5de3f2995 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -197,6 +197,11 @@ class alignas(alignof(SpinLock)) Profiler { return _features; } + inline bool isRunning() { + MutexLocker ml(_state_lock); + return _state == RUNNING; + } + u64 total_samples() { return _total_samples; } int max_stack_depth() { return _max_stack_depth; } time_t uptime() { return time(NULL) - _start_time; } @@ -213,6 +218,13 @@ class alignas(alignof(SpinLock)) Profiler { const char* cstack() const; int lookupClass(const char *key, size_t length); + // Pre-populate _class_map with all currently-loaded 'L'-type (reference) + // class signatures so that signal-safe lookups in walkVM (vtable_target) can + // resolve them without ever needing to malloc. Primitives and arrays are + // skipped — they never match vtable lookup keys. Caller must NOT hold + // _class_map_lock; this function acquires it internally for the bulk-insert + // phase only. Runs on a JVM thread (never in a signal handler). + void preregisterLoadedClasses(jvmtiEnv* jvmti); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { _call_trace_storage.processTraces(processor); diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 76fd49c68..417174235 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -18,6 +18,7 @@ #include "safeAccess.h" #include "hotspot/vmStructs.h" #include "hotspot/jitCodeCache.h" +#include #include #include #include @@ -575,6 +576,47 @@ void VM::loadAllMethodIDs(jvmtiEnv *jvmti, JNIEnv *jni) { } } +void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass) { + loadMethodIDs(jvmti, jni, klass); + + // Pre-populate _class_map for vtable_target signal-safe lookup. ClassPrepare + // runs on a JVM thread (never a signal handler), so we take a blocking + // shared lock via classMapSharedGuard() rather than the best-effort + // tryLockShared() in lookupClass(). This ensures newly prepared classes are + // never silently skipped while an exclusive dump/reset is in flight. Gate on + // vtable_target to avoid overhead when the feature is disabled. + Profiler* profiler = Profiler::instance(); + if (profiler == nullptr || !profiler->isRunning() || !profiler->stackWalkFeatures().vtable_target) { + return; + } + char* sig = nullptr; + jvmtiError err = jvmti->GetClassSignature(klass, &sig, nullptr); + if (err != JVMTI_ERROR_NONE) { + static std::atomic warn_count{0}; + if (warn_count.fetch_add(1, std::memory_order_relaxed) == 0) { + Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration (further failures suppressed)", err); + } + return; + } + if (sig == nullptr) { + return; + } + // Only 'L'-type (reference) signatures can ever match vtable_target lookup + // keys; skip primitives and arrays. + if (sig[0] != 'L') { + jvmti->Deallocate(reinterpret_cast(sig)); + return; + } + const char* slice = nullptr; + size_t slice_len = 0; + if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { + SharedLockGuard guard = profiler->classMapSharedGuard(); + (void)profiler->classMap()->lookup(slice, slice_len); + } + jvmti->Deallocate(reinterpret_cast(sig)); +} + void JNICALL VM::VMInit(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) { ready(jvmti, jni); loadAllMethodIDs(jvmti, jni); diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index 6be4c87bb..2c98548c9 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -198,10 +198,8 @@ class VM { // Needed only for AsyncGetCallTrace support } - static void JNICALL ClassPrepare(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, - jclass klass) { - loadMethodIDs(jvmti, jni, klass); - } + static void JNICALL ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass); static jvmtiError JNICALL RedefineClassesHook(jvmtiEnv *jvmti, jint class_count, diff --git a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp index ad2ccb17e..fa5147a2d 100644 --- a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp +++ b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp @@ -250,6 +250,41 @@ TEST(DictionaryConcurrent, SignalHandlerBoundedLookupVsDumpClear) { EXPECT_GT(total_clears.load(), 0L); } +// (4a) Single-threaded bulk insert (mimicking the inserts preregisterLoadedClasses +// performs while holding _class_map_lock exclusively in production) followed by +// read-only bounded_lookup(0) on each inserted key. Verifies the pre-registration +// contract: every key inserted via Dictionary::lookup() is subsequently visible +// to bounded_lookup(0). This test takes no external lock — the production +// exclusive-lock protocol is exercised by the other tests in this file. +TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { + Dictionary dict(/*id=*/0); + + constexpr int kBulk = 50; + char keys[kBulk][64]; + unsigned int inserted_ids[kBulk]; + + // Bulk insert — mirrors the per-class lookup() calls preregisterLoadedClasses + // issues from a JVM thread. + for (int i = 0; i < kBulk; i++) { + snprintf(keys[i], sizeof(keys[i]), "java/util/BulkClass%d", i); + inserted_ids[i] = dict.lookup(keys[i], strlen(keys[i])); + ASSERT_NE(0u, inserted_ids[i]); + ASSERT_NE(static_cast(INT_MAX), inserted_ids[i]); + } + + // Read back with bounded_lookup(0) — must return the same id, no malloc. + for (int i = 0; i < kBulk; i++) { + unsigned int found = dict.bounded_lookup(keys[i], strlen(keys[i]), 0); + EXPECT_EQ(inserted_ids[i], found) + << "bounded_lookup returned wrong id for key " << keys[i]; + } + + // A never-inserted key must return INT_MAX. + const char* absent = "java/util/NeverInserted"; + EXPECT_EQ(static_cast(INT_MAX), + dict.bounded_lookup(absent, strlen(absent), 0)); +} + // (4) Same race as (3) but using BoundedOptionalSharedLockGuard, which is the // guard classMapTrySharedGuard() now returns in hotspotSupport.cpp. The bounded // variant may fail spuriously under reader pressure (≤5 CAS attempts); this